Start testing transform_uri
This commit is contained in:
parent
ac0d28c9ec
commit
2e6b42e5c1
|
@ -0,0 +1,157 @@
|
|||
#!/usr/bin/env bash
|
||||
# transform-url
|
||||
# cf. https://tools.ietf.org/html/rfc3986#section-5 and
|
||||
# cf. https://tools.ietf.org/html/rfc3986#section-5.1
|
||||
# cf. also https://tools.ietf.org/html/rfc3986#appendix-B -- regex
|
||||
|
||||
# TEST WITH https://tools.ietf.org/html/rfc3986#section-5.4
|
||||
|
||||
transform_resource() { # 5.2.2
|
||||
declare -A R B T # reference, base url, target
|
||||
eval "$(parse_url R "$2")" # XXX CHANGE
|
||||
eval "$(parse_url B "$1")"
|
||||
# Basically going to follow the pseudocode in the spec.
|
||||
# the '+x' bit after the fields of the arrays tests if they're set
|
||||
if [[ "${R['scheme']+x}" ]]; then
|
||||
T['scheme']="${R['scheme']}"
|
||||
T['authority']="${R['authority']}"
|
||||
T['path']="$(remove_dot_segments "${R['path']}")"
|
||||
T['query']="${R['query']}"
|
||||
else
|
||||
if [[ "${R['authority']+x}" ]]; then
|
||||
T['authority']="${R['authority']}"
|
||||
T['path']="$(remove_dot_segments "${R['path']}")"
|
||||
T['query']="${R['query']}"
|
||||
else
|
||||
if [[ "${R['path']-x}" == "" ]]; then
|
||||
T['path']="${B['path']}"
|
||||
if [[ "${R['query']-x}" ]]; then
|
||||
T['query']="${R['query']}"
|
||||
else
|
||||
T['query']="${B['query']}"
|
||||
fi
|
||||
else
|
||||
if [[ "${R['path']}" == /* ]]; then
|
||||
T['path']="$(remove_dot_segments "${R['path']}")"
|
||||
else
|
||||
T['path']="$(merge "${B['authority']-?}" \
|
||||
"${B['path']}" "${R['path']}")"
|
||||
T['path']="$(remove_dot_segments "${T['path']}")"
|
||||
fi
|
||||
T['query']="${R['query']}"
|
||||
fi
|
||||
T['authority']="${B['authority']}"
|
||||
fi
|
||||
T['scheme']="${B['scheme']}"
|
||||
fi
|
||||
T['fragment']="${R['fragment']}"
|
||||
# 5.3 -- recomposition
|
||||
local r=""
|
||||
[[ "${T['scheme']-x}" ]] &&
|
||||
r="$r${T['scheme']}:"
|
||||
[[ "${T['authority']-x}" ]] &&
|
||||
r="$r//${T['authority']}"
|
||||
r="$r${T['path']}"
|
||||
[[ "${T['query']-x}" ]] &&
|
||||
r="$r?${T['query']}"
|
||||
[[ "${T['fragment']-x}" ]] &&
|
||||
r="$r#${T['fragment']}"
|
||||
printf '%s\n' "$r"
|
||||
}
|
||||
|
||||
merge() { # 5.2.3
|
||||
#>If the base URI has a defined authority component and an empty
|
||||
#>path, then return a string consisting of "/" concatenated with the
|
||||
#>reference's path; otherwise,
|
||||
#>return a string consisting of the reference's path component
|
||||
#>appended to all but the last segment of the base URI's path (i.e.,
|
||||
#>excluding any characters after the right-most "/" in the base URI
|
||||
#>path, or excluding the entire base URI path if it does not contain
|
||||
#>any "/" characters).
|
||||
B_authority="$1" # if ? is here, it means undefined (see caller)
|
||||
B_path="$2"
|
||||
R_path="$3"
|
||||
if [[ -z "$R_path" ]]; then
|
||||
printf '%q\n' "$B_path" |
|
||||
sed 's,//,/,g' # XXX is this okay....?
|
||||
return
|
||||
fi
|
||||
|
||||
if [[ "${B_authority:-?}" != "?" && "${B_path-x}" == "" ]]; then
|
||||
printf '/%q\n' "$R_path"
|
||||
else
|
||||
if [[ "$B_path" == */* ]]; then
|
||||
B_path="${B_path%/*}/"
|
||||
else
|
||||
B_path=""
|
||||
fi
|
||||
printf '%q/%q\n' "$B_path" "$R_path" # XXX - %q vs %s
|
||||
fi
|
||||
}
|
||||
|
||||
# I can probably just use normalize_path already in bollux here
|
||||
remove_dot_segments() { # 5.2.4
|
||||
local input="$1"
|
||||
local output=
|
||||
while [[ -n "$input" ]]; do
|
||||
if [[ "$input" == ../* || "$input" == ./* ]]; then
|
||||
input="${input#*/}"
|
||||
elif [[ "$input" == /./* ]]; then
|
||||
input="${input#/./}/"
|
||||
elif [[ "$input" == /.* ]]; then
|
||||
input="${input#/.}/b"
|
||||
elif [[ "$input" == /../* ]]; then
|
||||
input="${input#/../}/c"
|
||||
output="${output%/*}"
|
||||
elif [[ "$input" == /..* ]]; then
|
||||
input="${input#/..}/d"
|
||||
output="${output%/*}"
|
||||
elif [[ "$input" == . || "$input" == .. ]]; then
|
||||
input=
|
||||
else
|
||||
# move the first path segment in the input buffer to the end of
|
||||
# the output buffer, including the initial "/" character (if
|
||||
# any) and any subsequent characters up to, but not including,
|
||||
# the next "/" character or the end of the input buffer.
|
||||
[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
|
||||
output="$output${BASH_REMATCH[1]}"
|
||||
input="${BASH_REMATCH[2]}"
|
||||
fi
|
||||
done
|
||||
printf '%s\n' "$output" |
|
||||
sed 's,//,/,g' # XXX is this okay....?
|
||||
}
|
||||
|
||||
# *FINDING* URLS ... IN PURE BASH !!!
|
||||
parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
|
||||
local name="$1"
|
||||
local string="$2"
|
||||
local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
|
||||
[[ $string =~ $re ]] || return $?
|
||||
|
||||
local scheme="${BASH_REMATCH[2]}"
|
||||
local authority="${BASH_REMATCH[4]}"
|
||||
local path="${BASH_REMATCH[5]}"
|
||||
local query="${BASH_REMATCH[7]}"
|
||||
local fragment="${BASH_REMATCH[9]}"
|
||||
|
||||
for c in scheme authority path query fragment; do
|
||||
[[ "${!c}" ]] &&
|
||||
printf '%s[%s]=%s\n' "$name" "$c" "${!c}" |
|
||||
sed 's/[\|&;()<>]/\\&/g' # quote shell metacharacters
|
||||
done
|
||||
}
|
||||
|
||||
# ease-of-life functions
|
||||
isdefined() { # isdefined NAME => tests if NAME is defined ONLY
|
||||
[[ "${!1+x}" ]]
|
||||
}
|
||||
isempty() { # isempty NAME => tests if NAME is empty ONLY
|
||||
[[ ! "${!1-x}" ]]
|
||||
}
|
||||
|
||||
set -x
|
||||
transform_resource "$@"
|
||||
|
||||
# NEXT ....
|
||||
# NORMALIZATION !!!
|
Loading…
Reference in New Issue