158 lines
4.6 KiB
Bash
158 lines
4.6 KiB
Bash
#!/usr/bin/env bash
|
|
# transform-url
|
|
# cf. https://tools.ietf.org/html/rfc3986#section-5 and
|
|
# cf. https://tools.ietf.org/html/rfc3986#section-5.1
|
|
# cf. also https://tools.ietf.org/html/rfc3986#appendix-B -- regex
|
|
|
|
# TEST WITH https://tools.ietf.org/html/rfc3986#section-5.4
|
|
|
|
transform_resource() { # 5.2.2
|
|
declare -A R B T # reference, base url, target
|
|
eval "$(parse_url R "$2")" # XXX CHANGE
|
|
eval "$(parse_url B "$1")"
|
|
# Basically going to follow the pseudocode in the spec.
|
|
# the '+x' bit after the fields of the arrays tests if they're set
|
|
if [[ "${R['scheme']+x}" ]]; then
|
|
T['scheme']="${R['scheme']}"
|
|
T['authority']="${R['authority']}"
|
|
T['path']="$(remove_dot_segments "${R['path']}")"
|
|
T['query']="${R['query']}"
|
|
else
|
|
if [[ "${R['authority']+x}" ]]; then
|
|
T['authority']="${R['authority']}"
|
|
T['path']="$(remove_dot_segments "${R['path']}")"
|
|
T['query']="${R['query']}"
|
|
else
|
|
if [[ "${R['path']-x}" == "" ]]; then
|
|
T['path']="${B['path']}"
|
|
if [[ "${R['query']-x}" ]]; then
|
|
T['query']="${R['query']}"
|
|
else
|
|
T['query']="${B['query']}"
|
|
fi
|
|
else
|
|
if [[ "${R['path']}" == /* ]]; then
|
|
T['path']="$(remove_dot_segments "${R['path']}")"
|
|
else
|
|
T['path']="$(merge "${B['authority']-?}" \
|
|
"${B['path']}" "${R['path']}")"
|
|
T['path']="$(remove_dot_segments "${T['path']}")"
|
|
fi
|
|
T['query']="${R['query']}"
|
|
fi
|
|
T['authority']="${B['authority']}"
|
|
fi
|
|
T['scheme']="${B['scheme']}"
|
|
fi
|
|
T['fragment']="${R['fragment']}"
|
|
# 5.3 -- recomposition
|
|
local r=""
|
|
[[ "${T['scheme']-x}" ]] &&
|
|
r="$r${T['scheme']}:"
|
|
[[ "${T['authority']-x}" ]] &&
|
|
r="$r//${T['authority']}"
|
|
r="$r${T['path']}"
|
|
[[ "${T['query']-x}" ]] &&
|
|
r="$r?${T['query']}"
|
|
[[ "${T['fragment']-x}" ]] &&
|
|
r="$r#${T['fragment']}"
|
|
printf '%s\n' "$r"
|
|
}
|
|
|
|
merge() { # 5.2.3
|
|
#>If the base URI has a defined authority component and an empty
|
|
#>path, then return a string consisting of "/" concatenated with the
|
|
#>reference's path; otherwise,
|
|
#>return a string consisting of the reference's path component
|
|
#>appended to all but the last segment of the base URI's path (i.e.,
|
|
#>excluding any characters after the right-most "/" in the base URI
|
|
#>path, or excluding the entire base URI path if it does not contain
|
|
#>any "/" characters).
|
|
B_authority="$1" # if ? is here, it means undefined (see caller)
|
|
B_path="$2"
|
|
R_path="$3"
|
|
if [[ -z "$R_path" ]]; then
|
|
printf '%q\n' "$B_path" |
|
|
sed 's,//,/,g' # XXX is this okay....?
|
|
return
|
|
fi
|
|
|
|
if [[ "${B_authority:-?}" != "?" && "${B_path-x}" == "" ]]; then
|
|
printf '/%q\n' "$R_path"
|
|
else
|
|
if [[ "$B_path" == */* ]]; then
|
|
B_path="${B_path%/*}/"
|
|
else
|
|
B_path=""
|
|
fi
|
|
printf '%q/%q\n' "$B_path" "$R_path" # XXX - %q vs %s
|
|
fi
|
|
}
|
|
|
|
# I can probably just use normalize_path already in bollux here
|
|
remove_dot_segments() { # 5.2.4
|
|
local input="$1"
|
|
local output=
|
|
while [[ -n "$input" ]]; do
|
|
if [[ "$input" == ../* || "$input" == ./* ]]; then
|
|
input="${input#*/}"
|
|
elif [[ "$input" == /./* ]]; then
|
|
input="${input#/./}/"
|
|
elif [[ "$input" == /.* ]]; then
|
|
input="${input#/.}/b"
|
|
elif [[ "$input" == /../* ]]; then
|
|
input="${input#/../}/c"
|
|
output="${output%/*}"
|
|
elif [[ "$input" == /..* ]]; then
|
|
input="${input#/..}/d"
|
|
output="${output%/*}"
|
|
elif [[ "$input" == . || "$input" == .. ]]; then
|
|
input=
|
|
else
|
|
# move the first path segment in the input buffer to the end of
|
|
# the output buffer, including the initial "/" character (if
|
|
# any) and any subsequent characters up to, but not including,
|
|
# the next "/" character or the end of the input buffer.
|
|
[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
|
|
output="$output${BASH_REMATCH[1]}"
|
|
input="${BASH_REMATCH[2]}"
|
|
fi
|
|
done
|
|
printf '%s\n' "$output" |
|
|
sed 's,//,/,g' # XXX is this okay....?
|
|
}
|
|
|
|
# *FINDING* URLS ... IN PURE BASH !!!
|
|
parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
|
|
local name="$1"
|
|
local string="$2"
|
|
local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
|
|
[[ $string =~ $re ]] || return $?
|
|
|
|
local scheme="${BASH_REMATCH[2]}"
|
|
local authority="${BASH_REMATCH[4]}"
|
|
local path="${BASH_REMATCH[5]}"
|
|
local query="${BASH_REMATCH[7]}"
|
|
local fragment="${BASH_REMATCH[9]}"
|
|
|
|
for c in scheme authority path query fragment; do
|
|
[[ "${!c}" ]] &&
|
|
printf '%s[%s]=%s\n' "$name" "$c" "${!c}" |
|
|
sed 's/[\|&;()<>]/\\&/g' # quote shell metacharacters
|
|
done
|
|
}
|
|
|
|
# ease-of-life functions
|
|
isdefined() { # isdefined NAME => tests if NAME is defined ONLY
|
|
[[ "${!1+x}" ]]
|
|
}
|
|
isempty() { # isempty NAME => tests if NAME is empty ONLY
|
|
[[ ! "${!1-x}" ]]
|
|
}
|
|
|
|
set -x
|
|
transform_resource "$@"
|
|
|
|
# NEXT ....
|
|
# NORMALIZATION !!!
|