Start testing transform_uri

This commit is contained in:
Case Duckworth 2020-05-28 08:37:40 -05:00
parent ac0d28c9ec
commit 2e6b42e5c1
1 changed files with 157 additions and 0 deletions

157
transform_uri.sh Normal file
View File

@ -0,0 +1,157 @@
#!/usr/bin/env bash
# transform-url
# cf. https://tools.ietf.org/html/rfc3986#section-5 and
# cf. https://tools.ietf.org/html/rfc3986#section-5.1
# cf. also https://tools.ietf.org/html/rfc3986#appendix-B -- regex
# TEST WITH https://tools.ietf.org/html/rfc3986#section-5.4
transform_resource() { # 5.2.2
declare -A R B T # reference, base url, target
eval "$(parse_url R "$2")" # XXX CHANGE
eval "$(parse_url B "$1")"
# Basically going to follow the pseudocode in the spec.
# the '+x' bit after the fields of the arrays tests if they're set
if [[ "${R['scheme']+x}" ]]; then
T['scheme']="${R['scheme']}"
T['authority']="${R['authority']}"
T['path']="$(remove_dot_segments "${R['path']}")"
T['query']="${R['query']}"
else
if [[ "${R['authority']+x}" ]]; then
T['authority']="${R['authority']}"
T['path']="$(remove_dot_segments "${R['path']}")"
T['query']="${R['query']}"
else
if [[ "${R['path']-x}" == "" ]]; then
T['path']="${B['path']}"
if [[ "${R['query']-x}" ]]; then
T['query']="${R['query']}"
else
T['query']="${B['query']}"
fi
else
if [[ "${R['path']}" == /* ]]; then
T['path']="$(remove_dot_segments "${R['path']}")"
else
T['path']="$(merge "${B['authority']-?}" \
"${B['path']}" "${R['path']}")"
T['path']="$(remove_dot_segments "${T['path']}")"
fi
T['query']="${R['query']}"
fi
T['authority']="${B['authority']}"
fi
T['scheme']="${B['scheme']}"
fi
T['fragment']="${R['fragment']}"
# 5.3 -- recomposition
local r=""
[[ "${T['scheme']-x}" ]] &&
r="$r${T['scheme']}:"
[[ "${T['authority']-x}" ]] &&
r="$r//${T['authority']}"
r="$r${T['path']}"
[[ "${T['query']-x}" ]] &&
r="$r?${T['query']}"
[[ "${T['fragment']-x}" ]] &&
r="$r#${T['fragment']}"
printf '%s\n' "$r"
}
merge() { # 5.2.3
#>If the base URI has a defined authority component and an empty
#>path, then return a string consisting of "/" concatenated with the
#>reference's path; otherwise,
#>return a string consisting of the reference's path component
#>appended to all but the last segment of the base URI's path (i.e.,
#>excluding any characters after the right-most "/" in the base URI
#>path, or excluding the entire base URI path if it does not contain
#>any "/" characters).
B_authority="$1" # if ? is here, it means undefined (see caller)
B_path="$2"
R_path="$3"
if [[ -z "$R_path" ]]; then
printf '%q\n' "$B_path" |
sed 's,//,/,g' # XXX is this okay....?
return
fi
if [[ "${B_authority:-?}" != "?" && "${B_path-x}" == "" ]]; then
printf '/%q\n' "$R_path"
else
if [[ "$B_path" == */* ]]; then
B_path="${B_path%/*}/"
else
B_path=""
fi
printf '%q/%q\n' "$B_path" "$R_path" # XXX - %q vs %s
fi
}
# I can probably just use normalize_path already in bollux here
remove_dot_segments() { # 5.2.4
local input="$1"
local output=
while [[ -n "$input" ]]; do
if [[ "$input" == ../* || "$input" == ./* ]]; then
input="${input#*/}"
elif [[ "$input" == /./* ]]; then
input="${input#/./}/"
elif [[ "$input" == /.* ]]; then
input="${input#/.}/b"
elif [[ "$input" == /../* ]]; then
input="${input#/../}/c"
output="${output%/*}"
elif [[ "$input" == /..* ]]; then
input="${input#/..}/d"
output="${output%/*}"
elif [[ "$input" == . || "$input" == .. ]]; then
input=
else
# move the first path segment in the input buffer to the end of
# the output buffer, including the initial "/" character (if
# any) and any subsequent characters up to, but not including,
# the next "/" character or the end of the input buffer.
[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
output="$output${BASH_REMATCH[1]}"
input="${BASH_REMATCH[2]}"
fi
done
printf '%s\n' "$output" |
sed 's,//,/,g' # XXX is this okay....?
}
# *FINDING* URLS ... IN PURE BASH !!!
parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
local name="$1"
local string="$2"
local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
[[ $string =~ $re ]] || return $?
local scheme="${BASH_REMATCH[2]}"
local authority="${BASH_REMATCH[4]}"
local path="${BASH_REMATCH[5]}"
local query="${BASH_REMATCH[7]}"
local fragment="${BASH_REMATCH[9]}"
for c in scheme authority path query fragment; do
[[ "${!c}" ]] &&
printf '%s[%s]=%s\n' "$name" "$c" "${!c}" |
sed 's/[\|&;()<>]/\\&/g' # quote shell metacharacters
done
}
# ease-of-life functions
isdefined() { # isdefined NAME => tests if NAME is defined ONLY
[[ "${!1+x}" ]]
}
isempty() { # isempty NAME => tests if NAME is empty ONLY
[[ ! "${!1-x}" ]]
}
set -x
transform_resource "$@"
# NEXT ....
# NORMALIZATION !!!