Bring URL transforming up to spec
URL transformation (formerly called "munging") is now (afaict) fully compliant with the RFC spec. It's also implemented in pure bash and is available as a library at https://git.sr.ht/~acdw/shurlie.
This commit is contained in:
parent
482e2659ae
commit
c62e428c16
202
bollux
202
bollux
|
@ -122,7 +122,7 @@ blastoff() { # load a url
|
||||||
URL="$1"
|
URL="$1"
|
||||||
|
|
||||||
if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
|
if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
|
||||||
URL="$(run munge_url "$1" "$BOLLUX_URL")"
|
URL="$(run transform_resource "$BOLLUX_URL" "$1")"
|
||||||
fi
|
fi
|
||||||
[[ "$URL" != *://* ]] && URL="$BOLLUX_PROTO://$URL"
|
[[ "$URL" != *://* ]] && URL="$BOLLUX_PROTO://$URL"
|
||||||
URL="$(trim <<<"$URL")"
|
URL="$(trim <<<"$URL")"
|
||||||
|
@ -134,95 +134,133 @@ blastoff() { # load a url
|
||||||
run handle_response "$URL"
|
run handle_response "$URL"
|
||||||
}
|
}
|
||||||
|
|
||||||
munge_url() {
|
transform_resource() { # transform_resource BASE_URL REFERENCE_URL
|
||||||
local -A new old u
|
declare -A R B T # reference, base url, target
|
||||||
eval "$(split_url new <<<"$1")"
|
eval "$(parse_url B "$1")"
|
||||||
for k in "${!new[@]}"; do log d "new[$k]=${new[$k]}"; done
|
eval "$(parse_url R "$2")"
|
||||||
eval "$(split_url old <<<"$2")"
|
# A non-strict parser may ignore a scheme in the reference
|
||||||
for k in "${!old[@]}"; do log d "old[$k]=${old[$k]}"; done
|
# if it is identical to the base URI's scheme.
|
||||||
|
if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then
|
||||||
u['scheme']="${new['scheme']:-${old['scheme']:-}}"
|
unset "${R[scheme]}"
|
||||||
u['authority']="${new['authority']:-${old['authority']:-}}"
|
|
||||||
# XXX this whole path thing is wack
|
|
||||||
if [[ "${new['path']+isset}" ]]; then
|
|
||||||
log d 'new path set'
|
|
||||||
if [[ "${new['path']}" == /* ]]; then
|
|
||||||
log d 'new path == /*'
|
|
||||||
u['path']="${new['path']}"
|
|
||||||
elif [[ "${new['authority']}" == "${old['authority']}" || ! "${new['authority']+isset}" ]]; then
|
|
||||||
p="${old['path']:-}/${new['path']}"
|
|
||||||
log d "$p ( $(normalize_path <<<"$p") )"
|
|
||||||
u['path']="$(normalize_path <<<"$p")"
|
|
||||||
else
|
|
||||||
log d 'u path = new path'
|
|
||||||
u['path']="${new['path']}"
|
|
||||||
fi
|
|
||||||
elif [[ "${new['query']+isset}" || "${new['fragment']+isset}" ]]; then
|
|
||||||
log d 'u path = old path'
|
|
||||||
u['path']="${old['path']}"
|
|
||||||
else
|
|
||||||
u['path']="/"
|
|
||||||
fi
|
fi
|
||||||
u['query']="${new['query']:-}"
|
|
||||||
u['fragment']="${new['fragment']:-}"
|
|
||||||
for k in "${!u[@]}"; do log d "u[$k]=${u[$k]}"; done
|
|
||||||
|
|
||||||
run printf '%s%s%s%s%s\n' \
|
# basically pseudo-code from spec ported to bash
|
||||||
"${u['scheme']}" "${u['authority']}" "${u['path']}" \
|
if isdefined "R[scheme]"; then
|
||||||
"${u['query']}" "${u['fragment']}"
|
T[scheme]="${R[scheme]}"
|
||||||
|
isdefined "R[authority]" && T[authority]="${R[authority]}"
|
||||||
|
isdefined R[path] &&
|
||||||
|
T[path]="$(remove_dot_segments "${R[path]}")"
|
||||||
|
isdefined "R[query]" && T[query]="${R[query]}"
|
||||||
|
else
|
||||||
|
if isdefined "R[authority]"; then
|
||||||
|
T[authority]="${R[authority]}"
|
||||||
|
isdefined "R[authority]" &&
|
||||||
|
T[path]="$(remove_dot_segments "${R[path]}")"
|
||||||
|
isdefined R[query] && T[query]="${R[query]}"
|
||||||
|
else
|
||||||
|
if isempty "R[path]"; then
|
||||||
|
T[path]="${B[path]}"
|
||||||
|
if isdefined R[query]; then
|
||||||
|
T[query]="${R[query]}"
|
||||||
|
else
|
||||||
|
T[query]="${B[query]}"
|
||||||
|
fi
|
||||||
|
else
|
||||||
|
if [[ "${R[path]}" == /* ]]; then
|
||||||
|
T[path]="$(remove_dot_segments "${R[path]}")"
|
||||||
|
else
|
||||||
|
T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")"
|
||||||
|
T[path]="$(remove_dot_segments "${T[path]}")"
|
||||||
|
fi
|
||||||
|
isdefined R[query] && T[query]="${R[query]}"
|
||||||
|
fi
|
||||||
|
T[authority]="${B[authority]}"
|
||||||
|
fi
|
||||||
|
T[scheme]="${B[scheme]}"
|
||||||
|
fi
|
||||||
|
isdefined R[fragment] && T[fragment]="${R[fragment]}"
|
||||||
|
# cf. 5.3 -- recomposition
|
||||||
|
local r=""
|
||||||
|
isdefined "T[scheme]" && r="$r${T[scheme]}:"
|
||||||
|
isdefined "T[authority]" && r="$r//${T[authority]}"
|
||||||
|
r="$r${T[path]}"
|
||||||
|
isdefined T[query] && r="$r?${T[query]}"
|
||||||
|
isdefined T[fragment] && r="$r#${T[fragment]}"
|
||||||
|
printf '%s\n' "$r"
|
||||||
}
|
}
|
||||||
|
|
||||||
normalize_path() {
|
merge_paths() { # 5.2.3
|
||||||
gawk '{
|
# shellcheck disable=2034
|
||||||
split($0, path, /\//)
|
B_authority="$1"
|
||||||
for (c in path) {
|
B_path="$2"
|
||||||
if (path[c] == "" || path[c] == ".") {
|
R_path="$3"
|
||||||
continue
|
# if R_path is empty, get rid of // in B_path
|
||||||
}
|
if [[ -z "$R_path" ]]; then
|
||||||
if (path[c] == "..") {
|
printf '%s\n' "${B_path//\/\//\//}"
|
||||||
sub(/[^\/]+$/, "", ret)
|
return
|
||||||
continue
|
fi
|
||||||
}
|
|
||||||
if (! ret || match(ret, /\/$/)) {
|
if isdefined "B_authority" && isempty "B_path"; then
|
||||||
slash = ""
|
printf '/%s\n' "${R_path//\/\//\//}"
|
||||||
} else {
|
else
|
||||||
slash = "/"
|
if [[ "$B_path" == */* ]]; then
|
||||||
}
|
B_path="${B_path%/*}/"
|
||||||
ret = ret slash path[c]
|
else
|
||||||
}
|
B_path=""
|
||||||
print (ret ~ /^\// ? "" : "/") ret
|
fi
|
||||||
}'
|
printf '%s/%s\n' "${B_path%/}" "${R_path#/}"
|
||||||
|
fi
|
||||||
}
|
}
|
||||||
|
|
||||||
split_url() {
|
remove_dot_segments() { # 5.2.4
|
||||||
gawk -vvar="$1" '{
|
local input="$1"
|
||||||
if (match($0, /^[A-Za-z]+:/)) {
|
local output=
|
||||||
arr["scheme"] = substr($0, RSTART, RLENGTH)
|
# ^/\.(/|$) - BASH_REMATCH[0]
|
||||||
$0 = substr($0, RLENGTH + 1)
|
while [[ "$input" ]]; do
|
||||||
}
|
if [[ "$input" =~ ^\.\.?/ ]]; then
|
||||||
if (match($0, /^\/\/[^\/?#]+?/) || (match($0, /^[^\/?#]+?/) && scheme)) {
|
input="${input#${BASH_REMATCH[0]}}"
|
||||||
arr["authority"] = substr($0, RSTART, RLENGTH)
|
elif [[ "$input" =~ ^/\.(/|$) ]]; then
|
||||||
$0 = substr($0, RLENGTH + 1)
|
input="/${input#${BASH_REMATCH[0]}}"
|
||||||
}
|
elif [[ "$input" =~ ^/\.\.(/|$) ]]; then
|
||||||
if (match($0, /^\/?[^?#]+/)) {
|
input="/${input#${BASH_REMATCH[0]}}"
|
||||||
arr["path"] = substr($0, RSTART, RLENGTH)
|
[[ "$output" =~ /?[^/]+$ ]]
|
||||||
$0 = substr($0, RLENGTH + 1)
|
output="${output%${BASH_REMATCH[0]}}"
|
||||||
}
|
elif [[ "$input" == . || "$input" == .. ]]; then
|
||||||
if (match($0, /^\?[^#]+/)) {
|
input=
|
||||||
arr["query"] = substr($0, RSTART, RLENGTH)
|
else
|
||||||
$0 = substr($0, RLENGTH + 1)
|
[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || echo NOMATCH >&2
|
||||||
}
|
output="$output${BASH_REMATCH[1]}"
|
||||||
if (match($0, /^#.*/)) {
|
input="${BASH_REMATCH[2]}"
|
||||||
arr["fragment"] = substr($0, RSTART, RLENGTH)
|
fi
|
||||||
$0 = substr($0, RLENGTH + 1)
|
done
|
||||||
}
|
printf '%s\n' "${output//\/\//\//}"
|
||||||
for (part in arr) {
|
|
||||||
sub(/[[:space:]]+$/, "", arr[part])
|
|
||||||
printf var "[\"%s\"]=\"%s\"\n", part, arr[part]
|
|
||||||
}
|
|
||||||
}'
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
|
||||||
|
local name="$1"
|
||||||
|
local string="$2"
|
||||||
|
local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
|
||||||
|
[[ $string =~ $re ]] || return $?
|
||||||
|
|
||||||
|
local scheme="${BASH_REMATCH[2]}"
|
||||||
|
local authority="${BASH_REMATCH[4]}"
|
||||||
|
local path="${BASH_REMATCH[5]}"
|
||||||
|
local query="${BASH_REMATCH[7]}"
|
||||||
|
local fragment="${BASH_REMATCH[9]}"
|
||||||
|
|
||||||
|
for c in scheme authority query fragment; do
|
||||||
|
[[ "${!c}" ]] &&
|
||||||
|
printf '%s[%s]=%q\n' "$name" "$c" "${!c}"
|
||||||
|
done
|
||||||
|
# unclear if the path is always set even if empty but it looks that way
|
||||||
|
printf '%s[path]=%q\n' "$name" "$path"
|
||||||
|
}
|
||||||
|
|
||||||
|
# is a NAME defined ('set' in bash)?
|
||||||
|
isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME
|
||||||
|
# is a NAME defined AND empty?
|
||||||
|
isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME
|
||||||
|
|
||||||
request_url() {
|
request_url() {
|
||||||
local server="$1"
|
local server="$1"
|
||||||
local port="$2"
|
local port="$2"
|
||||||
|
|
Loading…
Reference in New Issue