Change implementation of URL array

This commit is contained in:
Case Duckworth 2020-06-18 08:24:01 -05:00
parent 7dd75ca773
commit 1e06e8f5af
1 changed files with 185 additions and 137 deletions

322
bollux
View File

@ -80,7 +80,7 @@ bollux() {
log d "BOLLUX_URL='$BOLLUX_URL'"
run blastoff "$BOLLUX_URL"
run blastoff -u "$BOLLUX_URL"
}
# process command-line arguments
@ -142,6 +142,8 @@ bollux_config() {
: "${C_LIST:=0}" # list formatting
: "${C_QUOTE:=3}" # quote formatting
: "${C_PRE:=0}" # preformatted text formatting
## state
UC_BLANK=':?:'
}
# quit happily
@ -170,123 +172,167 @@ prompt() { # prompt [-u] PROMPT [READ_ARGS...]
# load a URL
blastoff() { # blastoff [-u] URL
local well_formed=true
local proto url
local u
if [[ "$1" == "-u" ]]; then
well_formed=false
shift
u="$(run uwellform "$2")"
else
u="$1"
fi
url="$1"
if $well_formed && [[ "$1" != "$BOLLUX_URL" ]]; then
url="$(run transform_resource "$BOLLUX_URL" "$1")"
local -a url
run utransform url "$BOLLUX_URL" "$u"
if ! ucdef url[1]; then
run ucset url[1] "$BOLLUX_PROTO"
fi
[[ "$url" != *://* ]] && url="$BOLLUX_PROTO://$url"
url="$(trim_string "$url")"
proto="${url%://*}"
log d "PROTO='$proto' URL='$url'"
{
if declare -Fp "${proto}_request" &>/dev/null; then
run "${proto}_request" "$url"
if declare -Fp "${url[1]}_request" >/dev/null 2>&1; then
run "${url[1]}_request" "$url"
else
die 99 "No request handler for '$proto'!"
die 99 "No request handler for '${url[1]}'"
fi
} | run normalize |
{
if declare -Fp "${proto}_response" &>/dev/null; then
run "${proto}_response" "$url"
else
log x "No response handler for '$proto', passing through"
passthru
fi
}
} | run normalize | {
if declare -Fp "${url[1]}_response" >/dev/null 2>&1; then
run "${url[1]}_response" "$url"
else
log d "No response handler for '${url[1]}', passing thru"
passthru
fi
}
}
# transform a URI according to RFC 3986 sec 5.2.2
transform_resource() { # transform_resource BASE_URL REFERENCE_URL
local -A R B T # reference, base url, target
eval "$(run parse_url B "$1")"
eval "$(run parse_url R "$2")"
# A non-strict parser may ignore a scheme in the reference
# if it is identical to the base URI's scheme.
if ! "${STRICT:-true}" && [[ "${R[scheme]}" == "${B[scheme]}" ]]; then
unset "${R[scheme]}"
# URLS
## https://tools.ietf.org/html/rfc3986
uwellform() {
local u="$1"
if [[ "$u" != *://* ]]; then
u="$BOLLUX_PROTO://$u"
fi
# basically pseudo-code from spec ported to bash
if isdefined "R[scheme]"; then
T[scheme]="${R[scheme]}"
isdefined "R[authority]" && T[authority]="${R[authority]}"
isdefined R[path] &&
T[path]="$(run remove_dot_segments "${R[path]}")"
isdefined "R[query]" && T[query]="${R[query]}"
else
if isdefined "R[authority]"; then
T[authority]="${R[authority]}"
isdefined "R[authority]" &&
T[path]="$(remove_dot_segments "${R[path]}")"
isdefined R[query] && T[query]="${R[query]}"
u="$(trim_string "$u")"
printf '%s\n' "$u"
}
usplit() { # usplit NAME:ARRAY URL:STRING
local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
[[ $2 =~ $re ]] || return $?
local scheme="${BASH_REMATCH[2]}"
local authority="${BASH_REMATCH[4]}"
local path="${BASH_REMATCH[5]}"
local query="${BASH_REMATCH[7]}"
local fragment="${BASH_REMATCH[9]}"
# 0=url 1=scheme 2=authority 3=path 4=query 5=fragment
local i=1 c
for c in scheme authority path query fragment; do
if [[ "${!c}" || "$c" == path ]]; then
printf -v "$1[$i]" '%s' "${!c}"
else
if isempty "R[path]"; then
T[path]="${B[path]}"
if isdefined R[query]; then
T[query]="${R[query]}"
printf -v "$1[$i]" "$UC_BLANK"
fi
((i+=1))
done
printf -v "$1[0]" "$(ujoin "$1")" # inefficient I'm sure
}
ujoin() { # ujoin NAME:ARRAY
local -n U="$1"
if ucdef U[1]; then
printf -v U[0] "%s:" "${U[1]}"
fi
if ucdef U[2]; then
printf -v U[0] "${U[0]}//%s" "${U[2]}"
fi
printf -v U[0] "${U[0]}%s" "${U[3]}"
if ucdef U[4]; then
printf -v U[0] "${U[0]}?%s" "${U[4]}"
fi
if ucdef U[5]; then
printf -v U[0] "${U[0]}#%s" "${U[5]}"
fi
log d "${U[0]}"
}
ucdef() { [[ "${!1}" != "$UC_BLANK" ]]; } # ucdef NAME
ucblank() { [[ -z "${!1}" ]]; } # ucblank NAME
ucset() { # ucset NAME VALUE
run eval "${1}='$2'"
run ujoin "${1/\[*\]}"
}
utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING
local -a B R # base, reference
local -n T="$1" # target
usplit B "$2"
usplit R "$3"
# initialize T
for ((i=1;i<=5;i++)); do
T[$i]="$UC_BLANK"
done
# 0=url 1=scheme 2=authority 3=path 4=query 5=fragment
if ucdef R[1]; then
T[1]="${R[1]}"
if ucdef R[2]; then
T[2]="${R[2]}"
fi
if ucdef R[3]; then
T[3]="$(pundot "${R[3]}")"
fi
if ucdef R[4]; then
T[4]="${R[4]}"
fi
else
if ucdef R[2]; then
T[2]="${R[2]}"
if ucdef R[2]; then
T[3]="$(pundot "${R[3]}")"
fi
if ucdef R[4]; then
T[4]="${R[4]}"
fi
else
if ucblank R[3]; then
T[3]="${B[3]}"
if ucdef R[4]; then
T[4]="${R[4]}"
else
T[query]="${B[query]}"
T[4]="${B[4]}"
fi
else
if [[ "${R[path]}" == /* ]]; then
T[path]="$(remove_dot_segments "${R[path]}")"
if [[ "${R[3]}" == /* ]]; then
T[3]="$(pundot "${R[3]}")"
else
T[path]="$(merge_paths "B[authority]" "${B[path]}" "${R[path]}")"
T[path]="$(remove_dot_segments "${T[path]}")"
T[3]="$(pmerge B R)"
T[3]="$(pundot "${T[3]}")"
fi
if ucdef R[4]; then
T[4]="${R[4]}"
fi
isdefined R[query] && T[query]="${R[query]}"
fi
T[authority]="${B[authority]}"
T[2]="${B[2]}"
fi
T[scheme]="${B[scheme]}"
T[1]="${B[1]}"
fi
isdefined R[fragment] && T[fragment]="${R[fragment]}"
# cf. 5.3 -- recomposition
local r
isdefined "T[scheme]" && r="$r${T[scheme]}:"
# remove the port from the authority
isdefined "T[authority]" && r="$r//${T[authority]%:*}"
r="$r${T[path]}"
isdefined T[query] && r="$r?${T[query]}"
isdefined T[fragment] && r="$r#${T[fragment]}"
printf '%s\n' "$r"
if ucdef R[5]; then
T[5]="${R[5]}"
fi
ujoin T
}
# merge URL paths according to RFC 3986 sec 5.2.3
merge_paths() { # merge_paths BASE_AUTHORITY BASE_PATH REFERENCE_PATH
# shellcheck disable=2034
local B_authority="$1"
local B_path="$2"
local R_path="$3"
# if R_path is empty, get rid of // in B_path
if [[ -z "$R_path" ]]; then
printf '%s\n' "${B_path//\/\//\//}"
return
fi
if isdefined "B_authority" && isempty "B_path"; then
printf '/%s\n' "${R_path//\/\//\//}"
else
if [[ "$B_path" == */* ]]; then
B_path="${B_path%/*}/"
else
B_path=""
fi
printf '%s/%s\n' "${B_path%/}" "${R_path#/}"
fi
}
# remove dot segments in paths according to RFC 3986 sec 5.2.4
remove_dot_segments() { # remove_dot_segments PATH
pundot() { # pundot PATH:STRING
local input="$1"
local output
while [[ "$input" ]]; do
@ -301,7 +347,7 @@ remove_dot_segments() { # remove_dot_segments PATH
elif [[ "$input" == . || "$input" == .. ]]; then
input=
else
[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || log debug NOMATCH
[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1
output="$output${BASH_REMATCH[1]}"
input="${BASH_REMATCH[2]}"
fi
@ -309,36 +355,28 @@ remove_dot_segments() { # remove_dot_segments PATH
printf '%s\n' "${output//\/\//\//}"
}
# parse a url using the reference regex in RFC 3986 appendix B
parse_url() { # eval "$(split_url NAME STRING)" => NAME[...]
local name="$1"
local string="$2"
local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
[[ $string =~ $re ]] || return $?
pmerge() {
local -n b="$1"
local -n r="$2"
local scheme="${BASH_REMATCH[2]}"
local authority="${BASH_REMATCH[4]}"
local path="${BASH_REMATCH[5]}"
local query="${BASH_REMATCH[7]}"
local fragment="${BASH_REMATCH[9]}"
if ucblank r[3]; then
printf '%s\n' "${b[3]//\/\//\//}"
return
fi
for c in scheme authority query fragment; do
[[ "${!c}" ]] &&
run printf '%s[%s]=%q\n' "$name" "$c" "${!c}"
done
# unclear if the path is always set even if empty but it looks that way
run printf '%s[path]=%q\n' "$name" "$path"
if ucdef b[2] && ucblank b[3]; then
printf '/%s\n' "${r[3]//\/\//\//}"
else
local bp=""
if [[ "${b[3]}" == */* ]]; then
bp="${b[3]%/*}"
fi
printf '%s/%s\n' "${bp%/}" "${r[3]#/}"
fi
}
# is a NAME defined ('set' in bash)?
isdefined() { [[ "${!1+x}" ]]; } # isdefined NAME
# is a NAME defined AND empty?
isempty() { [[ ! "${!1-x}" ]]; } # isempty NAME
# work with URLs
# https://github.com/dylanaraps/pure-bash-bible/
urlencode() { # urlencode STRING
uencode() { # uencode URL:STRING
local LC_ALL=C
for ((i = 0; i < ${#1}; i++)); do
: "${1:i:1}"
@ -355,7 +393,7 @@ urlencode() { # urlencode STRING
}
# https://github.com/dylanaraps/pure-bash-bible/
urldecode() { # urldecode STRING
udecode() { # udecode URL:STRING
: "${1//+/ }"
printf '%b\n' "${_//%/\\x}"
}
@ -363,19 +401,28 @@ urldecode() { # urldecode STRING
# GEMINI
# https://gemini.circumlunar.space/docs/specification.html
gemini_request() { # gemini_request URL
local url port server
local ssl_cmd
url="$1"
port=1965
server="${url#*://}"
server="${server%%/*}"
local -a url
usplit url "$1"
ssl_cmd=(openssl s_client -crlf -quiet -connect "$server:$port")
ssl_cmd+=(-servername "$server") # SNI
# disable old TLS/SSL versions
ssl_cmd+=(-no_ssl3 -no_tls1 -no_tls1_1)
# get rid of userinfo
ucset url[2] "${url[2]#*@}"
run "${ssl_cmd[@]}" <<<"$url" 2>/dev/null
local port
if [[ "${url[2]}" == *:* ]]; then
port="${url[2]#*:}"
ucset url[2] "${url[2]%:*}"
else
port=1965 # TODO variablize
fi
local ssl_cmd=(
openssl s_client
-crlf -quiet -connect "${url[2]}:$port"
-servername "${url[2]}" # SNI
-no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions
)
run "${ssl_cmd[@]}" <<<"$url"
}
gemini_response() { # gemini_response URL
@ -399,7 +446,7 @@ gemini_response() { # gemini_response URL
10) run prompt "$meta" ;;
11) run prompt "$meta" -s ;; # password input
esac
run blastoff "?$(urlencode "$REPLY")"
run blastoff "?$(uencode "$REPLY")"
;;
2*) # OK
REDIRECTS=0
@ -480,7 +527,7 @@ gopher_response() { # gopher_response URL
cur_server="${BASH_REMATCH[1]}"
type="${BASH_REMATCH[6]:-1}"
run history_append "$url" "" # TODO: get the title ??
run history_append "$url" "" # gopher doesn't really have titles, huh
log d "TYPE='$type'"
@ -618,9 +665,10 @@ display() { # display METADATA [TITLE]
set_title "$title${title:+ - }bollux"
less_cmd=(less -R) # render ANSI color escapes
mklesskey "$BOLLUX_LESSKEY" && less_cmd+=(-k "$BOLLUX_LESSKEY")
local helpline="o:open, g/G:goto, [:back, ]:forward, r:refresh"
less_cmd+=(
-Pm"$(less_prompt_escape "$BOLLUX_URL") - bollux$" # 'status'line
-P='o\:open, g\:goto, [\:back, ]\:forward, r\:refresh$' # helpline
-P="$(less_prompt_escape "$helpline")$" # helpline
-m # start with statusline
+k # float content to the top
)
@ -910,7 +958,7 @@ handle_keypress() { # handle_keypress CODE
run blastoff "$BOLLUX_URL"
;;
53) # G - goto a url (pre-filled with current)
prompt -u GO
run prompt -u GO
run blastoff -u "$REPLY"
;;
*) # 54-57 -- still available for binding