Fix a bug with name collision

2021-03-04 17:49:18 -06:00 · 2021-03-04 17:49:18 -06:00 · 183617a85d
parent 8629f74d16
commit 183617a85d
1 changed files with 445 additions and 310 deletions
--- a/717
+++ b/717
@ -1,8 +1,9 @@
 #!/usr/bin/env bash
-# bollux: a bash gemini client
+################################################################################
+# BOLLUX: a bash gemini client
 # Author: Case Duckworth
 # License: MIT
-# Version: 0.4.0
+# Version: 0.4.1
 #
 # Commentary:
 #
@ -46,6 +47,7 @@
 # [9]: OpenSSL `s_client' online manual
 #      https://www.openssl.org/docs/manmaster/man1/openssl-s_client.html
 #
+################################################################################
 # Code:

 # Program information
@ -62,139 +64,13 @@ usage:
 flags:
 	-h	show this help and exit
 	-q	be quiet: log no messages
-	-v	verbose: log more messages
+	-v	be verbose: log more messages
 parameters:
 	URL	the URL to start in
 		If not provided, the user will be prompted.
 END
 }

-# UTILITY FUNCTIONS ############################################################
-
-# Run a command, but log it first.
-#
-# See `log' for the available levels.
-run() { # run COMMAND...
-	# I have to add a `trap' here for SIGINT to work properly.
-	trap bollux_quit SIGINT
-	log debug "$*"
-	"$@"
-}
-
-# Exit with an error and a message describing it.
-die() { # die EXIT_CODE MESSAGE
-	local ec="$1"
-	shift
-	log error "$*"
-	exit "$ec"
-}
-
-# Exit with success, printing a fun message.
-#
-# The default message is from the wonderful show "Cowboy Bebop."
-bollux_quit() {
-	printf '\e[1m%s\e[0m:\t\e[3m%s\e[0m\n' "$PRGN" "$BOLLUX_BYEMSG"
-	exit
-}
-# SIGINT is C-c, and I want to make sure bollux quits when it's typed.
-trap bollux_quit SIGINT
-
-# Trim leading and trailing whitespace from a string.
-#
-# [1]: #trim-leading-and-trailing-white-space-from-string
-trim_string() { # trim_string STRING
-	: "${1#"${1%%[![:space:]]*}"}"
-	: "${_%"${_##*[![:space:]]}"}"
-	printf '%s\n' "$_"
-}
-
-# Cycle a variable.
-#
-# e.g. 'cycle_list one,two,three' => 'two,three,one'
-cycle_list() { # cycle_list LIST DELIM
-	local list="${!1}" delim="$2"
-	local first="${list%%${delim}*}"
-	local rest="${list#*${delim}}"
-	printf -v "$1" '%s%s%s' "${rest}" "${delim}" "${first}"
-}
-
-# Determine the first element of a delimited list.
-#
-# e.g. 'first one,two,three' => 'one'
-first() { # first LIST DELIM
-	local list="${!1}" delim="$2"
-	printf '%s\n' "${list%%${delim}*}"
-}
-
-# Log a message to stderr (&2).
-#
-# TODO: document
-log() { # log LEVEL MESSAGE
-	[[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return
-	local fmt
-
-	case "$1" in
-	([dD]*) # debug
-		[[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return
-		fmt=34
-		;;
-	([eE]*) # error
-		fmt=31
-		;;
-	(*) fmt=1 ;;
-	esac
-	shift
-
-	printf >&2 '\e[%sm%s:%s:\e[0m\t%s\n' "$fmt" "$PRGN" "${FUNCNAME[1]}" "$*"
-}
-
-# Set the terminal title.
-set_title() { # set_title STRING
-	printf '\e]2;%s\007' "$*"
-}
-
-# Prompt the user for input.
-#
-# This is a thin wrapper around `read', a bash built-in.  Because of the
-# way bollux messes around with stein and stdout, I need to read directly from
-# the TTY with this function.
-prompt() { # prompt [-u] PROMPT [READ_ARGS...]
-	local read_cmd=(read -e -r)
-	if [[ "$1" == "-u" ]]; then
-		read_cmd+=(-i "$BOLLUX_URL")
-		shift
-	fi
-	local prompt="$1"
-	shift
-	read_cmd+=(-p "$prompt> ")
-	"${read_cmd[@]}" </dev/tty "$@"
-}
-
-
-# Bash built-in replacement for `cat'
-#
-# One of the more pedantic bits of bollux (is 'pedantic' the right word?) --
-# `cat' is more than likely installed on any system with bash, so this function
-# is really just here so I can say that bollux is written as purely in bash as
-# possible.
-passthru() {
-	while IFS= read -r; do
-		printf '%s\n' "$REPLY"
-	done
-}
-
-# Bash built-in replacement for `sleep'
-#
-# The commentary for `passthru' applies here as well, though I didn't write this
-# function -- Dylan Araps did.
-#
-# [1]: #use-read-as-an-alternative-to-the-sleep-command
-sleep() { # sleep SECONDS
-	read -rt "$1" <> <(:) || :
-}
-
-# MAIN BOLLUX DISPATCH FUNCTIONS ###############################################
-
 # Main entry point into `bollux'.
 #
 # See the `if' block at the bottom of this script.
@ -251,10 +127,15 @@ bollux_config() {

 	if [ -f "$BOLLUX_CONFIG" ]; then
 		log debug "Loading config file '$BOLLUX_CONFIG'"
+		# Shellcheck gets mad when we try to source a file behind a
+		# variable -- it doesn't know where it is.  This line ignores
+		# that warning, since the user can put $BOLLUX_CONFIG wherever.
 		# shellcheck disable=1090
 		. "$BOLLUX_CONFIG"
 	else
-		log debug "Can't load config file '$BOLLUX_CONFIG'."
+		# It's an error if bollux can't find the config file, but I
+		# don't want to kill the program over it.
+		log error "Can't load config file '$BOLLUX_CONFIG'."
 	fi

 	## behavior
@ -301,67 +182,185 @@ bollux_config() {
 	UC_BLANK=':?:' # internal use only, should be non-URL chars
 }

-# Load a URL.
+# Initialize bollux state
+bollux_init() {
+	# Trap `bollux_cleanup' on quit and exit
+	trap bollux_cleanup INT QUIT EXIT
+	# Trap `bollux_quit' on interrupt (C-c)
+	trap bollux_quit SIGINT
+
+	# Disable pathname expansion.
+	#
+	# It's very unlikely the user will want to navigate to a file when
+	# answering the GO prompt.
+	set -f
+
+	# Initialize state
+	#
+	# Other than $REDIRECTS, bollux's mutable state includes
+	# $BOLLUX_URL, but that's initialized elsewhere (possibly even by
+	# the user)
+	REDIRECTS=0
+
+	# History
+	#
+	# See also `history_append', `history_back', `history_forward'
+	declare -a HISTORY # history is kept in an array
+	HN=0               # position of history in the array
+	run mkdir -p "${BOLLUX_HISTFILE%/*}"
+
+	# Remove $BOLLUX_LESSKEY and re-generate keybindings (to catch rebinds)
+	run rm -f "$BOLLUX_LESSKEY"
+	mklesskey
+}
+
+# Cleanup on exit
+bollux_cleanup() {
+	# Stubbed in case of need in future
+	:
+}
+
+# Exit with success, printing a fun message.
 #
-# I was feeling fancy when I named this function -- a more descriptive name
-# would be 'bollux_goto' or something.
-blastoff() { # blastoff [-u] URL
-	local u
+# The default message is from the wonderful show "Cowboy Bebop."
+bollux_quit() {
+	printf '\e[1m%s\e[0m:\t\e[3m%s\e[0m\n' "$PRGN" "$BOLLUX_BYEMSG"
+	exit
+}

-	# `blastoff' assumes a "well-formed" URL by default -- i.e., a URL with
-	# a protocol string and no extraneous whitespace.  Since bollux can't
-	# trust the user to input a proper URL at a prompt, nor capsule authors
-	# to fully-form their URLs, so the -u flag is necessary for those
-	# use-cases.  Otherwise, bollux knows the URL is well-formed -- or
-	# should be, due to the Gemini specification.
+# UTILITY FUNCTIONS ############################################################
+
+# Run a command, but log it first.
+#
+# See `log' for the available levels.
+run() { # run COMMAND...
+	# I have to add a `trap' here for SIGINT to work properly.
+	trap bollux_quit SIGINT
+	LOG_FUNC=2 log debug "> $*"
+	"$@"
+}
+
+# Log a message to stderr (&2).
+#
+# `log' in this script can take 3 different parameters: `d', `e', and `x', where
+# `x' is any other string (though I usually use `x'), followed by the message to
+# log.  Most messages are either `d' (debug) level or `x' (diagnostic) level,
+# meaning I want to show them all the time or only when bollux is called with
+# `-v' (verbose).  The levels are somewhat arbitrary, like I suspect all logging
+# levels are, but you can read the rest of bollux to see what I've chosen to
+# classify as what.
+log() { # log LEVEL MESSAGE...
+	# 'QUIET' means don't log anything.
+	[[ "$BOLLUX_LOGLEVEL" == QUIET ]] && return
+	local fmt		# ANSI escape code
+
+	case "$1" in
+	([dD]*)			# Debug level -- only print if bollux -v.
+		[[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return
+		fmt=34		# Blue
+		;;
+	([eE]*)			# Error level -- always print.
+		fmt=31		# Red
+		;;
+	(*)			# Diagnostic level -- print unless QUIET.
+		fmt=1		# Bold
+		;;
+	esac
+	shift
+
+	printf >&2 '\e[%sm%s:%-16s:\e[0m %s\n' \
+		   "$fmt" "$PRGN" "${FUNCNAME[${LOG_FUNC:-1}]}" "$*"
+}
+
+# Exit with an error and a message describing it.
+die() { # die EXIT_CODE MESSAGE
+	local exit_code="$1"
+	shift
+	log error "$*"
+	exit "$exit_code"
+}
+
+# Trim leading and trailing whitespace from a string.
+#
+# [1]: #trim-leading-and-trailing-white-space-from-string
+trim_string() { # trim_string STRING
+	: "${1#"${1%%[![:space:]]*}"}"
+	: "${_%"${_##*[![:space:]]}"}"
+	printf '%s\n' "$_"
+}
+
+# Cycle a variable in a list given a delimiter.
+#
+# e.g. 'list_cycle one,two,three ,' => 'two,three,one'
+list_cycle() { # list_cycle LIST<string> DELIM
+	# I could've set up `list_cycle' to use an array instead of a delimited
+	# string, but the one variable this function is used for is
+	# T_PRE_DISPLAY, which is user-configurable.  I wanted it to be as easy
+	# to configure for users who might not immediately know the bash array
+	# syntax, but can figure out 'variable=value' without much thought.
+	local list="${!1}"		 # Pass the list by name, not value
+	local delim="$2"		 # The delimiter of the string
+	local first="${list%%${delim}*}" # The first element
+	local rest="${list#*${delim}}"	 # The rest of the elements
+	# -v prints to the variable specified.
+	printf -v "$1" '%s%s%s' "${rest}" "${delim}" "${first}"
+}
+
+# Set the terminal title.
+set_title() { # set_title TITLE...
+	printf '\e]2;%s\007' "$*"
+}
+
+# Prompt the user for input.
+#
+# This is a thin wrapper around `read', a bash built-in.  Because of the
+# way bollux messes around with stdin and stdout, I need to read directly from
+# the TTY with this function.
+prompt() { # prompt [-u] PROMPT [READ_ARGS...]
+	# `-e' gets the line "interactively", so it can see history and stuff
+	# `-r' reads a "raw" string, i.e., without backslash escaping
+	local read_cmd=(read -e -r)
 	if [[ "$1" == "-u" ]]; then
-		u="$(run uwellform "$2")"
-	else
-		u="$1"
+		# `-i TEXT' uses TEXT as the initial text for `read'
+		read_cmd+=(-i "$BOLLUX_URL")
+		shift
 	fi
+	local prompt="$1"	# How to prompt the user
+	shift
+	read_cmd+=(-p "$prompt> ")
+	"${read_cmd[@]}" </dev/tty "$@"
+}

-	# After ensuring the URL is well-formed, `blastoff' needs to transform
-	# it according to the transform rules of RFC 3986 (see §5.2.2), which
-	# turns relative references into absolute references that bollux can use
-	# in its request to the server.  That's followed by a check that the
-	# protocol is set, defaulting to Gemini if it isn't.
-	#
-	# Implementation detail: because Bash is really stupid when it comes to
-	# arrays, the URL functions u* (see below) work with an array defined
-	# with `local -a' and passed by name, not by value.  Thus, the
-	# `urltransform url ...' instead of `urltransform "${url[@]}"' or
-	# similar.  In addition, the `ucdef' and `ucset' functions take the name
-	# of the array element as parameters, not the element itself.
-	local -a url
-	run utransform url "$BOLLUX_URL" "$u"
-	if ! ucdef url[1]; then
-		run ucset url[1] "$BOLLUX_PROTO"
-	fi
+# Bash built-in replacement for `cat'
+#
+# One of the more pedantic bits of bollux (is 'pedantic' the right word?) --
+# `cat' is more than likely installed on any system with bash, so this function
+# is really just here so I can say that bollux is written as purely in bash as
+# possible.
+passthru() {
+	while IFS= read -r; do
+		printf '%s\n' "$REPLY"
+	done
+}

-	# To try and keep `bollux' as extensible as possible, I've written it
-	# only to expect two functions for every protocol it supports:
-	# `x_request' and `x_response', where `x' is the name of the protocol
-	# (the first element of the built `url' array).  `declare -F' looks only
-	# for functions in the current scope, failing if it doesn't exist.
-	#
-	# In between `x_request' and `x_response', `blastoff' normalizes the
-	# line endings to UNIX-style (LF) for ease of display.
-	{
-		if declare -F "${url[1]}_request" >/dev/null 2>&1; then
-			run "${url[1]}_request" "$url"
-		else
-			die 99 "No request handler for '${url[1]}'"
-		fi
-	} | run normalize | {
-		if declare -F "${url[1]}_response" >/dev/null 2>&1; then
-			run "${url[1]}_response" "$url"
-		else
-			log d \
-				"No response handler for '${url[1]}';" \
-				" passing thru"
-			passthru
-		fi
-	}
+# Bash built-in replacement for `sleep'
+#
+# The commentary for `passthru' applies here as well, though I didn't write this
+# function -- Dylan Araps did.
+#
+# [1]: #use-read-as-an-alternative-to-the-sleep-command
+sleep() { # sleep SECONDS
+	read -rt "$1" <> <(:) || :
+}
+
+# Normalize files.
+normalize() {
+	shopt -s extglob # for the printf call below
+	while IFS= read -r; do
+		# Normalize line endings to Unix-style (LF)
+		printf '%s\n' "${REPLY//$'\r'?($'\n')/}"
+	done
+	shopt -u extglob # reset 'extglob'
 }

 # URLS #########################################################################
@ -382,16 +381,16 @@ blastoff() { # blastoff [-u] URL
 # trim whitespace.
 #
 # Useful for URLs that were probably input by humans.
-uwellform() {
-	local u="$1"
+uwellform() { # uwellform URL
+	local url="$1"

-	if [[ "$u" != *://* ]]; then
-		u="$BOLLUX_PROTO://$u"
+	if [[ "$url" != *://* ]]; then
+		url="$BOLLUX_PROTO://$url"
 	fi

-	u="$(trim_string "$u")"
+	url="$(trim_string "$url")"

-	printf '%s\n' "$u"
+	printf '%s\n' "$url"
 }

 # Split a URL into its constituent parts, placing them all in the given array.
@ -406,58 +405,94 @@ uwellform() {
 # takes the matched URL, splits it using the regex, then assigns each part to an
 # element of the url array NAME by using `printf -v', which prints to a
 # variable.
-usplit() { # usplit NAME:ARRAY URL:STRING
+usplit() { # usplit URL_ARRAY<name> URL
+	# Note: URL_ARRAY isn't assigned in `usplit', because it should
+	# already exist.  Pass /only/ the name of URL_ARRAY to this
+	# function, not its contents.
 	local re='^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'
-	[[ $2 =~ $re ]] || return $?
+	local u="$2"
+	[[ "$u" =~ $re ]] || {
+		exit_code=$?
+		log error "usplit: '$2' doesn't match '$re'"
+		return $?
+	}

 	# ShellCheck doesn't see that I'm using these variables in the `for'
 	# loop below, because I'm not technically using them /as/ variables, but
 	# as names to the variables.  The ${!c} formation in the `printf' call
 	# below performs a reverse lookup on the name to get the actual data.
 	# shellcheck disable=2034
-	local url="${BASH_REMATCH[0]}" \
+	local entire_url="${BASH_REMATCH[0]}" \
 	      scheme="${BASH_REMATCH[2]}" \
 	      authority="${BASH_REMATCH[4]}" \
 	      path="${BASH_REMATCH[5]}" \
 	      query="${BASH_REMATCH[7]}" \
 	      fragment="${BASH_REMATCH[9]}"

+	# Iterate through the 5 components of a URL and assign them to elements
+	# of URL_ARRAY, as follows:
 	# 0=url 1=scheme 2=authority 3=path 4=query 5=fragment
-	local i=1 c
+	run printf -v "$1[0]" '%s' "$entire_url"
+	# This loop tests whether the component exists first -- if it
+	# doesn't, the special variable $UC_BLANK is used in the spot
+	# instead.  Bash doesn't have a useful way of differentiating an
+	# /unset/ element of an array, versus an /empty/ element.
+	# The only exception is that 'path' component, which always exists
+	# in a URL (I think the simplest URL possible is '/', the empty
+	# path).
+	local i=1 # begin at 1 -- the full URL is [0].
 	for c in scheme authority path query fragment; do
 		if [[ "${!c}" || "$c" == path ]]; then
-			printf -v "$1[$i]" '%s' "${!c}"
+			run printf -v "$1[$i]" '%s' "${!c}"
 		else
-			printf -v "$1[$i]" '%s' "$UC_BLANK"
+			run printf -v "$1[$i]" '%s' "$UC_BLANK"
 		fi
 		((i += 1))
 	done
-	printf -v "$1[0]" '%s' "$url"
+
 }

-# Join a URL array (NAME) back into a string.
-ujoin() { # ujoin NAME:ARRAY
-	local -n U="$1"
+# Join a URL array, split with `usplit', back into a string, assigning
+# it to the 0th element of the array.
+ujoin() { # ujoin URL_ARRAY<name>
+	# Here's the documentation for the '-n' flag:
+	#
+	# Give each name the nameref attribute, making it a name reference
+	# to another variable. That other variable is defined by the value of
+	# name. All references, assignments, and attribute modifications to
+	# name, except for those using or changing the -n attribute itself,
+	# are performed on the variable referenced by name's value. The
+	# nameref attribute cannot be applied to array variables.
+	#
+	# Pretty handy for passing-by-name!  Except that last part -- "The
+	# nameref attribute cannot be applied to array variables."  However,
+	# I've found a clever hack -- you can use 'printf -v' to print the
+	# value to the array element.
+	local -n URL_ARRAY="$1"

-	if ucdef U[1]; then
-		printf -v U[0] "%s:" "${U[1]}"
+	# For each possible URL component, check if it exists with `ucdef'.
+	# If it does, append it (with the correct component delimiter) to
+	# URL_ARRAY[0].
+	if ucdef URL_ARRAY[1]; then
+		printf -v URL_ARRAY[0] "%s:" "${URL_ARRAY[1]}"
 	fi

-	if ucdef U[2]; then
-		printf -v U[0] "${U[0]}//%s" "${U[2]}"
+	if ucdef URL_ARRAY[2]; then
+		printf -v URL_ARRAY[0] "${URL_ARRAY[0]}//%s" "${URL_ARRAY[2]}"
 	fi

-	printf -v U[0] "${U[0]}%s" "${U[3]}"
+	# The path component is required.
+	printf -v URL_ARRAY[0] "${URL_ARRAY[0]}%s" "${URL_ARRAY[3]}"

-	if ucdef U[4]; then
-		printf -v U[0] "${U[0]}?%s" "${U[4]}"
+	if ucdef URL_ARRAY[4]; then
+		printf -v URL_ARRAY[0] "${URL_ARRAY[0]}?%s" "${URL_ARRAY[4]}"
 	fi

-	if ucdef U[5]; then
-		printf -v U[0] "${U[0]}#%s" "${U[5]}"
+	if ucdef URL_ARRAY[5]; then
+		printf -v URL_ARRAY[0] "${URL_ARRAY[0]}#%s" "${URL_ARRAY[5]}"
 	fi

-	log d "${U[0]}"
+	log d "${URL_ARRAY[0]}"
 }

 # `ucdef' checks whether a URL component is blank or not -- if a component
@ -466,26 +501,39 @@ ujoin() { # ujoin NAME:ARRAY
 # not going to really be in a URL).  I tried really hard to differentiate an
 # unset array element from a simply empty one, but like, as far as I could tell,
 # you can't do that in Bash.
-ucdef() { # ucdef NAME
-	[[ "${!1}" != "$UC_BLANK" ]]
+ucdef() { # ucdef COMPONENT<name>
+	local component="$1"
+	[[ "${!component}" != "$UC_BLANK" ]]
 }

 # `ucblank' determines whether a URL component is blank (""), as opposed to
 # undefined.
-ucblank() { # ucblank NAME
-	[[ -z "${!1}" ]]
+ucblank() { # ucblank COMPONENT<name>
+	local component="$1"
+	[[ -z "${!component}" ]]
 }

 # `ucset' sets one component of a URL array and setting the 0th element to the
 # new full URL.  Use it instead of directly setting the array element with U[x],
 # because U[0] will fall out of sync with the rest of the contents.
-ucset() { # ucset NAME VALUE
-	run eval "${1}='$2'"
-	run ujoin "${1/\[*\]/}"
+ucset() { # ucset URL_ARRAY_INDEX<name> NEW_VALUE
+	local url_array_component="$1" # Of form 'URL_ARRAY[INDEX]'
+	local value="$2"
+	
+	# Assign $value to $url_array_component.
+	#
+	# Wrapped in an 'eval' for the extra layer of indirection.
+	run eval "${url_array_component}='$value'"
+
+	# Rejoin the URL_ARRAY with the changed value.
+	#
+	# The substitution here strips the array index subscript (i.e.,
+	# URL[4] => URL), passing the name of the full array to `ujoin'.
+	run ujoin "${url_array_component/\[*\]/}"
 }

-# [1]: encode a URL using percent-encoding.
-uencode() { # uencode URL:STRING
+# [1]: Encode a URL using percent-encoding.
+uencode() { # uencode URL
 	local LC_ALL=C
 	for ((i = 0; i < ${#1}; i++)); do
 		: "${1:i:1}"
@ -497,14 +545,14 @@ uencode() { # uencode URL:STRING
 	printf '\n'
 }

-# [1]: decode a percent-encoded URL.
-udecode() { # udecode URL:STRING
+# [1]: Decode a percent-encoded URL.
+udecode() { # udecode URL
 	: "${1//+/ }"
 	printf '%b\n' "${_//%/\\x}"
 }

-# Implement [2] § 5.2.4, "Remove Dot Segments"
-pundot() { # pundot PATH:STRING
+# Implement [2]: 5.2.4, "Remove Dot Segments".
+pundot() { # pundot PATH
 	local input="$1"
 	local output
 	while [[ "$input" ]]; do
@ -527,28 +575,28 @@ pundot() { # pundot PATH:STRING
 	printf '%s\n' "${output//\/\//\//}"
 }

-# Implement [2] § 5.2.3, "Merge Paths"
-pmerge() { # pmerge BASE:ARRAY REFERENCE:ARRAY
-	local -n b="$1"
-	local -n r="$2"
+# Implement [2] Section 5.2.3, "Merge Paths".
+pmerge() { # pmerge BASE_PATH<name> REFERENCE_PATH<name>
+	local -n base_path="$1"
+	local -n reference_path="$2"

-	if ucblank r[3]; then
-		printf '%s\n' "${b[3]//\/\//\//}"
+	if ucblank reference_path[3]; then
+		printf '%s\n' "${base_path[3]//\/\//\//}"
 		return
 	fi

-	if ucdef b[2] && ucblank b[3]; then
-		printf '/%s\n' "${r[3]//\/\//\//}"
+	if ucdef base_path[2] && ucblank base_path[3]; then
+		printf '/%s\n' "${reference_path[3]//\/\//\//}"
 	else
 		local bp=""
-		if [[ "${b[3]}" == */* ]]; then
-			bp="${b[3]%/*}"
+		if [[ "${base_path[3]}" == */* ]]; then
+			bp="${base_path[3]%/*}"
 		fi
-		printf '%s/%s\n' "${bp%/}" "${r[3]#/}"
+		printf '%s/%s\n' "${bp%/}" "${reference_path[3]#/}"
 	fi
 }

-# `utransform' implements [2]6 § 5.2.2, "Transform Resources."
+# `utransform' implements [2]6 Section 5.2.2, "Transform Resources."
 #
 # That section conveniently lays out a pseudocode algorithm describing how URL
 # resources should be transformed from one to another.  This function just
@ -624,19 +672,21 @@ utransform() {   # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING
 #
 ################################################################################

-# Request a resource from a gemini server - see [3] §§ 2, 4.
+# Request a resource from a gemini server - see [3] Sections 2, 4.
 gemini_request() { # gemini_request URL
 	local -a url
-	usplit url "$1"
+	run usplit url "$1"
+	log debug "${url[@]}"

 	# Remove user info from the URL.
 	#
 	# URLs can technically be of the form <proto>://<user>:<pass>@<domain>
-	# (see [2], § 3.2, "Authority").  I don't know of any Gemini servers
+	# (see [2] Section 3.2, "Authority").  I don't know of any Gemini servers
 	# that use the <user> or <pass> parts, so `gemini_request' just strips
 	# them from the requested URL.  This will need to be changed if servers
 	# decide to use this method of authentication.
-	ucset url[2] "${url[2]#*@}"
+	log debug "Removing user info from the URL"
+	run ucset url[2] "${url[2]#*@}"

 	# Determine the port to request.
 	#
@ -645,6 +695,7 @@ gemini_request() { # gemini_request URL
 	# port can be specified after the domain, separated with a colon.  The
 	# user can also request a different default port, for whatever reason,
 	# by setting the variable $BOLLUX_GEMINI_PORT.
+	log debug "Determining the port to request"
 	local port
 	if [[ "${url[2]}" == *:* ]]; then
 		port="${url[2]#*:}"
@ -680,7 +731,7 @@ gemini_request() { # gemini_request URL
 	run "${ssl_cmd[@]}" <<<"$url"
 }

-# Handle the gemini response - see [3] § 3.
+# Handle the gemini response - see [3] Section 3.
 gemini_response() { # gemini_response URL
 	local code meta # received on the first line of the response
 	local title	# determined by a clunky heuristic, see read loop: (2*)
@ -705,7 +756,7 @@ gemini_response() { # gemini_response URL
 	done
 	log d "[$code] $meta"

-	# Branch depending on the status code.  See [3], Appendix 1.
+	# Branch depending on the status code.  See [3] Appendix 1.
 	#
 	# Notes:
 	# - All codes other than 3* (Redirects) reset the REDIRECTS counter.
@ -735,7 +786,7 @@ gemini_response() { # gemini_response URL
 		#
 		# This while loop reads through the file looking for a line
 		# starting with `#', which is a level-one heading in text/gemini
-		# (see [3], § 5).  It assumes that the first such heading is the
+		# (see [3] Section 5).  It assumes that the first such heading is the
 		# title of the page, and uses that title for the terminal title
 		# and for the history.
 		local pretitle
@ -771,7 +822,7 @@ gemini_response() { # gemini_response URL
 		# distinction.  I'm not sure what the difference would be in
 		# practice, anyway.
 		#
-		# Per [4], bollux limits the number of redirects a page is
+		# Per [4] bollux limits the number of redirects a page is
 		# allowed to make (by default, five).  Change `$BOLLUX_MAXREDIR'
 		# to customize that limit.
 		((REDIRECTS += 1))
@ -788,7 +839,7 @@ gemini_response() { # gemini_response URL
 		run blastoff "$meta" # TODO: confirm redirect
 		;;
 	(4*) # TEMPORARY ERROR
-		# Since the 4* codes ([3], Appendix 1) are all server issues,
+		# Since the 4* codes ([3] Appendix 1) are all server issues,
 		# bollux can treat them all basically the same.  This is an area
 		# that could use some expansion.
 		local desc="Temporary error"
@ -862,7 +913,7 @@ gemini_response() { # gemini_response URL
 gopher_request() { # gopher_request URL
 	local url="$1"

-	# [7] § 2.1
+	# [7] Section 2.1
 	[[ "$url" =~ gopher://([^/?#:]*)(:([0-9]+))?(/((.))?(/?.*))?$ ]]
 	local server="${BASH_REMATCH[1]}" \
 	      port="${BASH_REMATCH[3]:-$BOLLUX_GOPHER_PORT}" \
@ -881,7 +932,7 @@ gopher_request() { # gopher_request URL
 # Handle a server response.
 gopher_response() { # gopher_response URL
 	local url="$1" pre=false
-	# [7] § 2.1
+	# [7] Section 2.1
 	#
 	# Note that this duplicates the code in `gopher_request'.  There might
 	# be a good way to thread this data through so that it's not computed
@ -896,7 +947,7 @@ gopher_response() { # gopher_response URL
 	# basically, each line in a gophermap starts with a character, its type,
 	# and then is followed by a series of tab-separated fields describing
 	# where that type is and how to display it.  The full list of original
-	# line types can be found in [6] § 3.8, though the types have also been
+	# line types can be found in [6] Section 3.8, though the types have also been
 	# extended over the years.  Since bollux can only display types that are
 	# text-ish, it only concerns itself with those in this case statement.
 	# All the others are simply downloaded.
@ -930,7 +981,7 @@ gopher_response() { # gopher_response URL
 		fi
 		;;
 	(*) # Anything else
-		# The list at [6] § 3.8 includes the following (noted where it
+		# The list at [6] Section 3.8 includes the following (noted where it
 		# might be good to differently handle them in the future):
 		#
 		# 2. Item is a CSO phone-book server            *****
@ -955,7 +1006,7 @@ gopher_response() { # gopher_response URL

 # Convert a gophermap naively to a gemini page.
 #
-# Based strongly on [8], but bash-ified.  Due to the properties of link lines in
+# Based strongly on [8] but bash-ified.  Due to the properties of link lines in
 # gemini, many of the item types in `gemini_reponse' can be linked to the proper
 # protocol handlers here -- so if a user is trying to reach a TCP link through
 # gopher, bollux won't have to handle it, for example.*
@ -1043,7 +1094,8 @@ gopher_convert() {
 # display the fetched content
 display() { # display METADATA [TITLE]
 	local -a less_cmd
-	local i mime charset
+	local mime charset
+
 	# split header line
 	local -a hdr
 	IFS=';' read -ra hdr <<<"$1"
@ -1156,16 +1208,6 @@ END
 	fi
 }

-# normalize files
-normalize() {
-	shopt -s extglob
-	while IFS= read -r; do
-		# normalize line endings
-		printf '%s\n' "${REPLY//$'\r'?($'\n')/}"
-	done
-	shopt -u extglob
-}
-
 # typeset a text/gemini document
 typeset_gemini() {
 	local pre=false
@ -1411,7 +1453,7 @@ handle_keypress() { # handle_keypress CODE
 		run blastoff -u "$REPLY"
 		;;
 	(54) # ` - change alt-text visibility and refresh
-		run cycle_list T_PRE_DISPLAY ,
+		run list_cycle T_PRE_DISPLAY ,
 		run blastoff "$BOLLUX_URL"
 		;;
 	(55) # 55-57 -- still available for binding
@ -1457,7 +1499,19 @@ extract_links() {
 	done
 }

-# download $BOLLUX_URL
+# Download a file.
+#
+# Any non-otherwise-handled MIME type will be downloaded using this function.
+# It uses 'dd' to download the resource to a temporary file, then attempts to
+# move it to $BOLLUX_DOWNDIR (by default, $PWD).  If that's not possible (either
+# because the target file already exists or the 'mv' invocation fails for some
+# reason), `download' logs the error and alerts the user where the temporary
+# file is saved.
+#
+# `download' works by reading the end of the pipe from `display', which means
+# that sometimes, due to something with the way bash or while or ... something
+# ... chunks the data, sometimes binary data gets corrupted.  This is an area
+# that requires more research.
 download() {
 	tn="$(mktemp)"
 	log x "Downloading: '$BOLLUX_URL' => '$tn'..."
@ -1472,60 +1526,141 @@ download() {
 	fi
 }

-# initialize bollux
-bollux_init() {
-	# Trap cleanup
-	trap bollux_cleanup INT QUIT EXIT
-	# State
-	REDIRECTS=0
-	set -f
-	# History
-	declare -a HISTORY # history is kept in an array
-	HN=0               # position of history in the array
-	run mkdir -p "${BOLLUX_HISTFILE%/*}"
-	# Remove $BOLLUX_LESSKEY and re-generate keybindings (to catch rebinds)
-	run rm -f "$BOLLUX_LESSKEY"
-	mklesskey
-}
+# HISTORY #####################################################################
+#
+# While bollux saves history to a file ($BOLLUX_HISTFILE), it doesn't /do/
+# anything with the history that's been saved.  When I do implement the history
+# functionality, it'll probably be on top of a file:// protocol, which will make
+# it very simple to also implement bookmarks and the previewing of pages.  In
+# fact, I should be able to implement this change by the weekend (2021-03-07).
+#
+###############################################################################

-# clean up on exit
-bollux_cleanup() {
-	# Stubbed in case of need in future
-	:
-}
-
-# append a URL to history
+# Append a URL to history.
 history_append() { # history_append URL TITLE
-	BOLLUX_URL="$1"
-	# date/time, url, title (best guess)
-	run printf '%(%FT%T)T\t%s\t%s\n' -1 "$1" "$2" >>"$BOLLUX_HISTFILE"
-	HISTORY[$HN]="$BOLLUX_URL"
+	local url="$1"
+	local title="$2"
+
+	# Print the URL and its title (if given) to $BOLLUX_HISTFILE.
+	local fmt=''
+	fmt+='%(%FT%T)T\t'	# %(_)T calls directly to 'strftime'.
+	if (( $# == 2 )); then
+		fmt+='%s\t'	# $url
+		fmt+='%s\n'	# $title
+	else
+		fmt+='%s%s\n'	# printf needs a field for every argument.
+	fi
+	run printf -- "$fmt" -1 "$url" "$title" >>"$BOLLUX_HISTFILE"
+
+	# Add the URL to the HISTORY array and increment the pointer.
+	HISTORY[$HN]="$url"
 	((HN += 1))
+
+	# Update $BOLLUX_URL.
+	BOLLUX_URL="$url"
 }

-# move back in history (session)
+# Move back in session history.
 history_back() {
 	log d "HN=$HN"
+	# We need to subtract 2 from HN because it automatically increases by
+	# one with each call to `history_append'.  If we subtract 1, we'll just
+	# be at the end of the array again, reloading the page.
 	((HN -= 2))
+	
 	if ((HN < 0)); then
 		HN=0
 		log e "Beginning of history."
 		return 1
 	fi
+	
 	run blastoff "${HISTORY[$HN]}"
 }

-# move forward in history (session)
+# Move forward in session history.
 history_forward() {
 	log d "HN=$HN"
+	
 	if ((HN >= ${#HISTORY[@]})); then
 		HN="${#HISTORY[@]}"
 		log e "End of history."
 		return 1
 	fi
+	
 	run blastoff "${HISTORY[$HN]}"
 }

+# Load a URL.
+#
+# I was feeling fancy when I named this function -- a more descriptive name
+# would be 'bollux_goto' or something.
+blastoff() { # blastoff [-u] URL
+	local u
+
+	# `blastoff' assumes a "well-formed" URL by default -- i.e., a URL with
+	# a protocol string and no extraneous whitespace.  Since bollux can't
+	# trust the user to input a proper URL at a prompt, nor capsule authors
+	# to fully-form their URLs, so the -u flag is necessary for those
+	# use-cases.  Otherwise, bollux knows the URL is well-formed -- or
+	# should be, due to the Gemini specification.
+	if [[ "$1" == "-u" ]]; then
+		u="$(run uwellform "$2")"
+	else
+		u="$1"
+	fi
+
+	# After ensuring the URL is well-formed, `blastoff' needs to transform
+	# it according to the transform rules of RFC 3986 (see Section 5.2.2), which
+	# turns relative references into absolute references that bollux can use
+	# in its request to the server.  That's followed by a check that the
+	# protocol is set, defaulting to Gemini if it isn't.
+	#
+	# Implementation detail: because Bash is really stupid when it comes to
+	# arrays, the URL functions u* (see below) work with an array defined
+	# with `local -a' and passed by name, not by value.  Thus, the
+	# `urltransform url ...' instead of `urltransform "${url[@]}"' or
+	# similar.  In addition, the `ucdef' and `ucset' functions take the name
+	# of the array element as parameters, not the element itself.
+	local -a url
+	run utransform url "$BOLLUX_URL" "$u"
+	if ! ucdef url[1]; then
+		run ucset url[1] "$BOLLUX_PROTO"
+	fi
+
+	# To try and keep `bollux' as extensible as possible, I've written it
+	# only to expect two functions for every protocol it supports:
+	# `x_request' and `x_response', where `x' is the name of the protocol
+	# (the first element of the built `url' array).  `declare -F' looks only
+	# for functions in the current scope, failing if it doesn't exist.
+	#
+	# In between `x_request' and `x_response', `blastoff' normalizes the
+	# line endings to UNIX-style (LF) for ease of display.
+	{
+		if declare -F "${url[1]}_request" >/dev/null 2>&1; then
+			run "${url[1]}_request" "$url"
+		else
+			die 99 "No request handler for '${url[1]}'"
+		fi
+	} | run normalize | {
+		if declare -F "${url[1]}_response" >/dev/null 2>&1; then
+			run "${url[1]}_response" "$url"
+		else
+			log d \
+			    "No response handler for '${url[1]}';" \
+			    " passing thru"
+			passthru
+		fi
+	}
+}
+
+# $BASH_SOURCE is an array that stores the "stack" of source calls in bash.  If
+# the first element of that array is "bollux", that means the user called this
+# script, instead of sourcing it.  In that case, and ONLY in that case, should
+# bollux actually enter the main loop of the program.  Otherwise, allow the
+# sourcing environment to simply source this script.
+#
+# This is basically the equivalent of python's 'if __name__ == "__main__":'
+# block.
 if [[ "${BASH_SOURCE[0]}" == "$0" ]]; then
 	${DEBUG:-false} && set -x
 	run bollux "$@"