Keep documenting
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Case Duckworth 2021-03-03 12:56:42 -06:00
parent f0f51d22a1
commit c274c4f723
1 changed files with 270 additions and 162 deletions

432
bollux
View File

@ -23,17 +23,28 @@
# things. That's a major project though, and I'm scared.
#
# The following works were referenced when writing this, and I've tried to
# credit them in comments below. Following each link, I'll include a "short
# code" that I'll use to reference them in those comments, if necessary to keep
# them shorter than 80 characters.
# credit them in comments below. Further in the commentary on this script, I'll
# include the following link numbers to refer to these documents, in order to
# keep the line length as short as possible.
#
# [1]: https://github.com/dylanaraps/pure-bash-bible [PBB]
# [2]: https://tools.ietf.org/html/rfc3986 [URLspec]
# [3]: https://gemini.circumlunar.space/docs/specification.html [GEMspec]
# [4]: https://tools.ietf.org/html/rfc1436 [GOPHERprotocol]
# [5]: https://tools.ietf.org/html/rfc4266 [GOPHERurl]
# [6]: [GOPHER_GEMINI]:
# [1]: Pure Bash Bible
# https://github.com/dylanaraps/pure-bash-bible
# [2]: URL Specification
# https://tools.ietf.org/html/rfc3986
# [3]: Gemini Specification
# https://gemini.circumlunar.space/docs/specification.html
# [4]: Gemini Best Practices
# https://gemini.circumlunar.space/docs/best-practices.gmi
# [5]: Gemini FAQ
# https://gemini.circumlunar.space/docs/faq.gmi
# [6]: Gopher Specification
# https://tools.ietf.org/html/rfc1436
# [7]: Gopher URLs
# https://tools.ietf.org/html/rfc4266
# [8]: Gophermap to Gemini script (by tomasino)
# https://github.com/jamestomasino/dotfiles-minimal/blob/master/bin/gophermap2gemini.awk
# [9]: OpenSSL `s_client' online manual
# https://www.openssl.org/docs/manmaster/man1/openssl-s_client.html
#
# Code:
@ -90,14 +101,14 @@ trap bollux_quit SIGINT
# Bash built-in replacement for `sleep`
#
# PBB: #use-read-as-an-alternative-to-the-sleep-command
# [1]: #use-read-as-an-alternative-to-the-sleep-command
sleep() { # sleep SECONDS
read -rt "$1" <> <(:) || :
}
# Trim leading and trailing whitespace from a string.
#
# PBB: #trim-leading-and-trailing-white-space-from-string
# [1]: #trim-leading-and-trailing-white-space-from-string
trim_string() { # trim_string STRING
: "${1#"${1%%[![:space:]]*}"}"
: "${_%"${_##*[![:space:]]}"}"
@ -130,14 +141,14 @@ log() { # log LEVEL MESSAGE
local fmt
case "$1" in
[dD]*) # debug
([dD]*) # debug
[[ "$BOLLUX_LOGLEVEL" == DEBUG ]] || return
fmt=34
;;
[eE]*) # error
([eE]*) # error
fmt=31
;;
*) fmt=1 ;;
(*) fmt=1 ;;
esac
shift
@ -190,14 +201,14 @@ bollux() {
bollux_args() {
while getopts :hvq OPT; do
case "$OPT" in
h)
(h)
bollux_usage
exit
;;
v) BOLLUX_LOGLEVEL=DEBUG ;;
q) BOLLUX_LOGLEVEL=QUIET ;;
:) die 1 "Option -$OPTARG requires an argument" ;;
*) die 1 "Unknown option: -$OPTARG" ;;
(v) BOLLUX_LOGLEVEL=DEBUG ;;
(q) BOLLUX_LOGLEVEL=QUIET ;;
(:) die 1 "Option -$OPTARG requires an argument" ;;
(*) die 1 "Unknown option: -$OPTARG" ;;
esac
done
shift $((OPTIND - 1))
@ -337,7 +348,8 @@ blastoff() { # blastoff [-u] URL
}
}
# URLS: https://tools.ietf.org/html/rfc3986 ####################################
# URLS #########################################################################
# https://tools.ietf.org/html/rfc3986 [2]
#
# Most of these functions are Bash implementations of functionality laid out in
# the linked RFC specification. I'll refer to the section numbers above each
@ -369,8 +381,8 @@ uwellform() {
# Split a URL into its constituent parts, placing them all in the given array.
#
# The regular expression given at the top of the function ($re) is taken
# directly from RFC 3986, Appendix B -- and if the URL provided doesn't match
# it, the function bails.
# directly from [2] Appendix B -- and if the URL provided doesn't match it, the
# function bails.
#
# `usplit' takes advantage of bash's regex abilities: when the regex comparison
# operator `=~' is used, bash populates the array $BASH_REMATCH with the groups
@ -432,8 +444,6 @@ ujoin() { # ujoin NAME:ARRAY
log d "${U[0]}"
}
# Three small utility functions for dealing with URL components.
#
# `ucdef' checks whether a URL component is blank or not -- if a component
# doesn't exist, `usplit' writes $UC_BLANK there instead (which is :?: by
# default, though it really doesn't matter much *what* it is, as long as it's
@ -458,6 +468,75 @@ ucset() { # ucset NAME VALUE
run ujoin "${1/\[*\]/}"
}
# [1]: encode a URL using percent-encoding.
uencode() { # uencode URL:STRING
local LC_ALL=C
for ((i = 0; i < ${#1}; i++)); do
: "${1:i:1}"
case "$_" in
([a-zA-Z0-9.~_-]) printf '%s' "$_" ;;
(*) printf '%%%02X' "'$_" ;;
esac
done
printf '\n'
}
# [1]: decode a percent-encoded URL.
udecode() { # udecode URL:STRING
: "${1//+/ }"
printf '%b\n' "${_//%/\\x}"
}
# Implement [2] § 5.2.4, "Remove Dot Segments"
pundot() { # pundot PATH:STRING
local input="$1"
local output
while [[ "$input" ]]; do
if [[ "$input" =~ ^\.\.?/ ]]; then
input="${input#${BASH_REMATCH[0]}}"
elif [[ "$input" =~ ^/\.(/|$) ]]; then
input="/${input#${BASH_REMATCH[0]}}"
elif [[ "$input" =~ ^/\.\.(/|$) ]]; then
input="/${input#${BASH_REMATCH[0]}}"
[[ "$output" =~ /?[^/]+$ ]]
output="${output%${BASH_REMATCH[0]}}"
elif [[ "$input" == . || "$input" == .. ]]; then
input=
else
[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1
output="$output${BASH_REMATCH[1]}"
input="${BASH_REMATCH[2]}"
fi
done
printf '%s\n' "${output//\/\//\//}"
}
# Implement [2] § 5.2.3, "Merge Paths"
pmerge() { # pmerge BASE:ARRAY REFERENCE:ARRAY
local -n b="$1"
local -n r="$2"
if ucblank r[3]; then
printf '%s\n' "${b[3]//\/\//\//}"
return
fi
if ucdef b[2] && ucblank b[3]; then
printf '/%s\n' "${r[3]//\/\//\//}"
else
local bp=""
if [[ "${b[3]}" == */* ]]; then
bp="${b[3]%/*}"
fi
printf '%s/%s\n' "${bp%/}" "${r[3]#/}"
fi
}
# `utransform' implements [2]6 § 5.2.2, "Transform Resources."
#
# That section conveniently lays out a pseudocode algorithm describing how URL
# resources should be transformed from one to another. This function just
# implements that pseudocode in Bash, using the helper functions defined above.
utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING
local -a B R # base, reference
local -n T="$1" # target
@ -520,128 +599,136 @@ utransform() { # utransform TARGET:ARRAY BASE:STRING REFERENCE:STRING
ujoin T
}
pundot() { # pundot PATH:STRING
local input="$1"
local output
while [[ "$input" ]]; do
if [[ "$input" =~ ^\.\.?/ ]]; then
input="${input#${BASH_REMATCH[0]}}"
elif [[ "$input" =~ ^/\.(/|$) ]]; then
input="/${input#${BASH_REMATCH[0]}}"
elif [[ "$input" =~ ^/\.\.(/|$) ]]; then
input="/${input#${BASH_REMATCH[0]}}"
[[ "$output" =~ /?[^/]+$ ]]
output="${output%${BASH_REMATCH[0]}}"
elif [[ "$input" == . || "$input" == .. ]]; then
input=
else
[[ $input =~ ^(/?[^/]*)(/?.*)$ ]] || return 1
output="$output${BASH_REMATCH[1]}"
input="${BASH_REMATCH[2]}"
fi
done
printf '%s\n' "${output//\/\//\//}"
}
# GEMINI #######################################################################
# https://gemini.circumlunar.space/docs/specification.html [3]
#
# The reason we're all here, folks. Gemini is a new protocol that aims to be a
# middle ground between Gopher and HTTP, blah blah. You know the spiel. I know
# the spiel. It's great stuff!
#
################################################################################
pmerge() {
local -n b="$1"
local -n r="$2"
if ucblank r[3]; then
printf '%s\n' "${b[3]//\/\//\//}"
return
fi
if ucdef b[2] && ucblank b[3]; then
printf '/%s\n' "${r[3]//\/\//\//}"
else
local bp=""
if [[ "${b[3]}" == */* ]]; then
bp="${b[3]%/*}"
fi
printf '%s/%s\n' "${bp%/}" "${r[3]#/}"
fi
}
# PBB
uencode() { # uencode URL:STRING
local LC_ALL=C
for ((i = 0; i < ${#1}; i++)); do
: "${1:i:1}"
case "$_" in
[a-zA-Z0-9.~_-])
printf '%s' "$_"
;;
*)
printf '%%%02X' "'$_"
;;
esac
done
printf '\n'
}
# PBB
udecode() { # udecode URL:STRING
: "${1//+/ }"
printf '%b\n' "${_//%/\\x}"
}
# GEMINI
# https://gemini.circumlunar.space/docs/specification.html
# Request a resource from a gemini server - see [3] §§ 2, 4.
gemini_request() { # gemini_request URL
local -a url
usplit url "$1"
# get rid of userinfo
# Remove user info from the URL.
#
# URLs can technically be of the form <proto>://<user>:<pass>@<domain>
# (see [2], § 3.2, "Authority"). I don't know of any Gemini servers
# that use the <user> or <pass> parts, so `gemini_request' just strips
# them from the requested URL. This will need to be changed if servers
# decide to use this method of authentication.
ucset url[2] "${url[2]#*@}"
# Determine the port to request.
#
# The default port for Gemini is 1965 (the year of the first Gemini
# space mission), but some servers use a different port. In a URL, a
# port can be specified after the domain, separated with a colon. The
# user can also request a different default port, for whatever reason,
# by setting the variable $BOLLUX_GEMINI_PORT.
local port
if [[ "${url[2]}" == *:* ]]; then
port="${url[2]#*:}"
ucset url[2] "${url[2]%:*}"
else
port=1965 # TODO variablize
port="$BOLLUX_GEMINI_PORT"
fi
# Build the SSL command to request the resource.
#
# This is the beating heart of bollux, the command that does all the
# important work of actually fetching the gemini content the user wants
# to read. I've broken it out into an array for ease of editing (and
# now, commenting!).
local ssl_cmd=(
# `s_client' is OpenSSL's reference client implementation In the
# manual [9] it says not to use it, but who reads the manual,
# anyway?
openssl s_client
-crlf -quiet -connect "${url[2]}:$port"
-servername "${url[2]}" # SNI
-no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions
-crlf # Automatically add CR+LF to line
-quiet # Don't print all the cert stuff
# -ign_eof # `-quiet' implies `-ign_eof'
-connect "${url[2]}:$port" # The server and port to connect
-servername "${url[2]}" # SNI: Server Name Identification
-no_ssl3 -no_tls1 -no_tls1_1 # disable old TLS/SSL versions
)
# Actually request the resource.
#
# I could probably use 'printf '%s\r\n' "$url" | run "${ssl_cmd[@]}",
# and maybe I should. I wrote this little line a while ago.
run "${ssl_cmd[@]}" <<<"$url"
}
# Handle the gemini response - see [3] § 3.
gemini_response() { # gemini_response URL
local url code meta
local title
url="$1"
local code meta # received on the first line of the response
local title # determined by a clunky heuristic, see read loop: (2*)
local url="$1" # the currently-visited URL.
# we need a loop here so it waits for the first line
# Read the first line.
#
# The first line of a Gemini response is the "header line," which is of
# the format "STATUS METADATA\r\n". I use a `while' loop using `read'
# with a timeout to handle non-responsive servers. Technically,
# METADATA shouldn't exceed 1024 bytes, but I can't think of a good way
# to break at that point -- so bollux is not quite spec-compliant in
# this regard.
#
# Additionally, there are sometimes bugs with caching and
# byte-shifting(?) when trying to download a binary file (see
# `download', below), but I'm not sure how to remedy that issue either.
# It requires more research.
while read -t "$BOLLUX_TIMEOUT" -r code meta ||
{ (($? > 128)) && die 99 "Timeout."; }; do
break
done
log d "[$code] $meta"
# Branch depending on the status code. See [3], Appendix 1.
#
# Notes:
# - All codes other than 3* (Redirects) reset the REDIRECTS counter.
# - I branch on the first digit of the status code, instead of both, to
# minimize the amount of duplicated code I need to write.
case "$code" in
1*) # input
(1*) # INPUT
# Gemini allows GET-style requests, and the INPUT family of
# response codes facilitate them. `10' is for standard input,
# and `11' is for sensitive information, like passwords.
REDIRECTS=0
BOLLUX_URL="$url"
case "$code" in
10) run prompt "$meta" ;;
11) run prompt "$meta" -s ;; # password input
(10) run prompt "$meta" ;;
(11) run prompt "$meta" -s ;; # sensitive input
esac
run history_append "$url" "${title:-}"
run blastoff "?$(uencode "$REPLY")"
;;
2*) # OK
(2*) # OK
# The `20' family of requests is like HTTP's `200' family: it
# means that the request worked and the server is sending the
# requested content.
REDIRECTS=0
BOLLUX_URL="$url"
# read ahead to find a title
# Janky heuristic to guess the title of a page.
#
# This while loop reads through the file looking for a line
# starting with `#', which is a level-one heading in text/gemini
# (see [3], § 5). It assumes that the first such heading is the
# title of the page, and uses that title for the terminal title
# and for the history.
local pretitle
while read -r; do
# Since looping through the file consumes it (that is,
# the file pointer (I think?) moves away from the
# beginning of the file), the content we've read so far
# must be saved in a `pretitle' variable, so it can be
# printed later with the rest of the page.
pretitle="$pretitle$REPLY"$'\n'
if [[ "$REPLY" =~ ^#[[:space:]]*(.*) ]]; then
title="${BASH_REMATCH[1]}"
@ -649,35 +736,55 @@ gemini_response() { # gemini_response URL
fi
done
run history_append "$url" "${title:-}"
# read the body out and pipe it to display
# Print the pretitle and the rest of the document (`passthru' is
# a pure-bash rewrite of `cat'), and pipe it through `display'
# for typesetting.
{
printf '%s' "$pretitle"
passthru
} | run display "$meta" "${title:-}"
;;
3*) # redirect
(3*) # REDIRECT
# Redirects are a fundamental part of any hypertext framework,
# and if I remember correctly, one of the main reasons
# solderpunk and others began thinking about gemini (the others
# being TLS and URLs, I believe).
#
# Note that although [3] specifies both a temporary (30) and
# permanent (31) redirect, bollux isn't smart enough to make a
# distinction. I'm not sure what the difference would be in
# practice, anyway.
#
# Per [4], bollux limits the number of redirects a page is
# allowed to make (by default, five). Change `$BOLLUX_MAXREDIR'
# to customize that limit.
((REDIRECTS += 1))
if ((REDIRECTS > BOLLUX_MAXREDIR)); then
die $((100 + code)) "Too many redirects!"
fi
BOLLUX_URL="$url"
# Another discussion on [4] pertains to the value of alerting
# the user to (A) a cross-domain redirect, or even (B) all
# redirects. I have yet to implement that particular
# functionality, and even when I do implement it I don't think
# (B) will be the default. Perhaps (A) though. No notification
# will also be an option, however.
run blastoff "$meta" # TODO: confirm redirect
;;
4*) # temporary error
(4*) # TEMPORARY ERROR
REDIRECTS=0
die "$((100 + code))" "Temporary error [$code]: $meta"
;;
5*) # permanent error
(5*) # PERMANENT ERROR
REDIRECTS=0
die "$((100 + code))" "Permanent error [$code]: $meta"
;;
6*) # certificate error
(6*) # CERTIFICATE ERROR
REDIRECTS=0
log d "Not implemented: Client certificates"
# TODO: recheck the speck
die "$((100 + code))" "[$code] $meta"
;;
*)
(*)
[[ -z "${code-}" ]] && die 100 "Empty response code."
die "$((100 + code))" "Unknown response code: $code."
;;
@ -720,16 +827,16 @@ gopher_response() { # gopher_response URL
log d "TYPE='$type'"
case "$type" in
0) # text
(0) # text
run display text/plain
;;
1) # menu
(1) # menu
run gopher_convert | run display text/gemini
;;
3) # failure
(3) # failure
die 203 "GOPHER: failed"
;;
7) # search
(7) # search
if [[ "$url" =~ $'\t' ]]; then
run gopher_convert | run display text/gemini
else
@ -737,19 +844,12 @@ gopher_response() { # gopher_response URL
run blastoff "$url $REPLY"
fi
;;
*) # something else
(*) # something else
run download "$url"
;;
esac
}
# 'cat' but in pure bash
passthru() {
while IFS= read -r; do
printf '%s\n' "$REPLY"
done
}
# convert gophermap to text/gemini (probably naive)
gopher_convert() {
local type label path server port regex
@ -768,19 +868,19 @@ gopher_convert() {
continue
fi
case "$type" in
.) # end of file
(.) # end of file
printf '.\n'
break
;;
i) # label
(i) # label
case "$label" in
'#'* | '*'[[:space:]]*)
('#'* | '*'[[:space:]]*)
if $pre; then
printf '%s\n' '```'
pre=false
fi
;;
*)
(*)
if ! $pre; then
printf '%s\n' '```'
pre=true
@ -789,14 +889,14 @@ gopher_convert() {
esac
printf '%s\n' "$label"
;;
h) # html link
(h) # html link
if $pre; then
printf '%s\n' '```'
pre=false
fi
printf '=> %s %s\n' "${path:4}" "$label"
;;
T) # telnet link
(T) # telnet link
if $pre; then
printf '%s\n' '```'
pre=false
@ -804,7 +904,7 @@ gopher_convert() {
printf '=> telnet://%s:%s/%s%s %s\n' \
"$server" "$port" "$type" "$path" "$label"
;;
*) # other type
(*) # other type
if $pre; then
printf '%s\n' '```'
pre=false
@ -822,6 +922,14 @@ gopher_convert() {
exec 9>&-
}
# 'cat' but in pure bash
passthru() {
while IFS= read -r; do
printf '%s\n' "$REPLY"
done
}
# display the fetched content
display() { # display METADATA [TITLE]
local -a less_cmd
@ -839,7 +947,7 @@ display() { # display METADATA [TITLE]
for ((i = 1; i <= "${#hdr[@]}"; i++)); do
h="${hdr[$i]}"
case "$h" in
*charset=*) charset="${h#*=}" ;;
(*charset=*) charset="${h#*=}" ;;
esac
done
@ -849,7 +957,7 @@ display() { # display METADATA [TITLE]
log debug "mime='$mime'; charset='$charset'"
case "$mime" in
text/*)
(text/*)
set_title "$title${title:+ - }bollux"
# render ANSI color escapes and don't wrap pre-formatted blocks
less_cmd=(less -RS)
@ -886,7 +994,7 @@ display() { # display METADATA [TITLE]
run "${less_cmd[@]}" && bollux_quit
} || run handle_keypress "$?"
;;
*) run download "$BOLLUX_URL" ;;
(*) run download "$BOLLUX_URL" ;;
esac
}
@ -896,8 +1004,8 @@ less_prompt_escape() { # less_prompt_escape STRING
for ((i = 0; i < ${#1}; i++)); do
: "${1:i:1}"
case "$_" in
[\?:\.%\\]) printf '\%s' "$_" ;;
*) printf '%s' "$_" ;;
([\?:\.%\\]) printf '\%s' "$_" ;;
(*) printf '%s' "$_" ;;
esac
done
printf '\n'
@ -965,7 +1073,7 @@ typeset_gemini() {
while IFS= read -r; do
case "$REPLY" in
'```'*)
('```'*)
PRE_LINE_FORCE=false
if $pre; then
pre=false
@ -973,28 +1081,28 @@ typeset_gemini() {
pre=true
fi
case "${T_PRE_DISPLAY%%,*}" in
pre)
(pre)
:
;;
alt | both)
(alt | both)
$pre && PRE_LINE_FORCE=true \
gemini_pre "${REPLY#\`\`\`}"
;;
esac
continue
;;
'=>'*)
('=>'*)
: $((ln += 1))
gemini_link "$REPLY" $pre "$ln"
;;
'#'*) gemini_header "$REPLY" $pre ;;
'*'[[:space:]]*)
('#'*) gemini_header "$REPLY" $pre ;;
('*'[[:space:]]*)
gemini_list "$REPLY" $pre
;;
'>'*)
('>'*)
gemini_quote "$REPLY" $pre
;;
*) gemini_text "$REPLY" $pre ;;
(*) gemini_text "$REPLY" $pre ;;
esac
done
}
@ -1103,25 +1211,25 @@ fold_line() { # fold_line [OPTIONS...] WIDTH TEXT
OPTIND=0
while getopts nm:f:l:B:A: OPT; do
case "$OPT" in
n) # -n = no trailing newline
(n) # -n = no trailing newline
newline=false
;;
m) # -m MARGIN = margin for all lines
(m) # -m MARGIN = margin for all lines
margin_all="$OPTARG"
;;
f) # -f MARGIN = margin for first line
(f) # -f MARGIN = margin for first line
margin_first="$OPTARG"
;;
l) # -l LENGTH = length of line before starting fold
(l) # -l LENGTH = length of line before starting fold
ll="$OPTARG"
;;
B) # -B BEFORE = text to insert before each line
(B) # -B BEFORE = text to insert before each line
before="$OPTARG"
;;
A) # -A AFTER = text to insert after each line
(A) # -A AFTER = text to insert after each line
after="$OPTARG"
;;
*) return 1 ;;
(*) return 1 ;;
esac
done
shift "$((OPTIND - 1))"
@ -1159,37 +1267,37 @@ fold_line() { # fold_line [OPTIONS...] WIDTH TEXT
# use the exit code from less (see mklesskey) to do things
handle_keypress() { # handle_keypress CODE
case "$1" in
48) # o - open a link -- show a menu of links on the page
(48) # o - open a link -- show a menu of links on the page
run select_url "$BOLLUX_PAGESRC"
;;
49) # g - goto a url -- input a new url
(49) # g - goto a url -- input a new url
prompt GO
run blastoff -u "$REPLY"
;;
50) # [ - back in the history
(50) # [ - back in the history
run history_back || {
sleep 0.5
run blastoff "$BOLLUX_URL"
}
;;
51) # ] - forward in the history
(51) # ] - forward in the history
run history_forward || {
sleep 0.5
run blastoff "$BOLLUX_URL"
}
;;
52) # r - re-request the current resource
(52) # r - re-request the current resource
run blastoff "$BOLLUX_URL"
;;
53) # G - goto a url (pre-filled with current)
(53) # G - goto a url (pre-filled with current)
run prompt -u GO
run blastoff -u "$REPLY"
;;
54) # ` - change alt-text visibility and refresh
(54) # ` - change alt-text visibility and refresh
run cycle_list T_PRE_DISPLAY ,
run blastoff "$BOLLUX_URL"
;;
55) # 55-57 -- still available for binding
(55) # 55-57 -- still available for binding
die "$?" "less(1) error"
;;
esac
@ -1206,8 +1314,8 @@ select_url() { # select_url FILE
PS3="OPEN> "
select u in "${MAPFILE[@]}"; do
case "$REPLY" in
q) bollux_quit ;;
[^0-9]*) run blastoff -u "$REPLY" && break ;;
(q) bollux_quit ;;
([^0-9]*) run blastoff -u "$REPLY" && break ;;
esac
run blastoff "${u%%[[:space:]]*}" && break
done </dev/tty