From 20395cb8269944b8d336b99c950bc6a90a8329e0 Mon Sep 17 00:00:00 2001 From: rmgr Date: Tue, 15 Sep 2020 20:02:29 +0930 Subject: [PATCH 01/61] Add support for http/https -> gemini proxy --- av98.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/av98.py b/av98.py index f74b532..a0f6581 100755 --- a/av98.py +++ b/av98.py @@ -274,6 +274,7 @@ class GeminiClient(cmd.Cmd): "auto_follow_redirects" : True, "gopher_proxy" : None, "tls_mode" : "tofu", + "http_proxy": None, "cache" : False } @@ -316,8 +317,15 @@ class GeminiClient(cmd.Cmd): # Don't try to speak to servers running other protocols if gi.scheme in ("http", "https"): - webbrowser.open_new_tab(gi.url) - return + if not self.options.get("http_proxy",None): + webbrowser.open_new_tab(gi.url) + return + else: + print("Do you want to try to open this link with a http proxy?") + resp = input("(Y)/N ") + if resp.strip().lower() in ("n","no"): + webbrowser.open_new_tab(gi.url) + return elif gi.scheme == "gopher" and not self.options.get("gopher_proxy", None): print("""AV-98 does not speak Gopher natively. However, you can use `set gopher_proxy hostname:port` to tell it about a @@ -543,7 +551,9 @@ you'll be able to transparently follow links to Gopherspace!""") # For Gopher requests, use the configured proxy host, port = self.options["gopher_proxy"].rsplit(":", 1) self._debug("Using gopher proxy: " + self.options["gopher_proxy"]) - + elif gi.scheme in ("http", "https"): + host, port = self.options["http_proxy"].rsplit(":",1) + self._debug("Using http proxy: " + self.options["http_proxy"]) # Do DNS resolution addresses = self._get_addresses(host, port) From ab913ebf54b591d14fb22404d1cda1aab1dbb401 Mon Sep 17 00:00:00 2001 From: rmgr Date: Tue, 15 Sep 2020 20:19:32 +0930 Subject: [PATCH 02/61] Add support for touring a range where the start index is bigger than the end index --- av98.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/av98.py b/av98.py index f74b532..38bdd15 100755 --- a/av98.py +++ b/av98.py @@ -1309,9 +1309,15 @@ Current tour can be listed with `tour ls` and scrubbed with `tour clear`.""" self.waypoints.append(gi) elif len(pair) == 2: # Two endpoints for a range of indices - for n in range(int(pair[0]), int(pair[1]) + 1): - gi = self.lookup[n-1] - self.waypoints.append(gi) + if int(pair[0]) < int(pair[1]): + for n in range(int(pair[0]), int(pair[1]) + 1): + gi = self.lookup[n-1] + self.waypoints.append(gi) + else: + for n in range(int(pair[0]), int(pair[1]) - 1, -1): + gi = self.lookup[n-1] + self.waypoints.append(gi) + else: # Syntax error print("Invalid use of range syntax %s, skipping" % index) From 92da876795fe590a57188848e0edcfc4b3513e81 Mon Sep 17 00:00:00 2001 From: Callum Brown Date: Tue, 22 Sep 2020 17:05:23 +0100 Subject: [PATCH 03/61] Pass extra information to _handle_cert_request This fixes a bug introduced in f45630 when handling a certificate request was factored out of _fetch_over_network. Also make the options consistent in terms of grammar. --- av98.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/av98.py b/av98.py index f74b532..8e79d71 100755 --- a/av98.py +++ b/av98.py @@ -484,7 +484,7 @@ you'll be able to transparently follow links to Gopherspace!""") # Client cert elif status.startswith("6"): - self._handle_cert_request(meta) + self._handle_cert_request(meta, status, gi.host) return self._fetch_over_network(gi) # Invalid status @@ -700,7 +700,7 @@ you'll be able to transparently follow links to Gopherspace!""") for _, filename in self.cache.values(): assert os.path.isfile(filename) - def _handle_cert_request(self, meta): + def _handle_cert_request(self, meta, status, host): # Don't do client cert stuff in restricted mode, as in principle # it could be used to fill up the disk by creating a whole lot of @@ -719,7 +719,7 @@ you'll be able to transparently follow links to Gopherspace!""") print("The server did not accept your certificate.") print("You may need to e.g. coordinate with the admin to get your certificate fingerprint whitelisted.") else: - print("The site {} is requesting a client certificate.".format(gi.host)) + print("The site {} is requesting a client certificate.".format(host)) print("This will allow the site to recognise you across requests.") # Give the user choices @@ -727,8 +727,8 @@ you'll be able to transparently follow links to Gopherspace!""") print("1. Give up.") print("2. Generate a new transient certificate.") print("3. Generate a new persistent certificate.") - print("4. Load a previously generated persistent.") - print("5. Load certificate from an external file.") + print("4. Load a previously generated certificate.") + print("5. Load a certificate from an external file.") choice = input("> ").strip() if choice == "2": self._generate_transient_cert_cert() From 5dfe62fc63f278626e524728bdfd609da2c79029 Mon Sep 17 00:00:00 2001 From: sario528 <28790463+sario528@users.noreply.github.com> Date: Thu, 15 Apr 2021 05:04:58 -0500 Subject: [PATCH 04/61] Swap GUS for geminispace.info --- av98.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/av98.py b/av98.py index f74b532..a1153ac 100755 --- a/av98.py +++ b/av98.py @@ -1347,8 +1347,8 @@ Use 'ls -l' to see URLs.""" self.page_index = 0 def do_gus(self, line): - """Submit a search query to the GUS search engine.""" - gus = GeminiItem("gemini://gus.guru/search") + """Submit a search query to the Gemini search engine.""" + gus = GeminiItem("gemini://geminispace.info/search") self._go_to_gi(gus.query(line)) def do_history(self, *args): From 6ee2a0716d5a41986f974699f39c995e3d91510d Mon Sep 17 00:00:00 2001 From: Nic Waller Date: Wed, 29 Jun 2022 20:57:34 -0700 Subject: [PATCH 05/61] improve compatibility with Python 3.10 --- av98.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/av98.py b/av98.py index f74b532..c5ee87a 100755 --- a/av98.py +++ b/av98.py @@ -548,8 +548,15 @@ you'll be able to transparently follow links to Gopherspace!""") addresses = self._get_addresses(host, port) # Prepare TLS context - protocol = ssl.PROTOCOL_TLS if sys.version_info.minor >=6 else ssl.PROTOCOL_TLSv1_2 - context = ssl.SSLContext(protocol) + def _newest_supported_protocol(): + if sys.version_info >= (3, 10): + return ssl.PROTOCOL_TLS_CLIENT + elif sys.version_info >= (3, 6): + return ssl.PROTOCOL_TLS + else: + return ssl.PROTOCOL_TLSv1_2 + context = ssl.SSLContext(_newest_supported_protocol()) + # Use CAs or TOFU if self.options["tls_mode"] == "ca": context.verify_mode = ssl.CERT_REQUIRED From 87473fee1b7f50b42a71f940500d19ace365b1e1 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 12 Nov 2023 14:19:23 +0100 Subject: [PATCH 06/61] Acknowledge contributors of recently merged PRs. --- av98.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/av98.py b/av98.py index c70dbfc..49db1e3 100755 --- a/av98.py +++ b/av98.py @@ -8,6 +8,10 @@ # - # - Klaus Alexander Seistrup # - govynnus +# - Nik +# - +# - rmgr +# - Aleksey Ryndin import argparse import cmd From e678bca0898a203fad260e9d3210224ba439b859 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 12 Nov 2023 14:49:01 +0100 Subject: [PATCH 07/61] Factor out caching functionality into own class/file. --- av98.py | 81 +++++++------------------------------------------------- cache.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 86 insertions(+), 72 deletions(-) create mode 100644 cache.py diff --git a/av98.py b/av98.py index 49db1e3..9f495b7 100755 --- a/av98.py +++ b/av98.py @@ -55,11 +55,12 @@ try: except ModuleNotFoundError: _HAS_CRYPTOGRAPHY = False +from cache import Cache + _VERSION = "1.0.2dev" _MAX_REDIRECTS = 5 -_MAX_CACHE_SIZE = 10 -_MAX_CACHE_AGE_SECS = 180 + # Command abbreviations _ABBREVS = { @@ -299,8 +300,7 @@ class GeminiClient(cmd.Cmd): self._connect_to_tofu_db() - self.cache = {} - self.cache_timestamps = {} + self.cache = Cache() def _connect_to_tofu_db(self): @@ -347,8 +347,9 @@ you'll be able to transparently follow links to Gopherspace!""") return # Use cache, or hit the network if resource is not cached - if check_cache and self.options["cache"] and self._is_cached(gi.url): - mime, body, tmpfile = self._get_cached(gi.url) + if check_cache and self.options["cache"] and self.cache.check(gi.url): + mime, body, tmpfile = self.cache.get(gi.url) + self.log["cache_hits"] += 1 else: try: gi, mime, body, tmpfile = self._fetch_over_network(gi) @@ -540,7 +541,7 @@ you'll be able to transparently follow links to Gopherspace!""") # Maintain cache and log if self.options["cache"]: - self._add_to_cache(gi.url, mime, self.tmp_filename) + self.cache.add(gi.url, mime, self.tmp_filename) self._log_visit(gi, address, size) return gi, mime, body, self.tmp_filename @@ -657,70 +658,6 @@ you'll be able to transparently follow links to Gopherspace!""") return addresses - def _is_cached(self, url): - if url not in self.cache: - return False - now = time.time() - cached = self.cache_timestamps[url] - if now - cached > _MAX_CACHE_AGE_SECS: - self._debug("Expiring old cached copy of resource.") - self._remove_from_cache(url) - return False - self._debug("Found cached copy of resource.") - return True - - def _remove_from_cache(self, url): - self.cache_timestamps.pop(url) - mime, filename = self.cache.pop(url) - os.unlink(filename) - self._validate_cache() - - def _add_to_cache(self, url, mime, filename): - - self.cache_timestamps[url] = time.time() - self.cache[url] = (mime, filename) - if len(self.cache) > _MAX_CACHE_SIZE: - self._trim_cache() - self._validate_cache() - - def _trim_cache(self): - # Order cache entries by age - lru = [(t, u) for (u, t) in self.cache_timestamps.items()] - lru.sort() - # Drop the oldest entry no matter what - _, url = lru[0] - self._debug("Dropping cached copy of {} from full cache.".format(url)) - self._remove_from_cache(url) - # Drop other entries if they are older than the limit - now = time.time() - for cached, url in lru[1:]: - if now - cached > _MAX_CACHE_AGE_SECS: - self._debug("Dropping cached copy of {} from full cache.".format(url)) - self._remove_from_cache(url) - else: - break - self._validate_cache() - - def _get_cached(self, url): - mime, filename = self.cache[url] - self.log["cache_hits"] += 1 - if mime.startswith("text/gemini"): - with open(filename, "r") as fp: - body = fp.read() - return mime, body, filename - else: - return mime, None, filename - - def _empty_cache(self): - for mime, filename in self.cache.values(): - if os.path.exists(filename): - os.unlink(filename) - - def _validate_cache(self): - assert self.cache.keys() == self.cache_timestamps.keys() - for _, filename in self.cache.values(): - assert os.path.isfile(filename) - def _handle_cert_request(self, meta, status, host): # Don't do client cert stuff in restricted mode, as in principle @@ -1590,7 +1527,7 @@ current gemini browsing session.""" self.db_conn.commit() self.db_conn.close() # Clean up after ourself - self._empty_cache() + self.cache.empty() if self.tmp_filename and os.path.exists(self.tmp_filename): os.unlink(self.tmp_filename) if self.idx_filename and os.path.exists(self.idx_filename): diff --git a/cache.py b/cache.py new file mode 100644 index 0000000..bdcddf0 --- /dev/null +++ b/cache.py @@ -0,0 +1,77 @@ +_MAX_CACHE_SIZE = 10 +_MAX_CACHE_AGE_SECS = 180 + +import os +import os.path +import time + +class Cache: + + def __init__(self): + + self.cache = {} + self.cache_timestamps = {} + + def check(self, url): + if url not in self.cache: + return False + now = time.time() + cached = self.cache_timestamps[url] + if now - cached > _MAX_CACHE_AGE_SECS: +# self._debug("Expiring old cached copy of resource.") + self._remove(url) + return False +# self._debug("Found cached copy of resource.") + return True + + def _remove(self, url): + self.cache_timestamps.pop(url) + mime, filename = self.cache.pop(url) + os.unlink(filename) + self.validatecache() + + def add(self, url, mime, filename): + + self.cache_timestamps[url] = time.time() + self.cache[url] = (mime, filename) + if len(self.cache) > _MAX_CACHE_SIZE: + self.trim() + self.validatecache() + + def trim(self): + # Order cache entries by age + lru = [(t, u) for (u, t) in self.cache_timestamps.items()] + lru.sort() + # Drop the oldest entry no matter what + _, url = lru[0] +# self._debug("Dropping cached copy of {} from full cache.".format(url)) + self._remove(url) + # Drop other entries if they are older than the limit + now = time.time() + for cached, url in lru[1:]: + if now - cached > _MAX_CACHE_AGE_SECS: +# self._debug("Dropping cached copy of {} from full cache.".format(url)) + self._remove(url) + else: + break + self.validatecache() + + def get(self, url): + mime, filename = self.cache[url] + if mime.startswith("text/gemini"): + with open(filename, "r") as fp: + body = fp.read() + return mime, body, filename + else: + return mime, None, filename + + def empty(self): + for mime, filename in self.cache.values(): + if os.path.exists(filename): + os.unlink(filename) + + def validatecache(self): + assert self.cache.keys() == self.cache_timestamps.keys() + for _, filename in self.cache.values(): + assert os.path.isfile(filename) + From 79a6187eacb84fe2c5aa4733b981d4eb0d122c7d Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 12 Nov 2023 15:14:05 +0100 Subject: [PATCH 08/61] Restore debugging output from cache using logging module. --- av98.py | 50 ++++++++++++++++++++++++++++---------------------- cache.py | 11 +++++++---- 2 files changed, 35 insertions(+), 26 deletions(-) diff --git a/av98.py b/av98.py index 9f495b7..6032fd9 100755 --- a/av98.py +++ b/av98.py @@ -24,6 +24,7 @@ import getpass import glob import hashlib import io +import logging import mimetypes import os import os.path @@ -96,11 +97,15 @@ _MIME_HANDLERS = { "text/*": "cat %s", } + # monkey-patch Gemini support in urllib.parse # see https://github.com/python/cpython/blob/master/Lib/urllib/parse.py urllib.parse.uses_relative.append("gemini") urllib.parse.uses_netloc.append("gemini") +ui_out = logging.getLogger("av98_logger") +ui_handler = logging.StreamHandler() +ui_out.addHandler(ui_handler) def fix_ipv6_url(url): if not url.count(":") > 2: # Best way to detect them? @@ -438,7 +443,7 @@ you'll be able to transparently follow links to Gopherspace!""") if not header or header[-1] != '\n': raise RuntimeError("Received invalid header from server!") header = header.strip() - self._debug("Response header: %s." % header) + ui_out.debug("Response header: %s." % header) # Validate header status, meta = header.split(maxsplit=1) if header[2:].strip() else (header[:2], "") @@ -483,8 +488,8 @@ you'll be able to transparently follow links to Gopherspace!""") follow = "yes" if follow.strip().lower() not in ("y", "yes"): raise UserAbortException() - self._debug("Following redirect to %s." % new_gi.url) - self._debug("This is consecutive redirect number %d." % len(self.previous_redirectors)) + ui_out.debug("Following redirect to %s." % new_gi.url) + ui_out.debug("This is consecutive redirect number %d." % len(self.previous_redirectors)) self.previous_redirectors.add(gi.url) if status == "31": # Permanent redirect @@ -537,7 +542,7 @@ you'll be able to transparently follow links to Gopherspace!""") size = tmpf.write(body) tmpf.close() self.tmp_filename = tmpf.name - self._debug("Wrote %d byte response to %s." % (size, self.tmp_filename)) + ui_out.debug("Wrote %d byte response to %s." % (size, self.tmp_filename)) # Maintain cache and log if self.options["cache"]: @@ -555,10 +560,10 @@ you'll be able to transparently follow links to Gopherspace!""") elif gi.scheme == "gopher": # For Gopher requests, use the configured proxy host, port = self.options["gopher_proxy"].rsplit(":", 1) - self._debug("Using gopher proxy: " + self.options["gopher_proxy"]) + ui_out.debug("Using gopher proxy: " + self.options["gopher_proxy"]) elif gi.scheme in ("http", "https"): host, port = self.options["http_proxy"].rsplit(":",1) - self._debug("Using http proxy: " + self.options["http_proxy"]) + ui_out.debug("Using http proxy: " + self.options["http_proxy"]) # Do DNS resolution addresses = self._get_addresses(host, port) @@ -605,7 +610,7 @@ you'll be able to transparently follow links to Gopherspace!""") # Connect to remote host by any address possible err = None for address in addresses: - self._debug("Connecting to: " + str(address[4])) + ui_out.debug("Connecting to: " + str(address[4])) s = socket.socket(address[0], address[1]) s.settimeout(self.options["timeout"]) s = context.wrap_socket(s, server_hostname = gi.host) @@ -621,8 +626,8 @@ you'll be able to transparently follow links to Gopherspace!""") raise err if sys.version_info.minor >=5: - self._debug("Established {} connection.".format(s.version())) - self._debug("Cipher is: {}.".format(s.cipher())) + ui_out.debug("Established {} connection.".format(s.version())) + ui_out.debug("Cipher is: {}.".format(s.cipher())) # Do TOFU if self.options["tls_mode"] != "ca": @@ -635,7 +640,7 @@ you'll be able to transparently follow links to Gopherspace!""") self.client_certs[gi.host] = self.client_certs["active"] # Send request and wrap response in a file descriptor - self._debug("Sending %s" % gi.url) + ui_out.debug("Sending %s" % gi.url) s.sendall((gi.url + CRLF).encode("UTF-8")) return address, s.makefile(mode = "rb") @@ -767,7 +772,7 @@ you'll be able to transparently follow links to Gopherspace!""") most_frequent_cert = cached_fingerprint if fingerprint == cached_fingerprint: # Matched! - self._debug("TOFU: Accepting previously seen ({} times) certificate {}".format(count, fingerprint)) + ui_out.debug("TOFU: Accepting previously seen ({} times) certificate {}".format(count, fingerprint)) self.db_cur.execute("""UPDATE cert_cache SET last_seen=?, count=? WHERE hostname=? AND address=? AND fingerprint=?""", @@ -785,7 +790,7 @@ you'll be able to transparently follow links to Gopherspace!""") previous_ttl = previous_cert.not_valid_after - now print(previous_ttl) - self._debug("TOFU: Unrecognised certificate {}! Raising the alarm...".format(fingerprint)) + ui_out.debug("TOFU: Unrecognised certificate {}! Raising the alarm...".format(fingerprint)) print("****************************************") print("[SECURITY WARNING] Unrecognised certificate!") print("The certificate presented for {} ({}) has never been seen before.".format(host, address)) @@ -812,7 +817,7 @@ you'll be able to transparently follow links to Gopherspace!""") # If not, cache this cert else: - self._debug("TOFU: Blindly trusting first ever certificate for this host!") + ui_out.debug("TOFU: Blindly trusting first ever certificate for this host!") self.db_cur.execute("""INSERT INTO cert_cache VALUES (?, ?, ?, ?, ?, ?)""", (host, address, fingerprint, now, now, 1)) @@ -839,7 +844,7 @@ you'll be able to transparently follow links to Gopherspace!""") else: # Use "xdg-open" as a last resort. cmd_str = "xdg-open %s" - self._debug("Using handler: %s" % cmd_str) + ui_out.debug("Using handler: %s" % cmd_str) return cmd_str def _handle_gemtext(self, body, menu_gi, display=True): @@ -860,7 +865,7 @@ you'll be able to transparently follow links to Gopherspace!""") self.index.append(gi) tmpf.write(self._format_geminiitem(len(self.index), gi) + "\n") except: - self._debug("Skipping possible link: %s" % line) + ui_out.debug("Skipping possible link: %s" % line) elif line.startswith("* "): line = line[1:].lstrip("\t ") tmpf.write(textwrap.fill(line, self.options["width"], @@ -928,12 +933,6 @@ you'll be able to transparently follow links to Gopherspace!""") else: return self.tmp_filename - def _debug(self, debug_text): - if not self.options["debug"]: - return - debug_text = "\x1b[0;32m[DEBUG] " + debug_text + "\x1b[0m" - print(debug_text) - def _load_client_cert(self): """ Interactively load a TLS client certificate from the filesystem in PEM @@ -1025,7 +1024,7 @@ you'll be able to transparently follow links to Gopherspace!""") self.client_certs["active"] = (certfile, keyfile) self.active_cert_domains = [] self.prompt = self.cert_prompt - self._debug("Using ID {} / {}.".format(*self.client_certs["active"])) + ui_out.debug("Using ID {} / {}.".format(*self.client_certs["active"])) def _deactivate_client_cert(self): if self.active_is_transient: @@ -1092,6 +1091,13 @@ you'll be able to transparently follow links to Gopherspace!""") if option not in self.options: print("Unrecognised option %s" % option) return + # Enable/disable debugging output + if option == "debug": + if value.lower() == "true": + ui_out.setLevel(logging.DEBUG) + elif value.lower() == "false": + ui_out.setLevel(logging.INFO) + # Validate / convert values if option == "gopher_proxy": if ":" not in value: diff --git a/cache.py b/cache.py index bdcddf0..99d5177 100644 --- a/cache.py +++ b/cache.py @@ -1,10 +1,13 @@ _MAX_CACHE_SIZE = 10 _MAX_CACHE_AGE_SECS = 180 +import logging import os import os.path import time +ui_out = logging.getLogger("av98_logger") + class Cache: def __init__(self): @@ -18,10 +21,10 @@ class Cache: now = time.time() cached = self.cache_timestamps[url] if now - cached > _MAX_CACHE_AGE_SECS: -# self._debug("Expiring old cached copy of resource.") + ui_out.debug("Expiring old cached copy of resource.") self._remove(url) return False -# self._debug("Found cached copy of resource.") + ui_out.debug("Found cached copy of resource.") return True def _remove(self, url): @@ -44,13 +47,13 @@ class Cache: lru.sort() # Drop the oldest entry no matter what _, url = lru[0] -# self._debug("Dropping cached copy of {} from full cache.".format(url)) + ui_out.debug("Dropping cached copy of {} from full cache.".format(url)) self._remove(url) # Drop other entries if they are older than the limit now = time.time() for cached, url in lru[1:]: if now - cached > _MAX_CACHE_AGE_SECS: -# self._debug("Dropping cached copy of {} from full cache.".format(url)) + ui_out.debug("Dropping cached copy of {} from full cache.".format(url)) self._remove(url) else: break From 053dcb7254ac15b9e242c488757a86591e9dead7 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 12 Nov 2023 15:36:54 +0100 Subject: [PATCH 09/61] Factor out certificate validation into its own class/file. --- av98.py | 155 ++---------------------------------------------------- tofu.py | 160 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+), 151 deletions(-) create mode 100644 tofu.py diff --git a/av98.py b/av98.py index 6032fd9..23949e2 100755 --- a/av98.py +++ b/av98.py @@ -18,11 +18,9 @@ import cmd import cgi import codecs import collections -import datetime import fnmatch import getpass import glob -import hashlib import io import logging import mimetypes @@ -32,9 +30,7 @@ import random import shlex import shutil import socket -import sqlite3 import ssl -from ssl import CertificateError import subprocess import sys import tempfile @@ -48,16 +44,8 @@ try: except ModuleNotFoundError: import textwrap -try: - from cryptography import x509 - from cryptography.hazmat.backends import default_backend - _HAS_CRYPTOGRAPHY = True - _BACKEND = default_backend() -except ModuleNotFoundError: - _HAS_CRYPTOGRAPHY = False - from cache import Cache - +from tofu import TofuStore _VERSION = "1.0.2dev" _MAX_REDIRECTS = 5 @@ -303,20 +291,9 @@ class GeminiClient(cmd.Cmd): "cache_hits": 0, } - self._connect_to_tofu_db() - + self.tofu_store = TofuStore(self.config_dir) self.cache = Cache() - def _connect_to_tofu_db(self): - - db_path = os.path.join(self.config_dir, "tofu.db") - self.db_conn = sqlite3.connect(db_path) - self.db_cur = self.db_conn.cursor() - - self.db_cur.execute("""CREATE TABLE IF NOT EXISTS cert_cache - (hostname text, address text, fingerprint text, - first_seen date, last_seen date, count integer)""") - def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True): """This method might be considered "the heart of AV-98". Everything involved in fetching a gemini resource happens here: @@ -632,7 +609,7 @@ you'll be able to transparently follow links to Gopherspace!""") # Do TOFU if self.options["tls_mode"] != "ca": cert = s.getpeercert(binary_form=True) - self._validate_cert(address[4][0], host, cert) + self.tofu_store.validate_cert(address[4][0], host, cert) # Remember that we showed the current cert to this domain... if self.client_certs["active"]: @@ -705,129 +682,6 @@ you'll be able to transparently follow links to Gopherspace!""") print("Giving up.") raise UserAbortException() - def _validate_cert(self, address, host, cert): - """ - Validate a TLS certificate in TOFU mode. - - If the cryptography module is installed: - - Check the certificate Common Name or SAN matches `host` - - Check the certificate's not valid before date is in the past - - Check the certificate's not valid after date is in the future - - Whether the cryptography module is installed or not, check the - certificate's fingerprint against the TOFU database to see if we've - previously encountered a different certificate for this IP address and - hostname. - """ - now = datetime.datetime.utcnow() - if _HAS_CRYPTOGRAPHY: - # Using the cryptography module we can get detailed access - # to the properties of even self-signed certs, unlike in - # the standard ssl library... - c = x509.load_der_x509_certificate(cert, _BACKEND) - - # Check certificate validity dates - if c.not_valid_before >= now: - raise CertificateError("Certificate not valid until: {}!".format(c.not_valid_before)) - elif c.not_valid_after <= now: - raise CertificateError("Certificate expired as of: {})!".format(c.not_valid_after)) - - # Check certificate hostnames - names = [] - common_name = c.subject.get_attributes_for_oid(x509.oid.NameOID.COMMON_NAME) - if common_name: - names.append(common_name[0].value) - try: - names.extend([alt.value for alt in c.extensions.get_extension_for_oid(x509.oid.ExtensionOID.SUBJECT_ALTERNATIVE_NAME).value]) - except x509.ExtensionNotFound: - pass - names = set(names) - for name in names: - try: - ssl._dnsname_match(name, host) - break - except CertificateError: - continue - else: - # If we didn't break out, none of the names were valid - raise CertificateError("Hostname does not match certificate common name or any alternative names.") - - sha = hashlib.sha256() - sha.update(cert) - fingerprint = sha.hexdigest() - - # Have we been here before? - self.db_cur.execute("""SELECT fingerprint, first_seen, last_seen, count - FROM cert_cache - WHERE hostname=? AND address=?""", (host, address)) - cached_certs = self.db_cur.fetchall() - - # If so, check for a match - if cached_certs: - max_count = 0 - most_frequent_cert = None - for cached_fingerprint, first, last, count in cached_certs: - if count > max_count: - max_count = count - most_frequent_cert = cached_fingerprint - if fingerprint == cached_fingerprint: - # Matched! - ui_out.debug("TOFU: Accepting previously seen ({} times) certificate {}".format(count, fingerprint)) - self.db_cur.execute("""UPDATE cert_cache - SET last_seen=?, count=? - WHERE hostname=? AND address=? AND fingerprint=?""", - (now, count+1, host, address, fingerprint)) - self.db_conn.commit() - break - else: - if _HAS_CRYPTOGRAPHY: - # Load the most frequently seen certificate to see if it has - # expired - certdir = os.path.join(self.config_dir, "cert_cache") - with open(os.path.join(certdir, most_frequent_cert+".crt"), "rb") as fp: - previous_cert = fp.read() - previous_cert = x509.load_der_x509_certificate(previous_cert, _BACKEND) - previous_ttl = previous_cert.not_valid_after - now - print(previous_ttl) - - ui_out.debug("TOFU: Unrecognised certificate {}! Raising the alarm...".format(fingerprint)) - print("****************************************") - print("[SECURITY WARNING] Unrecognised certificate!") - print("The certificate presented for {} ({}) has never been seen before.".format(host, address)) - print("This MIGHT be a Man-in-the-Middle attack.") - print("A different certificate has previously been seen {} times.".format(max_count)) - if _HAS_CRYPTOGRAPHY: - if previous_ttl < datetime.timedelta(): - print("That certificate has expired, which reduces suspicion somewhat.") - else: - print("That certificate is still valid for: {}".format(previous_ttl)) - print("****************************************") - print("Attempt to verify the new certificate fingerprint out-of-band:") - print(fingerprint) - choice = input("Accept this new certificate? Y/N ").strip().lower() - if choice in ("y", "yes"): - self.db_cur.execute("""INSERT INTO cert_cache - VALUES (?, ?, ?, ?, ?, ?)""", - (host, address, fingerprint, now, now, 1)) - self.db_conn.commit() - with open(os.path.join(certdir, fingerprint+".crt"), "wb") as fp: - fp.write(cert) - else: - raise Exception("TOFU Failure!") - - # If not, cache this cert - else: - ui_out.debug("TOFU: Blindly trusting first ever certificate for this host!") - self.db_cur.execute("""INSERT INTO cert_cache - VALUES (?, ?, ?, ?, ?, ?)""", - (host, address, fingerprint, now, now, 1)) - self.db_conn.commit() - certdir = os.path.join(self.config_dir, "cert_cache") - if not os.path.exists(certdir): - os.makedirs(certdir) - with open(os.path.join(certdir, fingerprint+".crt"), "wb") as fp: - fp.write(cert) - def _get_handler_cmd(self, mimetype): # Now look for a handler for this mimetype # Consider exact matches before wildcard matches @@ -1530,8 +1384,7 @@ current gemini browsing session.""" def do_quit(self, *args): """Exit AV-98.""" # Close TOFU DB - self.db_conn.commit() - self.db_conn.close() + self.tofu_store.close() # Clean up after ourself self.cache.empty() if self.tmp_filename and os.path.exists(self.tmp_filename): diff --git a/tofu.py b/tofu.py new file mode 100644 index 0000000..7f54b7b --- /dev/null +++ b/tofu.py @@ -0,0 +1,160 @@ +import datetime +import hashlib +import logging +import os +import os.path +import sqlite3 +import ssl +import time + +try: + from cryptography import x509 + from cryptography.hazmat.backends import default_backend + _HAS_CRYPTOGRAPHY = True + _BACKEND = default_backend() +except ModuleNotFoundError: + _HAS_CRYPTOGRAPHY = False + +ui_out = logging.getLogger("av98_logger") + +class TofuStore: + + def __init__(self, config_dir): + + self.config_dir = config_dir + db_path = os.path.join(self.config_dir, "tofu.db") + self.db_conn = sqlite3.connect(db_path) + self.db_cur = self.db_conn.cursor() + + self.db_cur.execute("""CREATE TABLE IF NOT EXISTS cert_cache + (hostname text, address text, fingerprint text, + first_seen date, last_seen date, count integer)""") + + def close(self): + + self.db_conn.commit() + self.db_conn.close() + + def validate_cert(self, address, host, cert): + """ + Validate a TLS certificate in TOFU mode. + + If the cryptography module is installed: + - Check the certificate Common Name or SAN matches `host` + - Check the certificate's not valid before date is in the past + - Check the certificate's not valid after date is in the future + + Whether the cryptography module is installed or not, check the + certificate's fingerprint against the TOFU database to see if we've + previously encountered a different certificate for this IP address and + hostname. + """ + now = datetime.datetime.utcnow() + if _HAS_CRYPTOGRAPHY: + # Using the cryptography module we can get detailed access + # to the properties of even self-signed certs, unlike in + # the standard ssl library... + c = x509.load_der_x509_certificate(cert, _BACKEND) + + # Check certificate validity dates + if c.not_valid_before >= now: + raise ssl.CertificateError("Certificate not valid until: {}!".format(c.not_valid_before)) + elif c.not_valid_after <= now: + raise ssl.CertificateError("Certificate expired as of: {})!".format(c.not_valid_after)) + + # Check certificate hostnames + names = [] + common_name = c.subject.get_attributes_for_oid(x509.oid.NameOID.COMMON_NAME) + if common_name: + names.append(common_name[0].value) + try: + names.extend([alt.value for alt in c.extensions.get_extension_for_oid(x509.oid.ExtensionOID.SUBJECT_ALTERNATIVE_NAME).value]) + except x509.ExtensionNotFound: + pass + names = set(names) + for name in names: + try: + ssl._dnsname_match(name, host) + break + except ssl.CertificateError: + continue + else: + # If we didn't break out, none of the names were valid + raise ssl.CertificateError("Hostname does not match certificate common name or any alternative names.") + + sha = hashlib.sha256() + sha.update(cert) + fingerprint = sha.hexdigest() + + # Have we been here before? + self.db_cur.execute("""SELECT fingerprint, first_seen, last_seen, count + FROM cert_cache + WHERE hostname=? AND address=?""", (host, address)) + cached_certs = self.db_cur.fetchall() + + # If so, check for a match + if cached_certs: + max_count = 0 + most_frequent_cert = None + for cached_fingerprint, first, last, count in cached_certs: + if count > max_count: + max_count = count + most_frequent_cert = cached_fingerprint + if fingerprint == cached_fingerprint: + # Matched! + ui_out.debug("TOFU: Accepting previously seen ({} times) certificate {}".format(count, fingerprint)) + self.db_cur.execute("""UPDATE cert_cache + SET last_seen=?, count=? + WHERE hostname=? AND address=? AND fingerprint=?""", + (now, count+1, host, address, fingerprint)) + self.db_conn.commit() + break + else: + if _HAS_CRYPTOGRAPHY: + # Load the most frequently seen certificate to see if it has + # expired + certdir = os.path.join(self.config_dir, "cert_cache") + with open(os.path.join(certdir, most_frequent_cert+".crt"), "rb") as fp: + previous_cert = fp.read() + previous_cert = x509.load_der_x509_certificate(previous_cert, _BACKEND) + previous_ttl = previous_cert.not_valid_after - now + print(previous_ttl) + + ui_out.debug("TOFU: Unrecognised certificate {}! Raising the alarm...".format(fingerprint)) + print("****************************************") + print("[SECURITY WARNING] Unrecognised certificate!") + print("The certificate presented for {} ({}) has never been seen before.".format(host, address)) + print("This MIGHT be a Man-in-the-Middle attack.") + print("A different certificate has previously been seen {} times.".format(max_count)) + if _HAS_CRYPTOGRAPHY: + if previous_ttl < datetime.timedelta(): + print("That certificate has expired, which reduces suspicion somewhat.") + else: + print("That certificate is still valid for: {}".format(previous_ttl)) + print("****************************************") + print("Attempt to verify the new certificate fingerprint out-of-band:") + print(fingerprint) + choice = input("Accept this new certificate? Y/N ").strip().lower() + if choice in ("y", "yes"): + self.db_cur.execute("""INSERT INTO cert_cache + VALUES (?, ?, ?, ?, ?, ?)""", + (host, address, fingerprint, now, now, 1)) + self.db_conn.commit() + with open(os.path.join(certdir, fingerprint+".crt"), "wb") as fp: + fp.write(cert) + else: + raise Exception("TOFU Failure!") + + # If not, cache this cert + else: + ui_out.debug("TOFU: Blindly trusting first ever certificate for this host!") + self.db_cur.execute("""INSERT INTO cert_cache + VALUES (?, ?, ?, ?, ?, ?)""", + (host, address, fingerprint, now, now, 1)) + self.db_conn.commit() + certdir = os.path.join(self.config_dir, "cert_cache") + if not os.path.exists(certdir): + os.makedirs(certdir) + with open(os.path.join(certdir, fingerprint+".crt"), "wb") as fp: + fp.write(cert) + From cdb2b0282c12970818e20668a321a9e2d8ca5084 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Mon, 13 Nov 2023 20:24:54 +0100 Subject: [PATCH 10/61] Don't expose Cache object's trim method. --- cache.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cache.py b/cache.py index 99d5177..4ca55f1 100644 --- a/cache.py +++ b/cache.py @@ -38,10 +38,10 @@ class Cache: self.cache_timestamps[url] = time.time() self.cache[url] = (mime, filename) if len(self.cache) > _MAX_CACHE_SIZE: - self.trim() + self._trim() self.validatecache() - def trim(self): + def _trim(self): # Order cache entries by age lru = [(t, u) for (u, t) in self.cache_timestamps.items()] lru.sort() From c6886d7eb957bd8ca1bfb02779ee251d1ac003a7 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Mon, 13 Nov 2023 23:58:12 +0100 Subject: [PATCH 11/61] Don't clutter go_to_gi() with error logging. --- av98.py | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) diff --git a/av98.py b/av98.py index 23949e2..f2e948e 100755 --- a/av98.py +++ b/av98.py @@ -338,22 +338,7 @@ you'll be able to transparently follow links to Gopherspace!""") except UserAbortException: return except Exception as err: - # Print an error message - if isinstance(err, socket.gaierror): - self.log["dns_failures"] += 1 - print("ERROR: DNS error!") - elif isinstance(err, ConnectionRefusedError): - self.log["refused_connections"] += 1 - print("ERROR: Connection refused!") - elif isinstance(err, ConnectionResetError): - self.log["reset_connections"] += 1 - print("ERROR: Connection reset!") - elif isinstance(err, (TimeoutError, socket.timeout)): - self.log["timeouts"] += 1 - print("""ERROR: Connection timed out! - Slow internet connection? Use 'set timeout' to be more patient.""") - else: - print("ERROR: " + str(err)) + self._print_friendly_error(err) return # Pass file to handler, unless we were asked not to @@ -374,6 +359,19 @@ you'll be able to transparently follow links to Gopherspace!""") if update_hist: self._update_history(gi) + def _print_friendly_error(self, err): + if isinstance(err, socket.gaierror): + ui_out.error("ERROR: DNS error!") + elif isinstance(err, ConnectionRefusedError): + ui_out.error("ERROR: Connection refused!") + elif isinstance(err, ConnectionResetError): + ui_out.error("ERROR: Connection reset!") + elif isinstance(err, (TimeoutError, socket.timeout)): + ui_out.error("""ERROR: Connection timed out! +Slow internet connection? Use 'set timeout' to be more patient.""") + else: + ui_out.error("ERROR: " + str(err)) + def _fetch_over_network(self, gi): # Be careful with client certificates! @@ -411,7 +409,18 @@ you'll be able to transparently follow links to Gopherspace!""") if not gi.host: address, f = None, open(gi.path, "rb") else: - address, f = self._send_request(gi) + try: + address, f = self._send_request(gi) + except Exception as err: + if isinstance(err, socket.gaierror): + self.log["dns_failures"] += 1 + elif isinstance(err, ConnectionRefusedError): + self.log["refused_connections"] += 1 + elif isinstance(err, ConnectionResetError): + self.log["reset_connections"] += 1 + elif isinstance(err, (TimeoutError, socket.timeout)): + self.log["timeouts"] += 1 + raise err # Spec dictates should not exceed 1024 bytes, # so maximum valid header length is 1027 bytes. From 588d599cb4d395806f4f45cdb68ee57791f61d36 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Tue, 14 Nov 2023 00:15:32 +0100 Subject: [PATCH 12/61] Add actual support for reading local files, use it for bookmarks. --- av98.py | 62 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 34 insertions(+), 28 deletions(-) diff --git a/av98.py b/av98.py index f2e948e..8e1f5f6 100755 --- a/av98.py +++ b/av98.py @@ -318,7 +318,7 @@ However, you can use `set gopher_proxy hostname:port` to tell it about a Gopher-to-Gemini proxy (such as a running Agena instance), in which case you'll be able to transparently follow links to Gopherspace!""") return - elif gi.scheme not in ("gemini", "gopher"): + elif gi.scheme not in ("file", "gemini", "gopher"): print("Sorry, no support for {} links.".format(gi.scheme)) return @@ -328,8 +328,10 @@ you'll be able to transparently follow links to Gopherspace!""") self._go_to_gi(new_gi) return - # Use cache, or hit the network if resource is not cached - if check_cache and self.options["cache"] and self.cache.check(gi.url): + # Use local file, use cache, or hit the network if resource is not cached + if gi.scheme == "file": + mime, body, tmpfile = self._handle_local_file(gi) + elif check_cache and self.options["cache"] and self.cache.check(gi.url): mime, body, tmpfile = self.cache.get(gi.url) self.log["cache_hits"] += 1 else: @@ -372,6 +374,16 @@ Slow internet connection? Use 'set timeout' to be more patient.""") else: ui_out.error("ERROR: " + str(err)) + def _handle_local_file(self, gi): + if gi.path.endswith(".gmi"): # TODO: be better about this + mime = "text/gemini" + with open(gi.path, "r") as fp: + body = fp.read() + else: + mime, noise = mimetypes.guess_type(gi.path) + body = None + return mime, body, gi.path + def _fetch_over_network(self, gi): # Be careful with client certificates! @@ -405,22 +417,19 @@ Slow internet connection? Use 'set timeout' to be more patient.""") print("Remaining unidentified.") self.client_certs.pop(gi.host) - # Is this a local file? - if not gi.host: - address, f = None, open(gi.path, "rb") - else: - try: - address, f = self._send_request(gi) - except Exception as err: - if isinstance(err, socket.gaierror): - self.log["dns_failures"] += 1 - elif isinstance(err, ConnectionRefusedError): - self.log["refused_connections"] += 1 - elif isinstance(err, ConnectionResetError): - self.log["reset_connections"] += 1 - elif isinstance(err, (TimeoutError, socket.timeout)): - self.log["timeouts"] += 1 - raise err + # Send request to server + try: + address, f = self._send_request(gi) + except Exception as err: + if isinstance(err, socket.gaierror): + self.log["dns_failures"] += 1 + elif isinstance(err, ConnectionRefusedError): + self.log["refused_connections"] += 1 + elif isinstance(err, ConnectionResetError): + self.log["reset_connections"] += 1 + elif isinstance(err, (TimeoutError, socket.timeout)): + self.log["timeouts"] += 1 + raise err # Spec dictates should not exceed 1024 bytes, # so maximum valid header length is 1027 bytes. @@ -1052,8 +1061,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self._go_to_gi(gi) # or a local file elif os.path.exists(os.path.expanduser(line)): - gi = GeminiItem(None, None, os.path.expanduser(line), - "1", line, False) + gi = GeminiItem("file://" + os.path.abspath(os.path.expanduser(line))) self._go_to_gi(gi) # If this isn't a mark, treat it as a URL else: @@ -1338,13 +1346,11 @@ Bookmarks are stored using the 'add' command.""" if len(args.split()) > 1 or (args and not args.isnumeric()): print("bookmarks command takes a single integer argument!") return - with open(bm_file, "r") as fp: - body = fp.read() - gi = GeminiItem("localhost/" + bm_file) - self._handle_gemtext(body, gi, display = not args) - if args: - # Use argument as a numeric index - self.default(line) + gi = GeminiItem("file://" + os.path.abspath(bm_file)) + self._go_to_gi(gi, update_hist=False, handle = not args) + if args: + # Use argument as a numeric index + self.default(line) ### Help def do_help(self, arg): From e4a44679dc2376aaa194c5e58612cb7cfc6518ce Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Tue, 14 Nov 2023 00:29:59 +0100 Subject: [PATCH 13/61] Fix saving of local files. --- av98.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/av98.py b/av98.py index 8e1f5f6..afeb44b 100755 --- a/av98.py +++ b/av98.py @@ -1312,7 +1312,8 @@ Use 'ls -l' to see URLs.""" # Don't use _get_active_tmpfile() here, because we want to save the # "source code" of menus, not the rendered view - this way AV-98 # can navigate to it later. - shutil.copyfile(self.tmp_filename, filename) + src = gi.path if gi.scheme == "file" else self.tmp_filename + shutil.copyfile(src, filename) print("Saved to %s" % filename) # Restore gi if necessary From 3aedd549e506a710877c56ea19a1e38749a018bd Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Tue, 14 Nov 2023 00:33:48 +0100 Subject: [PATCH 14/61] Remove unecessary duplication of handling code between _handle_gemtext() and _go_to_gi(). --- av98.py | 30 ++++++++++++++++++------------ 1 file changed, 18 insertions(+), 12 deletions(-) diff --git a/av98.py b/av98.py index afeb44b..20cd5b2 100755 --- a/av98.py +++ b/av98.py @@ -343,17 +343,24 @@ you'll be able to transparently follow links to Gopherspace!""") self._print_friendly_error(err) return + # Render gemtext, update index + if mime == "text/gemini": + self._handle_gemtext(body, gi) + # Pass file to handler, unless we were asked not to if handle: if mime == "text/gemini": - self._handle_gemtext(body, gi) + targetfile = self.idx_filename + elif gi.scheme == "file": + targetfile = gi.path else: - cmd_str = self._get_handler_cmd(mime) - try: - subprocess.call(shlex.split(cmd_str % tmpfile)) - except FileNotFoundError: - print("Handler program %s not found!" % shlex.split(cmd_str)[0]) - print("You can use the ! command to specify another handler program or pipeline.") + targetfile = tmpfile + cmd_str = self._get_handler_cmd(mime) + try: + subprocess.call(shlex.split(cmd_str % targetfile)) + except FileNotFoundError: + print("Handler program %s not found!" % shlex.split(cmd_str)[0]) + print("You can use the ! command to specify another handler program or pipeline.") # Update state self.gi = gi @@ -719,7 +726,10 @@ Slow internet connection? Use 'set timeout' to be more patient.""") ui_out.debug("Using handler: %s" % cmd_str) return cmd_str - def _handle_gemtext(self, body, menu_gi, display=True): + def _handle_gemtext(self, body, menu_gi): + """Simultaneously parse and render a text/gemini document. + Parsing causes self.index to be populated with GeminiItems. + Rendering causes self.idx_filename to contain a rendered view.""" self.index = [] preformatted = False if self.idx_filename: @@ -763,10 +773,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.page_index = 0 self.index_index = -1 - if display: - cmd_str = self._get_handler_cmd("text/gemini") - subprocess.call(shlex.split(cmd_str % self.idx_filename)) - def _format_geminiitem(self, index, gi, url=False): protocol = "" if gi.scheme == "gemini" else " %s" % gi.scheme line = "[%d%s] %s" % (index, protocol, gi.name or gi.url) From d689cbd04fbcbb42a6798226f188285b8d422c4c Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Tue, 14 Nov 2023 00:35:47 +0100 Subject: [PATCH 15/61] Fix _get_active_tmpfile to handle local file GIs. --- av98.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/av98.py b/av98.py index 20cd5b2..fde69d7 100755 --- a/av98.py +++ b/av98.py @@ -808,6 +808,8 @@ Slow internet connection? Use 'set timeout' to be more patient.""") def _get_active_tmpfile(self): if self.mime == "text/gemini": return self.idx_filename + elif self.gi.scheme == "file": + return self.gi.path else: return self.tmp_filename From 33e66b439b245c506c595091a648bbb2d186974f Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Tue, 14 Nov 2023 00:36:43 +0100 Subject: [PATCH 16/61] Test for saveability against a fetched GI, not a tempfile. --- av98.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/av98.py b/av98.py index fde69d7..fd99518 100755 --- a/av98.py +++ b/av98.py @@ -1265,7 +1265,7 @@ Use 'ls -l' to see URLs.""" # No arguments given at all # Save current item, if there is one, to a file whose name is # inferred from the gemini path - if not self.tmp_filename: + if not self.gi: print("You need to visit an item first!") return else: From 7f1aa5cbf3a4331e12523ba822fb6fb7b4aab738 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Tue, 14 Nov 2023 00:38:00 +0100 Subject: [PATCH 17/61] Remove unused imports. --- av98.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/av98.py b/av98.py index fd99518..74bbe5b 100755 --- a/av98.py +++ b/av98.py @@ -17,16 +17,13 @@ import argparse import cmd import cgi import codecs -import collections import fnmatch import getpass import glob -import io import logging import mimetypes import os import os.path -import random import shlex import shutil import socket From 01da8441411130850ad2ab89b4af0defea4c45b8 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Tue, 14 Nov 2023 08:56:39 +0100 Subject: [PATCH 18/61] Pull response parsing inside of _send_request(). --- av98.py | 37 ++++++++++++++++++++----------------- 1 file changed, 20 insertions(+), 17 deletions(-) diff --git a/av98.py b/av98.py index 74bbe5b..735ff91 100755 --- a/av98.py +++ b/av98.py @@ -423,7 +423,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") # Send request to server try: - address, f = self._send_request(gi) + status, meta, address, f = self._send_request(gi) except Exception as err: if isinstance(err, socket.gaierror): self.log["dns_failures"] += 1 @@ -435,21 +435,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.log["timeouts"] += 1 raise err - # Spec dictates should not exceed 1024 bytes, - # so maximum valid header length is 1027 bytes. - header = f.readline(1027) - header = header.decode("UTF-8") - if not header or header[-1] != '\n': - raise RuntimeError("Received invalid header from server!") - header = header.strip() - ui_out.debug("Response header: %s." % header) - - # Validate header - status, meta = header.split(maxsplit=1) if header[2:].strip() else (header[:2], "") - if len(meta) > 1024 or len(status) != 2 or not status.isnumeric(): - f.close() - raise RuntimeError("Received invalid header from server!") - # Update redirect loop/maze escaping state if not status.startswith("3"): self.previous_redirectors = set() @@ -641,7 +626,25 @@ Slow internet connection? Use 'set timeout' to be more patient.""") # Send request and wrap response in a file descriptor ui_out.debug("Sending %s" % gi.url) s.sendall((gi.url + CRLF).encode("UTF-8")) - return address, s.makefile(mode = "rb") + + # Read back response + f = s.makefile(mode = "rb") + # Spec dictates should not exceed 1024 bytes, + # so maximum valid header length is 1027 bytes. + header = f.readline(1027) + header = header.decode("UTF-8") + if not header or header[-1] != '\n': + raise RuntimeError("Received invalid header from server!") + header = header.strip() + ui_out.debug("Response header: %s." % header) + + # Validate header + status, meta = header.split(maxsplit=1) if header[2:].strip() else (header[:2], "") + if len(meta) > 1024 or len(status) != 2 or not status.isnumeric(): + f.close() + raise RuntimeError("Received invalid header from server!") + + return status, meta, address, f def _get_addresses(self, host, port): # DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled From 713616d55680773ca0555e9634bac63251cd030c Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Tue, 14 Nov 2023 19:11:52 +0100 Subject: [PATCH 19/61] Refactor of networking logic. 1. Move client certificate handling stuff inside of _send_request(). 2. Change _fetch_over_network() to not be recursive, by just looping through calls to _send_request(). This facilitates moving the redirect-tracking state inside _fetch_over_network(), instead of keeping it in GeminiClient. 3. Also allow _fetch_over_network() to save response to a provided filename, and use this to implement do_save(), rather than _go_to_gi(). This avoids the need for awkward gymnastics with the internal state. --- av98.py | 301 ++++++++++++++++++++++++++++++-------------------------- 1 file changed, 162 insertions(+), 139 deletions(-) diff --git a/av98.py b/av98.py index 735ff91..e74fdfb 100755 --- a/av98.py +++ b/av98.py @@ -248,7 +248,6 @@ class GeminiClient(cmd.Cmd): self.marks = {} self.page_index = 0 self.permanent_redirects = {} - self.previous_redirectors = set() self.restricted = restricted self.tmp_filename = "" self.visited_hosts = set() @@ -388,7 +387,125 @@ Slow internet connection? Use 'set timeout' to be more patient.""") body = None return mime, body, gi.path - def _fetch_over_network(self, gi): + def _fetch_over_network(self, gi, destination=None): + + previous_redirectors = set() + while True: + # Send request to server + try: + status, meta, address, f = self._send_request(gi) + except Exception as err: + if isinstance(err, socket.gaierror): + self.log["dns_failures"] += 1 + elif isinstance(err, ConnectionRefusedError): + self.log["refused_connections"] += 1 + elif isinstance(err, ConnectionResetError): + self.log["reset_connections"] += 1 + elif isinstance(err, (TimeoutError, socket.timeout)): + self.log["timeouts"] += 1 + raise err + + # Update redirect loop/maze escaping state + if not status.startswith("3"): + previous_redirectors = set() + + # Handle non-SUCCESS headers, which don't have a response body + # Inputs + if status.startswith("1"): + if status == "11": + user_input = getpass.getpass("> ") + else: + user_input = input("> ") + gi = gi.query(user_input) + continue + + # Redirects + elif status.startswith("3"): + new_gi = GeminiItem(gi.absolutise_url(meta)) + if new_gi.url == gi.url: + raise RuntimeError("URL redirects to itself!") + elif new_gi.url in previous_redirectors: + raise RuntimeError("Caught in redirect loop!") + elif len(previous_redirectors) == _MAX_REDIRECTS: + raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS) + # Never follow cross-domain redirects without asking + elif new_gi.host != gi.host: + follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url) + # Never follow cross-protocol redirects without asking + elif new_gi.scheme != gi.scheme: + follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url) + # Don't follow *any* redirect without asking if auto-follow is off + elif not self.options["auto_follow_redirects"]: + follow = input("Follow redirect to %s? (y/n) " % new_gi.url) + # Otherwise, follow away + else: + follow = "yes" + if follow.strip().lower() not in ("y", "yes"): + raise UserAbortException() + ui_out.debug("Following redirect to %s." % new_gi.url) + ui_out.debug("This is consecutive redirect number %d." % len(previous_redirectors)) + previous_redirectors.add(gi.url) + if status == "31": + # Permanent redirect + self.permanent_redirects[gi.url] = new_gi.url + gi = new_gi + continue + + # Errors + elif status.startswith("4") or status.startswith("5"): + raise RuntimeError(meta) + + # Client cert + elif status.startswith("6"): + self._handle_cert_request(meta, status, gi.host) + continue + + # Invalid status + elif not status.startswith("2"): + raise RuntimeError("Server returned undefined status code %s!" % status) + + # If we're here, this must be a success and there's a response body, + # so break out of the request loop + assert status.startswith("2") + break + + # Fill in default MIME type or validate a provided one + mime = meta + if mime == "": + mime = "text/gemini; charset=utf-8" + mime, mime_options = cgi.parse_header(mime) + if "charset" in mime_options: + try: + codecs.lookup(mime_options["charset"]) + except LookupError: + raise RuntimeError("Header declared unknown encoding %s" % value) + + # Save response body to disk + body, size, filename = self._write_response_to_file(mime, mime_options, f, destination) + ui_out.debug("Wrote %d byte response to %s." % (size, filename)) + + # Maintain cache and log + if self.options["cache"]: + self.cache.add(gi.url, mime, filename) + self._log_visit(gi, address, size) + + return gi, mime, body, filename + + def _send_request(self, gi): + """Send a selector to a given host and port. + Returns the resolved address and binary file with the reply.""" + + # Figure out which host to connect to + if gi.scheme == "gemini": + # For Gemini requests, connect to the host and port specified in the URL + host, port = gi.host, gi.port + elif gi.scheme == "gopher": + # For Gopher requests, use the configured proxy + host, port = self.options["gopher_proxy"].rsplit(":", 1) + ui_out.debug("Using gopher proxy: " + self.options["gopher_proxy"]) + elif gi.scheme in ("http", "https"): + host, port = self.options["http_proxy"].rsplit(":",1) + ui_out.debug("Using http proxy: " + self.options["http_proxy"]) # Be careful with client certificates! # Are we crossing a domain boundary? @@ -421,133 +538,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""") print("Remaining unidentified.") self.client_certs.pop(gi.host) - # Send request to server - try: - status, meta, address, f = self._send_request(gi) - except Exception as err: - if isinstance(err, socket.gaierror): - self.log["dns_failures"] += 1 - elif isinstance(err, ConnectionRefusedError): - self.log["refused_connections"] += 1 - elif isinstance(err, ConnectionResetError): - self.log["reset_connections"] += 1 - elif isinstance(err, (TimeoutError, socket.timeout)): - self.log["timeouts"] += 1 - raise err - - # Update redirect loop/maze escaping state - if not status.startswith("3"): - self.previous_redirectors = set() - - # Handle non-SUCCESS headers, which don't have a response body - # Inputs - if status.startswith("1"): - print(meta) - if status == "11": - user_input = getpass.getpass("> ") - else: - user_input = input("> ") - return self._fetch_over_network(gi.query(user_input)) - - # Redirects - elif status.startswith("3"): - new_gi = GeminiItem(gi.absolutise_url(meta)) - if new_gi.url == gi.url: - raise RuntimeError("URL redirects to itself!") - elif new_gi.url in self.previous_redirectors: - raise RuntimeError("Caught in redirect loop!") - elif len(self.previous_redirectors) == _MAX_REDIRECTS: - raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS) - # Never follow cross-domain redirects without asking - elif new_gi.host != gi.host: - follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url) - # Never follow cross-protocol redirects without asking - elif new_gi.scheme != gi.scheme: - follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url) - # Don't follow *any* redirect without asking if auto-follow is off - elif not self.options["auto_follow_redirects"]: - follow = input("Follow redirect to %s? (y/n) " % new_gi.url) - # Otherwise, follow away - else: - follow = "yes" - if follow.strip().lower() not in ("y", "yes"): - raise UserAbortException() - ui_out.debug("Following redirect to %s." % new_gi.url) - ui_out.debug("This is consecutive redirect number %d." % len(self.previous_redirectors)) - self.previous_redirectors.add(gi.url) - if status == "31": - # Permanent redirect - self.permanent_redirects[gi.url] = new_gi.url - return self._fetch_over_network(new_gi) - - # Errors - elif status.startswith("4") or status.startswith("5"): - raise RuntimeError(meta) - - # Client cert - elif status.startswith("6"): - self._handle_cert_request(meta, status, gi.host) - return self._fetch_over_network(gi) - - # Invalid status - elif not status.startswith("2"): - raise RuntimeError("Server returned undefined status code %s!" % status) - - # If we're here, this must be a success and there's a response body - assert status.startswith("2") - - mime = meta - if mime == "": - mime = "text/gemini; charset=utf-8" - mime, mime_options = cgi.parse_header(mime) - if "charset" in mime_options: - try: - codecs.lookup(mime_options["charset"]) - except LookupError: - raise RuntimeError("Header declared unknown encoding %s" % value) - - # Read the response body over the network - body = f.read() - - # Save the result in a temporary file - ## Set file mode - if mime.startswith("text/"): - mode = "w" - encoding = mime_options.get("charset", "UTF-8") - try: - body = body.decode(encoding) - except UnicodeError: - raise RuntimeError("Could not decode response body using %s encoding declared in header!" % encoding) - else: - mode = "wb" - encoding = None - ## Write - tmpf = tempfile.NamedTemporaryFile(mode, encoding=encoding, delete=False) - size = tmpf.write(body) - tmpf.close() - self.tmp_filename = tmpf.name - ui_out.debug("Wrote %d byte response to %s." % (size, self.tmp_filename)) - - # Maintain cache and log - if self.options["cache"]: - self.cache.add(gi.url, mime, self.tmp_filename) - self._log_visit(gi, address, size) - - return gi, mime, body, self.tmp_filename - - def _send_request(self, gi): - """Send a selector to a given host and port. - Returns the resolved address and binary file with the reply.""" - if gi.scheme == "gemini": - # For Gemini requests, connect to the host and port specified in the URL - host, port = gi.host, gi.port - elif gi.scheme == "gopher": - # For Gopher requests, use the configured proxy - host, port = self.options["gopher_proxy"].rsplit(":", 1) - ui_out.debug("Using gopher proxy: " + self.options["gopher_proxy"]) - elif gi.scheme in ("http", "https"): - host, port = self.options["http_proxy"].rsplit(":",1) - ui_out.debug("Using http proxy: " + self.options["http_proxy"]) # Do DNS resolution addresses = self._get_addresses(host, port) @@ -646,6 +636,37 @@ Slow internet connection? Use 'set timeout' to be more patient.""") return status, meta, address, f + def _write_response_to_file(self, mime, mime_options, f, destination): + # Read the response body over the network + body = f.read() + + # Save the result to a temporary file + + ## Determine file mode + if mime.startswith("text/"): + mode = "w" + encoding = mime_options.get("charset", "UTF-8") + try: + body = body.decode(encoding) + except UnicodeError: + raise RuntimeError("Could not decode response body using %s encoding declared in header!" % encoding) + else: + mode = "wb" + encoding = None + + ## Use a temporary file if a filename was not provided + if destination: + fp = open(destination, mode, encoding=encoding) + else: + fp = tempfile.NamedTemporaryFile(mode, encoding=encoding, delete=False) + self.tmp_filename = fp.name + + ## Write + size = fp.write(body) + fp.close() + + return body, size, destination or self.tmp_filename + def _get_addresses(self, host, port): # DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled if ":" in host: @@ -1296,37 +1317,39 @@ Use 'ls -l' to see URLs.""" print("You must provide an index, a filename, or both.") return - # Next, fetch the item to save, if it's not the current one. + # Determine GI to save if index: - last_gi = self.gi try: gi = self.lookup[index-1] - self._go_to_gi(gi, update_hist = False, handle = False) + saving_current = False except IndexError: print ("Index too high!") - self.gi = last_gi return else: gi = self.gi + saving_current = True # Derive filename from current GI's path, if one hasn't been set if not filename: filename = os.path.basename(gi.path) - # Check for filename collisions and actually do the save if safe + # Check for filename collisions if os.path.exists(filename): print("File %s already exists!" % filename) - else: + return + + # Actually do the save operation + if saving_current: # Don't use _get_active_tmpfile() here, because we want to save the # "source code" of menus, not the rendered view - this way AV-98 # can navigate to it later. src = gi.path if gi.scheme == "file" else self.tmp_filename shutil.copyfile(src, filename) - print("Saved to %s" % filename) + else: + ## Download an item that's not the current one + self._fetch_over_network(gi, filename) - # Restore gi if necessary - if index != None: - self._go_to_gi(last_gi, handle=False) + print("Saved to %s" % filename) @needs_gi def do_url(self, *args): From 048b04bed20c8ad80d2e50b8b8b7e7ea8270e85c Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 18:06:33 +0100 Subject: [PATCH 20/61] Extensively refactor the way temporary files are used. Instead of littering /tmp with one file per download plus one per rendered gemtext file, just reuse the same two files over and over. If enabled, caching creates separate copies. Pretty sure this solves Issue #35, but I'll test and close that after also improving the use of cache files... --- av98.py | 155 +++++++++++++++++++++++++------------------------------ cache.py | 18 +++---- 2 files changed, 80 insertions(+), 93 deletions(-) diff --git a/av98.py b/av98.py index e74fdfb..941d437 100755 --- a/av98.py +++ b/av98.py @@ -235,13 +235,16 @@ class GeminiClient(cmd.Cmd): print("Creating config directory {}".format(self.config_dir)) os.makedirs(self.config_dir) + ## Claim two temporary filenames to use as buffers + self.raw_file_buffer = tempfile.NamedTemporaryFile(delete=False).name + self.rendered_file_buffer = tempfile.NamedTemporaryFile(delete=False).name + self.no_cert_prompt = "\x1b[38;5;76m" + "AV-98" + "\x1b[38;5;255m" + "> " + "\x1b[0m" self.cert_prompt = "\x1b[38;5;202m" + "AV-98" + "\x1b[38;5;255m" + "+cert> " + "\x1b[0m" self.prompt = self.no_cert_prompt self.gi = None self.history = [] self.hist_index = 0 - self.idx_filename = "" self.index = [] self.index_index = -1 self.lookup = self.index @@ -249,7 +252,8 @@ class GeminiClient(cmd.Cmd): self.page_index = 0 self.permanent_redirects = {} self.restricted = restricted - self.tmp_filename = "" + self.active_raw_file = "" + self.active_rendered_file = "" self.visited_hosts = set() self.waypoints = [] @@ -290,6 +294,9 @@ class GeminiClient(cmd.Cmd): self.tofu_store = TofuStore(self.config_dir) self.cache = Cache() + ui_out.debug("Raw buffer: ", self.raw_file_buffer) + ui_out.debug("Rendered buffer: ", self.rendered_file_buffer) + def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True): """This method might be considered "the heart of AV-98". Everything involved in fetching a gemini resource happens here: @@ -326,13 +333,15 @@ you'll be able to transparently follow links to Gopherspace!""") # Use local file, use cache, or hit the network if resource is not cached if gi.scheme == "file": - mime, body, tmpfile = self._handle_local_file(gi) + mime = self._handle_local_file(gi) + self.active_raw_file = gi.path elif check_cache and self.options["cache"] and self.cache.check(gi.url): - mime, body, tmpfile = self.cache.get(gi.url) + mime, self.active_raw_file = self.cache.get(gi.url) self.log["cache_hits"] += 1 else: try: - gi, mime, body, tmpfile = self._fetch_over_network(gi) + gi, mime = self._fetch_over_network(gi) + self.active_raw_file = self.raw_file_buffer except UserAbortException: return except Exception as err: @@ -341,19 +350,16 @@ you'll be able to transparently follow links to Gopherspace!""") # Render gemtext, update index if mime == "text/gemini": - self._handle_gemtext(body, gi) + self._handle_gemtext(gi) + self.active_rendered_file = self.rendered_file_buffer + else: + self.active_rendered_file = self.active_raw_file # Pass file to handler, unless we were asked not to if handle: - if mime == "text/gemini": - targetfile = self.idx_filename - elif gi.scheme == "file": - targetfile = gi.path - else: - targetfile = tmpfile cmd_str = self._get_handler_cmd(mime) try: - subprocess.call(shlex.split(cmd_str % targetfile)) + subprocess.call(shlex.split(cmd_str % self.active_rendered_file)) except FileNotFoundError: print("Handler program %s not found!" % shlex.split(cmd_str)[0]) print("You can use the ! command to specify another handler program or pipeline.") @@ -385,7 +391,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") else: mime, noise = mimetypes.guess_type(gi.path) body = None - return mime, body, gi.path + return mime def _fetch_over_network(self, gi, destination=None): @@ -481,15 +487,15 @@ Slow internet connection? Use 'set timeout' to be more patient.""") raise RuntimeError("Header declared unknown encoding %s" % value) # Save response body to disk - body, size, filename = self._write_response_to_file(mime, mime_options, f, destination) - ui_out.debug("Wrote %d byte response to %s." % (size, filename)) + body, size = self._write_response_to_file(mime, mime_options, f, destination) + ui_out.debug("Wrote %d byte response to %s." % (size, destination)) # Maintain cache and log if self.options["cache"]: - self.cache.add(gi.url, mime, filename) + self.cache.add(gi.url, mime, self.raw_file_buffer) self._log_visit(gi, address, size) - return gi, mime, body, filename + return gi, mime def _send_request(self, gi): """Send a selector to a given host and port. @@ -654,18 +660,12 @@ Slow internet connection? Use 'set timeout' to be more patient.""") mode = "wb" encoding = None - ## Use a temporary file if a filename was not provided - if destination: - fp = open(destination, mode, encoding=encoding) - else: - fp = tempfile.NamedTemporaryFile(mode, encoding=encoding, delete=False) - self.tmp_filename = fp.name - ## Write + fp = open(destination or self.raw_file_buffer, mode=mode, encoding=encoding) size = fp.write(body) fp.close() - return body, size, destination or self.tmp_filename + return body, size def _get_addresses(self, host, port): # DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled @@ -747,48 +747,48 @@ Slow internet connection? Use 'set timeout' to be more patient.""") ui_out.debug("Using handler: %s" % cmd_str) return cmd_str - def _handle_gemtext(self, body, menu_gi): + def _handle_gemtext(self, menu_gi): """Simultaneously parse and render a text/gemini document. Parsing causes self.index to be populated with GeminiItems. - Rendering causes self.idx_filename to contain a rendered view.""" + Rendering causes self.rendered_file_buffer to contain a rendered + view.""" self.index = [] preformatted = False - if self.idx_filename: - os.unlink(self.idx_filename) - tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False) - self.idx_filename = tmpf.name - for line in body.splitlines(): - if line.startswith("```"): - preformatted = not preformatted - elif preformatted: - tmpf.write(line + "\n") - elif line.startswith("=>"): - try: - gi = GeminiItem.from_map_line(line, menu_gi) - self.index.append(gi) - tmpf.write(self._format_geminiitem(len(self.index), gi) + "\n") - except: - ui_out.debug("Skipping possible link: %s" % line) - elif line.startswith("* "): - line = line[1:].lstrip("\t ") - tmpf.write(textwrap.fill(line, self.options["width"], - initial_indent = "• ", subsequent_indent=" ") + "\n") - elif line.startswith(">"): - line = line[1:].lstrip("\t ") - tmpf.write(textwrap.fill(line, self.options["width"], - initial_indent = "> ", subsequent_indent="> ") + "\n") - elif line.startswith("###"): - line = line[3:].lstrip("\t ") - tmpf.write("\x1b[4m" + line + "\x1b[0m""\n") - elif line.startswith("##"): - line = line[2:].lstrip("\t ") - tmpf.write("\x1b[1m" + line + "\x1b[0m""\n") - elif line.startswith("#"): - line = line[1:].lstrip("\t ") - tmpf.write("\x1b[1m\x1b[4m" + line + "\x1b[0m""\n") - else: - tmpf.write(textwrap.fill(line, self.options["width"]) + "\n") - tmpf.close() + + with open(self.active_raw_file, "r") as fp: + body = fp.read() + with open(self.rendered_file_buffer, "w") as fp: + for line in body.splitlines(): + if line.startswith("```"): + preformatted = not preformatted + elif preformatted: + fp.write(line + "\n") + elif line.startswith("=>"): + try: + gi = GeminiItem.from_map_line(line, menu_gi) + self.index.append(gi) + fp.write(self._format_geminiitem(len(self.index), gi) + "\n") + except: + ui_out.debug("Skipping possible link: %s" % line) + elif line.startswith("* "): + line = line[1:].lstrip("\t ") + fp.write(textwrap.fill(line, self.options["width"], + initial_indent = "• ", subsequent_indent=" ") + "\n") + elif line.startswith(">"): + line = line[1:].lstrip("\t ") + fp.write(textwrap.fill(line, self.options["width"], + initial_indent = "> ", subsequent_indent="> ") + "\n") + elif line.startswith("###"): + line = line[3:].lstrip("\t ") + fp.write("\x1b[4m" + line + "\x1b[0m""\n") + elif line.startswith("##"): + line = line[2:].lstrip("\t ") + fp.write("\x1b[1m" + line + "\x1b[0m""\n") + elif line.startswith("#"): + line = line[1:].lstrip("\t ") + fp.write("\x1b[1m\x1b[4m" + line + "\x1b[0m""\n") + else: + fp.write(textwrap.fill(line, self.options["width"]) + "\n") self.lookup = self.index self.page_index = 0 @@ -826,14 +826,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.log["ipv6_requests"] += 1 self.log["ipv6_bytes_recvd"] += size - def _get_active_tmpfile(self): - if self.mime == "text/gemini": - return self.idx_filename - elif self.gi.scheme == "file": - return self.gi.path - else: - return self.tmp_filename - def _load_client_cert(self): """ Interactively load a TLS client certificate from the filesystem in PEM @@ -1250,27 +1242,27 @@ Use 'ls -l' to see URLs.""" @needs_gi def do_cat(self, *args): """Run most recently visited item through "cat" command.""" - subprocess.call(shlex.split("cat %s" % self._get_active_tmpfile())) + subprocess.call(shlex.split("cat %s" % self.active_rendered_file)) @needs_gi def do_less(self, *args): """Run most recently visited item through "less" command.""" cmd_str = self._get_handler_cmd(self.mime) - cmd_str = cmd_str % self._get_active_tmpfile() + cmd_str = cmd_str % self.active_rendered_file subprocess.call("%s | less -R" % cmd_str, shell=True) @needs_gi def do_fold(self, *args): """Run most recently visited item through "fold" command.""" cmd_str = self._get_handler_cmd(self.mime) - cmd_str = cmd_str % self._get_active_tmpfile() + cmd_str = cmd_str % self.active_rendered_file subprocess.call("%s | fold -w 70 -s" % cmd_str, shell=True) @restricted @needs_gi def do_shell(self, line): """'cat' most recently visited item through a shell pipeline.""" - subprocess.call(("cat %s |" % self._get_active_tmpfile()) + line, shell=True) + subprocess.call(("cat %s |" % self.active_rendered_file) + line, shell=True) @restricted @needs_gi @@ -1340,10 +1332,7 @@ Use 'ls -l' to see URLs.""" # Actually do the save operation if saving_current: - # Don't use _get_active_tmpfile() here, because we want to save the - # "source code" of menus, not the rendered view - this way AV-98 - # can navigate to it later. - src = gi.path if gi.scheme == "file" else self.tmp_filename + src = gi.path if gi.scheme == "file" else self.active_raw_file shutil.copyfile(src, filename) else: ## Download an item that's not the current one @@ -1434,10 +1423,8 @@ current gemini browsing session.""" self.tofu_store.close() # Clean up after ourself self.cache.empty() - if self.tmp_filename and os.path.exists(self.tmp_filename): - os.unlink(self.tmp_filename) - if self.idx_filename and os.path.exists(self.idx_filename): - os.unlink(self.idx_filename) + os.unlink(self.raw_file_buffer) + os.unlink(self.rendered_file_buffer) for cert in self.transient_certs_created: for ext in (".crt", ".key"): diff --git a/cache.py b/cache.py index 4ca55f1..097c5d7 100644 --- a/cache.py +++ b/cache.py @@ -4,6 +4,8 @@ _MAX_CACHE_AGE_SECS = 180 import logging import os import os.path +import shutil +import tempfile import time ui_out = logging.getLogger("av98_logger") @@ -34,9 +36,13 @@ class Cache: self.validatecache() def add(self, url, mime, filename): - + # Copy client's buffer file to new cache file + tmpf = tempfile.NamedTemporaryFile(delete=False) + tmpf.close() + shutil.copyfile(filename, tmpf.name) + # Remember details self.cache_timestamps[url] = time.time() - self.cache[url] = (mime, filename) + self.cache[url] = (mime, tmpf.name) if len(self.cache) > _MAX_CACHE_SIZE: self._trim() self.validatecache() @@ -60,13 +66,7 @@ class Cache: self.validatecache() def get(self, url): - mime, filename = self.cache[url] - if mime.startswith("text/gemini"): - with open(filename, "r") as fp: - body = fp.read() - return mime, body, filename - else: - return mime, None, filename + return self.cache[url] def empty(self): for mime, filename in self.cache.values(): From d2fe381c3eb02bc8d04dbae79a68450ac961cfa2 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 18:19:22 +0100 Subject: [PATCH 21/61] Change Cache to use a TemporaryDirectory for its storage to ensure and simplify thorough cleanups upon shutdown. Thanks to Ghost for making me aware of this possibility! --- av98.py | 1 - cache.py | 8 ++------ 2 files changed, 2 insertions(+), 7 deletions(-) diff --git a/av98.py b/av98.py index 941d437..ee77ba3 100755 --- a/av98.py +++ b/av98.py @@ -1422,7 +1422,6 @@ current gemini browsing session.""" # Close TOFU DB self.tofu_store.close() # Clean up after ourself - self.cache.empty() os.unlink(self.raw_file_buffer) os.unlink(self.rendered_file_buffer) diff --git a/cache.py b/cache.py index 097c5d7..2c80afb 100644 --- a/cache.py +++ b/cache.py @@ -16,6 +16,7 @@ class Cache: self.cache = {} self.cache_timestamps = {} + self.tempdir = tempfile.TemporaryDirectory() def check(self, url): if url not in self.cache: @@ -37,7 +38,7 @@ class Cache: def add(self, url, mime, filename): # Copy client's buffer file to new cache file - tmpf = tempfile.NamedTemporaryFile(delete=False) + tmpf = tempfile.NamedTemporaryFile(dir=self.tempdir.name, delete=False) tmpf.close() shutil.copyfile(filename, tmpf.name) # Remember details @@ -68,11 +69,6 @@ class Cache: def get(self, url): return self.cache[url] - def empty(self): - for mime, filename in self.cache.values(): - if os.path.exists(filename): - os.unlink(filename) - def validatecache(self): assert self.cache.keys() == self.cache_timestamps.keys() for _, filename in self.cache.values(): From 480f2cc15f7d40992579813930cf66febff1d2c4 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 18:29:52 +0100 Subject: [PATCH 22/61] Rejig how do_bookmark() handles an argument to simplify go_to_gi(). --- av98.py | 27 ++++++++++++++++----------- 1 file changed, 16 insertions(+), 11 deletions(-) diff --git a/av98.py b/av98.py index ee77ba3..66ef94b 100755 --- a/av98.py +++ b/av98.py @@ -297,7 +297,7 @@ class GeminiClient(cmd.Cmd): ui_out.debug("Raw buffer: ", self.raw_file_buffer) ui_out.debug("Rendered buffer: ", self.rendered_file_buffer) - def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True): + def _go_to_gi(self, gi, update_hist=True, check_cache=True): """This method might be considered "the heart of AV-98". Everything involved in fetching a gemini resource happens here: sending the request over the network, parsing the response if @@ -355,14 +355,13 @@ you'll be able to transparently follow links to Gopherspace!""") else: self.active_rendered_file = self.active_raw_file - # Pass file to handler, unless we were asked not to - if handle: - cmd_str = self._get_handler_cmd(mime) - try: - subprocess.call(shlex.split(cmd_str % self.active_rendered_file)) - except FileNotFoundError: - print("Handler program %s not found!" % shlex.split(cmd_str)[0]) - print("You can use the ! command to specify another handler program or pipeline.") + # Pass file to handler + cmd_str = self._get_handler_cmd(mime) + try: + subprocess.call(shlex.split(cmd_str % self.active_rendered_file)) + except FileNotFoundError: + print("Handler program %s not found!" % shlex.split(cmd_str)[0]) + print("You can use the ! command to specify another handler program or pipeline.") # Update state self.gi = gi @@ -1368,10 +1367,16 @@ Bookmarks are stored using the 'add' command.""" print("bookmarks command takes a single integer argument!") return gi = GeminiItem("file://" + os.path.abspath(bm_file)) - self._go_to_gi(gi, update_hist=False, handle = not args) if args: - # Use argument as a numeric index + # Semi-sneaky + # Parses the bookmark file and modifies self.index so that + # self.default(n) works, but avoids invoking a handler so the + # full bookmark list is never seen. + self.active_raw_file = gi.path + self._handle_gemtext(gi) self.default(line) + else: + self._go_to_gi(gi, update_hist=False) ### Help def do_help(self, arg): From 67729fb7113945806f1b70c2ee81c67f8af3916a Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 18:40:48 +0100 Subject: [PATCH 23/61] Count redirects in black box recorder. --- av98.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/av98.py b/av98.py index 66ef94b..7384b37 100755 --- a/av98.py +++ b/av98.py @@ -289,6 +289,7 @@ class GeminiClient(cmd.Cmd): "reset_connections": 0, "timeouts": 0, "cache_hits": 0, + "redirects_followed": 0 } self.tofu_store = TofuStore(self.config_dir) @@ -450,6 +451,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") ui_out.debug("Following redirect to %s." % new_gi.url) ui_out.debug("This is consecutive redirect number %d." % len(previous_redirectors)) previous_redirectors.add(gi.url) + self.log["redirects_followed"] += 1 if status == "31": # Permanent redirect self.permanent_redirects[gi.url] = new_gi.url @@ -1417,6 +1419,7 @@ current gemini browsing session.""" lines.append(("Refused connections:", self.log["refused_connections"])) lines.append(("Reset connections:", self.log["reset_connections"])) lines.append(("Cache hits:", self.log["cache_hits"])) + lines.append(("Redirects followed:", self.log["redirects_followed"])) # Print for key, value in lines: print(key.ljust(24) + str(value).rjust(8)) From 23b0597b6d8745b9cab96bb109e1caf92622ce0e Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 19:38:13 +0100 Subject: [PATCH 24/61] Update email address and copyright years. --- av98.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/av98.py b/av98.py index 7384b37..ae4ffd8 100755 --- a/av98.py +++ b/av98.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # AV-98 Gemini client # Dervied from VF-1 (https://github.com/solderpunk/VF-1), -# (C) 2019, 2020 Solderpunk +# (C) 2019, 2020, 2023 Solderpunk # With contributions from: # - danceka # - From 9e2cce7ce07dbf063bc8860de051814bea565a65 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 19:38:45 +0100 Subject: [PATCH 25/61] Print full tracebacks from exceptions when debugging is enabled. --- av98.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/av98.py b/av98.py index ae4ffd8..e39138e 100755 --- a/av98.py +++ b/av98.py @@ -32,6 +32,7 @@ import subprocess import sys import tempfile import time +import traceback import urllib.parse import uuid import webbrowser @@ -382,6 +383,7 @@ you'll be able to transparently follow links to Gopherspace!""") Slow internet connection? Use 'set timeout' to be more patient.""") else: ui_out.error("ERROR: " + str(err)) + ui_out.debug(traceback.format_exc()) def _handle_local_file(self, gi): if gi.path.endswith(".gmi"): # TODO: be better about this From f78b6ff780e708182e962ec808b10cb7fe4b2ac3 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 19:45:36 +0100 Subject: [PATCH 26/61] Move handling of permanent redirects inside of _fetch_over_network(). --- av98.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/av98.py b/av98.py index e39138e..e30a980 100755 --- a/av98.py +++ b/av98.py @@ -327,12 +327,6 @@ you'll be able to transparently follow links to Gopherspace!""") print("Sorry, no support for {} links.".format(gi.scheme)) return - # Obey permanent redirects - if gi.url in self.permanent_redirects: - new_gi = GeminiItem(self.permanent_redirects[gi.url], name=gi.name) - self._go_to_gi(new_gi) - return - # Use local file, use cache, or hit the network if resource is not cached if gi.scheme == "file": mime = self._handle_local_file(gi) @@ -399,6 +393,11 @@ Slow internet connection? Use 'set timeout' to be more patient.""") previous_redirectors = set() while True: + # Obey permanent redirects + if gi.url in self.permanent_redirects: + gi = GeminiItem(self.permanent_redirects[gi.url], name=gi.name) + continue + # Send request to server try: status, meta, address, f = self._send_request(gi) From 50c43c75b4663b141f04c2a07f4ab0dd375f1066 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 19:46:01 +0100 Subject: [PATCH 27/61] Provider progress animation when downloading files > 100 KiB. --- av98.py | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/av98.py b/av98.py index e30a980..a9e27de 100755 --- a/av98.py +++ b/av98.py @@ -645,8 +645,22 @@ Slow internet connection? Use 'set timeout' to be more patient.""") return status, meta, address, f def _write_response_to_file(self, mime, mime_options, f, destination): + spinner_seq = ["|", "/", "-", "\\"] # Read the response body over the network - body = f.read() + body = bytearray([]) + chunk_count = 0 + while True: + chunk = f.read(100*1024) + chunk_count += 1 + if not chunk: + break + body.extend(chunk) + if chunk_count > 1: + spinner = spinner_seq[chunk_count % 4] + if chunk_count < 10: + print("{} Received {} KiB...".format(spinner, chunk_count*100), end="\r") + else: + print("{} Received {} MiB...".format(spinner, chunk_count/10.0), end="\r") # Save the result to a temporary file From 91ff51a0ef00759862bb4b0fff1c12980f065a54 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 15 Nov 2023 19:47:17 +0100 Subject: [PATCH 28/61] Fix subtle breakage on some certs, when ssl_dnsname_match throws a non-CertificateError exception. --- tofu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tofu.py b/tofu.py index 7f54b7b..50c6c76 100644 --- a/tofu.py +++ b/tofu.py @@ -76,7 +76,7 @@ class TofuStore: try: ssl._dnsname_match(name, host) break - except ssl.CertificateError: + except Exception: continue else: # If we didn't break out, none of the names were valid From 681b11b8a4d9f58fbfe49f91b38f0c1ff49cede7 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Thu, 16 Nov 2023 19:13:20 +0100 Subject: [PATCH 29/61] Include local file and cached resource handling inside nice try/except error printing. --- av98.py | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/av98.py b/av98.py index a9e27de..d2d3652 100755 --- a/av98.py +++ b/av98.py @@ -328,21 +328,21 @@ you'll be able to transparently follow links to Gopherspace!""") return # Use local file, use cache, or hit the network if resource is not cached - if gi.scheme == "file": - mime = self._handle_local_file(gi) - self.active_raw_file = gi.path - elif check_cache and self.options["cache"] and self.cache.check(gi.url): - mime, self.active_raw_file = self.cache.get(gi.url) - self.log["cache_hits"] += 1 - else: - try: + try: + if gi.scheme == "file": + mime = self._handle_local_file(gi) + self.active_raw_file = gi.path + elif check_cache and self.options["cache"] and self.cache.check(gi.url): + mime, self.active_raw_file = self.cache.get(gi.url) + self.log["cache_hits"] += 1 + else: gi, mime = self._fetch_over_network(gi) self.active_raw_file = self.raw_file_buffer - except UserAbortException: - return - except Exception as err: - self._print_friendly_error(err) - return + except UserAbortException: + return + except Exception as err: + self._print_friendly_error(err) + return # Render gemtext, update index if mime == "text/gemini": From 0ce09f37a60f999455ab66eb375e3d0fe1c1a1a1 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Thu, 16 Nov 2023 19:14:22 +0100 Subject: [PATCH 30/61] Gracefully handle requests for file:// URLs where the file does not exist. --- av98.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/av98.py b/av98.py index d2d3652..b14716b 100755 --- a/av98.py +++ b/av98.py @@ -375,18 +375,19 @@ you'll be able to transparently follow links to Gopherspace!""") elif isinstance(err, (TimeoutError, socket.timeout)): ui_out.error("""ERROR: Connection timed out! Slow internet connection? Use 'set timeout' to be more patient.""") + elif isinstance(err, FileNotFoundError): + ui_out.error("ERROR: Local file not found!") else: ui_out.error("ERROR: " + str(err)) ui_out.debug(traceback.format_exc()) def _handle_local_file(self, gi): - if gi.path.endswith(".gmi"): # TODO: be better about this - mime = "text/gemini" - with open(gi.path, "r") as fp: - body = fp.read() - else: - mime, noise = mimetypes.guess_type(gi.path) - body = None + if not os.path.exists(gi.path): + raise FileNotFoundError + mime, noise = mimetypes.guess_type(gi.path) + if not mime: + if gi.path.endswith(".gmi"): # TODO: be better about this + mime = "text/gemini" return mime def _fetch_over_network(self, gi, destination=None): From 71f8a3dc860460f1fa5238488c411f2e7d1a372b Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Thu, 16 Nov 2023 19:18:30 +0100 Subject: [PATCH 31/61] Gracefully handle requests for file:// URLs which point at a directory. --- av98.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/av98.py b/av98.py index b14716b..02ee865 100755 --- a/av98.py +++ b/av98.py @@ -330,6 +330,10 @@ you'll be able to transparently follow links to Gopherspace!""") # Use local file, use cache, or hit the network if resource is not cached try: if gi.scheme == "file": + if not os.path.exists(gi.path): + raise FileNotFoundError + elif os.path.isdir(gi.path): + raise IsADirectoryError mime = self._handle_local_file(gi) self.active_raw_file = gi.path elif check_cache and self.options["cache"] and self.cache.check(gi.url): @@ -377,13 +381,13 @@ you'll be able to transparently follow links to Gopherspace!""") Slow internet connection? Use 'set timeout' to be more patient.""") elif isinstance(err, FileNotFoundError): ui_out.error("ERROR: Local file not found!") + elif isinstance(err, IsADirectoryError): + ui_out.error("ERROR: Viewing local directories is not supported!") else: ui_out.error("ERROR: " + str(err)) ui_out.debug(traceback.format_exc()) def _handle_local_file(self, gi): - if not os.path.exists(gi.path): - raise FileNotFoundError mime, noise = mimetypes.guess_type(gi.path) if not mime: if gi.path.endswith(".gmi"): # TODO: be better about this From bd7c5c2110e5b2a6eb9249da5d81390f285f17df Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Fri, 17 Nov 2023 19:44:06 +0100 Subject: [PATCH 32/61] Log network errors a little more carefully. --- av98.py | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) diff --git a/av98.py b/av98.py index 02ee865..698a849 100755 --- a/av98.py +++ b/av98.py @@ -404,18 +404,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") continue # Send request to server - try: - status, meta, address, f = self._send_request(gi) - except Exception as err: - if isinstance(err, socket.gaierror): - self.log["dns_failures"] += 1 - elif isinstance(err, ConnectionRefusedError): - self.log["refused_connections"] += 1 - elif isinstance(err, ConnectionResetError): - self.log["reset_connections"] += 1 - elif isinstance(err, (TimeoutError, socket.timeout)): - self.log["timeouts"] += 1 - raise err + status, meta, address, f = self._send_request(gi) # Update redirect loop/maze escaping state if not status.startswith("3"): @@ -552,7 +541,12 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.client_certs.pop(gi.host) # Do DNS resolution - addresses = self._get_addresses(host, port) + try: + addresses = self._get_addresses(host, port) + except Exception as err: + if isinstance(err, socket.gaierror): + self.log["dns_failures"] += 1 + raise err # Prepare TLS context def _newest_supported_protocol(): @@ -604,12 +598,19 @@ Slow internet connection? Use 'set timeout' to be more patient.""") try: s.connect(address[4]) break - except OSError as e: + except Exception as e: err = e + # Log network errors + if isinstance(err, ConnectionRefusedError): + self.log["refused_connections"] += 1 + elif isinstance(err, ConnectionResetError): + self.log["reset_connections"] += 1 + elif isinstance(err, (TimeoutError, socket.timeout)): + self.log["timeouts"] += 1 else: # If we couldn't connect to *any* of the addresses, just - # bubble up the exception from the last attempt and deny - # knowledge of earlier failures. + # bubble up the exception from the last attempt for the + # sake of error reporting to the user. raise err if sys.version_info.minor >=5: From 4f354ab2915756a24a31e44bc35b027c3455efaa Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Fri, 17 Nov 2023 19:56:47 +0100 Subject: [PATCH 33/61] Nicely align setting names and values. --- av98.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/av98.py b/av98.py index 698a849..1513d97 100755 --- a/av98.py +++ b/av98.py @@ -989,15 +989,18 @@ Slow internet connection? Use 'set timeout' to be more patient.""") @restricted def do_set(self, line): """View or set various options.""" + # Compute some constants for pretty alignment + ljust = max((len(k) for k in self.options.keys())) + rjust = max((len(str(v)) for v in self.options.values())) if not line.strip(): # Show all current settings for option in sorted(self.options.keys()): - print("%s %s" % (option, self.options[option])) + print("%s %s" % (option.ljust(ljust+4), str(self.options[option]).rjust(rjust))) elif len(line.split()) == 1: # Show current value of one specific setting option = line.strip() if option in self.options: - print("%s %s" % (option, self.options[option])) + print("%s %s" % (option.ljust(ljust+4), str(self.options[option]).rjust(rjust))) else: print("Unrecognised option %s" % option) else: From a9b34278a8ed9d9276c315e2f2e012acd6a3bd51 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Fri, 17 Nov 2023 19:59:12 +0100 Subject: [PATCH 34/61] Don't hardcode black box spacing values. --- av98.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/av98.py b/av98.py index 1513d97..c2057b5 100755 --- a/av98.py +++ b/av98.py @@ -1445,8 +1445,10 @@ current gemini browsing session.""" lines.append(("Cache hits:", self.log["cache_hits"])) lines.append(("Redirects followed:", self.log["redirects_followed"])) # Print + ljust = max((len(k) for k,v in lines)) + rjust = max((len(str(v)) for k,v in lines)) for key, value in lines: - print(key.ljust(24) + str(value).rjust(8)) + print(key.ljust(ljust+4) + str(value).rjust(rjust)) ### The end! def do_quit(self, *args): From 607223c25aa0f7721107a5b9eedd15e4b7ccf32b Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Fri, 17 Nov 2023 20:08:48 +0100 Subject: [PATCH 35/61] Get real persnickety about nice option/blackbox alignment. --- av98.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/av98.py b/av98.py index c2057b5..8bb7361 100755 --- a/av98.py +++ b/av98.py @@ -992,15 +992,16 @@ Slow internet connection? Use 'set timeout' to be more patient.""") # Compute some constants for pretty alignment ljust = max((len(k) for k in self.options.keys())) rjust = max((len(str(v)) for v in self.options.values())) + gap = 48 - (ljust + rjust) if not line.strip(): # Show all current settings for option in sorted(self.options.keys()): - print("%s %s" % (option.ljust(ljust+4), str(self.options[option]).rjust(rjust))) + print(option.ljust(ljust+gap) + str(self.options[option]).rjust(rjust)) elif len(line.split()) == 1: # Show current value of one specific setting option = line.strip() if option in self.options: - print("%s %s" % (option.ljust(ljust+4), str(self.options[option]).rjust(rjust))) + print(option.ljust(ljust+gap) + str(self.options[option]).rjust(rjust)) else: print("Unrecognised option %s" % option) else: @@ -1447,8 +1448,9 @@ current gemini browsing session.""" # Print ljust = max((len(k) for k,v in lines)) rjust = max((len(str(v)) for k,v in lines)) + gap = 48 - (ljust + rjust) for key, value in lines: - print(key.ljust(ljust+4) + str(value).rjust(rjust)) + print(key.ljust(ljust+gap) + str(value).rjust(rjust)) ### The end! def do_quit(self, *args): From de7e5dc254f1f735f25c2a59d8ceaced1eb4c477 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 18 Nov 2023 14:22:32 +0100 Subject: [PATCH 36/61] Factor out client certificate management into its own class/file. --- av98.py | 305 +++++++++---------------------------------------- clientcerts.py | 233 +++++++++++++++++++++++++++++++++++++ 2 files changed, 286 insertions(+), 252 deletions(-) create mode 100644 clientcerts.py diff --git a/av98.py b/av98.py index 8bb7361..44a5c35 100755 --- a/av98.py +++ b/av98.py @@ -19,7 +19,6 @@ import cgi import codecs import fnmatch import getpass -import glob import logging import mimetypes import os @@ -34,7 +33,6 @@ import tempfile import time import traceback import urllib.parse -import uuid import webbrowser try: @@ -44,6 +42,8 @@ except ModuleNotFoundError: from cache import Cache from tofu import TofuStore +from clientcerts import ClientCertificateManager + _VERSION = "1.0.2dev" _MAX_REDIRECTS = 5 @@ -258,13 +258,6 @@ class GeminiClient(cmd.Cmd): self.visited_hosts = set() self.waypoints = [] - self.client_certs = { - "active": None - } - self.active_cert_domains = [] - self.active_is_transient = False - self.transient_certs_created = [] - self.options = { "debug" : False, "ipv6" : True, @@ -294,6 +287,7 @@ class GeminiClient(cmd.Cmd): } self.tofu_store = TofuStore(self.config_dir) + self.client_cert_manager = ClientCertificateManager(self.config_dir) self.cache = Cache() ui_out.debug("Raw buffer: ", self.raw_file_buffer) @@ -459,7 +453,13 @@ Slow internet connection? Use 'set timeout' to be more patient.""") # Client cert elif status.startswith("6"): - self._handle_cert_request(meta, status, gi.host) + if self.restricted: + print("The server is requesting a client certificate.") + print("These are not supported in restricted mode, sorry.") + raise UserAbortException() + + if not self.client_cert_manager.handle_cert_request(meta, status, gi.host): + raise UserAbortException() continue # Invalid status @@ -509,37 +509,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""") host, port = self.options["http_proxy"].rsplit(":",1) ui_out.debug("Using http proxy: " + self.options["http_proxy"]) - # Be careful with client certificates! - # Are we crossing a domain boundary? - if self.active_cert_domains and gi.host not in self.active_cert_domains: - if self.active_is_transient: - print("Permanently delete currently active transient certificate?") - resp = input("Y/N? ") - if resp.strip().lower() in ("y", "yes"): - print("Destroying certificate.") - self._deactivate_client_cert() - else: - print("Staying here.") - raise UserAbortException() - else: - print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?") - resp = input("Y/N? ") - if resp.strip().lower() in ("n", "no"): - print("Keeping certificate active for {}".format(gi.host)) - else: - print("Deactivating certificate.") - self._deactivate_client_cert() - - # Suggest reactivating previous certs - if not self.client_certs["active"] and gi.host in self.client_certs: - print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(gi.host)) - resp = input("Y/N? ") - if resp.strip().lower() in ("y", "yes"): - self._activate_client_cert(*self.client_certs[gi.host]) - else: - print("Remaining unidentified.") - self.client_certs.pop(gi.host) - # Do DNS resolution try: addresses = self._get_addresses(host, port) @@ -549,44 +518,9 @@ Slow internet connection? Use 'set timeout' to be more patient.""") raise err # Prepare TLS context - def _newest_supported_protocol(): - if sys.version_info >= (3, 10): - return ssl.PROTOCOL_TLS_CLIENT - elif sys.version_info >= (3, 6): - return ssl.PROTOCOL_TLS - else: - return ssl.PROTOCOL_TLSv1_2 - context = ssl.SSLContext(_newest_supported_protocol()) - - # Use CAs or TOFU - if self.options["tls_mode"] == "ca": - context.verify_mode = ssl.CERT_REQUIRED - context.check_hostname = True - context.load_default_certs() - else: - context.check_hostname = False - context.verify_mode = ssl.CERT_NONE - # Impose minimum TLS version - ## In 3.7 and above, this is easy... - if sys.version_info.minor >= 7: - context.minimum_version = ssl.TLSVersion.TLSv1_2 - ## Otherwise, it seems very hard... - ## The below is less strict than it ought to be, but trying to disable - ## TLS v1.1 here using ssl.OP_NO_TLSv1_1 produces unexpected failures - ## with recent versions of OpenSSL. What a mess... - else: - context.options |= ssl.OP_NO_SSLv3 - context.options |= ssl.OP_NO_SSLv2 - # Try to enforce sensible ciphers - try: - context.set_ciphers("AESGCM+ECDHE:AESGCM+DHE:CHACHA20+ECDHE:CHACHA20+DHE:!DSS:!SHA1:!MD5:@STRENGTH") - except ssl.SSLError: - # Rely on the server to only support sensible things, I guess... - pass - # Load client certificate if needed - if self.client_certs["active"]: - certfile, keyfile = self.client_certs["active"] - context.load_cert_chain(certfile, keyfile) + context = self._prepare_SSL_context(self.options["tls_mode"]) + if not self.client_cert_manager.associate_client_cert(context, gi): + raise UserAbortException() # Connect to remote host by any address possible err = None @@ -622,11 +556,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""") cert = s.getpeercert(binary_form=True) self.tofu_store.validate_cert(address[4][0], host, cert) - # Remember that we showed the current cert to this domain... - if self.client_certs["active"]: - self.active_cert_domains.append(gi.host) - self.client_certs[gi.host] = self.client_certs["active"] - # Send request and wrap response in a file descriptor ui_out.debug("Sending %s" % gi.url) s.sendall((gi.url + CRLF).encode("UTF-8")) @@ -708,47 +637,45 @@ Slow internet connection? Use 'set timeout' to be more patient.""") return addresses - def _handle_cert_request(self, meta, status, host): - - # Don't do client cert stuff in restricted mode, as in principle - # it could be used to fill up the disk by creating a whole lot of - # certificates - if self.restricted: - print("The server is requesting a client certificate.") - print("These are not supported in restricted mode, sorry.") - raise UserAbortException() - - print("SERVER SAYS: ", meta) - # Present different messages for different 6x statuses, but - # handle them the same. - if status in ("64", "65"): - print("The server rejected your certificate because it is either expired or not yet valid.") - elif status == "63": - print("The server did not accept your certificate.") - print("You may need to e.g. coordinate with the admin to get your certificate fingerprint whitelisted.") + def _prepare_SSL_context(self, cert_validation_mode="tofu"): + # Flail against version churn + if sys.version_info >= (3, 10): + _newest_supported_protocol = ssl.PROTOCOL_TLS_CLIENT + elif sys.version_info >= (3, 6): + _newest_supported_protocol = ssl.PROTOCOL_TLS else: - print("The site {} is requesting a client certificate.".format(host)) - print("This will allow the site to recognise you across requests.") + _newest_supported_protocol = ssl.PROTOCOL_TLSv1_2 + context = ssl.SSLContext(_newest_supported_protocol) - # Give the user choices - print("What do you want to do?") - print("1. Give up.") - print("2. Generate a new transient certificate.") - print("3. Generate a new persistent certificate.") - print("4. Load a previously generated certificate.") - print("5. Load a certificate from an external file.") - choice = input("> ").strip() - if choice == "2": - self._generate_transient_cert_cert() - elif choice == "3": - self._generate_persistent_client_cert() - elif choice == "4": - self._choose_client_cert() - elif choice == "5": - self._load_client_cert() + # Use CAs or TOFU + if cert_validation_mode == "ca": + context.verify_mode = ssl.CERT_REQUIRED + context.check_hostname = True + context.load_default_certs() else: - print("Giving up.") - raise UserAbortException() + context.check_hostname = False + context.verify_mode = ssl.CERT_NONE + + # Impose minimum TLS version + ## In 3.7 and above, this is easy... + if sys.version_info.minor >= 7: + context.minimum_version = ssl.TLSVersion.TLSv1_2 + ## Otherwise, it seems very hard... + ## The below is less strict than it ought to be, but trying to disable + ## TLS v1.1 here using ssl.OP_NO_TLSv1_1 produces unexpected failures + ## with recent versions of OpenSSL. What a mess... + else: + context.options |= ssl.OP_NO_SSLv3 + context.options |= ssl.OP_NO_SSLv2 + + # Try to enforce sensible ciphers + try: + context.set_ciphers("AESGCM+ECDHE:AESGCM+DHE:CHACHA20+ECDHE:CHACHA20+DHE:!DSS:!SHA1:!MD5:@STRENGTH") + except ssl.SSLError: + # Rely on the server to only support sensible things, I guess... + pass + + return context def _get_handler_cmd(self, mimetype): # Now look for a handler for this mimetype @@ -848,110 +775,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.log["ipv6_requests"] += 1 self.log["ipv6_bytes_recvd"] += size - def _load_client_cert(self): - """ - Interactively load a TLS client certificate from the filesystem in PEM - format. - """ - print("Loading client certificate file, in PEM format (blank line to cancel)") - certfile = input("Certfile path: ").strip() - if not certfile: - print("Aborting.") - return - certfile = os.path.expanduser(certfile) - if not os.path.isfile(certfile): - print("Certificate file {} does not exist.".format(certfile)) - return - print("Loading private key file, in PEM format (blank line to cancel)") - keyfile = input("Keyfile path: ").strip() - if not keyfile: - print("Aborting.") - return - keyfile = os.path.expanduser(keyfile) - if not os.path.isfile(keyfile): - print("Private key file {} does not exist.".format(keyfile)) - return - self._activate_client_cert(certfile, keyfile) - - def _generate_transient_cert_cert(self): - """ - Use `openssl` command to generate a new transient client certificate - with 24 hours of validity. - """ - certdir = os.path.join(self.config_dir, "transient_certs") - name = str(uuid.uuid4()) - self._generate_client_cert(certdir, name, transient=True) - self.active_is_transient = True - self.transient_certs_created.append(name) - - def _generate_persistent_client_cert(self): - """ - Interactively use `openssl` command to generate a new persistent client - certificate with one year of validity. - """ - certdir = os.path.join(self.config_dir, "client_certs") - print("What do you want to name this new certificate?") - print("Answering `mycert` will create `{0}/mycert.crt` and `{0}/mycert.key`".format(certdir)) - name = input("> ") - if not name.strip(): - print("Aborting.") - return - self._generate_client_cert(certdir, name) - - def _generate_client_cert(self, certdir, basename, transient=False): - """ - Use `openssl` binary to generate a client certificate (which may be - transient or persistent) and save the certificate and private key to the - specified directory with the specified basename. - """ - if not os.path.exists(certdir): - os.makedirs(certdir) - certfile = os.path.join(certdir, basename+".crt") - keyfile = os.path.join(certdir, basename+".key") - cmd = "openssl req -x509 -newkey rsa:2048 -days {} -nodes -keyout {} -out {}".format(1 if transient else 365, keyfile, certfile) - if transient: - cmd += " -subj '/CN={}'".format(basename) - os.system(cmd) - self._activate_client_cert(certfile, keyfile) - - def _choose_client_cert(self): - """ - Interactively select a previously generated client certificate and - activate it. - """ - certdir = os.path.join(self.config_dir, "client_certs") - certs = glob.glob(os.path.join(certdir, "*.crt")) - if len(certs) == 0: - print("There are no previously generated certificates.") - return - certdir = {} - for n, cert in enumerate(certs): - certdir[str(n+1)] = (cert, os.path.splitext(cert)[0] + ".key") - print("{}. {}".format(n+1, os.path.splitext(os.path.basename(cert))[0])) - choice = input("> ").strip() - if choice in certdir: - certfile, keyfile = certdir[choice] - self._activate_client_cert(certfile, keyfile) - else: - print("What?") - - def _activate_client_cert(self, certfile, keyfile): - self.client_certs["active"] = (certfile, keyfile) - self.active_cert_domains = [] - self.prompt = self.cert_prompt - ui_out.debug("Using ID {} / {}.".format(*self.client_certs["active"])) - - def _deactivate_client_cert(self): - if self.active_is_transient: - for filename in self.client_certs["active"]: - os.remove(filename) - for domain in self.active_cert_domains: - self.client_certs.pop(domain) - self.client_certs["active"] = None - self.active_cert_domains = [] - self.prompt = self.no_cert_prompt - self.active_is_transient = False - # Cmd implementation follows def default(self, line): @@ -1047,25 +870,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") def do_cert(self, line): """Manage client certificates""" print("Managing client certificates") - if self.client_certs["active"]: - print("Active certificate: {}".format(self.client_certs["active"][0])) - print("1. Deactivate client certificate.") - print("2. Generate new certificate.") - print("3. Load previously generated certificate.") - print("4. Load externally created client certificate from file.") - print("Enter blank line to exit certificate manager.") - choice = input("> ").strip() - if choice == "1": - print("Deactivating client certificate.") - self._deactivate_client_cert() - elif choice == "2": - self._generate_persistent_client_cert() - elif choice == "3": - self._choose_client_cert() - elif choice == "4": - self._load_client_cert() - else: - print("Aborting.") + self.client_cert_manager.manage() @restricted def do_handler(self, line): @@ -1460,12 +1265,8 @@ current gemini browsing session.""" # Clean up after ourself os.unlink(self.raw_file_buffer) os.unlink(self.rendered_file_buffer) - - for cert in self.transient_certs_created: - for ext in (".crt", ".key"): - certfile = os.path.join(self.config_dir, "transient_certs", cert+ext) - if os.path.exists(certfile): - os.remove(certfile) + self.client_cert_manager.cleanup() + # Say goodbye print() print("Thank you for flying AV-98!") sys.exit() @@ -1522,7 +1323,7 @@ def main(): # Act on args if args.tls_cert: # If tls_key is None, python will attempt to load the key from tls_cert. - gc._activate_client_cert(args.tls_cert, args.tls_key) + gc.client_cert_manager._activate_client_cert(args.tls_cert, args.tls_key) if args.bookmarks: gc.cmdqueue.append("bookmarks") elif args.url: diff --git a/clientcerts.py b/clientcerts.py new file mode 100644 index 0000000..25f2903 --- /dev/null +++ b/clientcerts.py @@ -0,0 +1,233 @@ +import glob +import logging +import os +import os.path +import uuid + +ui_out = logging.getLogger("av98_logger") + +class ClientCertificateManager: + + def __init__(self, config_dir): + + self.config_dir = config_dir + self.client_certs = { + "active": None + } + self.active_cert_domains = [] + self.active_is_transient = False + self.transient_certs_created = [] + + def cleanup(self): + for cert in self.transient_certs_created: + for ext in (".crt", ".key"): + certfile = os.path.join(self.config_dir, "transient_certs", cert+ext) + if os.path.exists(certfile): + os.remove(certfile) + + def manage(self): + if self.client_certs["active"]: + print("Active certificate: {}".format(self.client_certs["active"][0])) + print("1. Deactivate client certificate.") + print("2. Generate new certificate.") + print("3. Load previously generated certificate.") + print("4. Load externally created client certificate from file.") + print("Enter blank line to exit certificate manager.") + choice = input("> ").strip() + if choice == "1": + print("Deactivating client certificate.") + self._deactivate_client_cert() + elif choice == "2": + self._generate_persistent_client_cert() + elif choice == "3": + self._choose_client_cert() + elif choice == "4": + self._load_client_cert() + else: + print("Aborting.") + + def associate_client_cert(self, context, gi): + # Be careful with client certificates! + # Are we crossing a domain boundary? + if self.client_certs["active"] and gi.host not in self.active_cert_domains: + if self.active_is_transient: + print("Permanently delete currently active transient certificate?") + resp = input("Y/N? ") + if resp.strip().lower() in ("y", "yes"): + print("Destroying certificate.") + self._deactivate_client_cert() + else: + print("Staying here.") + return False + else: + print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?") + resp = input("Y/N? ") + if resp.strip().lower() in ("n", "no"): + print("Keeping certificate active for {}".format(gi.host)) + self.active_cert_domains.append(gi.host) + self.client_certs[gi.host] = self.client_certs["active"] + else: + print("Deactivating certificate.") + self._deactivate_client_cert() + + # Suggest reactivating previous certs + if not self.client_certs["active"] and gi.host in self.client_certs: + print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(gi.host)) + resp = input("Y/N? ") + if resp.strip().lower() in ("y", "yes"): + self._activate_client_cert(*self.client_certs[gi.host]) + else: + print("Remaining unidentified.") + self.client_certs.pop(gi.host) + + # Associate certs to context based on above + if self.client_certs["active"]: + certfile, keyfile = self.client_certs["active"] + context.load_cert_chain(certfile, keyfile) + + return True + + def handle_cert_request(self, meta, status, host): + + # Don't do client cert stuff in restricted mode, as in principle + # it could be used to fill up the disk by creating a whole lot of + # certificates + print("SERVER SAYS: ", meta) + # Present different messages for different 6x statuses, but + # handle them the same. + if status in ("64", "65"): + print("The server rejected your certificate because it is either expired or not yet valid.") + elif status == "63": + print("The server did not accept your certificate.") + print("You may need to e.g. coordinate with the admin to get your certificate fingerprint whitelisted.") + else: + print("The site {} is requesting a client certificate.".format(host)) + print("This will allow the site to recognise you across requests.") + + # Give the user choices + print("What do you want to do?") + print("1. Give up.") + print("2. Generate a new transient certificate.") + print("3. Generate a new persistent certificate.") + print("4. Load a previously generated certificate.") + print("5. Load a certificate from an external file.") + choice = input("> ").strip() + if choice == "2": + self._generate_transient_cert_cert() + elif choice == "3": + self._generate_persistent_client_cert() + elif choice == "4": + self._choose_client_cert() + elif choice == "5": + self._load_client_cert() + else: + print("Giving up.") + return False + + if self.client_certs["active"]: + self.active_cert_domains.append(host) + self.client_certs[host] = self.client_certs["active"] + + return True + + def _load_client_cert(self): + """ + Interactively load a TLS client certificate from the filesystem in PEM + format. + """ + print("Loading client certificate file, in PEM format (blank line to cancel)") + certfile = input("Certfile path: ").strip() + if not certfile: + print("Aborting.") + return + certfile = os.path.expanduser(certfile) + if not os.path.isfile(certfile): + print("Certificate file {} does not exist.".format(certfile)) + return + print("Loading private key file, in PEM format (blank line to cancel)") + keyfile = input("Keyfile path: ").strip() + if not keyfile: + print("Aborting.") + return + keyfile = os.path.expanduser(keyfile) + if not os.path.isfile(keyfile): + print("Private key file {} does not exist.".format(keyfile)) + return + self._activate_client_cert(certfile, keyfile) + + def _generate_transient_cert_cert(self): + """ + Use `openssl` command to generate a new transient client certificate + with 24 hours of validity. + """ + certdir = os.path.join(self.config_dir, "transient_certs") + name = str(uuid.uuid4()) + self._generate_client_cert(certdir, name, transient=True) + self.active_is_transient = True + self.transient_certs_created.append(name) + + def _generate_persistent_client_cert(self): + """ + Interactively use `openssl` command to generate a new persistent client + certificate with one year of validity. + """ + certdir = os.path.join(self.config_dir, "client_certs") + print("What do you want to name this new certificate?") + print("Answering `mycert` will create `{0}/mycert.crt` and `{0}/mycert.key`".format(certdir)) + name = input("> ") + if not name.strip(): + print("Aborting.") + return + self._generate_client_cert(certdir, name) + + def _generate_client_cert(self, certdir, basename, transient=False): + """ + Use `openssl` binary to generate a client certificate (which may be + transient or persistent) and save the certificate and private key to the + specified directory with the specified basename. + """ + if not os.path.exists(certdir): + os.makedirs(certdir) + certfile = os.path.join(certdir, basename+".crt") + keyfile = os.path.join(certdir, basename+".key") + cmd = "openssl req -x509 -newkey rsa:2048 -days {} -nodes -keyout {} -out {}".format(1 if transient else 365, keyfile, certfile) + if transient: + cmd += " -subj '/CN={}'".format(basename) + os.system(cmd) + self._activate_client_cert(certfile, keyfile) + + def _choose_client_cert(self): + """ + Interactively select a previously generated client certificate and + activate it. + """ + certdir = os.path.join(self.config_dir, "client_certs") + certs = glob.glob(os.path.join(certdir, "*.crt")) + if len(certs) == 0: + print("There are no previously generated certificates.") + return + certdir = {} + for n, cert in enumerate(certs): + certdir[str(n+1)] = (cert, os.path.splitext(cert)[0] + ".key") + print("{}. {}".format(n+1, os.path.splitext(os.path.basename(cert))[0])) + choice = input("> ").strip() + if choice in certdir: + certfile, keyfile = certdir[choice] + self._activate_client_cert(certfile, keyfile) + else: + print("What?") + + def _activate_client_cert(self, certfile, keyfile): + self.client_certs["active"] = (certfile, keyfile) + self.active_cert_domains = [] + ui_out.debug("Using ID {} / {}.".format(*self.client_certs["active"])) + + def _deactivate_client_cert(self): + if self.active_is_transient: + for filename in self.client_certs["active"]: + os.remove(filename) + for domain in self.active_cert_domains: + self.client_certs.pop(domain) + self.client_certs["active"] = None + self.active_cert_domains = [] + self.active_is_transient = False From 247f01e3e70dd7ea6fde3fba1cc56e2c779a9e1a Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 18 Nov 2023 14:23:56 +0100 Subject: [PATCH 37/61] Update bookmarks to reflect permanent redirects upon exiting. --- av98.py | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/av98.py b/av98.py index 44a5c35..5b6f8fa 100755 --- a/av98.py +++ b/av98.py @@ -775,6 +775,38 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.log["ipv6_requests"] += 1 self.log["ipv6_bytes_recvd"] += size + def _maintain_bookmarks(self): + # Nothing to do if no bookmarks exist! + bm_file = os.path.join(self.config_dir, "bookmarks.gmi") + if not os.path.exists(bm_file): + return + + # Backup bookmark file + backup_file = tempfile.NamedTemporaryFile(delete=False) + backup_file.close() + backup_file = backup_file.name + shutil.copyfile(bm_file, backup_file) + + # Attempt maintenance, restore backup if anything fails + try: + with open(backup_file, "r") as fp_old, open(bm_file, "w") as fp_new: + for line in fp_old: + if not line.startswith("=>"): + fp_new.write(line) + continue + old_url = line.split()[1] + url = old_url + while url in self.permanent_redirects: + url = self.permanent_redirects[url] + if url != old_url: + ui_out.debug("Updating old bookmark url {} to {} based on permanent redirect.".format(old_url, url)) + fp_new.write(line.replace(old_url, url)) + except Exception as err: + shutil.copyfile(backup_file, bm_file) + ui_out.debug(traceback.format_exc()) + finally: + os.unlink(backup_file) + # Cmd implementation follows def default(self, line): @@ -1266,6 +1298,8 @@ current gemini browsing session.""" os.unlink(self.raw_file_buffer) os.unlink(self.rendered_file_buffer) self.client_cert_manager.cleanup() + # Apply permanent redirects to bookmarks + self._maintain_bookmarks() # Say goodbye print() print("Thank you for flying AV-98!") From 2a70985176c70f3c58c3e2c012a3f112acded7c7 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 18 Nov 2023 15:59:36 +0100 Subject: [PATCH 38/61] General tidy-up of entire av98.py file. Most a matter of rearranging the order of methods to flow sensibly, as well writing or updating docstrings, getting rid of old unused return values, and fixing a few very minor defects. --- av98.py | 369 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 218 insertions(+), 151 deletions(-) diff --git a/av98.py b/av98.py index 5b6f8fa..b133a8c 100755 --- a/av98.py +++ b/av98.py @@ -48,7 +48,6 @@ _VERSION = "1.0.2dev" _MAX_REDIRECTS = 5 - # Command abbreviations _ABBREVS = { "a": "add", @@ -83,7 +82,6 @@ _MIME_HANDLERS = { "text/*": "cat %s", } - # monkey-patch Gemini support in urllib.parse # see https://github.com/python/cpython/blob/master/Lib/urllib/parse.py urllib.parse.uses_relative.append("gemini") @@ -264,8 +262,8 @@ class GeminiClient(cmd.Cmd): "timeout" : 10, "width" : 80, "auto_follow_redirects" : True, - "gopher_proxy" : None, "tls_mode" : "tofu", + "gopher_proxy" : None, "http_proxy": None, "cache" : False } @@ -294,11 +292,15 @@ class GeminiClient(cmd.Cmd): ui_out.debug("Rendered buffer: ", self.rendered_file_buffer) def _go_to_gi(self, gi, update_hist=True, check_cache=True): - """This method might be considered "the heart of AV-98". + """ + This method might be considered "the heart of AV-98". Everything involved in fetching a gemini resource happens here: sending the request over the network, parsing the response if its a menu, storing the response in a temporary file, choosing - and calling a handler program, and updating the history.""" + and calling a handler program, and updating the history. + Most navigation commands are just a thin wrapper around a call + to this. + """ # Don't try to speak to servers running other protocols if gi.scheme in ("http", "https"): @@ -342,7 +344,7 @@ you'll be able to transparently follow links to Gopherspace!""") self._print_friendly_error(err) return - # Render gemtext, update index + # Render gemtext, updating the index if mime == "text/gemini": self._handle_gemtext(gi) self.active_rendered_file = self.rendered_file_buffer @@ -363,25 +365,10 @@ you'll be able to transparently follow links to Gopherspace!""") if update_hist: self._update_history(gi) - def _print_friendly_error(self, err): - if isinstance(err, socket.gaierror): - ui_out.error("ERROR: DNS error!") - elif isinstance(err, ConnectionRefusedError): - ui_out.error("ERROR: Connection refused!") - elif isinstance(err, ConnectionResetError): - ui_out.error("ERROR: Connection reset!") - elif isinstance(err, (TimeoutError, socket.timeout)): - ui_out.error("""ERROR: Connection timed out! -Slow internet connection? Use 'set timeout' to be more patient.""") - elif isinstance(err, FileNotFoundError): - ui_out.error("ERROR: Local file not found!") - elif isinstance(err, IsADirectoryError): - ui_out.error("ERROR: Viewing local directories is not supported!") - else: - ui_out.error("ERROR: " + str(err)) - ui_out.debug(traceback.format_exc()) - def _handle_local_file(self, gi): + """ + Guess the MIME type of a local file, to determine the best handler. + """ mime, noise = mimetypes.guess_type(gi.path) if not mime: if gi.path.endswith(".gmi"): # TODO: be better about this @@ -389,7 +376,10 @@ Slow internet connection? Use 'set timeout' to be more patient.""") return mime def _fetch_over_network(self, gi, destination=None): - + """ + Fetch the provided GeminiItem over the network and save the received + content to a file. + """ previous_redirectors = set() while True: # Obey permanent redirects @@ -483,10 +473,10 @@ Slow internet connection? Use 'set timeout' to be more patient.""") raise RuntimeError("Header declared unknown encoding %s" % value) # Save response body to disk - body, size = self._write_response_to_file(mime, mime_options, f, destination) + size = self._write_response_to_file(mime, mime_options, f, destination) ui_out.debug("Wrote %d byte response to %s." % (size, destination)) - # Maintain cache and log + # Maintain cache and update flight recorder if self.options["cache"]: self.cache.add(gi.url, mime, self.raw_file_buffer) self._log_visit(gi, address, size) @@ -494,8 +484,15 @@ Slow internet connection? Use 'set timeout' to be more patient.""") return gi, mime def _send_request(self, gi): - """Send a selector to a given host and port. - Returns the resolved address and binary file with the reply.""" + """ + Send a Gemini request to the appropriate host for the provided + GeminiItem. This is usually the GI's own host and port attributes, + but if it's a gopher:// or http(s):// item, a proxy might be used. + + Returns the received response header, parsed into a status code + and meta, plus a the address object that was connected to and a + file interface to the underlying network socket. + """ # Figure out which host to connect to if gi.scheme == "gemini": @@ -552,16 +549,16 @@ Slow internet connection? Use 'set timeout' to be more patient.""") ui_out.debug("Cipher is: {}.".format(s.cipher())) # Do TOFU - if self.options["tls_mode"] != "ca": + if self.options["tls_mode"] == "tofu": cert = s.getpeercert(binary_form=True) self.tofu_store.validate_cert(address[4][0], host, cert) # Send request and wrap response in a file descriptor ui_out.debug("Sending %s" % gi.url) s.sendall((gi.url + CRLF).encode("UTF-8")) - - # Read back response f = s.makefile(mode = "rb") + + # Fetch response header # Spec dictates should not exceed 1024 bytes, # so maximum valid header length is 1027 bytes. header = f.readline(1027) @@ -571,7 +568,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") header = header.strip() ui_out.debug("Response header: %s." % header) - # Validate header + # Validate response header status, meta = header.split(maxsplit=1) if header[2:].strip() else (header[:2], "") if len(meta) > 1024 or len(status) != 2 or not status.isnumeric(): f.close() @@ -579,46 +576,11 @@ Slow internet connection? Use 'set timeout' to be more patient.""") return status, meta, address, f - def _write_response_to_file(self, mime, mime_options, f, destination): - spinner_seq = ["|", "/", "-", "\\"] - # Read the response body over the network - body = bytearray([]) - chunk_count = 0 - while True: - chunk = f.read(100*1024) - chunk_count += 1 - if not chunk: - break - body.extend(chunk) - if chunk_count > 1: - spinner = spinner_seq[chunk_count % 4] - if chunk_count < 10: - print("{} Received {} KiB...".format(spinner, chunk_count*100), end="\r") - else: - print("{} Received {} MiB...".format(spinner, chunk_count/10.0), end="\r") - - # Save the result to a temporary file - - ## Determine file mode - if mime.startswith("text/"): - mode = "w" - encoding = mime_options.get("charset", "UTF-8") - try: - body = body.decode(encoding) - except UnicodeError: - raise RuntimeError("Could not decode response body using %s encoding declared in header!" % encoding) - else: - mode = "wb" - encoding = None - - ## Write - fp = open(destination or self.raw_file_buffer, mode=mode, encoding=encoding) - size = fp.write(body) - fp.close() - - return body, size - def _get_addresses(self, host, port): + """ + Convert a host and port into an address object suitable for + instantiating a socket. + """ # DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled if ":" in host: # This is likely a literal IPv6 address, so we can *only* ask for @@ -638,6 +600,9 @@ Slow internet connection? Use 'set timeout' to be more patient.""") return addresses def _prepare_SSL_context(self, cert_validation_mode="tofu"): + """ + Specify a bunch of low level SSL settings. + """ # Flail against version churn if sys.version_info >= (3, 10): _newest_supported_protocol = ssl.PROTOCOL_TLS_CLIENT @@ -677,30 +642,81 @@ Slow internet connection? Use 'set timeout' to be more patient.""") return context - def _get_handler_cmd(self, mimetype): - # Now look for a handler for this mimetype - # Consider exact matches before wildcard matches - exact_matches = [] - wildcard_matches = [] - for handled_mime, cmd_str in _MIME_HANDLERS.items(): - if "*" in handled_mime: - wildcard_matches.append((handled_mime, cmd_str)) - else: - exact_matches.append((handled_mime, cmd_str)) - for handled_mime, cmd_str in exact_matches + wildcard_matches: - if fnmatch.fnmatch(mimetype, handled_mime): + def _write_response_to_file(self, mime, mime_options, f, destination): + """ + Given a file handler representing a network socket which will yield + the response body for a successful Gemini request, and the associated + MIME information, download the response body and save it in the + specified file. text/* responses which use an encoding other than + UTF-8 will be transcoded to UTF-8 before hitting the disk. + + Returns the size in bytes of the downloaded response. + """ + # Read the response body over the network + spinner_seq = ["|", "/", "-", "\\"] + body = bytearray([]) + chunk_count = 0 + while True: + chunk = f.read(100*1024) + chunk_count += 1 + if not chunk: break + body.extend(chunk) + if chunk_count > 1: + spinner = spinner_seq[chunk_count % 4] + if chunk_count < 10: + print("{} Received {} KiB...".format(spinner, chunk_count*100), end="\r") + else: + print("{} Received {} MiB...".format(spinner, chunk_count/10.0), end="\r") + print(" "*80, end="\r") # Clean up prompt space + + # Determine file mode + if mime.startswith("text/"): + mode = "w" + # Decode received bytes with response-specified encoding... + encoding = mime_options.get("charset", "UTF-8") + try: + body = body.decode(encoding) + except UnicodeError: + raise RuntimeError("Could not decode response body using %s encoding declared in header!" % encoding) + # ...but alway save to disk in UTF-8 + encoding = "UTF-8" else: - # Use "xdg-open" as a last resort. - cmd_str = "xdg-open %s" - ui_out.debug("Using handler: %s" % cmd_str) - return cmd_str + mode = "wb" + encoding = None + + # Write + fp = open(destination or self.raw_file_buffer, mode=mode, encoding=encoding) + size = fp.write(body) + fp.close() + + return size + + def _log_visit(self, gi, address, size): + """ + Update the "black box flight recorder" with details of requests and + responses. + """ + if not address: + return + self.log["requests"] += 1 + self.log["bytes_recvd"] += size + self.visited_hosts.add(address) + if address[0] == socket.AF_INET: + self.log["ipv4_requests"] += 1 + self.log["ipv4_bytes_recvd"] += size + elif address[0] == socket.AF_INET6: + self.log["ipv6_requests"] += 1 + self.log["ipv6_bytes_recvd"] += size def _handle_gemtext(self, menu_gi): - """Simultaneously parse and render a text/gemini document. - Parsing causes self.index to be populated with GeminiItems. + """ + Simultaneously parse and render a text/gemini document. + Parsing causes self.index to be populated with GeminiItems + representing the links in the document. Rendering causes self.rendered_file_buffer to contain a rendered - view.""" + view of the document. + """ self.index = [] preformatted = False @@ -744,15 +760,41 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.index_index = -1 def _format_geminiitem(self, index, gi, url=False): + """ + Render a link line. + """ protocol = "" if gi.scheme == "gemini" else " %s" % gi.scheme line = "[%d%s] %s" % (index, protocol, gi.name or gi.url) if gi.name and url: line += " (%s)" % gi.url return line - def _show_lookup(self, offset=0, end=None, url=False): - for n, gi in enumerate(self.lookup[offset:end]): - print(self._format_geminiitem(n+offset+1, gi, url)) + def _get_handler_cmd(self, mimetype): + """ + Given the MIME type of a downloaded item, figure out which program to + open it with. + + Returns a string suitable for use with subprocess.call after the '%s' + has been replaced with the name of the file where the downloaded item + was saved. + """ + # Now look for a handler for this mimetype + # Consider exact matches before wildcard matches + exact_matches = [] + wildcard_matches = [] + for handled_mime, cmd_str in _MIME_HANDLERS.items(): + if "*" in handled_mime: + wildcard_matches.append((handled_mime, cmd_str)) + else: + exact_matches.append((handled_mime, cmd_str)) + for handled_mime, cmd_str in exact_matches + wildcard_matches: + if fnmatch.fnmatch(mimetype, handled_mime): + break + else: + # Use "xdg-open" as a last resort. + cmd_str = "xdg-open %s" + ui_out.debug("Using handler: %s" % cmd_str) + return cmd_str def _update_history(self, gi): # Don't duplicate @@ -762,20 +804,34 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.history.append(gi) self.hist_index = len(self.history) - 1 - def _log_visit(self, gi, address, size): - if not address: - return - self.log["requests"] += 1 - self.log["bytes_recvd"] += size - self.visited_hosts.add(address) - if address[0] == socket.AF_INET: - self.log["ipv4_requests"] += 1 - self.log["ipv4_bytes_recvd"] += size - elif address[0] == socket.AF_INET6: - self.log["ipv6_requests"] += 1 - self.log["ipv6_bytes_recvd"] += size + def _print_friendly_error(self, err): + if isinstance(err, socket.gaierror): + ui_out.error("ERROR: DNS error!") + elif isinstance(err, ConnectionRefusedError): + ui_out.error("ERROR: Connection refused!") + elif isinstance(err, ConnectionResetError): + ui_out.error("ERROR: Connection reset!") + elif isinstance(err, (TimeoutError, socket.timeout)): + ui_out.error("""ERROR: Connection timed out! +Slow internet connection? Use 'set timeout' to be more patient.""") + elif isinstance(err, FileNotFoundError): + ui_out.error("ERROR: Local file not found!") + elif isinstance(err, IsADirectoryError): + ui_out.error("ERROR: Viewing local directories is not supported!") + else: + ui_out.error("ERROR: " + str(err)) + ui_out.debug(traceback.format_exc()) + + def _show_lookup(self, offset=0, end=None, url=False): + for n, gi in enumerate(self.lookup[offset:end]): + print(self._format_geminiitem(n+offset+1, gi, url)) def _maintain_bookmarks(self): + """ + Update any bookmarks whose URLs we tried to fetch during the current + session and received a permanent redirect for, so they are fetched + directly at the new address in future. + """ # Nothing to do if no bookmarks exist! bm_file = os.path.join(self.config_dir, "bookmarks.gmi") if not os.path.exists(bm_file): @@ -810,6 +866,11 @@ Slow internet connection? Use 'set timeout' to be more patient.""") # Cmd implementation follows def default(self, line): + """ + This is called when none of the do_* methods match the user's + input. This is probably either an abbreviated command, or a numeric + index for the lookup table. + """ if line.strip() == "EOF": return self.onecmd("quit") elif line.strip() == "..": @@ -831,16 +892,19 @@ Slow internet connection? Use 'set timeout' to be more patient.""") print("What?") return + # Pick out a GeminiItemt try: gi = self.lookup[n-1] except IndexError: print ("Index too high!") return + # Go to selected item self.index_index = n self._go_to_gi(gi) ### Settings + @restricted def do_set(self, line): """View or set various options.""" @@ -898,12 +962,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""") pass self.options[option] = value - @restricted - def do_cert(self, line): - """Manage client certificates""" - print("Managing client certificates") - self.client_cert_manager.manage() - @restricted def do_handler(self, line): """View or set handler commands for different MIME types.""" @@ -923,15 +981,11 @@ Slow internet connection? Use 'set timeout' to be more patient.""") if "%s" not in handler: print("Are you sure you don't want to pass the filename to the handler?") - def do_abbrevs(self, *args): - """Print all AV-98 command abbreviations.""" - header = "Command Abbreviations:" - self.stdout.write("\n{}\n".format(str(header))) - if self.ruler: - self.stdout.write("{}\n".format(str(self.ruler * len(header)))) - for k, v in _ABBREVS.items(): - self.stdout.write("{:<7} {}\n".format(k, v)) - self.stdout.write("\n") + @restricted + def do_cert(self, line): + """Manage client certificates""" + print("Managing client certificates") + self.client_cert_manager.manage() ### Stuff for getting around def do_go(self, line): @@ -961,9 +1015,15 @@ Slow internet connection? Use 'set timeout' to be more patient.""") """Go up one directory in the path.""" self._go_to_gi(self.gi.up()) + @needs_gi + def do_root(self, *args): + """Go to root selector of the server hosting current item.""" + self._go_to_gi(self.gi.root()) + def do_back(self, *args): """Go back to the previous gemini item.""" if not self.history or self.hist_index == 0: + print("You are already at the end of your history.") return self.hist_index -= 1 gi = self.history[self.hist_index] @@ -972,6 +1032,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") def do_forward(self, *args): """Go forward to the next gemini item.""" if not self.history or self.hist_index == len(self.history) - 1: + print("You are already at the end of your history.") return self.hist_index += 1 gi = self.history[self.hist_index] @@ -986,10 +1047,10 @@ Slow internet connection? Use 'set timeout' to be more patient.""") self.lookup = self.index return self.onecmd(str(self.index_index-1)) - @needs_gi - def do_root(self, *args): - """Go to root selector of the server hosting current item.""" - self._go_to_gi(self.gi.root()) + def do_gus(self, line): + """Submit a search query to the Gemini search engine.""" + gus = GeminiItem("gemini://geminispace.info/search") + self._go_to_gi(gus.query(line)) def do_tour(self, line): """Add index items as waypoints on a tour, which is basically a FIFO @@ -1059,10 +1120,6 @@ Think of it like marks in vi: 'mark a'='ma' and 'go a'=''a'.""" else: print("Invalid mark, must be one letter") - def do_version(self, line): - """Display version information.""" - print("AV-98 " + _VERSION) - ### Stuff that modifies the lookup table def do_ls(self, line): """List contents of current index. @@ -1071,11 +1128,6 @@ Use 'ls -l' to see URLs.""" self._show_lookup(url = "-l" in line) self.page_index = 0 - def do_gus(self, line): - """Submit a search query to the Gemini search engine.""" - gus = GeminiItem("gemini://geminispace.info/search") - self._go_to_gi(gus.query(line)) - def do_history(self, *args): """Display history.""" self.lookup = self.history @@ -1104,19 +1156,19 @@ Use 'ls -l' to see URLs.""" ### Stuff that does something to most recently viewed item @needs_gi def do_cat(self, *args): - """Run most recently visited item through "cat" command.""" + """Run most recently visited item through `cat` command.""" subprocess.call(shlex.split("cat %s" % self.active_rendered_file)) @needs_gi def do_less(self, *args): - """Run most recently visited item through "less" command.""" + """Run most recently visited item through `less` command.""" cmd_str = self._get_handler_cmd(self.mime) cmd_str = cmd_str % self.active_rendered_file subprocess.call("%s | less -R" % cmd_str, shell=True) @needs_gi def do_fold(self, *args): - """Run most recently visited item through "fold" command.""" + """Run most recently visited item through `fold` command.""" cmd_str = self._get_handler_cmd(self.mime) cmd_str = cmd_str % self.active_rendered_file subprocess.call("%s | fold -w 70 -s" % cmd_str, shell=True) @@ -1124,16 +1176,16 @@ Use 'ls -l' to see URLs.""" @restricted @needs_gi def do_shell(self, line): - """'cat' most recently visited item through a shell pipeline.""" + """`cat` most recently visited item through a shell pipeline.""" subprocess.call(("cat %s |" % self.active_rendered_file) + line, shell=True) @restricted @needs_gi def do_save(self, line): """Save an item to the filesystem. -'save n filename' saves menu item n to the specified filename. -'save filename' saves the last viewed item to the specified filename. -'save n' saves menu item n to an automagic filename.""" +`save n filename` saves menu item n to the specified filename. +`save filename` saves the last viewed item to the specified filename. +`save n` saves menu item n to an automagic filename.""" args = line.strip().split() # First things first, figure out what our arguments are @@ -1209,6 +1261,7 @@ Use 'ls -l' to see URLs.""" print(self.gi.url) ### Bookmarking stuff + @restricted @needs_gi def do_add(self, line): @@ -1242,16 +1295,6 @@ Bookmarks are stored using the 'add' command.""" else: self._go_to_gi(gi, update_hist=False) - ### Help - def do_help(self, arg): - """ALARM! Recursion detected! ALARM! Prepare to eject!""" - if arg == "!": - print("! is an alias for 'shell'") - elif arg == "?": - print("? is an alias for 'help'") - else: - cmd.Cmd.do_help(self, arg) - ### Flight recorder def do_blackbox(self, *args): """Display contents of flight recorder, showing statistics for the @@ -1289,6 +1332,30 @@ current gemini browsing session.""" for key, value in lines: print(key.ljust(ljust+gap) + str(value).rjust(rjust)) + ### Help + def do_help(self, arg): + """ALARM! Recursion detected! ALARM! Prepare to eject!""" + if arg == "!": + print("! is an alias for 'shell'") + elif arg == "?": + print("? is an alias for 'help'") + else: + cmd.Cmd.do_help(self, arg) + + def do_abbrevs(self, *args): + """Print all AV-98 command abbreviations.""" + header = "Command Abbreviations:" + self.stdout.write("\n{}\n".format(str(header))) + if self.ruler: + self.stdout.write("{}\n".format(str(self.ruler * len(header)))) + for k, v in _ABBREVS.items(): + self.stdout.write("{:<7} {}\n".format(k, v)) + self.stdout.write("\n") + + def do_version(self, line): + """Display version information.""" + print("AV-98 " + _VERSION) + ### The end! def do_quit(self, *args): """Exit AV-98.""" From 0393ae3ea3691bea00b75ed34c9cb901eb98fa66 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 18 Nov 2023 17:15:17 +0100 Subject: [PATCH 39/61] Correctly track domains which a client cert has been sent to. --- clientcerts.py | 1 + 1 file changed, 1 insertion(+) diff --git a/clientcerts.py b/clientcerts.py index 25f2903..f886c18 100644 --- a/clientcerts.py +++ b/clientcerts.py @@ -76,6 +76,7 @@ class ClientCertificateManager: resp = input("Y/N? ") if resp.strip().lower() in ("y", "yes"): self._activate_client_cert(*self.client_certs[gi.host]) + self.active_cert_domains.append(gi.host) else: print("Remaining unidentified.") self.client_certs.pop(gi.host) From 62972c0228f04100803b0cb709fc7b39707837ce Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 18 Nov 2023 19:27:42 +0100 Subject: [PATCH 40/61] Restore cert prompt functionality. --- av98.py | 4 ++++ clientcerts.py | 3 +++ 2 files changed, 7 insertions(+) diff --git a/av98.py b/av98.py index b133a8c..6808510 100755 --- a/av98.py +++ b/av98.py @@ -518,6 +518,10 @@ you'll be able to transparently follow links to Gopherspace!""") context = self._prepare_SSL_context(self.options["tls_mode"]) if not self.client_cert_manager.associate_client_cert(context, gi): raise UserAbortException() + if self.client_cert_manager.is_cert_active(): + self.prompt = self.cert_prompt + else: + self.prompt = self.no_cert_prompt # Connect to remote host by any address possible err = None diff --git a/clientcerts.py b/clientcerts.py index f886c18..f3b7965 100644 --- a/clientcerts.py +++ b/clientcerts.py @@ -88,6 +88,9 @@ class ClientCertificateManager: return True + def is_cert_active(self): + return self.client_certs["active"] != None + def handle_cert_request(self, meta, status, host): # Don't do client cert stuff in restricted mode, as in principle From 03b90fcd5e74aaf6325a621143de4e1b822a1cd1 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 18 Nov 2023 19:28:15 +0100 Subject: [PATCH 41/61] Put config dir discovery/creation in own method, store computed filename of bookmarks. --- av98.py | 71 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 36 insertions(+), 35 deletions(-) diff --git a/av98.py b/av98.py index 6808510..645ba26 100755 --- a/av98.py +++ b/av98.py @@ -213,31 +213,6 @@ class GeminiClient(cmd.Cmd): def __init__(self, restricted=False): cmd.Cmd.__init__(self) - # Set umask so that nothing we create can be read by anybody else. - # The certificate cache and TOFU database contain "browser history" - # type sensitivie information. - os.umask(0o077) - - # Find config directory - ## Look for something pre-existing - for confdir in ("~/.av98/", "~/.config/av98/"): - confdir = os.path.expanduser(confdir) - if os.path.exists(confdir): - self.config_dir = confdir - break - ## Otherwise, make one in .config if it exists - else: - if os.path.exists(os.path.expanduser("~/.config/")): - self.config_dir = os.path.expanduser("~/.config/av98/") - else: - self.config_dir = os.path.expanduser("~/.av98/") - print("Creating config directory {}".format(self.config_dir)) - os.makedirs(self.config_dir) - - ## Claim two temporary filenames to use as buffers - self.raw_file_buffer = tempfile.NamedTemporaryFile(delete=False).name - self.rendered_file_buffer = tempfile.NamedTemporaryFile(delete=False).name - self.no_cert_prompt = "\x1b[38;5;76m" + "AV-98" + "\x1b[38;5;255m" + "> " + "\x1b[0m" self.cert_prompt = "\x1b[38;5;202m" + "AV-98" + "\x1b[38;5;255m" + "+cert> " + "\x1b[0m" self.prompt = self.no_cert_prompt @@ -284,12 +259,40 @@ class GeminiClient(cmd.Cmd): "redirects_followed": 0 } + self._init_config() + ui_out.debug("Raw buffer: ", self.raw_file_buffer) + ui_out.debug("Rendered buffer: ", self.rendered_file_buffer) self.tofu_store = TofuStore(self.config_dir) self.client_cert_manager = ClientCertificateManager(self.config_dir) self.cache = Cache() - ui_out.debug("Raw buffer: ", self.raw_file_buffer) - ui_out.debug("Rendered buffer: ", self.rendered_file_buffer) + def _init_config(self): + # Set umask so that nothing we create can be read by anybody else. + # The certificate cache and TOFU database contain "browser history" + # type sensitivie information. + os.umask(0o077) + + # Find or create config directory + ## Look for something pre-existing + for confdir in ("~/.av98/", "~/.config/av98/"): + confdir = os.path.expanduser(confdir) + if os.path.exists(confdir): + self.config_dir = confdir + break + ## Otherwise, make one in .config if it exists + else: + if os.path.exists(os.path.expanduser("~/.config/")): + self.config_dir = os.path.expanduser("~/.config/av98/") + else: + self.config_dir = os.path.expanduser("~/.av98/") + print("Creating config directory {}".format(self.config_dir)) + os.makedirs(self.config_dir) + + # Set some filename constants + self.bm_file = os.path.join(self.config_dir, "bookmarks.gmi") + # Claim two temporary filenames to use as buffers + self.raw_file_buffer = tempfile.NamedTemporaryFile(delete=False).name + self.rendered_file_buffer = tempfile.NamedTemporaryFile(delete=False).name def _go_to_gi(self, gi, update_hist=True, check_cache=True): """ @@ -837,19 +840,18 @@ Slow internet connection? Use 'set timeout' to be more patient.""") directly at the new address in future. """ # Nothing to do if no bookmarks exist! - bm_file = os.path.join(self.config_dir, "bookmarks.gmi") - if not os.path.exists(bm_file): + if not os.path.exists(self.bm_file): return # Backup bookmark file backup_file = tempfile.NamedTemporaryFile(delete=False) backup_file.close() backup_file = backup_file.name - shutil.copyfile(bm_file, backup_file) + shutil.copyfile(self.bm_file, backup_file) # Attempt maintenance, restore backup if anything fails try: - with open(backup_file, "r") as fp_old, open(bm_file, "w") as fp_new: + with open(backup_file, "r") as fp_old, open(self.bm_file, "w") as fp_new: for line in fp_old: if not line.startswith("=>"): fp_new.write(line) @@ -862,7 +864,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") ui_out.debug("Updating old bookmark url {} to {} based on permanent redirect.".format(old_url, url)) fp_new.write(line.replace(old_url, url)) except Exception as err: - shutil.copyfile(backup_file, bm_file) + shutil.copyfile(backup_file, self.bm_file) ui_out.debug(traceback.format_exc()) finally: os.unlink(backup_file) @@ -1279,15 +1281,14 @@ Optionally, specify the new name for the bookmark.""" 'bookmarks' shows all bookmarks. 'bookmarks n' navigates immediately to item n in the bookmark menu. Bookmarks are stored using the 'add' command.""" - bm_file = os.path.join(self.config_dir, "bookmarks.gmi") - if not os.path.exists(bm_file): + if not os.path.exists(self.bm_file): print("You need to 'add' some bookmarks, first!") return args = line.strip() if len(args.split()) > 1 or (args and not args.isnumeric()): print("bookmarks command takes a single integer argument!") return - gi = GeminiItem("file://" + os.path.abspath(bm_file)) + gi = GeminiItem("file://" + os.path.abspath(self.bm_file)) if args: # Semi-sneaky # Parses the bookmark file and modifies self.index so that From 73ce79310df5f5c2eb73040f5dccea22819543f6 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 10:20:54 +0100 Subject: [PATCH 42/61] Reuse a consistent function for getting Y/N type user input. --- av98.py | 15 +++++++-------- clientcerts.py | 20 ++++++++------------ util.py | 16 ++++++++++++++++ 3 files changed, 31 insertions(+), 20 deletions(-) create mode 100644 util.py diff --git a/av98.py b/av98.py index 645ba26..25b01db 100755 --- a/av98.py +++ b/av98.py @@ -43,6 +43,7 @@ except ModuleNotFoundError: from cache import Cache from tofu import TofuStore from clientcerts import ClientCertificateManager +import util _VERSION = "1.0.2dev" @@ -311,9 +312,7 @@ class GeminiClient(cmd.Cmd): webbrowser.open_new_tab(gi.url) return else: - print("Do you want to try to open this link with a http proxy?") - resp = input("(Y)/N ") - if resp.strip().lower() in ("n","no"): + if not util.ask_yes_no("Do you want to try to open this link with a http proxy?", True): webbrowser.open_new_tab(gi.url) return elif gi.scheme == "gopher" and not self.options.get("gopher_proxy", None): @@ -418,17 +417,17 @@ you'll be able to transparently follow links to Gopherspace!""") raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS) # Never follow cross-domain redirects without asking elif new_gi.host != gi.host: - follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url) + follow = util.ask_yes_no("Follow cross-domain redirect to %s?" % new_gi.url) # Never follow cross-protocol redirects without asking elif new_gi.scheme != gi.scheme: - follow = input("Follow cross-protocol redirect to %s? (y/n) " % new_gi.url) + follow = util.ask_yes_no("Follow cross-protocol redirect to %s?" % new_gi.url) # Don't follow *any* redirect without asking if auto-follow is off elif not self.options["auto_follow_redirects"]: - follow = input("Follow redirect to %s? (y/n) " % new_gi.url) + follow = util.ask_yes_no("Follow redirect to %s?" % new_gi.url) # Otherwise, follow away else: - follow = "yes" - if follow.strip().lower() not in ("y", "yes"): + follow = True + if not follow: raise UserAbortException() ui_out.debug("Following redirect to %s." % new_gi.url) ui_out.debug("This is consecutive redirect number %d." % len(previous_redirectors)) diff --git a/clientcerts.py b/clientcerts.py index f3b7965..f7c7092 100644 --- a/clientcerts.py +++ b/clientcerts.py @@ -4,6 +4,8 @@ import os import os.path import uuid +import util + ui_out = logging.getLogger("av98_logger") class ClientCertificateManager: @@ -51,30 +53,24 @@ class ClientCertificateManager: # Are we crossing a domain boundary? if self.client_certs["active"] and gi.host not in self.active_cert_domains: if self.active_is_transient: - print("Permanently delete currently active transient certificate?") - resp = input("Y/N? ") - if resp.strip().lower() in ("y", "yes"): + if util.ask_yes_no("Permanently delete currently active transient certificate?"): print("Destroying certificate.") self._deactivate_client_cert() else: print("Staying here.") return False else: - print("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?") - resp = input("Y/N? ") - if resp.strip().lower() in ("n", "no"): + if util.ask_yes_no("PRIVACY ALERT: Deactivate client cert before connecting to a new domain?"): + print("Deactivating certificate.") + self._deactivate_client_cert() + else: print("Keeping certificate active for {}".format(gi.host)) self.active_cert_domains.append(gi.host) self.client_certs[gi.host] = self.client_certs["active"] - else: - print("Deactivating certificate.") - self._deactivate_client_cert() # Suggest reactivating previous certs if not self.client_certs["active"] and gi.host in self.client_certs: - print("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(gi.host)) - resp = input("Y/N? ") - if resp.strip().lower() in ("y", "yes"): + if util.ask_yes_no("PRIVACY ALERT: Reactivate previously used client cert for {}?".format(gi.host)): self._activate_client_cert(*self.client_certs[gi.host]) self.active_cert_domains.append(gi.host) else: diff --git a/util.py b/util.py new file mode 100644 index 0000000..a035c28 --- /dev/null +++ b/util.py @@ -0,0 +1,16 @@ +def ask_yes_no(prompt, default=None): + print(prompt) + if default == True: + prompt = "(Y)/N: " + elif default == False: + prompt = "Y/(N): " + else: + prompt = "Y/N: " + while True: + resp = input(prompt) + if not resp.strip() and default != None: + return efault + elif resp.strip().lower() in ("y", "yes"): + return True + elif resp.strip().lower() in ("n","no"): + return False From 9970f21e47e1d3b0d0b23900ab35d911050dbfbd Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 10:41:08 +0100 Subject: [PATCH 43/61] Move a little more clutter from av98.py into util.py. --- av98.py | 30 ++---------------------------- util.py | 26 ++++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 28 deletions(-) diff --git a/av98.py b/av98.py index 25b01db..b20bf0e 100755 --- a/av98.py +++ b/av98.py @@ -92,28 +92,6 @@ ui_out = logging.getLogger("av98_logger") ui_handler = logging.StreamHandler() ui_out.addHandler(ui_handler) -def fix_ipv6_url(url): - if not url.count(":") > 2: # Best way to detect them? - return url - # If there's a pair of []s in there, it's probably fine as is. - if "[" in url and "]" in url: - return url - # Easiest case is a raw address, no schema, no path. - # Just wrap it in square brackets and whack a slash on the end - if "/" not in url: - return "[" + url + "]/" - # Now the trickier cases... - if "://" in url: - schema, schemaless = url.split("://") - else: - schema, schemaless = None, url - if "/" in schemaless: - netloc, rest = schemaless.split("/",1) - schemaless = "[" + netloc + "]" + "/" + rest - if schema: - return schema + "://" + schemaless - return schemaless - standard_ports = { "gemini": 1965, "gopher": 70, @@ -124,7 +102,7 @@ class GeminiItem(): def __init__(self, url, name=""): if "://" not in url: url = "gemini://" + url - self.url = fix_ipv6_url(url) + self.url = util.fix_ipv6_url(url) self.name = name parsed = urllib.parse.urlparse(self.url) self.scheme = parsed.scheme @@ -181,10 +159,6 @@ class GeminiItem(): CRLF = '\r\n' -# Cheap and cheerful URL detector -def looks_like_url(word): - return "." in word and word.startswith("gemini://") - class UserAbortException(Exception): pass @@ -1081,7 +1055,7 @@ Current tour can be listed with `tour ls` and scrubbed with `tour clear`.""" self.waypoints = [] elif line == "*": self.waypoints.extend(self.lookup) - elif looks_like_url(line): + elif util.looks_like_url(line): self.waypoints.append(GeminiItem(line)) else: for index in line.split(): diff --git a/util.py b/util.py index a035c28..ef245e6 100644 --- a/util.py +++ b/util.py @@ -1,3 +1,7 @@ +# Cheap and cheerful URL detector +def looks_like_url(word): + return "." in word and word.startswith("gemini://") + def ask_yes_no(prompt, default=None): print(prompt) if default == True: @@ -14,3 +18,25 @@ def ask_yes_no(prompt, default=None): return True elif resp.strip().lower() in ("n","no"): return False + +def fix_ipv6_url(url): + if not url.count(":") > 2: # Best way to detect them? + return url + # If there's a pair of []s in there, it's probably fine as is. + if "[" in url and "]" in url: + return url + # Easiest case is a raw address, no schema, no path. + # Just wrap it in square brackets and whack a slash on the end + if "/" not in url: + return "[" + url + "]/" + # Now the trickier cases... + if "://" in url: + schema, schemaless = url.split("://") + else: + schema, schemaless = None, url + if "/" in schemaless: + netloc, rest = schemaless.split("/",1) + schemaless = "[" + netloc + "]" + "/" + rest + if schema: + return schema + "://" + schemaless + return schemaless From 0a9846b3428daa80ba3b7c9d113302d55baccbc6 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 12:29:48 +0100 Subject: [PATCH 44/61] Correctly report downloaded file size in debug messages. Previously, the return value of fp.write() was used, however for text files this counts the number of characters written, not the number of bytes, and for non-ASCII content these differ. --- av98.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/av98.py b/av98.py index b20bf0e..470a14d 100755 --- a/av98.py +++ b/av98.py @@ -649,6 +649,7 @@ you'll be able to transparently follow links to Gopherspace!""") else: print("{} Received {} MiB...".format(spinner, chunk_count/10.0), end="\r") print(" "*80, end="\r") # Clean up prompt space + size = len(body) # Determine file mode if mime.startswith("text/"): @@ -666,9 +667,8 @@ you'll be able to transparently follow links to Gopherspace!""") encoding = None # Write - fp = open(destination or self.raw_file_buffer, mode=mode, encoding=encoding) - size = fp.write(body) - fp.close() + with open(destination or self.raw_file_buffer, mode=mode, encoding=encoding) as fp: + fp.write(body) return size From a459e49fa0a35d8f62740fcaa089ad6b950b4b42 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 14:27:47 +0100 Subject: [PATCH 45/61] Extract titles from gemtext and use them as GeminiItem.name if that's missing. --- av98.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/av98.py b/av98.py index 470a14d..12aa825 100755 --- a/av98.py +++ b/av98.py @@ -699,6 +699,7 @@ you'll be able to transparently follow links to Gopherspace!""") """ self.index = [] preformatted = False + title = "" with open(self.active_raw_file, "r") as fp: body = fp.read() @@ -732,6 +733,8 @@ you'll be able to transparently follow links to Gopherspace!""") elif line.startswith("#"): line = line[1:].lstrip("\t ") fp.write("\x1b[1m\x1b[4m" + line + "\x1b[0m""\n") + if not title: + title = line else: fp.write(textwrap.fill(line, self.options["width"]) + "\n") @@ -739,6 +742,12 @@ you'll be able to transparently follow links to Gopherspace!""") self.page_index = 0 self.index_index = -1 + # If the supplied GI didn't have a name (e.g. we arrived at it from a + # manually entered URL, not a link), use the title inferred from the + # first top level header + if not menu_gi.name: + menu_gi.name = title + def _format_geminiitem(self, index, gi, url=False): """ Render a link line. From a0eedac5328c8d291131f226aac687329bce2cbd Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 14:34:29 +0100 Subject: [PATCH 46/61] Include temporary buffer file name in debug output. --- av98.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/av98.py b/av98.py index 12aa825..64c8bd6 100755 --- a/av98.py +++ b/av98.py @@ -450,7 +450,7 @@ you'll be able to transparently follow links to Gopherspace!""") # Save response body to disk size = self._write_response_to_file(mime, mime_options, f, destination) - ui_out.debug("Wrote %d byte response to %s." % (size, destination)) + ui_out.debug("Wrote %d byte response to %s." % (size, destination or self.raw_file_buffer)) # Maintain cache and update flight recorder if self.options["cache"]: From c8c12cab860a50b49f2788f759572b30f7ba966f Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 14:35:13 +0100 Subject: [PATCH 47/61] Don't trigger a traceback in debug mode for 4x or 5x status codes. --- av98.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/av98.py b/av98.py index 64c8bd6..1c00988 100755 --- a/av98.py +++ b/av98.py @@ -807,6 +807,8 @@ Slow internet connection? Use 'set timeout' to be more patient.""") ui_out.error("ERROR: Local file not found!") elif isinstance(err, IsADirectoryError): ui_out.error("ERROR: Viewing local directories is not supported!") + elif isinstance(err, RuntimeError): # Misusing this for status 4x or 5x + ui_out.error("ERROR: " + str(err)) else: ui_out.error("ERROR: " + str(err)) ui_out.debug(traceback.format_exc()) From 4df88896a8c1233b7ffad5a09b915281842cd1d6 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 14:38:55 +0100 Subject: [PATCH 48/61] Ensure we drive a useable filename for saving items whose URL does not provide one. --- av98.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/av98.py b/av98.py index 1c00988..e382a1e 100755 --- a/av98.py +++ b/av98.py @@ -149,6 +149,28 @@ class GeminiItem(): else: return "=> {}\n".format(self.url) + def derive_filename(self, mime=None): + # Simplest option it to use the end of the URL, if there is one. + filename = os.path.basename(self.path) + if filename: + return filename + + # If there's not, try to pretty up the GeminiItem name + if self.name: + filename = self.name.lower().replace(" ","_") + # Otherwise, use something generic. + else: + filename = "av98_download_" + time.strftime("%Y%m%d%H%M%S") + + # Add an extension + if mime == "text/gemini": + return filename + ".gmi" + elif mime: + ext = mimetypes.guess_extension(mime) + if ext: + return filename + ext + return filename + ".file" + @classmethod def from_map_line(cls, line, origin_gi): assert line.startswith("=>") @@ -1226,9 +1248,9 @@ Use 'ls -l' to see URLs.""" gi = self.gi saving_current = True - # Derive filename from current GI's path, if one hasn't been set + # Derive a filename if one hasn't been set if not filename: - filename = os.path.basename(gi.path) + filename = gi.derive_filename(self.mime if saving_current else None) # Check for filename collisions if os.path.exists(filename): From 305f7f9f2c03b753b37a50bef591380ff35d34cf Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 14:40:26 +0100 Subject: [PATCH 49/61] Make do_quit() silent, move the farewell message to main(). --- av98.py | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/av98.py b/av98.py index e382a1e..f40cb46 100755 --- a/av98.py +++ b/av98.py @@ -256,6 +256,7 @@ class GeminiClient(cmd.Cmd): "redirects_followed": 0 } + self._stop = False self._init_config() ui_out.debug("Raw buffer: ", self.raw_file_buffer) ui_out.debug("Rendered buffer: ", self.rendered_file_buffer) @@ -877,6 +878,9 @@ Slow internet connection? Use 'set timeout' to be more patient.""") # Cmd implementation follows + def postcmd(self, stop, line): + return self._stop + def default(self, line): """ This is called when none of the do_* methods match the user's @@ -1378,10 +1382,8 @@ current gemini browsing session.""" self.client_cert_manager.cleanup() # Apply permanent redirects to bookmarks self._maintain_bookmarks() - # Say goodbye - print() - print("Thank you for flying AV-98!") - sys.exit() + # Exit command loop + self._stop = True do_exit = do_quit @@ -1448,12 +1450,18 @@ def main(): gc.cmdqueue.append("tour %s" % url) gc.cmdqueue.append("tour") - # Endless interpret loop + # Endless interpret loop until user quits while True: try: gc.cmdloop() + break except KeyboardInterrupt: print("") + # Say goodbye + print() + print("Thank you for flying AV-98!") + sys.exit() + if __name__ == '__main__': main() From cbbc4109764fc0c1e80609bdb1940eb61b176ee8 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 15:24:26 +0100 Subject: [PATCH 50/61] Add curl/wget style --download option. --- av98.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/av98.py b/av98.py index f40cb46..faa472c 100755 --- a/av98.py +++ b/av98.py @@ -1394,6 +1394,8 @@ def main(): parser = argparse.ArgumentParser(description='A command line gemini client.') parser.add_argument('--bookmarks', action='store_true', help='start with your list of bookmarks') + parser.add_argument('--dl', '--download', action='store_true', + help='download a single URL and quit') parser.add_argument('--tls-cert', metavar='FILE', help='TLS client certificate file') parser.add_argument('--tls-key', metavar='FILE', help='TLS client certificate private key file') parser.add_argument('--restricted', action="store_true", help='Disallow shell, add, and save commands') @@ -1411,6 +1413,25 @@ def main(): # Instantiate client gc = GeminiClient(args.restricted) + # Handle --download + if args.dl: + gc.onecmd("set debug True") + # Download + gi = GeminiItem(args.url[0]) + gi, mime = gc._fetch_over_network(gi) + # Parse gemtext in the hopes of getting a gi.name for the filename + if mime == "text/gemini": + gc.active_raw_file = gc.raw_file_buffer + gc._handle_gemtext(gi) + # Copy from temp file to pwd with a nice name + filename = gi.derive_filename(mime) + shutil.copyfile(gc.raw_file_buffer, filename) + size = os.path.getsize(filename) + # Notify user where the file ended up + print("Wrote %d byte %s response to %s." % (size, mime, filename)) + gc.do_quit() + sys.exit() + # Process config file rcfile = os.path.join(gc.config_dir, "av98rc") if os.path.exists(rcfile): From 203ffaea906a5830cd758d69aeef7d7808644e8f Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 15:25:29 +0100 Subject: [PATCH 51/61] Replace VF-1's flight-themed sign off with AV-98 policing-themed sign off. --- av98.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/av98.py b/av98.py index faa472c..e0e2ef6 100755 --- a/av98.py +++ b/av98.py @@ -1481,7 +1481,7 @@ def main(): # Say goodbye print() - print("Thank you for flying AV-98!") + print("Thank you for patrolling with AV-98!") sys.exit() if __name__ == '__main__': From 3abf44d18c582e05547a674d502dc7dcffca8fb0 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 16:23:36 +0100 Subject: [PATCH 52/61] Make the new --download option play nicely with --tls_cert and --tls_key. --- av98.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/av98.py b/av98.py index e0e2ef6..a0c3b5c 100755 --- a/av98.py +++ b/av98.py @@ -1413,6 +1413,13 @@ def main(): # Instantiate client gc = GeminiClient(args.restricted) + # Activate client certs now in case they are needed for --download below + if args.tls_cert and args.tls_key: + gc.client_cert_manager._activate_client_cert(args.tls_cert, args.tls_key) + for url in args.url: + gi = GeminiItem(url) + gc.client_cert_manager.active_cert_domains.append(gi.host) + # Handle --download if args.dl: gc.onecmd("set debug True") @@ -1455,10 +1462,7 @@ def main(): print("Restricted mode engaged!") print("Enjoy your patrol through Geminispace...") - # Act on args - if args.tls_cert: - # If tls_key is None, python will attempt to load the key from tls_cert. - gc.client_cert_manager._activate_client_cert(args.tls_cert, args.tls_key) + # Add commands to the queue based on command line arguments if args.bookmarks: gc.cmdqueue.append("bookmarks") elif args.url: From b46b15905b17b81feb74eed87cbb5b6552c96add Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 16:26:48 +0100 Subject: [PATCH 53/61] Prepare to cut 1.1.0. --- av98.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/av98.py b/av98.py index a0c3b5c..2a08a84 100755 --- a/av98.py +++ b/av98.py @@ -45,7 +45,7 @@ from tofu import TofuStore from clientcerts import ClientCertificateManager import util -_VERSION = "1.0.2dev" +_VERSION = "1.1.0dev" _MAX_REDIRECTS = 5 From 188cacca1ff85338edb352cd29112038eb3d3635 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 19 Nov 2023 16:49:07 +0100 Subject: [PATCH 54/61] Restore coloured debug output, like we had before the logging module was adopted. --- av98.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/av98.py b/av98.py index 2a08a84..95706f5 100755 --- a/av98.py +++ b/av98.py @@ -88,8 +88,19 @@ _MIME_HANDLERS = { urllib.parse.uses_relative.append("gemini") urllib.parse.uses_netloc.append("gemini") +# Set up logging (annoying necessity after splitting client into multiple +# .py files...was it worth it? +class AV98Formatter(logging.Formatter): + def format(self, record): + output = super().format(record) + if record.levelno == logging.DEBUG: + return "\x1b[0;32m[DEBUG] " + output + "\x1b[0m" + return "[DEBUG] " + output + else: + return output ui_out = logging.getLogger("av98_logger") ui_handler = logging.StreamHandler() +ui_handler.setFormatter(AV98Formatter()) ui_out.addHandler(ui_handler) standard_ports = { From 48c8fd95431de1e212a3f5b274e5500eab4ca206 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Wed, 22 Nov 2023 11:20:05 +0100 Subject: [PATCH 55/61] Get on board with modern Python packaging conventions. Remember when we used to make fun of Java people for sticking everything in a `src/` directory? --- pyproject.toml | 26 +++++++++++++++++++++++ setup.py | 23 -------------------- src/av98/__init__.py | 1 + av98.py => src/av98/av98.py | 11 +++++----- cache.py => src/av98/cache.py | 0 clientcerts.py => src/av98/clientcerts.py | 2 +- tofu.py => src/av98/tofu.py | 0 util.py => src/av98/util.py | 0 8 files changed, 34 insertions(+), 29 deletions(-) create mode 100755 pyproject.toml delete mode 100755 setup.py create mode 100644 src/av98/__init__.py rename av98.py => src/av98/av98.py (99%) rename cache.py => src/av98/cache.py (100%) rename clientcerts.py => src/av98/clientcerts.py (99%) rename tofu.py => src/av98/tofu.py (100%) rename util.py => src/av98/util.py (100%) diff --git a/pyproject.toml b/pyproject.toml new file mode 100755 index 0000000..55a3245 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,26 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" +[project] +name = "AV-98" +dynamic = ["version"] +description = "Command line Gemini client" +authors = [{name="Solderpunk", email="solderpunk@posteo.net"}] +classifiers = [ + "License :: OSI Approved :: BSD License", + "Programming Language :: Python :: 3 :: Only", + "Topic :: Communications", + "Intended Audience :: End Users/Desktop", + "Environment :: Console", + "Development Status :: 5 - Production/Stable", +] +[project.urls] +Homepage = "https://tildegit.org/solderpunk/AV-98/" +Issues = "https://tildegit.org/solderpunk/AV-98/issues" +[project.scripts] +av98 = "av98.av98:main" +[project.optional-dependencies] +tofu = ["cryptography"] +colour = ["ansiwrap"] +[tool.setuptools.dynamic] +version = {attr = "av98.__version__"} diff --git a/setup.py b/setup.py deleted file mode 100755 index 210bebd..0000000 --- a/setup.py +++ /dev/null @@ -1,23 +0,0 @@ -from setuptools import setup - -setup( - name='AV-98', - version='1.0.2dev', - description="Command line Gemini client.", - author="Solderpunk", - author_email="solderpunk@sdf.org", - url='https://tildegit.org/solderpunk/AV-98/', - classifiers=[ - 'License :: OSI Approved :: BSD License', - 'Programming Language :: Python :: 3 :: Only', - 'Topic :: Communications', - 'Intended Audience :: End Users/Desktop', - 'Environment :: Console', - 'Development Status :: 4 - Beta', - ], - py_modules = ["av98"], - entry_points={ - "console_scripts": ["av98=av98:main"] - }, - install_requires=[], -) diff --git a/src/av98/__init__.py b/src/av98/__init__.py new file mode 100644 index 0000000..bda1b9c --- /dev/null +++ b/src/av98/__init__.py @@ -0,0 +1 @@ +__version__ = "1.1.0dev" diff --git a/av98.py b/src/av98/av98.py similarity index 99% rename from av98.py rename to src/av98/av98.py index 95706f5..5981812 100755 --- a/av98.py +++ b/src/av98/av98.py @@ -40,10 +40,11 @@ try: except ModuleNotFoundError: import textwrap -from cache import Cache -from tofu import TofuStore -from clientcerts import ClientCertificateManager -import util +from av98 import __version__ +from av98.cache import Cache +from av98.tofu import TofuStore +from av98.clientcerts import ClientCertificateManager +import av98.util as util _VERSION = "1.1.0dev" @@ -1380,7 +1381,7 @@ current gemini browsing session.""" def do_version(self, line): """Display version information.""" - print("AV-98 " + _VERSION) + print("AV-98 " + __version__) ### The end! def do_quit(self, *args): diff --git a/cache.py b/src/av98/cache.py similarity index 100% rename from cache.py rename to src/av98/cache.py diff --git a/clientcerts.py b/src/av98/clientcerts.py similarity index 99% rename from clientcerts.py rename to src/av98/clientcerts.py index f7c7092..9627100 100644 --- a/clientcerts.py +++ b/src/av98/clientcerts.py @@ -4,7 +4,7 @@ import os import os.path import uuid -import util +import av98.util as util ui_out = logging.getLogger("av98_logger") diff --git a/tofu.py b/src/av98/tofu.py similarity index 100% rename from tofu.py rename to src/av98/tofu.py diff --git a/util.py b/src/av98/util.py similarity index 100% rename from util.py rename to src/av98/util.py From 4b759aec70e80258149454ffd99cceb411020f87 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 25 Nov 2023 13:08:36 +0100 Subject: [PATCH 56/61] Add --output command line option for use with --download. --- src/av98/av98.py | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/src/av98/av98.py b/src/av98/av98.py index 5981812..1c9eb1a 100755 --- a/src/av98/av98.py +++ b/src/av98/av98.py @@ -1408,6 +1408,8 @@ def main(): help='start with your list of bookmarks') parser.add_argument('--dl', '--download', action='store_true', help='download a single URL and quit') + parser.add_argument('-o', '--output', metavar='FILE', + help='filename to save --dl URL to') parser.add_argument('--tls-cert', metavar='FILE', help='TLS client certificate file') parser.add_argument('--tls-key', metavar='FILE', help='TLS client certificate private key file') parser.add_argument('--restricted', action="store_true", help='Disallow shell, add, and save commands') @@ -1438,12 +1440,16 @@ def main(): # Download gi = GeminiItem(args.url[0]) gi, mime = gc._fetch_over_network(gi) - # Parse gemtext in the hopes of getting a gi.name for the filename - if mime == "text/gemini": - gc.active_raw_file = gc.raw_file_buffer - gc._handle_gemtext(gi) + # Decide on a filename + if args.output: + filename = args.output + else: + if mime == "text/gemini": + # Parse gemtext in the hopes of getting a gi.name for the filename + gc.active_raw_file = gc.raw_file_buffer + gc._handle_gemtext(gi) + filename = gi.derive_filename(mime) # Copy from temp file to pwd with a nice name - filename = gi.derive_filename(mime) shutil.copyfile(gc.raw_file_buffer, filename) size = os.path.getsize(filename) # Notify user where the file ended up From 0268cd426b7ee22ac3106245f6db4f2db66550b9 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 25 Nov 2023 16:12:46 +0100 Subject: [PATCH 57/61] Further splitting up and renaming of files. Now that everything lives in src/av98/ as per the latest Python fashion, it's a problem to have a file named av98.py. So split it out into main.py (which just implements the argument parsing) and client.py. The old clientcerts.py has become certmanager.py so that tab completion for client.py is quick and easy. --- pyproject.toml | 2 +- src/av98/{clientcerts.py => certmanager.py} | 0 src/av98/{av98.py => client.py} | 115 +---------------- src/av98/main.py | 130 ++++++++++++++++++++ 4 files changed, 132 insertions(+), 115 deletions(-) rename src/av98/{clientcerts.py => certmanager.py} (100%) rename src/av98/{av98.py => client.py} (92%) create mode 100755 src/av98/main.py diff --git a/pyproject.toml b/pyproject.toml index 55a3245..69a4e49 100755 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,7 @@ classifiers = [ Homepage = "https://tildegit.org/solderpunk/AV-98/" Issues = "https://tildegit.org/solderpunk/AV-98/issues" [project.scripts] -av98 = "av98.av98:main" +av98 = "av98.main:main" [project.optional-dependencies] tofu = ["cryptography"] colour = ["ansiwrap"] diff --git a/src/av98/clientcerts.py b/src/av98/certmanager.py similarity index 100% rename from src/av98/clientcerts.py rename to src/av98/certmanager.py diff --git a/src/av98/av98.py b/src/av98/client.py similarity index 92% rename from src/av98/av98.py rename to src/av98/client.py index 1c9eb1a..d28f4d8 100755 --- a/src/av98/av98.py +++ b/src/av98/client.py @@ -13,7 +13,6 @@ # - rmgr # - Aleksey Ryndin -import argparse import cmd import cgi import codecs @@ -43,11 +42,9 @@ except ModuleNotFoundError: from av98 import __version__ from av98.cache import Cache from av98.tofu import TofuStore -from av98.clientcerts import ClientCertificateManager +from av98.certmanager import ClientCertificateManager import av98.util as util -_VERSION = "1.1.0dev" - _MAX_REDIRECTS = 5 # Command abbreviations @@ -1398,113 +1395,3 @@ current gemini browsing session.""" self._stop = True do_exit = do_quit - -# Main function -def main(): - - # Parse args - parser = argparse.ArgumentParser(description='A command line gemini client.') - parser.add_argument('--bookmarks', action='store_true', - help='start with your list of bookmarks') - parser.add_argument('--dl', '--download', action='store_true', - help='download a single URL and quit') - parser.add_argument('-o', '--output', metavar='FILE', - help='filename to save --dl URL to') - parser.add_argument('--tls-cert', metavar='FILE', help='TLS client certificate file') - parser.add_argument('--tls-key', metavar='FILE', help='TLS client certificate private key file') - parser.add_argument('--restricted', action="store_true", help='Disallow shell, add, and save commands') - parser.add_argument('--version', action='store_true', - help='display version information and quit') - parser.add_argument('url', metavar='URL', nargs='*', - help='start with this URL') - args = parser.parse_args() - - # Handle --version - if args.version: - print("AV-98 " + _VERSION) - sys.exit() - - # Instantiate client - gc = GeminiClient(args.restricted) - - # Activate client certs now in case they are needed for --download below - if args.tls_cert and args.tls_key: - gc.client_cert_manager._activate_client_cert(args.tls_cert, args.tls_key) - for url in args.url: - gi = GeminiItem(url) - gc.client_cert_manager.active_cert_domains.append(gi.host) - - # Handle --download - if args.dl: - gc.onecmd("set debug True") - # Download - gi = GeminiItem(args.url[0]) - gi, mime = gc._fetch_over_network(gi) - # Decide on a filename - if args.output: - filename = args.output - else: - if mime == "text/gemini": - # Parse gemtext in the hopes of getting a gi.name for the filename - gc.active_raw_file = gc.raw_file_buffer - gc._handle_gemtext(gi) - filename = gi.derive_filename(mime) - # Copy from temp file to pwd with a nice name - shutil.copyfile(gc.raw_file_buffer, filename) - size = os.path.getsize(filename) - # Notify user where the file ended up - print("Wrote %d byte %s response to %s." % (size, mime, filename)) - gc.do_quit() - sys.exit() - - # Process config file - rcfile = os.path.join(gc.config_dir, "av98rc") - if os.path.exists(rcfile): - print("Using config %s" % rcfile) - with open(rcfile, "r") as fp: - for line in fp: - line = line.strip() - if ((args.bookmarks or args.url) and - any((line.startswith(x) for x in ("go", "g", "tour", "t"))) - ): - if args.bookmarks: - print("Skipping rc command \"%s\" due to --bookmarks option." % line) - else: - print("Skipping rc command \"%s\" due to provided URLs." % line) - continue - gc.cmdqueue.append(line) - - # Say hi - print("Welcome to AV-98!") - if args.restricted: - print("Restricted mode engaged!") - print("Enjoy your patrol through Geminispace...") - - # Add commands to the queue based on command line arguments - if args.bookmarks: - gc.cmdqueue.append("bookmarks") - elif args.url: - if len(args.url) == 1: - gc.cmdqueue.append("go %s" % args.url[0]) - else: - for url in args.url: - if not url.startswith("gemini://"): - url = "gemini://" + url - gc.cmdqueue.append("tour %s" % url) - gc.cmdqueue.append("tour") - - # Endless interpret loop until user quits - while True: - try: - gc.cmdloop() - break - except KeyboardInterrupt: - print("") - - # Say goodbye - print() - print("Thank you for patrolling with AV-98!") - sys.exit() - -if __name__ == '__main__': - main() diff --git a/src/av98/main.py b/src/av98/main.py new file mode 100755 index 0000000..a5f4ae8 --- /dev/null +++ b/src/av98/main.py @@ -0,0 +1,130 @@ +#!/usr/bin/env python3 +# AV-98 Gemini client +# Dervied from VF-1 (https://github.com/solderpunk/VF-1), +# (C) 2019, 2020, 2023 Solderpunk +# With contributions from: +# - danceka +# - +# - +# - Klaus Alexander Seistrup +# - govynnus +# - Nik +# - +# - rmgr +# - Aleksey Ryndin +import argparse +import os.path +import shutil +import sys + +from av98 import __version__ +from av98.client import GeminiClient + +def main(): + + # Parse args + parser = argparse.ArgumentParser(description='A command line gemini client.') + parser.add_argument('--bookmarks', action='store_true', + help='start with your list of bookmarks') + parser.add_argument('--dl', '--download', action='store_true', + help='download a single URL and quit') + parser.add_argument('-o', '--output', metavar='FILE', + help='filename to save --dl URL to') + parser.add_argument('--tls-cert', metavar='FILE', help='TLS client certificate file') + parser.add_argument('--tls-key', metavar='FILE', help='TLS client certificate private key file') + parser.add_argument('--restricted', action="store_true", help='Disallow shell, add, and save commands') + parser.add_argument('--version', action='store_true', + help='display version information and quit') + parser.add_argument('url', metavar='URL', nargs='*', + help='start with this URL') + args = parser.parse_args() + + # Handle --version + if args.version: + print("AV-98 " + __version__) + sys.exit() + + # Instantiate client + gc = GeminiClient(args.restricted) + + # Activate client certs now in case they are needed for --download below + if args.tls_cert and args.tls_key: + gc.client_cert_manager._activate_client_cert(args.tls_cert, args.tls_key) + for url in args.url: + gi = GeminiItem(url) + gc.client_cert_manager.active_cert_domains.append(gi.host) + + # Handle --download + if args.dl: + gc.onecmd("set debug True") + # Download + gi = GeminiItem(args.url[0]) + gi, mime = gc._fetch_over_network(gi) + # Decide on a filename + if args.output: + filename = args.output + else: + if mime == "text/gemini": + # Parse gemtext in the hopes of getting a gi.name for the filename + gc.active_raw_file = gc.raw_file_buffer + gc._handle_gemtext(gi) + filename = gi.derive_filename(mime) + # Copy from temp file to pwd with a nice name + shutil.copyfile(gc.raw_file_buffer, filename) + size = os.path.getsize(filename) + # Notify user where the file ended up + print("Wrote %d byte %s response to %s." % (size, mime, filename)) + gc.do_quit() + sys.exit() + + # Process config file + rcfile = os.path.join(gc.config_dir, "av98rc") + if os.path.exists(rcfile): + print("Using config %s" % rcfile) + with open(rcfile, "r") as fp: + for line in fp: + line = line.strip() + if ((args.bookmarks or args.url) and + any((line.startswith(x) for x in ("go", "g", "tour", "t"))) + ): + if args.bookmarks: + print("Skipping rc command \"%s\" due to --bookmarks option." % line) + else: + print("Skipping rc command \"%s\" due to provided URLs." % line) + continue + gc.cmdqueue.append(line) + + # Say hi + print("Welcome to AV-98!") + if args.restricted: + print("Restricted mode engaged!") + print("Enjoy your patrol through Geminispace...") + + # Add commands to the queue based on command line arguments + if args.bookmarks: + gc.cmdqueue.append("bookmarks") + elif args.url: + if len(args.url) == 1: + gc.cmdqueue.append("go %s" % args.url[0]) + else: + for url in args.url: + if not url.startswith("gemini://"): + url = "gemini://" + url + gc.cmdqueue.append("tour %s" % url) + gc.cmdqueue.append("tour") + + # Endless interpret loop until user quits + while True: + try: + gc.cmdloop() + break + except KeyboardInterrupt: + print("") + + # Say goodbye + print() + print("Thank you for patrolling with AV-98!") + sys.exit() + +if __name__ == '__main__': + main() From 0e91b4f894d4ed59a4127c0447f365fe7328c572 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sat, 25 Nov 2023 16:16:48 +0100 Subject: [PATCH 58/61] Rename the gus command to search, and the old search to filter. Add option to set search endpoint. --- src/av98/client.py | 20 +++++++++++++------- 1 file changed, 13 insertions(+), 7 deletions(-) diff --git a/src/av98/client.py b/src/av98/client.py index d28f4d8..42d15fb 100755 --- a/src/av98/client.py +++ b/src/av98/client.py @@ -67,7 +67,7 @@ _ABBREVS = { "r": "reload", "s": "save", "se": "search", - "/": "search", + "/": "filter", "t": "tour", "u": "up", } @@ -246,7 +246,8 @@ class GeminiClient(cmd.Cmd): "tls_mode" : "tofu", "gopher_proxy" : None, "http_proxy": None, - "cache" : False + "cache" : False, + "search_url" : "gemini://geminispace.info/search" } self.log = { @@ -901,7 +902,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""") elif line.strip() == "..": return self.do_up() elif line.startswith("/"): - return self.do_search(line[1:]) + return self.do_filter(line[1:]) # Expand abbreviated commands first_word = line.split()[0].strip() @@ -1074,8 +1075,13 @@ Slow internet connection? Use 'set timeout' to be more patient.""") def do_gus(self, line): """Submit a search query to the Gemini search engine.""" - gus = GeminiItem("gemini://geminispace.info/search") - self._go_to_gi(gus.query(line)) + ui_out.warning("[WARNING] The `gus` command is deprecated! Use `search` instead.") + self.do_search(line) + + def do_search(self, line): + """Submit a search query a configured Gemini search engine.""" + gi = GeminiItem(self.options["search_url"]) + self._go_to_gi(gi.query(line)) def do_tour(self, line): """Add index items as waypoints on a tour, which is basically a FIFO @@ -1159,8 +1165,8 @@ Use 'ls -l' to see URLs.""" self._show_lookup(url=True) self.page_index = 0 - def do_search(self, searchterm): - """Search index (case insensitive).""" + def do_filter(self, searchterm): + """Filter index on names (case insensitive).""" results = [ gi for gi in self.lookup if searchterm.lower() in gi.name.lower()] if results: From e96d373eec31eee261a8efb0a43764bfe689a5ba Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 26 Nov 2023 12:12:19 +0100 Subject: [PATCH 59/61] Fix bug with default response to Y/N prompts. --- src/av98/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/av98/util.py b/src/av98/util.py index ef245e6..f411b72 100644 --- a/src/av98/util.py +++ b/src/av98/util.py @@ -13,7 +13,7 @@ def ask_yes_no(prompt, default=None): while True: resp = input(prompt) if not resp.strip() and default != None: - return efault + return default elif resp.strip().lower() in ("y", "yes"): return True elif resp.strip().lower() in ("n","no"): From 0e9953882c9279c77ca107e2de8ecb3dacc1a2a3 Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 26 Nov 2023 12:17:42 +0100 Subject: [PATCH 60/61] Don't treat filename collisions as fatal. --- src/av98/client.py | 6 ++---- src/av98/util.py | 8 ++++++++ 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/src/av98/client.py b/src/av98/client.py index 42d15fb..56509e6 100755 --- a/src/av98/client.py +++ b/src/av98/client.py @@ -1270,10 +1270,8 @@ Use 'ls -l' to see URLs.""" # Derive a filename if one hasn't been set if not filename: filename = gi.derive_filename(self.mime if saving_current else None) - - # Check for filename collisions - if os.path.exists(filename): - print("File %s already exists!" % filename) + filename = util.handle_filename_collisions(filename) + if not filename: return # Actually do the save operation diff --git a/src/av98/util.py b/src/av98/util.py index f411b72..43acee6 100644 --- a/src/av98/util.py +++ b/src/av98/util.py @@ -1,7 +1,15 @@ +import os.path + # Cheap and cheerful URL detector def looks_like_url(word): return "." in word and word.startswith("gemini://") +def handle_filename_collisions(filename): + while os.path.exists(filename): + print("File %s already exists!" % filename) + filename = input("Choose a new one, or leave blank to abort: ") + return filename + def ask_yes_no(prompt, default=None): print(prompt) if default == True: From 854369afade13299ad2c91eff20d32c4e6a3ce4f Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Sun, 26 Nov 2023 12:36:42 +0100 Subject: [PATCH 61/61] Add a 'user' command, analogous to 'root' but jumps to a pubnix user's capsule if the URL starts with ~username. --- src/av98/client.py | 15 +++++++++++++++ 1 file changed, 15 insertions(+) diff --git a/src/av98/client.py b/src/av98/client.py index 56509e6..b5c6036 100755 --- a/src/av98/client.py +++ b/src/av98/client.py @@ -122,6 +122,13 @@ class GeminiItem(): def root(self): return GeminiItem(self._derive_url("/")) + def user(self): + if not self.path.startswith("/~"): + raise ValueError("This is not a tilde URL.") + new_path = self.path.split("/")[1] + "/" + print(new_path) + return GeminiItem(self._derive_url(new_path)) + def up(self): pathbits = list(os.path.split(self.path.rstrip('/'))) # Don't try to go higher than root @@ -1046,6 +1053,14 @@ Slow internet connection? Use 'set timeout' to be more patient.""") """Go to root selector of the server hosting current item.""" self._go_to_gi(self.gi.root()) + @needs_gi + def do_user(self, *args): + """If the current URL has a leading ~user/ component, go to its root.""" + try: + self._go_to_gi(self.gi.user()) + except ValueError: + print("The current URL does not appear to start with a tilde dir.") + def do_back(self, *args): """Go back to the previous gemini item.""" if not self.history or self.hist_index == 0: