Extensively refactor the way temporary files are used.

Instead of littering /tmp with one file per download plus one per
rendered gemtext file, just reuse the same two files over and
over.  If enabled, caching creates separate copies.

Pretty sure this solves Issue #35, but I'll test and close that
after also improving the use of cache files...
This commit is contained in:
Solderpunk 2023-11-15 18:06:33 +01:00
parent 713616d556
commit 048b04bed2
2 changed files with 80 additions and 93 deletions

155
av98.py
View File

@ -235,13 +235,16 @@ class GeminiClient(cmd.Cmd):
print("Creating config directory {}".format(self.config_dir))
os.makedirs(self.config_dir)
## Claim two temporary filenames to use as buffers
self.raw_file_buffer = tempfile.NamedTemporaryFile(delete=False).name
self.rendered_file_buffer = tempfile.NamedTemporaryFile(delete=False).name
self.no_cert_prompt = "\x1b[38;5;76m" + "AV-98" + "\x1b[38;5;255m" + "> " + "\x1b[0m"
self.cert_prompt = "\x1b[38;5;202m" + "AV-98" + "\x1b[38;5;255m" + "+cert> " + "\x1b[0m"
self.prompt = self.no_cert_prompt
self.gi = None
self.history = []
self.hist_index = 0
self.idx_filename = ""
self.index = []
self.index_index = -1
self.lookup = self.index
@ -249,7 +252,8 @@ class GeminiClient(cmd.Cmd):
self.page_index = 0
self.permanent_redirects = {}
self.restricted = restricted
self.tmp_filename = ""
self.active_raw_file = ""
self.active_rendered_file = ""
self.visited_hosts = set()
self.waypoints = []
@ -290,6 +294,9 @@ class GeminiClient(cmd.Cmd):
self.tofu_store = TofuStore(self.config_dir)
self.cache = Cache()
ui_out.debug("Raw buffer: ", self.raw_file_buffer)
ui_out.debug("Rendered buffer: ", self.rendered_file_buffer)
def _go_to_gi(self, gi, update_hist=True, check_cache=True, handle=True):
"""This method might be considered "the heart of AV-98".
Everything involved in fetching a gemini resource happens here:
@ -326,13 +333,15 @@ you'll be able to transparently follow links to Gopherspace!""")
# Use local file, use cache, or hit the network if resource is not cached
if gi.scheme == "file":
mime, body, tmpfile = self._handle_local_file(gi)
mime = self._handle_local_file(gi)
self.active_raw_file = gi.path
elif check_cache and self.options["cache"] and self.cache.check(gi.url):
mime, body, tmpfile = self.cache.get(gi.url)
mime, self.active_raw_file = self.cache.get(gi.url)
self.log["cache_hits"] += 1
else:
try:
gi, mime, body, tmpfile = self._fetch_over_network(gi)
gi, mime = self._fetch_over_network(gi)
self.active_raw_file = self.raw_file_buffer
except UserAbortException:
return
except Exception as err:
@ -341,19 +350,16 @@ you'll be able to transparently follow links to Gopherspace!""")
# Render gemtext, update index
if mime == "text/gemini":
self._handle_gemtext(body, gi)
self._handle_gemtext(gi)
self.active_rendered_file = self.rendered_file_buffer
else:
self.active_rendered_file = self.active_raw_file
# Pass file to handler, unless we were asked not to
if handle:
if mime == "text/gemini":
targetfile = self.idx_filename
elif gi.scheme == "file":
targetfile = gi.path
else:
targetfile = tmpfile
cmd_str = self._get_handler_cmd(mime)
try:
subprocess.call(shlex.split(cmd_str % targetfile))
subprocess.call(shlex.split(cmd_str % self.active_rendered_file))
except FileNotFoundError:
print("Handler program %s not found!" % shlex.split(cmd_str)[0])
print("You can use the ! command to specify another handler program or pipeline.")
@ -385,7 +391,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
else:
mime, noise = mimetypes.guess_type(gi.path)
body = None
return mime, body, gi.path
return mime
def _fetch_over_network(self, gi, destination=None):
@ -481,15 +487,15 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
raise RuntimeError("Header declared unknown encoding %s" % value)
# Save response body to disk
body, size, filename = self._write_response_to_file(mime, mime_options, f, destination)
ui_out.debug("Wrote %d byte response to %s." % (size, filename))
body, size = self._write_response_to_file(mime, mime_options, f, destination)
ui_out.debug("Wrote %d byte response to %s." % (size, destination))
# Maintain cache and log
if self.options["cache"]:
self.cache.add(gi.url, mime, filename)
self.cache.add(gi.url, mime, self.raw_file_buffer)
self._log_visit(gi, address, size)
return gi, mime, body, filename
return gi, mime
def _send_request(self, gi):
"""Send a selector to a given host and port.
@ -654,18 +660,12 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
mode = "wb"
encoding = None
## Use a temporary file if a filename was not provided
if destination:
fp = open(destination, mode, encoding=encoding)
else:
fp = tempfile.NamedTemporaryFile(mode, encoding=encoding, delete=False)
self.tmp_filename = fp.name
## Write
fp = open(destination or self.raw_file_buffer, mode=mode, encoding=encoding)
size = fp.write(body)
fp.close()
return body, size, destination or self.tmp_filename
return body, size
def _get_addresses(self, host, port):
# DNS lookup - will get IPv4 and IPv6 records if IPv6 is enabled
@ -747,48 +747,48 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
ui_out.debug("Using handler: %s" % cmd_str)
return cmd_str
def _handle_gemtext(self, body, menu_gi):
def _handle_gemtext(self, menu_gi):
"""Simultaneously parse and render a text/gemini document.
Parsing causes self.index to be populated with GeminiItems.
Rendering causes self.idx_filename to contain a rendered view."""
Rendering causes self.rendered_file_buffer to contain a rendered
view."""
self.index = []
preformatted = False
if self.idx_filename:
os.unlink(self.idx_filename)
tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
self.idx_filename = tmpf.name
for line in body.splitlines():
if line.startswith("```"):
preformatted = not preformatted
elif preformatted:
tmpf.write(line + "\n")
elif line.startswith("=>"):
try:
gi = GeminiItem.from_map_line(line, menu_gi)
self.index.append(gi)
tmpf.write(self._format_geminiitem(len(self.index), gi) + "\n")
except:
ui_out.debug("Skipping possible link: %s" % line)
elif line.startswith("* "):
line = line[1:].lstrip("\t ")
tmpf.write(textwrap.fill(line, self.options["width"],
initial_indent = "", subsequent_indent=" ") + "\n")
elif line.startswith(">"):
line = line[1:].lstrip("\t ")
tmpf.write(textwrap.fill(line, self.options["width"],
initial_indent = "> ", subsequent_indent="> ") + "\n")
elif line.startswith("###"):
line = line[3:].lstrip("\t ")
tmpf.write("\x1b[4m" + line + "\x1b[0m""\n")
elif line.startswith("##"):
line = line[2:].lstrip("\t ")
tmpf.write("\x1b[1m" + line + "\x1b[0m""\n")
elif line.startswith("#"):
line = line[1:].lstrip("\t ")
tmpf.write("\x1b[1m\x1b[4m" + line + "\x1b[0m""\n")
else:
tmpf.write(textwrap.fill(line, self.options["width"]) + "\n")
tmpf.close()
with open(self.active_raw_file, "r") as fp:
body = fp.read()
with open(self.rendered_file_buffer, "w") as fp:
for line in body.splitlines():
if line.startswith("```"):
preformatted = not preformatted
elif preformatted:
fp.write(line + "\n")
elif line.startswith("=>"):
try:
gi = GeminiItem.from_map_line(line, menu_gi)
self.index.append(gi)
fp.write(self._format_geminiitem(len(self.index), gi) + "\n")
except:
ui_out.debug("Skipping possible link: %s" % line)
elif line.startswith("* "):
line = line[1:].lstrip("\t ")
fp.write(textwrap.fill(line, self.options["width"],
initial_indent = "", subsequent_indent=" ") + "\n")
elif line.startswith(">"):
line = line[1:].lstrip("\t ")
fp.write(textwrap.fill(line, self.options["width"],
initial_indent = "> ", subsequent_indent="> ") + "\n")
elif line.startswith("###"):
line = line[3:].lstrip("\t ")
fp.write("\x1b[4m" + line + "\x1b[0m""\n")
elif line.startswith("##"):
line = line[2:].lstrip("\t ")
fp.write("\x1b[1m" + line + "\x1b[0m""\n")
elif line.startswith("#"):
line = line[1:].lstrip("\t ")
fp.write("\x1b[1m\x1b[4m" + line + "\x1b[0m""\n")
else:
fp.write(textwrap.fill(line, self.options["width"]) + "\n")
self.lookup = self.index
self.page_index = 0
@ -826,14 +826,6 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
self.log["ipv6_requests"] += 1
self.log["ipv6_bytes_recvd"] += size
def _get_active_tmpfile(self):
if self.mime == "text/gemini":
return self.idx_filename
elif self.gi.scheme == "file":
return self.gi.path
else:
return self.tmp_filename
def _load_client_cert(self):
"""
Interactively load a TLS client certificate from the filesystem in PEM
@ -1250,27 +1242,27 @@ Use 'ls -l' to see URLs."""
@needs_gi
def do_cat(self, *args):
"""Run most recently visited item through "cat" command."""
subprocess.call(shlex.split("cat %s" % self._get_active_tmpfile()))
subprocess.call(shlex.split("cat %s" % self.active_rendered_file))
@needs_gi
def do_less(self, *args):
"""Run most recently visited item through "less" command."""
cmd_str = self._get_handler_cmd(self.mime)
cmd_str = cmd_str % self._get_active_tmpfile()
cmd_str = cmd_str % self.active_rendered_file
subprocess.call("%s | less -R" % cmd_str, shell=True)
@needs_gi
def do_fold(self, *args):
"""Run most recently visited item through "fold" command."""
cmd_str = self._get_handler_cmd(self.mime)
cmd_str = cmd_str % self._get_active_tmpfile()
cmd_str = cmd_str % self.active_rendered_file
subprocess.call("%s | fold -w 70 -s" % cmd_str, shell=True)
@restricted
@needs_gi
def do_shell(self, line):
"""'cat' most recently visited item through a shell pipeline."""
subprocess.call(("cat %s |" % self._get_active_tmpfile()) + line, shell=True)
subprocess.call(("cat %s |" % self.active_rendered_file) + line, shell=True)
@restricted
@needs_gi
@ -1340,10 +1332,7 @@ Use 'ls -l' to see URLs."""
# Actually do the save operation
if saving_current:
# Don't use _get_active_tmpfile() here, because we want to save the
# "source code" of menus, not the rendered view - this way AV-98
# can navigate to it later.
src = gi.path if gi.scheme == "file" else self.tmp_filename
src = gi.path if gi.scheme == "file" else self.active_raw_file
shutil.copyfile(src, filename)
else:
## Download an item that's not the current one
@ -1434,10 +1423,8 @@ current gemini browsing session."""
self.tofu_store.close()
# Clean up after ourself
self.cache.empty()
if self.tmp_filename and os.path.exists(self.tmp_filename):
os.unlink(self.tmp_filename)
if self.idx_filename and os.path.exists(self.idx_filename):
os.unlink(self.idx_filename)
os.unlink(self.raw_file_buffer)
os.unlink(self.rendered_file_buffer)
for cert in self.transient_certs_created:
for ext in (".crt", ".key"):

View File

@ -4,6 +4,8 @@ _MAX_CACHE_AGE_SECS = 180
import logging
import os
import os.path
import shutil
import tempfile
import time
ui_out = logging.getLogger("av98_logger")
@ -34,9 +36,13 @@ class Cache:
self.validatecache()
def add(self, url, mime, filename):
# Copy client's buffer file to new cache file
tmpf = tempfile.NamedTemporaryFile(delete=False)
tmpf.close()
shutil.copyfile(filename, tmpf.name)
# Remember details
self.cache_timestamps[url] = time.time()
self.cache[url] = (mime, filename)
self.cache[url] = (mime, tmpf.name)
if len(self.cache) > _MAX_CACHE_SIZE:
self._trim()
self.validatecache()
@ -60,13 +66,7 @@ class Cache:
self.validatecache()
def get(self, url):
mime, filename = self.cache[url]
if mime.startswith("text/gemini"):
with open(filename, "r") as fp:
body = fp.read()
return mime, body, filename
else:
return mime, None, filename
return self.cache[url]
def empty(self):
for mime, filename in self.cache.values():