Initial implementation of short-term caching.
This commit is contained in:
parent
4d652e0fef
commit
0f328141b9
221
av98.py
221
av98.py
|
@ -287,6 +287,9 @@ class GeminiClient(cmd.Cmd):
|
||||||
|
|
||||||
self._connect_to_tofu_db()
|
self._connect_to_tofu_db()
|
||||||
|
|
||||||
|
self.cache = {}
|
||||||
|
self.cache_timestamps = {}
|
||||||
|
|
||||||
def _connect_to_tofu_db(self):
|
def _connect_to_tofu_db(self):
|
||||||
|
|
||||||
db_path = os.path.join(self.config_dir, "tofu.db")
|
db_path = os.path.join(self.config_dir, "tofu.db")
|
||||||
|
@ -303,6 +306,7 @@ class GeminiClient(cmd.Cmd):
|
||||||
sending the request over the network, parsing the response if
|
sending the request over the network, parsing the response if
|
||||||
its a menu, storing the response in a temporary file, choosing
|
its a menu, storing the response in a temporary file, choosing
|
||||||
and calling a handler program, and updating the history."""
|
and calling a handler program, and updating the history."""
|
||||||
|
|
||||||
# Don't try to speak to servers running other protocols
|
# Don't try to speak to servers running other protocols
|
||||||
if gi.scheme in ("http", "https"):
|
if gi.scheme in ("http", "https"):
|
||||||
webbrowser.open_new_tab(gi.url)
|
webbrowser.open_new_tab(gi.url)
|
||||||
|
@ -316,12 +320,58 @@ you'll be able to transparently follow links to Gopherspace!""")
|
||||||
elif gi.scheme not in ("gemini", "gopher"):
|
elif gi.scheme not in ("gemini", "gopher"):
|
||||||
print("Sorry, no support for {} links.".format(gi.scheme))
|
print("Sorry, no support for {} links.".format(gi.scheme))
|
||||||
return
|
return
|
||||||
|
|
||||||
# Obey permanent redirects
|
# Obey permanent redirects
|
||||||
if gi.url in self.permanent_redirects:
|
if gi.url in self.permanent_redirects:
|
||||||
new_gi = GeminiItem(self.permanent_redirects[gi.url], name=gi.name)
|
new_gi = GeminiItem(self.permanent_redirects[gi.url], name=gi.name)
|
||||||
self._go_to_gi(new_gi)
|
self._go_to_gi(new_gi)
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# Use cache, or hit the network if resource is not cached
|
||||||
|
if self._is_cached(gi.url):
|
||||||
|
mime, body, tmpfile = self._get_cached(gi.url)
|
||||||
|
else:
|
||||||
|
try:
|
||||||
|
gi, mime, body, tmpfile = self._fetch_over_network(gi)
|
||||||
|
except Exception as err:
|
||||||
|
# Print an error message
|
||||||
|
if isinstance(err, socket.gaierror):
|
||||||
|
self.log["dns_failures"] += 1
|
||||||
|
print("ERROR: DNS error!")
|
||||||
|
elif isinstance(err, ConnectionRefusedError):
|
||||||
|
self.log["refused_connections"] += 1
|
||||||
|
print("ERROR: Connection refused!")
|
||||||
|
elif isinstance(err, ConnectionResetError):
|
||||||
|
self.log["reset_connections"] += 1
|
||||||
|
print("ERROR: Connection reset!")
|
||||||
|
elif isinstance(err, (TimeoutError, socket.timeout)):
|
||||||
|
self.log["timeouts"] += 1
|
||||||
|
print("""ERROR: Connection timed out!
|
||||||
|
Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
|
else:
|
||||||
|
print("ERROR: " + str(err))
|
||||||
|
return
|
||||||
|
|
||||||
|
# Pass file to handler, unless we were asked not to
|
||||||
|
if handle:
|
||||||
|
if mime == "text/gemini":
|
||||||
|
self._handle_gemtext(body, gi)
|
||||||
|
else:
|
||||||
|
cmd_str = self._get_handler_cmd(mime)
|
||||||
|
try:
|
||||||
|
subprocess.call(shlex.split(cmd_str % tmpf.name))
|
||||||
|
except FileNotFoundError:
|
||||||
|
print("Handler program %s not found!" % shlex.split(cmd_str)[0])
|
||||||
|
print("You can use the ! command to specify another handler program or pipeline.")
|
||||||
|
|
||||||
|
# Update state
|
||||||
|
self.gi = gi
|
||||||
|
self.mime = mime
|
||||||
|
if update_hist:
|
||||||
|
self._update_history(gi)
|
||||||
|
|
||||||
|
def _fetch_over_network(self, gi):
|
||||||
|
|
||||||
# Be careful with client certificates!
|
# Be careful with client certificates!
|
||||||
# Are we crossing a domain boundary?
|
# Are we crossing a domain boundary?
|
||||||
if self.active_cert_domains and gi.host not in self.active_cert_domains:
|
if self.active_cert_domains and gi.host not in self.active_cert_domains:
|
||||||
|
@ -353,50 +403,26 @@ you'll be able to transparently follow links to Gopherspace!""")
|
||||||
print("Remaining unidentified.")
|
print("Remaining unidentified.")
|
||||||
self.client_certs.pop(gi.host)
|
self.client_certs.pop(gi.host)
|
||||||
|
|
||||||
# Do everything which touches the network in one block,
|
# Is this a local file?
|
||||||
# so we only need to catch exceptions once
|
if not gi.host:
|
||||||
try:
|
address, f = None, open(gi.path, "rb")
|
||||||
# Is this a local file?
|
else:
|
||||||
if not gi.host:
|
address, f = self._send_request(gi)
|
||||||
address, f = None, open(gi.path, "rb")
|
|
||||||
else:
|
|
||||||
address, f = self._send_request(gi)
|
|
||||||
|
|
||||||
# Spec dictates <META> should not exceed 1024 bytes,
|
# Spec dictates <META> should not exceed 1024 bytes,
|
||||||
# so maximum valid header length is 1027 bytes.
|
# so maximum valid header length is 1027 bytes.
|
||||||
header = f.readline(1027)
|
header = f.readline(1027)
|
||||||
header = header.decode("UTF-8")
|
header = header.decode("UTF-8")
|
||||||
if not header or header[-1] != '\n':
|
if not header or header[-1] != '\n':
|
||||||
raise RuntimeError("Received invalid header from server!")
|
raise RuntimeError("Received invalid header from server!")
|
||||||
header = header.strip()
|
header = header.strip()
|
||||||
self._debug("Response header: %s." % header)
|
self._debug("Response header: %s." % header)
|
||||||
|
|
||||||
# Catch network errors which may happen on initial connection
|
|
||||||
except Exception as err:
|
|
||||||
# Print an error message
|
|
||||||
if isinstance(err, socket.gaierror):
|
|
||||||
self.log["dns_failures"] += 1
|
|
||||||
print("ERROR: DNS error!")
|
|
||||||
elif isinstance(err, ConnectionRefusedError):
|
|
||||||
self.log["refused_connections"] += 1
|
|
||||||
print("ERROR: Connection refused!")
|
|
||||||
elif isinstance(err, ConnectionResetError):
|
|
||||||
self.log["reset_connections"] += 1
|
|
||||||
print("ERROR: Connection reset!")
|
|
||||||
elif isinstance(err, (TimeoutError, socket.timeout)):
|
|
||||||
self.log["timeouts"] += 1
|
|
||||||
print("""ERROR: Connection timed out!
|
|
||||||
Slow internet connection? Use 'set timeout' to be more patient.""")
|
|
||||||
else:
|
|
||||||
print("ERROR: " + str(err))
|
|
||||||
return
|
|
||||||
|
|
||||||
# Validate header
|
# Validate header
|
||||||
status, meta = header.split(maxsplit=1)
|
status, meta = header.split(maxsplit=1)
|
||||||
if len(meta) > 1024 or len(status) != 2 or not status.isnumeric():
|
if len(meta) > 1024 or len(status) != 2 or not status.isnumeric():
|
||||||
print("ERROR: Received invalid header from server!")
|
|
||||||
f.close()
|
f.close()
|
||||||
return
|
raise RuntimeError("Received invalid header from server!")
|
||||||
|
|
||||||
# Update redirect loop/maze escaping state
|
# Update redirect loop/maze escaping state
|
||||||
if not status.startswith("3"):
|
if not status.startswith("3"):
|
||||||
|
@ -410,20 +436,17 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
user_input = getpass.getpass("> ")
|
user_input = getpass.getpass("> ")
|
||||||
else:
|
else:
|
||||||
user_input = input("> ")
|
user_input = input("> ")
|
||||||
self._go_to_gi(gi.query(user_input))
|
return self._fetch_over_network(gi.query(user_input))
|
||||||
return
|
|
||||||
# Redirects
|
# Redirects
|
||||||
elif status.startswith("3"):
|
elif status.startswith("3"):
|
||||||
new_gi = GeminiItem(gi.absolutise_url(meta))
|
new_gi = GeminiItem(gi.absolutise_url(meta))
|
||||||
if new_gi.url == gi.url:
|
if new_gi.url == gi.url:
|
||||||
print("Error: URL redirects to itself!")
|
raise RuntimeError("URL redirects to itself!")
|
||||||
return
|
|
||||||
elif new_gi.url in self.previous_redirectors:
|
elif new_gi.url in self.previous_redirectors:
|
||||||
print("Error: caught in redirect loop!")
|
raise RuntimeError("Caught in redirect loop!")
|
||||||
return
|
|
||||||
elif len(self.previous_redirectors) == _MAX_REDIRECTS:
|
elif len(self.previous_redirectors) == _MAX_REDIRECTS:
|
||||||
print("Error: refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
|
raise RuntimeError("Refusing to follow more than %d consecutive redirects!" % _MAX_REDIRECTS)
|
||||||
return
|
|
||||||
# Never follow cross-domain redirects without asking
|
# Never follow cross-domain redirects without asking
|
||||||
elif new_gi.host != gi.host:
|
elif new_gi.host != gi.host:
|
||||||
follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
|
follow = input("Follow cross-domain redirect to %s? (y/n) " % new_gi.url)
|
||||||
|
@ -444,12 +467,12 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
if status == "31":
|
if status == "31":
|
||||||
# Permanent redirect
|
# Permanent redirect
|
||||||
self.permanent_redirects[gi.url] = new_gi.url
|
self.permanent_redirects[gi.url] = new_gi.url
|
||||||
self._go_to_gi(new_gi)
|
return self._fetch_over_network(new_gi)
|
||||||
return
|
|
||||||
# Errors
|
# Errors
|
||||||
elif status.startswith("4") or status.startswith("5"):
|
elif status.startswith("4") or status.startswith("5"):
|
||||||
print("Error: %s" % meta)
|
raise RuntimeError(meta)
|
||||||
return
|
|
||||||
# Client cert
|
# Client cert
|
||||||
elif status.startswith("6"):
|
elif status.startswith("6"):
|
||||||
# Don't do client cert stuff in restricted mode, as in principle
|
# Don't do client cert stuff in restricted mode, as in principle
|
||||||
|
@ -498,8 +521,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
|
|
||||||
# Invalid status
|
# Invalid status
|
||||||
elif not status.startswith("2"):
|
elif not status.startswith("2"):
|
||||||
print("ERROR: Server returned undefined status code %s!" % status)
|
raise RuntimeError("Server returned undefined status code %s!" % status)
|
||||||
return
|
|
||||||
|
|
||||||
# If we're here, this must be a success and there's a response body
|
# If we're here, this must be a success and there's a response body
|
||||||
assert status.startswith("2")
|
assert status.startswith("2")
|
||||||
|
@ -512,16 +534,12 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
try:
|
try:
|
||||||
codecs.lookup(mime_options["charset"])
|
codecs.lookup(mime_options["charset"])
|
||||||
except LookupError:
|
except LookupError:
|
||||||
print("Header declared unknown encoding %s" % value)
|
raise RuntimeError("Header declared unknown encoding %s" % value)
|
||||||
return
|
|
||||||
|
|
||||||
# Read the response body over the network
|
# Read the response body over the network
|
||||||
body = f.read()
|
body = f.read()
|
||||||
|
|
||||||
# Save the result in a temporary file
|
# Save the result in a temporary file
|
||||||
## Delete old file
|
|
||||||
if self.tmp_filename and os.path.exists(self.tmp_filename):
|
|
||||||
os.unlink(self.tmp_filename)
|
|
||||||
## Set file mode
|
## Set file mode
|
||||||
if mime.startswith("text/"):
|
if mime.startswith("text/"):
|
||||||
mode = "w"
|
mode = "w"
|
||||||
|
@ -529,8 +547,7 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
try:
|
try:
|
||||||
body = body.decode(encoding)
|
body = body.decode(encoding)
|
||||||
except UnicodeError:
|
except UnicodeError:
|
||||||
print("Could not decode response body using %s encoding declared in header!" % encoding)
|
raise RuntimeError("Could not decode response body using %s encoding declared in header!" % encoding)
|
||||||
return
|
|
||||||
else:
|
else:
|
||||||
mode = "wb"
|
mode = "wb"
|
||||||
encoding = None
|
encoding = None
|
||||||
|
@ -541,24 +558,11 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
self.tmp_filename = tmpf.name
|
self.tmp_filename = tmpf.name
|
||||||
self._debug("Wrote %d byte response to %s." % (size, self.tmp_filename))
|
self._debug("Wrote %d byte response to %s." % (size, self.tmp_filename))
|
||||||
|
|
||||||
# Pass file to handler, unless we were asked not to
|
# Maintain cache and log
|
||||||
if handle:
|
self._add_to_cache(gi.url, mime, tmpf.name)
|
||||||
if mime == "text/gemini":
|
|
||||||
self._handle_gemtext(body, gi)
|
|
||||||
else:
|
|
||||||
cmd_str = self._get_handler_cmd(mime)
|
|
||||||
try:
|
|
||||||
subprocess.call(shlex.split(cmd_str % tmpf.name))
|
|
||||||
except FileNotFoundError:
|
|
||||||
print("Handler program %s not found!" % shlex.split(cmd_str)[0])
|
|
||||||
print("You can use the ! command to specify another handler program or pipeline.")
|
|
||||||
|
|
||||||
# Update state
|
|
||||||
self.gi = gi
|
|
||||||
self.mime = mime
|
|
||||||
self._log_visit(gi, address, size)
|
self._log_visit(gi, address, size)
|
||||||
if update_hist:
|
|
||||||
self._update_history(gi)
|
return gi, mime, body, tmpf
|
||||||
|
|
||||||
def _send_request(self, gi):
|
def _send_request(self, gi):
|
||||||
"""Send a selector to a given host and port.
|
"""Send a selector to a given host and port.
|
||||||
|
@ -663,6 +667,69 @@ Slow internet connection? Use 'set timeout' to be more patient.""")
|
||||||
|
|
||||||
return addresses
|
return addresses
|
||||||
|
|
||||||
|
def _is_cached(self, url):
|
||||||
|
if url not in self.cache:
|
||||||
|
return False
|
||||||
|
now = time.time()
|
||||||
|
cached = self.cache_timestamps[url]
|
||||||
|
if now - cached > 180:
|
||||||
|
self._debug("Expiring old cached copy of resource.")
|
||||||
|
self._remove_from_cache(url)
|
||||||
|
return False
|
||||||
|
self._debug("Found cached copy of resource.")
|
||||||
|
return True
|
||||||
|
|
||||||
|
def _remove_from_cache(self, url):
|
||||||
|
self.cache_timestamps.pop(url)
|
||||||
|
mime, filename = self.cache.pop(url)
|
||||||
|
os.unlink(filename)
|
||||||
|
self._validate_cache()
|
||||||
|
|
||||||
|
def _add_to_cache(self, url, mime, filename):
|
||||||
|
|
||||||
|
self.cache_timestamps[url] = time.time()
|
||||||
|
self.cache[url] = (mime, filename)
|
||||||
|
if len(self.cache) > 10:
|
||||||
|
self._trim_cache()
|
||||||
|
self._validate_cache()
|
||||||
|
|
||||||
|
def _trim_cache(self):
|
||||||
|
# Order cache entries by age
|
||||||
|
lru = [(t, u) for (u, t) in self.cache_timestamps.items()]
|
||||||
|
lru.sort()
|
||||||
|
# Drop the oldest entry no matter what
|
||||||
|
_, url = lru[0]
|
||||||
|
self._debug("Dropping cached copy of {} from full cache.".format(url))
|
||||||
|
self._remove_from_cache(url)
|
||||||
|
# Drop other entries if they are older than the limit
|
||||||
|
now = time.time()
|
||||||
|
for cached, url in lru[1:]:
|
||||||
|
if now - cached > 180:
|
||||||
|
self._debug("Dropping cached copy of {} from full cache.".format(url))
|
||||||
|
self._remove_from_cache(url)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
self._validate_cache()
|
||||||
|
|
||||||
|
def _get_cached(self, url):
|
||||||
|
mime, filename = self.cache[url]
|
||||||
|
if mime.startswith("text/gemini"):
|
||||||
|
with open(filename, "r") as fp:
|
||||||
|
body = fp.read()
|
||||||
|
return mime, body, filename
|
||||||
|
else:
|
||||||
|
return mime, None, filename
|
||||||
|
|
||||||
|
def _empty_cache(self):
|
||||||
|
for mime, filename in self.cache.values():
|
||||||
|
if os.path.exists(filename):
|
||||||
|
os.unlink(filename)
|
||||||
|
|
||||||
|
def _validate_cache(self):
|
||||||
|
assert self.cache.keys() == self.cache_timestamps.keys()
|
||||||
|
for _, filename in self.cache.values():
|
||||||
|
assert os.path.isfile(filename)
|
||||||
|
|
||||||
def _validate_cert(self, address, host, cert):
|
def _validate_cert(self, address, host, cert):
|
||||||
"""
|
"""
|
||||||
Validate a TLS certificate in TOFU mode.
|
Validate a TLS certificate in TOFU mode.
|
||||||
|
@ -1483,10 +1550,12 @@ current gemini browsing session."""
|
||||||
self.db_conn.commit()
|
self.db_conn.commit()
|
||||||
self.db_conn.close()
|
self.db_conn.close()
|
||||||
# Clean up after ourself
|
# Clean up after ourself
|
||||||
|
self._empty_cache()
|
||||||
if self.tmp_filename and os.path.exists(self.tmp_filename):
|
if self.tmp_filename and os.path.exists(self.tmp_filename):
|
||||||
os.unlink(self.tmp_filename)
|
os.unlink(self.tmp_filename)
|
||||||
if self.idx_filename and os.path.exists(self.idx_filename):
|
if self.idx_filename and os.path.exists(self.idx_filename):
|
||||||
os.unlink(self.idx_filename)
|
os.unlink(self.idx_filename)
|
||||||
|
|
||||||
for cert in self.transient_certs_created:
|
for cert in self.transient_certs_created:
|
||||||
for ext in (".crt", ".key"):
|
for ext in (".crt", ".key"):
|
||||||
certfile = os.path.join(self.config_dir, "transient_certs", cert+ext)
|
certfile = os.path.join(self.config_dir, "transient_certs", cert+ext)
|
||||||
|
|
Loading…
Reference in New Issue