Factor out caching functionality into own class/file.

This commit is contained in:
Solderpunk 2023-11-12 14:49:01 +01:00
parent 87473fee1b
commit e678bca089
2 changed files with 86 additions and 72 deletions

81
av98.py
View File

@ -55,11 +55,12 @@ try:
except ModuleNotFoundError:
_HAS_CRYPTOGRAPHY = False
from cache import Cache
_VERSION = "1.0.2dev"
_MAX_REDIRECTS = 5
_MAX_CACHE_SIZE = 10
_MAX_CACHE_AGE_SECS = 180
# Command abbreviations
_ABBREVS = {
@ -299,8 +300,7 @@ class GeminiClient(cmd.Cmd):
self._connect_to_tofu_db()
self.cache = {}
self.cache_timestamps = {}
self.cache = Cache()
def _connect_to_tofu_db(self):
@ -347,8 +347,9 @@ you'll be able to transparently follow links to Gopherspace!""")
return
# Use cache, or hit the network if resource is not cached
if check_cache and self.options["cache"] and self._is_cached(gi.url):
mime, body, tmpfile = self._get_cached(gi.url)
if check_cache and self.options["cache"] and self.cache.check(gi.url):
mime, body, tmpfile = self.cache.get(gi.url)
self.log["cache_hits"] += 1
else:
try:
gi, mime, body, tmpfile = self._fetch_over_network(gi)
@ -540,7 +541,7 @@ you'll be able to transparently follow links to Gopherspace!""")
# Maintain cache and log
if self.options["cache"]:
self._add_to_cache(gi.url, mime, self.tmp_filename)
self.cache.add(gi.url, mime, self.tmp_filename)
self._log_visit(gi, address, size)
return gi, mime, body, self.tmp_filename
@ -657,70 +658,6 @@ you'll be able to transparently follow links to Gopherspace!""")
return addresses
def _is_cached(self, url):
if url not in self.cache:
return False
now = time.time()
cached = self.cache_timestamps[url]
if now - cached > _MAX_CACHE_AGE_SECS:
self._debug("Expiring old cached copy of resource.")
self._remove_from_cache(url)
return False
self._debug("Found cached copy of resource.")
return True
def _remove_from_cache(self, url):
self.cache_timestamps.pop(url)
mime, filename = self.cache.pop(url)
os.unlink(filename)
self._validate_cache()
def _add_to_cache(self, url, mime, filename):
self.cache_timestamps[url] = time.time()
self.cache[url] = (mime, filename)
if len(self.cache) > _MAX_CACHE_SIZE:
self._trim_cache()
self._validate_cache()
def _trim_cache(self):
# Order cache entries by age
lru = [(t, u) for (u, t) in self.cache_timestamps.items()]
lru.sort()
# Drop the oldest entry no matter what
_, url = lru[0]
self._debug("Dropping cached copy of {} from full cache.".format(url))
self._remove_from_cache(url)
# Drop other entries if they are older than the limit
now = time.time()
for cached, url in lru[1:]:
if now - cached > _MAX_CACHE_AGE_SECS:
self._debug("Dropping cached copy of {} from full cache.".format(url))
self._remove_from_cache(url)
else:
break
self._validate_cache()
def _get_cached(self, url):
mime, filename = self.cache[url]
self.log["cache_hits"] += 1
if mime.startswith("text/gemini"):
with open(filename, "r") as fp:
body = fp.read()
return mime, body, filename
else:
return mime, None, filename
def _empty_cache(self):
for mime, filename in self.cache.values():
if os.path.exists(filename):
os.unlink(filename)
def _validate_cache(self):
assert self.cache.keys() == self.cache_timestamps.keys()
for _, filename in self.cache.values():
assert os.path.isfile(filename)
def _handle_cert_request(self, meta, status, host):
# Don't do client cert stuff in restricted mode, as in principle
@ -1590,7 +1527,7 @@ current gemini browsing session."""
self.db_conn.commit()
self.db_conn.close()
# Clean up after ourself
self._empty_cache()
self.cache.empty()
if self.tmp_filename and os.path.exists(self.tmp_filename):
os.unlink(self.tmp_filename)
if self.idx_filename and os.path.exists(self.idx_filename):

77
cache.py Normal file
View File

@ -0,0 +1,77 @@
_MAX_CACHE_SIZE = 10
_MAX_CACHE_AGE_SECS = 180
import os
import os.path
import time
class Cache:
def __init__(self):
self.cache = {}
self.cache_timestamps = {}
def check(self, url):
if url not in self.cache:
return False
now = time.time()
cached = self.cache_timestamps[url]
if now - cached > _MAX_CACHE_AGE_SECS:
# self._debug("Expiring old cached copy of resource.")
self._remove(url)
return False
# self._debug("Found cached copy of resource.")
return True
def _remove(self, url):
self.cache_timestamps.pop(url)
mime, filename = self.cache.pop(url)
os.unlink(filename)
self.validatecache()
def add(self, url, mime, filename):
self.cache_timestamps[url] = time.time()
self.cache[url] = (mime, filename)
if len(self.cache) > _MAX_CACHE_SIZE:
self.trim()
self.validatecache()
def trim(self):
# Order cache entries by age
lru = [(t, u) for (u, t) in self.cache_timestamps.items()]
lru.sort()
# Drop the oldest entry no matter what
_, url = lru[0]
# self._debug("Dropping cached copy of {} from full cache.".format(url))
self._remove(url)
# Drop other entries if they are older than the limit
now = time.time()
for cached, url in lru[1:]:
if now - cached > _MAX_CACHE_AGE_SECS:
# self._debug("Dropping cached copy of {} from full cache.".format(url))
self._remove(url)
else:
break
self.validatecache()
def get(self, url):
mime, filename = self.cache[url]
if mime.startswith("text/gemini"):
with open(filename, "r") as fp:
body = fp.read()
return mime, body, filename
else:
return mime, None, filename
def empty(self):
for mime, filename in self.cache.values():
if os.path.exists(filename):
os.unlink(filename)
def validatecache(self):
assert self.cache.keys() == self.cache_timestamps.keys()
for _, filename in self.cache.values():
assert os.path.isfile(filename)