Factor out caching functionality into own class/file.

2023-11-12 14:49:01 +01:00 · 2023-11-12 14:49:01 +01:00 · e678bca089
parent 87473fee1b
commit e678bca089
2 changed files with 86 additions and 72 deletions
--- a/av98.py
+++ b/av98.py
@ -55,11 +55,12 @@ try:
 except ModuleNotFoundError:
    _HAS_CRYPTOGRAPHY = False

+from cache import Cache
+
 _VERSION = "1.0.2dev"

 _MAX_REDIRECTS = 5
-_MAX_CACHE_SIZE = 10
-_MAX_CACHE_AGE_SECS = 180
+

 # Command abbreviations
 _ABBREVS = {
@ -299,8 +300,7 @@ class GeminiClient(cmd.Cmd):

        self._connect_to_tofu_db()

-        self.cache = {}
-        self.cache_timestamps = {}
+        self.cache = Cache()

    def _connect_to_tofu_db(self):

@ -347,8 +347,9 @@ you'll be able to transparently follow links to Gopherspace!""")
            return

        # Use cache, or hit the network if resource is not cached
-        if check_cache and self.options["cache"] and self._is_cached(gi.url):
-            mime, body, tmpfile = self._get_cached(gi.url)
+        if check_cache and self.options["cache"] and self.cache.check(gi.url):
+            mime, body, tmpfile = self.cache.get(gi.url)
+            self.log["cache_hits"] += 1
        else:
            try:
                gi, mime, body, tmpfile = self._fetch_over_network(gi)
@ -540,7 +541,7 @@ you'll be able to transparently follow links to Gopherspace!""")

        # Maintain cache and log
        if self.options["cache"]:
-            self._add_to_cache(gi.url, mime, self.tmp_filename)
+            self.cache.add(gi.url, mime, self.tmp_filename)
        self._log_visit(gi, address, size)

        return gi, mime, body, self.tmp_filename
@ -657,70 +658,6 @@ you'll be able to transparently follow links to Gopherspace!""")

        return addresses

-    def _is_cached(self, url):
-        if url not in self.cache:
-            return False
-        now = time.time()
-        cached = self.cache_timestamps[url]
-        if now - cached > _MAX_CACHE_AGE_SECS:
-            self._debug("Expiring old cached copy of resource.")
-            self._remove_from_cache(url)
-            return False
-        self._debug("Found cached copy of resource.")
-        return True
-
-    def _remove_from_cache(self, url):
-        self.cache_timestamps.pop(url)
-        mime, filename = self.cache.pop(url)
-        os.unlink(filename)
-        self._validate_cache()
-
-    def _add_to_cache(self, url, mime, filename):
-
-        self.cache_timestamps[url] = time.time()
-        self.cache[url] = (mime, filename)
-        if len(self.cache) > _MAX_CACHE_SIZE:
-            self._trim_cache()
-        self._validate_cache()
-
-    def _trim_cache(self):
-        # Order cache entries by age
-        lru = [(t, u) for (u, t) in self.cache_timestamps.items()]
-        lru.sort()
-        # Drop the oldest entry no matter what
-        _, url = lru[0]
-        self._debug("Dropping cached copy of {} from full cache.".format(url))
-        self._remove_from_cache(url)
-        # Drop other entries if they are older than the limit
-        now = time.time()
-        for cached, url in lru[1:]:
-            if now - cached > _MAX_CACHE_AGE_SECS:
-                self._debug("Dropping cached copy of {} from full cache.".format(url))
-                self._remove_from_cache(url)
-            else:
-                break
-        self._validate_cache()
-
-    def _get_cached(self, url):
-        mime, filename = self.cache[url]
-        self.log["cache_hits"] += 1
-        if mime.startswith("text/gemini"):
-            with open(filename, "r") as fp:
-                body = fp.read()
-                return mime, body, filename
-        else:
-            return mime, None, filename
-
-    def _empty_cache(self):
-        for mime, filename in self.cache.values():
-            if os.path.exists(filename):
-                os.unlink(filename)
-
-    def _validate_cache(self):
-        assert self.cache.keys() == self.cache_timestamps.keys()
-        for _, filename in self.cache.values():
-            assert os.path.isfile(filename)
-
    def _handle_cert_request(self, meta, status, host):

        # Don't do client cert stuff in restricted mode, as in principle
@ -1590,7 +1527,7 @@ current gemini browsing session."""
        self.db_conn.commit()
        self.db_conn.close()
        # Clean up after ourself
-        self._empty_cache()
+        self.cache.empty()
        if self.tmp_filename and os.path.exists(self.tmp_filename):
            os.unlink(self.tmp_filename)
        if self.idx_filename and os.path.exists(self.idx_filename):
--- a/cache.py
+++ b/cache.py
@ -0,0 +1,77 @@
+_MAX_CACHE_SIZE = 10
+_MAX_CACHE_AGE_SECS = 180
+
+import os
+import os.path
+import time
+
+class Cache:
+
+    def __init__(self):
+
+        self.cache = {}
+        self.cache_timestamps = {}
+
+    def check(self, url):
+        if url not in self.cache:
+            return False
+        now = time.time()
+        cached = self.cache_timestamps[url]
+        if now - cached > _MAX_CACHE_AGE_SECS:
+#            self._debug("Expiring old cached copy of resource.")
+            self._remove(url)
+            return False
+#        self._debug("Found cached copy of resource.")
+        return True
+
+    def _remove(self, url):
+        self.cache_timestamps.pop(url)
+        mime, filename = self.cache.pop(url)
+        os.unlink(filename)
+        self.validatecache()
+
+    def add(self, url, mime, filename):
+
+        self.cache_timestamps[url] = time.time()
+        self.cache[url] = (mime, filename)
+        if len(self.cache) > _MAX_CACHE_SIZE:
+            self.trim()
+        self.validatecache()
+
+    def trim(self):
+        # Order cache entries by age
+        lru = [(t, u) for (u, t) in self.cache_timestamps.items()]
+        lru.sort()
+        # Drop the oldest entry no matter what
+        _, url = lru[0]
+#        self._debug("Dropping cached copy of {} from full cache.".format(url))
+        self._remove(url)
+        # Drop other entries if they are older than the limit
+        now = time.time()
+        for cached, url in lru[1:]:
+            if now - cached > _MAX_CACHE_AGE_SECS:
+#                self._debug("Dropping cached copy of {} from full cache.".format(url))
+                self._remove(url)
+            else:
+                break
+        self.validatecache()
+
+    def get(self, url):
+        mime, filename = self.cache[url]
+        if mime.startswith("text/gemini"):
+            with open(filename, "r") as fp:
+                body = fp.read()
+                return mime, body, filename
+        else:
+            return mime, None, filename
+
+    def empty(self):
+        for mime, filename in self.cache.values():
+            if os.path.exists(filename):
+                os.unlink(filename)
+
+    def validatecache(self):
+        assert self.cache.keys() == self.cache_timestamps.keys()
+        for _, filename in self.cache.values():
+            assert os.path.isfile(filename)
+