Factor out caching functionality into own class/file.
This commit is contained in:
parent
87473fee1b
commit
e678bca089
81
av98.py
81
av98.py
|
@ -55,11 +55,12 @@ try:
|
|||
except ModuleNotFoundError:
|
||||
_HAS_CRYPTOGRAPHY = False
|
||||
|
||||
from cache import Cache
|
||||
|
||||
_VERSION = "1.0.2dev"
|
||||
|
||||
_MAX_REDIRECTS = 5
|
||||
_MAX_CACHE_SIZE = 10
|
||||
_MAX_CACHE_AGE_SECS = 180
|
||||
|
||||
|
||||
# Command abbreviations
|
||||
_ABBREVS = {
|
||||
|
@ -299,8 +300,7 @@ class GeminiClient(cmd.Cmd):
|
|||
|
||||
self._connect_to_tofu_db()
|
||||
|
||||
self.cache = {}
|
||||
self.cache_timestamps = {}
|
||||
self.cache = Cache()
|
||||
|
||||
def _connect_to_tofu_db(self):
|
||||
|
||||
|
@ -347,8 +347,9 @@ you'll be able to transparently follow links to Gopherspace!""")
|
|||
return
|
||||
|
||||
# Use cache, or hit the network if resource is not cached
|
||||
if check_cache and self.options["cache"] and self._is_cached(gi.url):
|
||||
mime, body, tmpfile = self._get_cached(gi.url)
|
||||
if check_cache and self.options["cache"] and self.cache.check(gi.url):
|
||||
mime, body, tmpfile = self.cache.get(gi.url)
|
||||
self.log["cache_hits"] += 1
|
||||
else:
|
||||
try:
|
||||
gi, mime, body, tmpfile = self._fetch_over_network(gi)
|
||||
|
@ -540,7 +541,7 @@ you'll be able to transparently follow links to Gopherspace!""")
|
|||
|
||||
# Maintain cache and log
|
||||
if self.options["cache"]:
|
||||
self._add_to_cache(gi.url, mime, self.tmp_filename)
|
||||
self.cache.add(gi.url, mime, self.tmp_filename)
|
||||
self._log_visit(gi, address, size)
|
||||
|
||||
return gi, mime, body, self.tmp_filename
|
||||
|
@ -657,70 +658,6 @@ you'll be able to transparently follow links to Gopherspace!""")
|
|||
|
||||
return addresses
|
||||
|
||||
def _is_cached(self, url):
|
||||
if url not in self.cache:
|
||||
return False
|
||||
now = time.time()
|
||||
cached = self.cache_timestamps[url]
|
||||
if now - cached > _MAX_CACHE_AGE_SECS:
|
||||
self._debug("Expiring old cached copy of resource.")
|
||||
self._remove_from_cache(url)
|
||||
return False
|
||||
self._debug("Found cached copy of resource.")
|
||||
return True
|
||||
|
||||
def _remove_from_cache(self, url):
|
||||
self.cache_timestamps.pop(url)
|
||||
mime, filename = self.cache.pop(url)
|
||||
os.unlink(filename)
|
||||
self._validate_cache()
|
||||
|
||||
def _add_to_cache(self, url, mime, filename):
|
||||
|
||||
self.cache_timestamps[url] = time.time()
|
||||
self.cache[url] = (mime, filename)
|
||||
if len(self.cache) > _MAX_CACHE_SIZE:
|
||||
self._trim_cache()
|
||||
self._validate_cache()
|
||||
|
||||
def _trim_cache(self):
|
||||
# Order cache entries by age
|
||||
lru = [(t, u) for (u, t) in self.cache_timestamps.items()]
|
||||
lru.sort()
|
||||
# Drop the oldest entry no matter what
|
||||
_, url = lru[0]
|
||||
self._debug("Dropping cached copy of {} from full cache.".format(url))
|
||||
self._remove_from_cache(url)
|
||||
# Drop other entries if they are older than the limit
|
||||
now = time.time()
|
||||
for cached, url in lru[1:]:
|
||||
if now - cached > _MAX_CACHE_AGE_SECS:
|
||||
self._debug("Dropping cached copy of {} from full cache.".format(url))
|
||||
self._remove_from_cache(url)
|
||||
else:
|
||||
break
|
||||
self._validate_cache()
|
||||
|
||||
def _get_cached(self, url):
|
||||
mime, filename = self.cache[url]
|
||||
self.log["cache_hits"] += 1
|
||||
if mime.startswith("text/gemini"):
|
||||
with open(filename, "r") as fp:
|
||||
body = fp.read()
|
||||
return mime, body, filename
|
||||
else:
|
||||
return mime, None, filename
|
||||
|
||||
def _empty_cache(self):
|
||||
for mime, filename in self.cache.values():
|
||||
if os.path.exists(filename):
|
||||
os.unlink(filename)
|
||||
|
||||
def _validate_cache(self):
|
||||
assert self.cache.keys() == self.cache_timestamps.keys()
|
||||
for _, filename in self.cache.values():
|
||||
assert os.path.isfile(filename)
|
||||
|
||||
def _handle_cert_request(self, meta, status, host):
|
||||
|
||||
# Don't do client cert stuff in restricted mode, as in principle
|
||||
|
@ -1590,7 +1527,7 @@ current gemini browsing session."""
|
|||
self.db_conn.commit()
|
||||
self.db_conn.close()
|
||||
# Clean up after ourself
|
||||
self._empty_cache()
|
||||
self.cache.empty()
|
||||
if self.tmp_filename and os.path.exists(self.tmp_filename):
|
||||
os.unlink(self.tmp_filename)
|
||||
if self.idx_filename and os.path.exists(self.idx_filename):
|
||||
|
|
|
@ -0,0 +1,77 @@
|
|||
_MAX_CACHE_SIZE = 10
|
||||
_MAX_CACHE_AGE_SECS = 180
|
||||
|
||||
import os
|
||||
import os.path
|
||||
import time
|
||||
|
||||
class Cache:
|
||||
|
||||
def __init__(self):
|
||||
|
||||
self.cache = {}
|
||||
self.cache_timestamps = {}
|
||||
|
||||
def check(self, url):
|
||||
if url not in self.cache:
|
||||
return False
|
||||
now = time.time()
|
||||
cached = self.cache_timestamps[url]
|
||||
if now - cached > _MAX_CACHE_AGE_SECS:
|
||||
# self._debug("Expiring old cached copy of resource.")
|
||||
self._remove(url)
|
||||
return False
|
||||
# self._debug("Found cached copy of resource.")
|
||||
return True
|
||||
|
||||
def _remove(self, url):
|
||||
self.cache_timestamps.pop(url)
|
||||
mime, filename = self.cache.pop(url)
|
||||
os.unlink(filename)
|
||||
self.validatecache()
|
||||
|
||||
def add(self, url, mime, filename):
|
||||
|
||||
self.cache_timestamps[url] = time.time()
|
||||
self.cache[url] = (mime, filename)
|
||||
if len(self.cache) > _MAX_CACHE_SIZE:
|
||||
self.trim()
|
||||
self.validatecache()
|
||||
|
||||
def trim(self):
|
||||
# Order cache entries by age
|
||||
lru = [(t, u) for (u, t) in self.cache_timestamps.items()]
|
||||
lru.sort()
|
||||
# Drop the oldest entry no matter what
|
||||
_, url = lru[0]
|
||||
# self._debug("Dropping cached copy of {} from full cache.".format(url))
|
||||
self._remove(url)
|
||||
# Drop other entries if they are older than the limit
|
||||
now = time.time()
|
||||
for cached, url in lru[1:]:
|
||||
if now - cached > _MAX_CACHE_AGE_SECS:
|
||||
# self._debug("Dropping cached copy of {} from full cache.".format(url))
|
||||
self._remove(url)
|
||||
else:
|
||||
break
|
||||
self.validatecache()
|
||||
|
||||
def get(self, url):
|
||||
mime, filename = self.cache[url]
|
||||
if mime.startswith("text/gemini"):
|
||||
with open(filename, "r") as fp:
|
||||
body = fp.read()
|
||||
return mime, body, filename
|
||||
else:
|
||||
return mime, None, filename
|
||||
|
||||
def empty(self):
|
||||
for mime, filename in self.cache.values():
|
||||
if os.path.exists(filename):
|
||||
os.unlink(filename)
|
||||
|
||||
def validatecache(self):
|
||||
assert self.cache.keys() == self.cache_timestamps.keys()
|
||||
for _, filename in self.cache.values():
|
||||
assert os.path.isfile(filename)
|
||||
|
Loading…
Reference in New Issue