working on netcache

2023-07-04 23:20:39 +02:00 · 2023-07-04 23:20:39 +02:00 · 76b00b8c04
parent a6974bb404
commit 76b00b8c04
2 changed files with 145 additions and 132 deletions
--- a/netcache.py
+++ b/netcache.py
@ -1,6 +1,8 @@
 #!/bin/python
 import os
+import urllib

+_home = os.path.expanduser('~')
 cache_home = os.environ.get('XDG_CACHE_HOME') or\
                os.path.join(_home,'.cache')
 _CACHE_PATH = os.path.join(cache_home,"offpunk/")
@ -10,8 +12,140 @@ if not os.path.exists(_CACHE_PATH):
    os.makedirs(_CACHE_PATH)


-def get_cache_path(url):
+#def get(url,max_size_download=None,timeout=None):
+
+def cache_last_modified(url):
+    path = get_cache_path(url)
+    if path:
+        return os.path.getmtime(path)
+    elif self.local:
+        return 0
+    else:
+        print("ERROR : NO CACHE in cache_last_modified")
+        return None

 def is_cache_valid(url,validity=0):
+    # Validity is the acceptable time for
+    # a cache to be valid  (in seconds)
+    # If 0, then any cache is considered as valid
+    # (use validity = 1 if you want to refresh everything)
+    cache = get_cache_path(url)
+    # TODO FIXME : detect if we are local
+    #if self.local:
+    #    return os.path.exists(cache)
+    if cache :
+        # If path is too long, we always return True to avoid
+        # fetching it.
+        if len(cache) > 259:
+            print("We return False because path is too long")
+            return False
+        if os.path.exists(cache) and not os.path.isdir(cache):
+            if validity > 0 :
+                last_modification = cache_last_modified(url)
+                now = time.time()
+                age = now - last_modification
+                return age < validity
+            else:
+                return True
+        else:
+            #Cache has not been build
+            return False
+    else:
+        #There’s not even a cache!
+        return False


+
+def get_cache_path(url):
+    #First, we parse the URL
+    parsed = urllib.parse.urlparse(url)
+    if url[0] == "/" or url.startswith("./"):
+        scheme = "file"
+    else:
+        scheme = parsed.scheme
+    if scheme in ["file","mailto","list"]:
+        local = True
+        host = ""
+        port = None
+        # file:// is 7 char
+        if url.startswith("file://"):
+            path = self.url[7:]
+        elif scheme == "mailto":
+            path = parsed.path
+        elif url.startswith("list://"):
+            listdir = os.path.join(_DATA_DIR,"lists")
+            listname = url[7:].lstrip("/")
+            if listname in [""]:
+                name = "My Lists"
+                path = listdir
+            else:
+                name = listname
+                path = os.path.join(listdir, "%s.gmi"%listname)
+        else:
+            path = url
+    else:
+        local = False
+        # Convert unicode hostname to punycode using idna RFC3490
+        host = parsed.hostname #.encode("idna").decode()
+        port = parsed.port or standard_ports.get(scheme, 0)
+        # special gopher selector case
+        if scheme == "gopher":
+            if len(parsed.path) >= 2:
+                itemtype = parsed.path[1]
+                path = parsed.path[2:]
+            else:
+                itemtype = "1"
+                path = ""
+            if itemtype == "0":
+                mime = "text/gemini"
+            elif itemtype == "1":
+                mime = "text/gopher"
+            elif itemtype == "h":
+                mime = "text/html"
+            elif itemtype in ("9","g","I","s"):
+                mime = "binary"
+            else:
+                mime = "text/gopher"
+        else:
+            path = parsed.path
+        if parsed.query:
+            # we don’t add the query if path is too long because path above 260 char
+            # are not supported and crash python.
+            # Also, very long query are usually useless stuff
+            if len(path+parsed.query) < 258:
+                path += "/" + parsed.query
+
+    # Now, we have a partial path. Let’s make it full path.
+    if local:
+        cache_path = path
+    else:
+        cache_path = os.path.expanduser(_CACHE_PATH + scheme + "/" + host + path)
+        #There’s an OS limitation of 260 characters per path.
+        #We will thus cut the path enough to add the index afterward
+        cache_path = cache_path[:249]
+        # FIXME : this is a gross hack to give a name to
+        # index files. This will break if the index is not
+        # index.gmi. I don’t know how to know the real name
+        # of the file. But first, we need to ensure that the domain name
+        # finish by "/". Else, the cache will create a file, not a folder.
+        if scheme.startswith("http"):
+            index = "index.html"
+        elif scheme == "finger":
+            index = "index.txt"
+        elif scheme == "gopher":
+            index = "gophermap"
+        else:
+            index = "index.gmi"
+        if path == "" or os.path.isdir(cache_path):
+            if not cache_path.endswith("/"):
+                cache_path += "/"
+            if not url.endswith("/"):
+                url += "/"
+        if cache_path.endswith("/"):
+            cache_path += index
+        #sometimes, the index itself is a dir
+        #like when folder/index.gmi?param has been created
+        #and we try to access folder
+        if os.path.isdir(cache_path):
+            cache_path += "/" + index
+    return cache_path
--- a/offpunk.py
+++ b/offpunk.py
@ -1,8 +1,8 @@
 #!/usr/bin/env python3
 # Offpunk Offline Gemini client
 # Derived from AV-98 by Solderpunk,
-# (C) 2021, 2022 Ploum <offpunk@ploum.eu>
-# (C) 2019, 2020 Solderpunk <solderpunk@sdf.org>
+# (C) From 2021:  Ploum <offpunk@ploum.eu>
+# (C) 2019, 2020: Solderpunk <solderpunk@sdf.org>
 # With contributions from:
 #  - danceka <hannu.hartikainen@gmail.com>
 #  - <jprjr@tilde.club>
@ -47,6 +47,7 @@ import webbrowser
 import base64
 import subprocess
 import ansirenderer
+import netcache
 from offutils import run,term_width
 try:
    import setproctitle
@ -229,105 +230,16 @@ class GeminiItem():
        self.mime = None
        self.renderer = None
        self.body = None
-        parsed = urllib.parse.urlparse(self.url)
-        if url[0] == "/" or url.startswith("./"):
-            self.scheme = "file"
-        else:
-            self.scheme = parsed.scheme
-        if self.scheme in ["file","mailto","list"]:
-            self.local = True
-            self.host = ""
-            self.port = None
-            # file:// is 7 char
-            if self.url.startswith("file://"):
-                self.path = self.url[7:]
-            elif self.scheme == "mailto":
-                self.path = parsed.path
-            elif self.url.startswith("list://"):
-                listdir = os.path.join(_DATA_DIR,"lists")
-                listname = self.url[7:].lstrip("/")
-                if listname in [""]:
-                    self.name = "My Lists"
-                    self.path = listdir
-                else:
-                    self.name = listname
-                    self.path = os.path.join(listdir, "%s.gmi"%listname)
-            else:
-                self.path = self.url
-        else:
-            self.local = False
-            # Convert unicode hostname to punycode using idna RFC3490
-            self.host = parsed.hostname #.encode("idna").decode()
-            self.port = parsed.port or standard_ports.get(self.scheme, 0)
-            # special gopher selector case
-            if self.scheme == "gopher":
-                if len(parsed.path) >= 2:
-                    itemtype = parsed.path[1]
-                    self.path = parsed.path[2:]
-                else:
-                    itemtype = "1"
-                    self.path = ""
-                if itemtype == "0":
-                    self.mime = "text/gemini"
-                elif itemtype == "1":
-                    self.mime = "text/gopher"
-                elif itemtype == "h":
-                    self.mime = "text/html"
-                elif itemtype in ("9","g","I","s"):
-                    self.mime = "binary"
-                else:
-                    self.mime = "text/gopher"
-            else:
-                self.path = parsed.path
-            if parsed.query:
-                # we don’t add the query if path is too long because path above 260 char
-                # are not supported and crash python.
-                # Also, very long query are usually useless stuff
-                if len(self.path+parsed.query) < 258:
-                    self.path += "/" + parsed.query
+        #TODO : stuff have been migrated to netcache. What are we missing here ?

    def get_cache_path(self):
        # if we already have a _cache_path, we returns it.
        # Except if it became a folder! (which happens for index.html/index.gmi)
        # In that case, we need to reconstruct it
-        if self._cache_path and not os.path.isdir(self._cache_path):
-            return self._cache_path
-        elif self.local:
-            self._cache_path = self.path
-        #if not local, we create a local cache path.
-        #Warning: this cache_path might be nul in case of an invalid GI
-        elif self.scheme and self.host:
-            self._cache_path = os.path.expanduser(_CACHE_PATH + self.scheme +\
-                                                "/" + self.host + self.path)
-            #There’s an OS limitation of 260 characters per path.
-            #We will thus cut the path enough to add the index afterward
-            self._cache_path = self._cache_path[:249]
-            # FIXME : this is a gross hack to give a name to
-            # index files. This will break if the index is not
-            # index.gmi. I don’t know how to know the real name
-            # of the file. But first, we need to ensure that the domain name
-            # finish by "/". Else, the cache will create a file, not a folder.
-            if self.scheme.startswith("http"):
-                index = "index.html"
-            elif self.scheme == "finger":
-                index = "index.txt"
-            elif self.scheme == "gopher":
-                index = "gophermap"
-            else:
-                index = "index.gmi"
-            if self.path == "" or os.path.isdir(self._cache_path):
-                if not self._cache_path.endswith("/"):
-                    self._cache_path += "/"
-                if not self.url.endswith("/"):
-                    self.url += "/"
-            if self._cache_path.endswith("/"):
-                self._cache_path += index
-            #sometimes, the index itself is a dir
-            #like when folder/index.gmi?param has been created
-            #and we try to access folder
-            if os.path.isdir(self._cache_path):
-                self._cache_path += "/" + index
-        return self._cache_path
+        # TODO: ensure that the following two lines are not needed in netcache
+        #if self._cache_path and not os.path.isdir(self._cache_path):
+        #    return self._cache_path
+        return netcache.get_cache_path(self.url)

    def get_capsule_title(self):
            #small intelligence to try to find a good name for a capsule
@ -366,43 +278,10 @@ class GeminiItem():
        return title

    def is_cache_valid(self,validity=0):
-        # Validity is the acceptable time for
-        # a cache to be valid  (in seconds)
-        # If 0, then any cache is considered as valid
-        # (use validity = 1 if you want to refresh everything)
-        cache = self.get_cache_path()
-        if self.local:
-            return os.path.exists(cache)
-        elif cache :
-            # If path is too long, we always return True to avoid
-            # fetching it.
-            if len(cache) > 259:
-                print("We return False because path is too long")
-                return False
-            if os.path.exists(cache) and not os.path.isdir(cache):
-                if validity > 0 :
-                    last_modification = self.cache_last_modified()
-                    now = time.time()
-                    age = now - last_modification
-                    return age < validity
-                else:
-                    return True
-            else:
-                #Cache has not been build
-                return False
-        else:
-            #There’s not even a cache!
-            return False
+        return netcache.is_cache_valid(self.url,validity=validity)

    def cache_last_modified(self):
-        path = self.get_cache_path()
-        if path:
-            return os.path.getmtime(path)
-        elif self.local:
-            return 0
-        else:
-            print("ERROR : NO CACHE in cache_last_modified")
-            return None
+        return netcache.cache_last_modified(self.url)

    def get_body(self,as_file=False):
        if self.body and not as_file: