From ce65850987bffb1aadc3ecbf610146c354cb92f8 Mon Sep 17 00:00:00 2001
From: Lionel Dricot <git@ploum.eu>
Date: Thu, 20 Jul 2023 01:17:34 +0200
Subject: [PATCH] starting to adapt offpunk

---
 netcache.py | 53 +++++++++++++++++++++++++++--------------------------
 offpunk.py  | 31 +++++--------------------------
 2 files changed, 32 insertions(+), 52 deletions(-)

diff --git a/netcache.py b/netcache.py
index aad588f..46f4613 100755
--- a/netcache.py
+++ b/netcache.py
@@ -237,10 +237,10 @@ def write_body(url,body,mime=None):
             f.close()
         return cache_path
 
-def _fetch_http(url,max_length=None):
-    def set_error(item,length,max_length):
+def _fetch_http(url,max_size=None,timeout=DEFAULT_TIMEOUT,**kwargs):
+    def set_error(item,length,max_size):
         err = "Size of %s is %s Mo\n"%(item.url,length)
-        err += "Offpunk only download automatically content under %s Mo\n" %(max_length/1000000)
+        err += "Offpunk only download automatically content under %s Mo\n" %(max_size/1000000)
         err += "To retrieve this content anyway, type 'reload'."
         item.set_error(err)
         return item
@@ -261,7 +261,7 @@ def _fetch_http(url,max_length=None):
 #            else:
 #                parsed = parsed._replace(netloc = self.redirects[netloc])
     url = urllib.parse.urlunparse(parsed)
-    with requests.get(url,headers=header, stream=True,timeout=5) as response:
+    with requests.get(url,headers=header, stream=True,timeout=DEFAULT_TIMEOUT) as response:
         #print("This is header for %s"%gi.url)
         #print(response.headers)
         if "content-type" in response.headers:
@@ -272,10 +272,10 @@ def _fetch_http(url,max_length=None):
             length = int(response.headers['content-length'])
         else:
             length = 0
-        if max_length and length > max_length:
+        if max_size and length > max_size:
             response.close()
-            return set_error(gi,str(length/1000000),max_length)
-        elif max_length and length == 0:
+            return set_error(gi,str(length/100),max_size)
+        elif max_size and length == 0:
             body = b''
             downloaded = 0
             for r in response.iter_content():
@@ -283,15 +283,15 @@ def _fetch_http(url,max_length=None):
                 #We divide max_size for streamed content
                 #in order to catch them faster
                 size = sys.getsizeof(body)
-                max = max_length/2
+                max = max_size/2
                 current = round(size*100/max,1)
                 if current > downloaded:
                     downloaded = current
                     print("  -> Receiving stream: %s%% of allowed data"%downloaded,end='\r')
-                #print("size: %s (%s\% of maxlenght)"%(size,size/max_length))
-                if size > max_length/2:
+                #print("size: %s (%s\% of maxlenght)"%(size,size/max_size))
+                if size > max_size/2:
                     response.close()
-                    return set_error(gi,"streaming",max_length)
+                    return set_error(gi,"streaming",max_size)
             response.close()
         else:
             body = response.content
@@ -301,7 +301,7 @@ def _fetch_http(url,max_length=None):
     cache = write_body(url,body,mime)
     return cache
 
-def _fetch_gopher(url,timeout=10):
+def _fetch_gopher(url,timeout=DEFAULT_TIMEOUT,**kwargs):
     parsed =urllib.parse.urlparse(url)
     host = parsed.hostname
     port = parsed.port or 70
@@ -360,7 +360,7 @@ def _fetch_gopher(url,timeout=10):
     cache = write_body(url,response,mime)
     return cache
 
-def _fetch_finger(url,timeout=10):
+def _fetch_finger(url,timeout=DEFAULT_TIMEOUT,**kwargs):
     parsed = urllib.parse.urlparse(url)
     host = parsed.hostname
     port = parsed.port or standard_ports["finger"]
@@ -373,7 +373,7 @@ def _fetch_finger(url,timeout=10):
     return cache
 
 # Originally copied from reference spartan client by Michael Lazar
-def _fetch_spartan(url):
+def _fetch_spartan(url,**kwargs):
     cache = None
     url_parts = urllib.parse.urlparse(url)
     host = url_parts.hostname
@@ -409,7 +409,7 @@ def _fetch_spartan(url):
         cache = _fetch_spartan(redirect_url)
     return cache
 
-def _fetch_gemini(url,options={}):
+def _fetch_gemini(url,timeout=DEFAULT_TIMEOUT,**kwargs):
     cache = None
     url_parts = urllib.parse.urlparse(url)
     host = url_parts.hostname
@@ -502,10 +502,6 @@ def _fetch_gemini(url,options={}):
     err = None
     for address in addresses:
         s = socket.socket(address[0], address[1])
-        if "timeout" in options:
-            timeout = options["timeout"]
-        else:
-            timeout = DEFAULT_TIMEOUT
         s.settimeout(timeout)
         s = context.wrap_socket(s, server_hostname = host)
         try:
@@ -636,7 +632,7 @@ def _fetch_gemini(url,options={}):
     return cache
 
 
-def fetch(url):
+def fetch(url,**kwargs):
     url = normalize_url(url)
     path=None
     if "://" in url:
@@ -644,13 +640,13 @@ def fetch(url):
         if scheme not in standard_ports:
             print("%s is not a supported protocol"%scheme)
         elif scheme in ("http","https"):
-            path=_fetch_http(url)
+            path=_fetch_http(url,**kwargs)
         elif scheme == "gopher":
-            path=_fetch_gopher(url)
+            path=_fetch_gopher(url,**kwargs)
         elif scheme == "finger":
-            path=_fetch_finger(url)
+            path=_fetch_finger(url,**kwargs)
         elif scheme == "gemini":
-            patch=_fetch_gemini(url)
+            patch=_fetch_gemini(url,**kwargs)
         else:
             print("scheme %s not implemented yet")
     else:
@@ -666,6 +662,10 @@ def main():
                         help="return path to the cache instead of the content of the cache")
     parser.add_argument("--offline", action="store_true",
                         help="Do not attempt to download, return cached version or error")
+    parser.add_argument("--max-size", type=int,
+                        help="Cancel download of items above that size (value in Mb).")
+    parser.add_argument("--timeout", type=int,
+                        help="Time to wait before cancelling connection (in second).")
     # No argument: write help
     parser.add_argument('url', metavar='URL', nargs='*',
                         help='download URL and returns the content or the path to a cached version')
@@ -673,15 +673,16 @@ def main():
     # --cache-validity : do not download if cache is valid
     # --validity : returns the date of the cached version, Null if no version
     # --force-download : download and replace cache, even if valid
-    # --max-size-download : cancel download of items above that size. Returns Null.
     args = parser.parse_args()
+
+    param = {}
     
     for u in args.url:
         if args.offline:
             path = get_cache_path(u)
         else:
             print("Download URL: %s" %u)
-            path = fetch(u)
+            path = fetch(u,max_size=args.max_size,timeout=args.timeout)
         if args.path:
             print(path)
         else:
diff --git a/offpunk.py b/offpunk.py
index b615524..3a58e9a 100755
--- a/offpunk.py
+++ b/offpunk.py
@@ -765,31 +765,10 @@ class GeminiClient(cmd.Cmd):
 
         elif not self.offline_only and not gi.local:
             try:
-                if gi.scheme in ("http", "https"):
-                    if self.support_http:
-                        if limit_size:
-                            # Let’s cap automatic downloads to 20Mo
-                            max_download = int(self.options["max_size_download"])*1000000
-                        else:
-                            max_download = None
-                        gi = self._fetch_http(gi,max_length=max_download)
-                    elif handle and not self.sync_only:
-                        if not _DO_HTTP:
-                            print("Install python3-requests to handle http requests natively")
-                        webbrowser.open_new_tab(gi.url)
-                        return
-                    else:
-                        return
-                elif gi.scheme in ("gopher"):
-                    gi = self._fetch_gopher(gi,timeout=self.options["short_timeout"])
-                elif gi.scheme in ("finger"):
-                    gi = self._fetch_finger(gi,timeout=self.options["short_timeout"])
-                elif gi.scheme in ("spartan"):
-                    gi = self._fetch_spartan(gi)
-                elif gi.scheme in ("gemini"):
-                    gi = self._fetch_over_network(gi)
-                else:
-                    return
+                params = {}
+                params["timeout"] = self.options["short_timeout"]
+                params["max_size"] = int(self.options["max_size_download"])*1000000
+                cachepath = netcache.fetch(gi.url,**params)
             except UserAbortException:
                 return
             except Exception as err:
@@ -832,7 +811,7 @@ class GeminiClient(cmd.Cmd):
                 return
 
         # Pass file to handler, unless we were asked not to
-        if gi :
+        if netcache.is_cache_valid(gi.url) :
             display = handle and not self.sync_only
             #TODO: take into account _RENDER_IMAGE
             if display and self.options["download_images_first"] \