Fixed a crash when parsing hidden_urls bug #32

GemtextRenderer is parsing the text for URLs not starting with "=>" and adding them later to the list to avoid having to copy/paste with the mouse. This is an hidden feature. In this case, the url was not supposed to be one and included [] chars which prevent urllib to know how to handle it. The fix involved refactoring the looks_like_url functions out of offpunk and add it to offutils so it can be used by ansicat to ensure a function looks_like_url before giving it to urllib.
2023-12-02 00:11:34 +01:00 · 2023-12-02 00:11:34 +01:00 · 3164658352
parent c3aff6755e
commit 3164658352
4 changed files with 46 additions and 47 deletions
--- a/3
+++ b/3
@ -5,7 +5,8 @@
 - redirections is now reflected in links and the cache (bug #28)
 - ansicat: added "--mode" option
 - ansicat: avoid a crash when urllib.parse.urljoin fails
- Fix a crash when gus is called without parameters (Von Hohenheiden)
+- offpunk: Fix a crash when gus is called without parameters (Von Hohenheiden)
+- ansicat: fixed a crash when parsing wrong hidden_url in gemini (bug #32)

 ## 2.0 - November 16th 2023
 Changes since 1.10
--- a/ansicat.py
+++ b/ansicat.py
@ -12,7 +12,7 @@ import mimetypes
 import fnmatch
 import netcache
 import offthemes
-from offutils import run,term_width,is_local,looks_like_base64
+from offutils import run,term_width,is_local,looks_like_base64, looks_like_url
 import base64
 from offutils import _DATA_DIR
 try:
@ -515,10 +515,9 @@ class AbstractRenderer():
                    ll = l.split()[0]
                    try:
                        abs_l = urllib.parse.urljoin(self.url,ll)
+                        self.links[mode].append(abs_l) 
                    except Exception as err:
                        print("Urljoin Error: Could not make an URL out of %s and %s"%(self.url,ll))
-                        abs_l = ll
-                    self.links[mode].append(abs_l) 
                for l in self.get_subscribe_links()[1:]:
                    self.links[mode].append(l[0])

@ -701,7 +700,7 @@ class GemtextRenderer(AbstractRenderer):
                if "://" in line:
                    words = line.split()
                    for w in words:
-                        if "://" in w:
+                        if "://" in w and looks_like_url(w):
                            hidden_links.append(w)
                r.add_text(line.rstrip())
        links += hidden_links
--- a/offpunk.py
+++ b/offpunk.py
@ -26,7 +26,7 @@ import netcache
 import opnk
 import ansicat
 import offthemes
-from offutils import run,term_width,is_local,mode_url,unmode_url
+from offutils import run,term_width,is_local,mode_url,unmode_url, looks_like_url
 from offutils import _CONFIG_DIR,_DATA_DIR,_CACHE_PATH
 import offblocklist
 try:
@ -77,47 +77,6 @@ _ABBREVS = {
 _MIME_HANDLERS = {
 }

-#An IPV6 URL should be put between []
-#We try to detect them has location with more than 2 ":"
-def fix_ipv6_url(url):
-    if not url or url.startswith("mailto"):
-        return url
-    if "://" in url:
-        schema, schemaless = url.split("://",maxsplit=1)
-    else:
-        schema, schemaless = None, url
-    if "/" in schemaless:
-        netloc, rest = schemaless.split("/",1)
-        if netloc.count(":") > 2 and "[" not in netloc and "]" not in netloc:
-            schemaless = "[" + netloc + "]" + "/" + rest
-    elif schemaless.count(":") > 2:
-        schemaless = "[" + schemaless + "]/"
-    if schema:
-        return schema + "://" + schemaless
-    return schemaless
-
-# Cheap and cheerful URL detector
-def looks_like_url(word):
-    try:
-        if not word.strip():
-            return False
-        url = fix_ipv6_url(word).strip()
-        parsed = urllib.parse.urlparse(url)
-        #sometimes, urllib crashed only when requesting the port
-        port = parsed.port
-        scheme = word.split("://")[0]
-        mailto = word.startswith("mailto:")
-        start = scheme in netcache.standard_ports
-        local = scheme in ["file","list"]
-        if mailto:
-            return "@" in word
-        elif not local:
-            return start and ("." in word or "localhost" in word)
-        else:
-            return "/" in word
-    except ValueError:
-        return False
-
 # GeminiClient Decorators
 def needs_gi(inner):
    def outer(self, *args, **kwargs):
--- a/offutils.py
+++ b/offutils.py
@ -14,6 +14,7 @@ import shlex
 import urllib.parse
 import urllib.parse
 import netcache_migration
+import netcache

 CACHE_VERSION = 1

@ -60,7 +61,46 @@ while current_version < CACHE_VERSION:
        f.write(str(current_version))
        f.close()

+#An IPV6 URL should be put between []
+#We try to detect them has location with more than 2 ":"
+def fix_ipv6_url(url):
+    if not url or url.startswith("mailto"):
+        return url
+    if "://" in url:
+        schema, schemaless = url.split("://",maxsplit=1)
+    else:
+        schema, schemaless = None, url
+    if "/" in schemaless:
+        netloc, rest = schemaless.split("/",1)
+        if netloc.count(":") > 2 and "[" not in netloc and "]" not in netloc:
+            schemaless = "[" + netloc + "]" + "/" + rest
+    elif schemaless.count(":") > 2:
+        schemaless = "[" + schemaless + "]/"
+    if schema:
+        return schema + "://" + schemaless
+    return schemaless

+# Cheap and cheerful URL detector
+def looks_like_url(word):
+    try:
+        if not word.strip():
+            return False
+        url = fix_ipv6_url(word).strip()
+        parsed = urllib.parse.urlparse(url)
+        #sometimes, urllib crashed only when requesting the port
+        port = parsed.port
+        scheme = word.split("://")[0]
+        mailto = word.startswith("mailto:")
+        start = scheme in netcache.standard_ports
+        local = scheme in ["file","list"]
+        if mailto:
+            return "@" in word
+        elif not local:
+            return start and ("." in word or "localhost" in word)
+        else:
+            return "/" in word
+    except ValueError:
+        return False

 ## Those two functions add/remove the mode to the
 # URLs. This is a gross hack to remember the mode