Fixed a crash when parsing hidden_urls bug #32

GemtextRenderer is parsing the text for URLs not starting with "=>" and
adding them later to the list to avoid having to copy/paste with the
mouse. This is an hidden feature.

In this case, the url was not supposed to be one and included [] chars
which prevent urllib to know how to handle it.

The fix involved refactoring the looks_like_url functions out of offpunk
and add it to offutils so it can be used by ansicat to ensure a function
looks_like_url before giving it to urllib.
This commit is contained in:
Ploum 2023-12-02 00:11:34 +01:00
parent c3aff6755e
commit 3164658352
4 changed files with 46 additions and 47 deletions

View File

@ -5,7 +5,8 @@
- redirections is now reflected in links and the cache (bug #28)
- ansicat: added "--mode" option
- ansicat: avoid a crash when urllib.parse.urljoin fails
- Fix a crash when gus is called without parameters (Von Hohenheiden)
- offpunk: Fix a crash when gus is called without parameters (Von Hohenheiden)
- ansicat: fixed a crash when parsing wrong hidden_url in gemini (bug #32)
## 2.0 - November 16th 2023
Changes since 1.10

View File

@ -12,7 +12,7 @@ import mimetypes
import fnmatch
import netcache
import offthemes
from offutils import run,term_width,is_local,looks_like_base64
from offutils import run,term_width,is_local,looks_like_base64, looks_like_url
import base64
from offutils import _DATA_DIR
try:
@ -515,10 +515,9 @@ class AbstractRenderer():
ll = l.split()[0]
try:
abs_l = urllib.parse.urljoin(self.url,ll)
self.links[mode].append(abs_l)
except Exception as err:
print("Urljoin Error: Could not make an URL out of %s and %s"%(self.url,ll))
abs_l = ll
self.links[mode].append(abs_l)
for l in self.get_subscribe_links()[1:]:
self.links[mode].append(l[0])
@ -701,7 +700,7 @@ class GemtextRenderer(AbstractRenderer):
if "://" in line:
words = line.split()
for w in words:
if "://" in w:
if "://" in w and looks_like_url(w):
hidden_links.append(w)
r.add_text(line.rstrip())
links += hidden_links

View File

@ -26,7 +26,7 @@ import netcache
import opnk
import ansicat
import offthemes
from offutils import run,term_width,is_local,mode_url,unmode_url
from offutils import run,term_width,is_local,mode_url,unmode_url, looks_like_url
from offutils import _CONFIG_DIR,_DATA_DIR,_CACHE_PATH
import offblocklist
try:
@ -77,47 +77,6 @@ _ABBREVS = {
_MIME_HANDLERS = {
}
#An IPV6 URL should be put between []
#We try to detect them has location with more than 2 ":"
def fix_ipv6_url(url):
if not url or url.startswith("mailto"):
return url
if "://" in url:
schema, schemaless = url.split("://",maxsplit=1)
else:
schema, schemaless = None, url
if "/" in schemaless:
netloc, rest = schemaless.split("/",1)
if netloc.count(":") > 2 and "[" not in netloc and "]" not in netloc:
schemaless = "[" + netloc + "]" + "/" + rest
elif schemaless.count(":") > 2:
schemaless = "[" + schemaless + "]/"
if schema:
return schema + "://" + schemaless
return schemaless
# Cheap and cheerful URL detector
def looks_like_url(word):
try:
if not word.strip():
return False
url = fix_ipv6_url(word).strip()
parsed = urllib.parse.urlparse(url)
#sometimes, urllib crashed only when requesting the port
port = parsed.port
scheme = word.split("://")[0]
mailto = word.startswith("mailto:")
start = scheme in netcache.standard_ports
local = scheme in ["file","list"]
if mailto:
return "@" in word
elif not local:
return start and ("." in word or "localhost" in word)
else:
return "/" in word
except ValueError:
return False
# GeminiClient Decorators
def needs_gi(inner):
def outer(self, *args, **kwargs):

View File

@ -14,6 +14,7 @@ import shlex
import urllib.parse
import urllib.parse
import netcache_migration
import netcache
CACHE_VERSION = 1
@ -60,7 +61,46 @@ while current_version < CACHE_VERSION:
f.write(str(current_version))
f.close()
#An IPV6 URL should be put between []
#We try to detect them has location with more than 2 ":"
def fix_ipv6_url(url):
if not url or url.startswith("mailto"):
return url
if "://" in url:
schema, schemaless = url.split("://",maxsplit=1)
else:
schema, schemaless = None, url
if "/" in schemaless:
netloc, rest = schemaless.split("/",1)
if netloc.count(":") > 2 and "[" not in netloc and "]" not in netloc:
schemaless = "[" + netloc + "]" + "/" + rest
elif schemaless.count(":") > 2:
schemaless = "[" + schemaless + "]/"
if schema:
return schema + "://" + schemaless
return schemaless
# Cheap and cheerful URL detector
def looks_like_url(word):
try:
if not word.strip():
return False
url = fix_ipv6_url(word).strip()
parsed = urllib.parse.urlparse(url)
#sometimes, urllib crashed only when requesting the port
port = parsed.port
scheme = word.split("://")[0]
mailto = word.startswith("mailto:")
start = scheme in netcache.standard_ports
local = scheme in ["file","list"]
if mailto:
return "@" in word
elif not local:
return start and ("." in word or "localhost" in word)
else:
return "/" in word
except ValueError:
return False
## Those two functions add/remove the mode to the
# URLs. This is a gross hack to remember the mode