forked from solderpunk/AV-98
Fixed a crash when parsing hidden_urls bug #32
GemtextRenderer is parsing the text for URLs not starting with "=>" and adding them later to the list to avoid having to copy/paste with the mouse. This is an hidden feature. In this case, the url was not supposed to be one and included [] chars which prevent urllib to know how to handle it. The fix involved refactoring the looks_like_url functions out of offpunk and add it to offutils so it can be used by ansicat to ensure a function looks_like_url before giving it to urllib.
This commit is contained in:
parent
c3aff6755e
commit
3164658352
|
@ -5,7 +5,8 @@
|
|||
- redirections is now reflected in links and the cache (bug #28)
|
||||
- ansicat: added "--mode" option
|
||||
- ansicat: avoid a crash when urllib.parse.urljoin fails
|
||||
- Fix a crash when gus is called without parameters (Von Hohenheiden)
|
||||
- offpunk: Fix a crash when gus is called without parameters (Von Hohenheiden)
|
||||
- ansicat: fixed a crash when parsing wrong hidden_url in gemini (bug #32)
|
||||
|
||||
## 2.0 - November 16th 2023
|
||||
Changes since 1.10
|
||||
|
|
|
@ -12,7 +12,7 @@ import mimetypes
|
|||
import fnmatch
|
||||
import netcache
|
||||
import offthemes
|
||||
from offutils import run,term_width,is_local,looks_like_base64
|
||||
from offutils import run,term_width,is_local,looks_like_base64, looks_like_url
|
||||
import base64
|
||||
from offutils import _DATA_DIR
|
||||
try:
|
||||
|
@ -515,10 +515,9 @@ class AbstractRenderer():
|
|||
ll = l.split()[0]
|
||||
try:
|
||||
abs_l = urllib.parse.urljoin(self.url,ll)
|
||||
self.links[mode].append(abs_l)
|
||||
except Exception as err:
|
||||
print("Urljoin Error: Could not make an URL out of %s and %s"%(self.url,ll))
|
||||
abs_l = ll
|
||||
self.links[mode].append(abs_l)
|
||||
for l in self.get_subscribe_links()[1:]:
|
||||
self.links[mode].append(l[0])
|
||||
|
||||
|
@ -701,7 +700,7 @@ class GemtextRenderer(AbstractRenderer):
|
|||
if "://" in line:
|
||||
words = line.split()
|
||||
for w in words:
|
||||
if "://" in w:
|
||||
if "://" in w and looks_like_url(w):
|
||||
hidden_links.append(w)
|
||||
r.add_text(line.rstrip())
|
||||
links += hidden_links
|
||||
|
|
43
offpunk.py
43
offpunk.py
|
@ -26,7 +26,7 @@ import netcache
|
|||
import opnk
|
||||
import ansicat
|
||||
import offthemes
|
||||
from offutils import run,term_width,is_local,mode_url,unmode_url
|
||||
from offutils import run,term_width,is_local,mode_url,unmode_url, looks_like_url
|
||||
from offutils import _CONFIG_DIR,_DATA_DIR,_CACHE_PATH
|
||||
import offblocklist
|
||||
try:
|
||||
|
@ -77,47 +77,6 @@ _ABBREVS = {
|
|||
_MIME_HANDLERS = {
|
||||
}
|
||||
|
||||
#An IPV6 URL should be put between []
|
||||
#We try to detect them has location with more than 2 ":"
|
||||
def fix_ipv6_url(url):
|
||||
if not url or url.startswith("mailto"):
|
||||
return url
|
||||
if "://" in url:
|
||||
schema, schemaless = url.split("://",maxsplit=1)
|
||||
else:
|
||||
schema, schemaless = None, url
|
||||
if "/" in schemaless:
|
||||
netloc, rest = schemaless.split("/",1)
|
||||
if netloc.count(":") > 2 and "[" not in netloc and "]" not in netloc:
|
||||
schemaless = "[" + netloc + "]" + "/" + rest
|
||||
elif schemaless.count(":") > 2:
|
||||
schemaless = "[" + schemaless + "]/"
|
||||
if schema:
|
||||
return schema + "://" + schemaless
|
||||
return schemaless
|
||||
|
||||
# Cheap and cheerful URL detector
|
||||
def looks_like_url(word):
|
||||
try:
|
||||
if not word.strip():
|
||||
return False
|
||||
url = fix_ipv6_url(word).strip()
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
#sometimes, urllib crashed only when requesting the port
|
||||
port = parsed.port
|
||||
scheme = word.split("://")[0]
|
||||
mailto = word.startswith("mailto:")
|
||||
start = scheme in netcache.standard_ports
|
||||
local = scheme in ["file","list"]
|
||||
if mailto:
|
||||
return "@" in word
|
||||
elif not local:
|
||||
return start and ("." in word or "localhost" in word)
|
||||
else:
|
||||
return "/" in word
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
# GeminiClient Decorators
|
||||
def needs_gi(inner):
|
||||
def outer(self, *args, **kwargs):
|
||||
|
|
40
offutils.py
40
offutils.py
|
@ -14,6 +14,7 @@ import shlex
|
|||
import urllib.parse
|
||||
import urllib.parse
|
||||
import netcache_migration
|
||||
import netcache
|
||||
|
||||
CACHE_VERSION = 1
|
||||
|
||||
|
@ -60,7 +61,46 @@ while current_version < CACHE_VERSION:
|
|||
f.write(str(current_version))
|
||||
f.close()
|
||||
|
||||
#An IPV6 URL should be put between []
|
||||
#We try to detect them has location with more than 2 ":"
|
||||
def fix_ipv6_url(url):
|
||||
if not url or url.startswith("mailto"):
|
||||
return url
|
||||
if "://" in url:
|
||||
schema, schemaless = url.split("://",maxsplit=1)
|
||||
else:
|
||||
schema, schemaless = None, url
|
||||
if "/" in schemaless:
|
||||
netloc, rest = schemaless.split("/",1)
|
||||
if netloc.count(":") > 2 and "[" not in netloc and "]" not in netloc:
|
||||
schemaless = "[" + netloc + "]" + "/" + rest
|
||||
elif schemaless.count(":") > 2:
|
||||
schemaless = "[" + schemaless + "]/"
|
||||
if schema:
|
||||
return schema + "://" + schemaless
|
||||
return schemaless
|
||||
|
||||
# Cheap and cheerful URL detector
|
||||
def looks_like_url(word):
|
||||
try:
|
||||
if not word.strip():
|
||||
return False
|
||||
url = fix_ipv6_url(word).strip()
|
||||
parsed = urllib.parse.urlparse(url)
|
||||
#sometimes, urllib crashed only when requesting the port
|
||||
port = parsed.port
|
||||
scheme = word.split("://")[0]
|
||||
mailto = word.startswith("mailto:")
|
||||
start = scheme in netcache.standard_ports
|
||||
local = scheme in ["file","list"]
|
||||
if mailto:
|
||||
return "@" in word
|
||||
elif not local:
|
||||
return start and ("." in word or "localhost" in word)
|
||||
else:
|
||||
return "/" in word
|
||||
except ValueError:
|
||||
return False
|
||||
|
||||
## Those two functions add/remove the mode to the
|
||||
# URLs. This is a gross hack to remember the mode
|
||||
|
|
Loading…
Reference in New Issue