follow redirects everywhere. Should fix #28$

«
This commit is contained in:
Ploum 2023-12-01 17:14:22 +01:00
parent 3862183fee
commit c3aff6755e
4 changed files with 34 additions and 25 deletions

View File

@ -1,7 +1,8 @@
# Offpunk History
## 2.1 - Unreleased
- ansicat: freshly updated gemtext/rss links are highlighted ("new_link" theme option)
- freshly updated gemtext/rss links are highlighted ("new_link" theme option)
- redirections is now reflected in links and the cache (bug #28)
- ansicat: added "--mode" option
- ansicat: avoid a crash when urllib.parse.urljoin fails
- Fix a crash when gus is called without parameters (Von Hohenheiden)

View File

@ -125,9 +125,10 @@ def is_cache_valid(url,validity=0):
#Theres not even a cache!
return False
def get_cache_path(url):
def get_cache_path(url,add_index=True):
# Sometimes, cache_path became a folder! (which happens for index.html/index.gmi)
# In that case, we need to reconstruct it
# if add_index=False, we dont add that "index.gmi" at the ends of the cache_path
#First, we parse the URL
if not url:
return None
@ -198,7 +199,7 @@ def get_cache_path(url):
#Theres an OSlimitation of 260 characters per path.
#We will thus cut the path enough to add the index afterward
cache_path = cache_path[:249]
# FIXME : this is a gross hack to give a name to
# this is a gross hack to give a name to
# index files. This will break if the index is not
# index.gmi. I dont know how to know the real name
# of the file. But first, we need to ensure that the domain name
@ -216,12 +217,12 @@ def get_cache_path(url):
cache_path += "/"
if not url.endswith("/"):
url += "/"
if cache_path.endswith("/"):
if add_index and cache_path.endswith("/"):
cache_path += index
#sometimes, the index itself is a dir
#like when folder/index.gmi?param has been created
#and we try to access folder
if os.path.isdir(cache_path):
if add_index and os.path.isdir(cache_path):
cache_path += "/" + index
else:
#URL is missing either a supported scheme or a valid host
@ -585,6 +586,7 @@ def _validate_cert(address, host, cert,accept_bad_ssl=False,automatic_choice=Non
def _fetch_gemini(url,timeout=DEFAULT_TIMEOUT,interactive=True,accept_bad_ssl_certificates=False,\
**kwargs):
cache = None
newurl = url
url_parts = urllib.parse.urlparse(url)
host = url_parts.hostname
port = url_parts.port or standard_ports["gemini"]
@ -696,7 +698,7 @@ def _fetch_gemini(url,timeout=DEFAULT_TIMEOUT,interactive=True,accept_bad_ssl_ce
newurl = url.split("?")[0]
return _fetch_gemini(newurl+"?"+user_input)
else:
return None
return None,None
# Redirects
elif status.startswith("3"):
newurl = urllib.parse.urljoin(url,meta)
@ -734,7 +736,7 @@ def _fetch_gemini(url,timeout=DEFAULT_TIMEOUT,interactive=True,accept_bad_ssl_ce
# Client cert
elif status.startswith("6"):
print("Handling certificates for status 6X are not supported by offpunk\n")
print("Please open a bug report")
print("See bug #31 for discussion about the problem")
_fetch_gemini(url)
# Invalid status
elif not status.startswith("2"):
@ -766,16 +768,21 @@ def _fetch_gemini(url,timeout=DEFAULT_TIMEOUT,interactive=True,accept_bad_ssl_ce
else:
body = fbody
cache = write_body(url,body,mime)
return cache
return cache,newurl
def fetch(url,offline=False,download_image_first=True,images_mode="readable",validity=0,**kwargs):
url = normalize_url(url)
newurl = url
path=None
print_error = "print_error" in kwargs.keys() and kwargs["print_error"]
if is_cache_valid(url,validity=validity):
path = get_cache_path(url)
#Firt, we look if we have a valid cache, even if offline
#If we are offline, any cache is better than nothing
if is_cache_valid(url,validity=validity) or (offline and is_cache_valid(url,validity=0)):
path = get_cache_path(url)
#if the cache is a folder, we should add a "/" at the end of the URL
if not url.endswith("/") and os.path.isdir(get_cache_path(url,add_index=False)) :
newurl = url+"/"
elif offline and is_cache_valid(url,validity=0):
path = get_cache_path(url)
elif "://" in url and not offline:
@ -795,11 +802,11 @@ def fetch(url,offline=False,download_image_first=True,images_mode="readable",val
elif scheme == "finger":
path=_fetch_finger(url,**kwargs)
elif scheme == "gemini":
path=_fetch_gemini(url,**kwargs)
path,newurl=_fetch_gemini(url,**kwargs)
else:
print("scheme %s not implemented yet")
except UserAbortException:
return
return None, newurl
except Exception as err:
cache = set_error(url, err)
# Print an error message
@ -838,10 +845,10 @@ def fetch(url,offline=False,download_image_first=True,images_mode="readable",val
print("ERROR4: " + str(type(err)) + " : " + str(err))
#print("\n" + str(err.with_traceback(None)))
print(traceback.format_exc())
return cache
return cache, newurl
# We download images contained in the document (from full mode)
if not offline and download_image_first and images_mode:
renderer = ansicat.renderer_from_file(path,url)
renderer = ansicat.renderer_from_file(path,newurl)
if renderer:
for image in renderer.get_images(mode=images_mode):
#Image should exist, should be an url (not a data image)
@ -856,7 +863,7 @@ def fetch(url,offline=False,download_image_first=True,images_mode="readable",val
#if that ever happen
fetch(image,offline=offline,download_image_first=False,\
images_mode=None,validity=0,**kwargs)
return path
return path, newurl
def main():

View File

@ -326,7 +326,7 @@ class GeminiClient(cmd.Cmd):
params["validity"] = 60
# Use cache or mark as to_fetch if resource is not cached
if handle and not self.sync_only:
displayed = self.opencache.opnk(url,mode=mode,grep=grep,theme=self.theme,**params)
displayed, url = self.opencache.opnk(url,mode=mode,grep=grep,theme=self.theme,**params)
modedurl = mode_url(url,mode)
if not displayed:
#if we cant display, we mark to sync what is not local

19
opnk.py
View File

@ -163,7 +163,7 @@ class opencache():
else:
usecache = False
if not usecache:
renderer = ansicat.renderer_from_file(path,inpath,theme=theme)
renderer = ansicat.renderer_from_file(path,url=inpath,theme=theme)
if renderer:
self.rendererdic[inpath] = renderer
self.renderer_time[inpath] = int(time.time())
@ -180,24 +180,25 @@ class opencache():
def opnk(self,inpath,mode=None,terminal=True,grep=None,theme=None,**kwargs):
#Return True if inpath opened in Terminal
# False otherwise
# also returns the url in case it has been modified
#if terminal = False, we dont try to open in the terminal,
#we immediately fallback to xdg-open.
#netcache currently provide the path if its a file.
if not offutils.is_local(inpath):
kwargs["images_mode"] = mode
cachepath = netcache.fetch(inpath,**kwargs)
cachepath,inpath = netcache.fetch(inpath,**kwargs)
if not cachepath:
return False
return False, inpath
# folowing line is for :// which are locals (file,list)
elif "://" in inpath:
cachepath = netcache.fetch(inpath,**kwargs)
cachepath,inpath = netcache.fetch(inpath,**kwargs)
elif inpath.startswith("mailto:"):
cachepath = inpath
elif os.path.exists(inpath):
cachepath = inpath
else:
print("%s does not exist"%inpath)
return
return False, inpath
renderer = self.get_renderer(inpath,mode=mode,theme=theme)
if renderer and mode:
renderer.set_mode(mode)
@ -212,7 +213,7 @@ class opencache():
#dont use less, we call it directly
if renderer.has_direct_display():
renderer.display(mode=mode,directdisplay=True)
return True
return True, inpath
else:
body = renderer.display(mode=mode)
#Should we use the cache? only if it is not local and theres a cache
@ -239,7 +240,7 @@ class opencache():
#We dont want to restore positions in lists
firsttime = is_local(inpath)
less_cmd(self.temp_files[key], histfile=self.less_histfile[key],cat=firsttime,grep=grep)
return True
return True, inpath
#maybe, we have no renderer. Or we want to skip it.
else:
mimetype = ansicat.get_mime(cachepath)
@ -252,7 +253,7 @@ class opencache():
else:
print("Cannot find a mail client to send mail to %s" %inpath)
print("Please install xdg-open (usually from xdg-util package)")
return
return False, inpath
else:
cmd_str = self._get_handler_cmd(mimetype)
try:
@ -260,7 +261,7 @@ class opencache():
except FileNotFoundError:
print("Handler program %s not found!" % shlex.split(cmd_str)[0])
print("You can use the ! command to specify another handler program or pipeline.")
return False
return False, inpath
#We remove the renderers from the cache and we also delete temp files
def cleanup(self):