From ed019fa04e2e930cd66c277e2060759747d35652 Mon Sep 17 00:00:00 2001 From: Lionel Dricot Date: Thu, 3 Aug 2023 16:54:29 +0200 Subject: [PATCH] nearly got rid of geminiitem --- ansirenderer.py | 56 ++++++++++++++-- offpunk.py | 169 ++++++++++-------------------------------------- offutils.py | 2 + 3 files changed, 89 insertions(+), 138 deletions(-) diff --git a/ansirenderer.py b/ansirenderer.py index f90cd0c..b45a1f2 100644 --- a/ansirenderer.py +++ b/ansirenderer.py @@ -446,13 +446,53 @@ class AbstractRenderer(): def set_mode(self,mode): self.last_mode = mode def get_links(self,mode=None): - print("mode : %s and last_mode : %s (%s)"%(mode,self.last_mode,self.url)) + # This method is used to load once the list of links in a gi + # Links can be followed, after a space, by a description/title + #TODO: remove this code + # def get_links(self,mode=None): + # links = [] + # toreturn = [] + # if self.renderer: + # if not mode: + # mode = self.renderer.last_mode + # links = self.renderer.get_links(mode=mode) + # for l in links: + # #split between link and potential name + # # check that l is non-empty + # url = None + # if l: + # splitted = l.split(maxsplit=1) + # url = self.absolutise_url(splitted[0]) + # if url and looks_like_url(url): + # if len(splitted) > 1: + # #We add a name only for Gopher items + # if url.startswith("gopher://"): + # newgi = GeminiItem(url,name=splitted[1]) + # else: + # newgi = GeminiItem(url) + # else: + # newgi = GeminiItem(url) + # toreturn.append(newgi) + # elif url and mode != "links_only" and url.startswith("data:image/"): + # imgurl,imgdata = ansirenderer.looks_like_base64(url,self.url) + # if imgurl: + # toreturn.append(GeminiItem(imgurl)) + # else: + # toreturn.append(None) + # else: + # # We must include a None item to keep the link count valid + # toreturn.append(None) + # return toreturn if not mode: mode = self.last_mode if mode not in self.links : prepared_body = self.prepare(self.body,mode=mode) results = self.render(prepared_body,mode=mode) if results: - self.links[mode] = results[1] + #we should absolutize all URLs here + self.links[mode] = [] + for l in results[1]: + abs_l = urllib.parse.urljoin(self.url,l.split()[0]) + self.links[mode].append(abs_l) for l in self.get_subscribe_links()[1:]: self.links[mode].append(l[0]) return self.links[mode] @@ -529,7 +569,13 @@ class AbstractRenderer(): result = self.render(prepared_body,width=width,mode=mode) if result: self.rendered_text[mode] = result[0] - self.links[mode] = result[1] + #The following is there to prepoulate self.links + #but it seems to slow down a lot the loading + #self.links[mode] = [] + #we should absolutize all URLs here + #for l in result[1]: + # abs_l = urllib.parse.urljoin(self.url,l.split()[0]) + # self.links[mode].append(abs_l) return self.rendered_text[mode] def _window_title(self,title,info=None): @@ -957,7 +1003,9 @@ class HtmlRenderer(AbstractRenderer): ty = l.get("type") if ty : if "rss" in ty or "atom" in ty or "feed" in ty: - subs.append([l.get("href"),ty,l.get("title")]) + # some rss links are relatives: we absolutise_url + sublink = urllib.parse.urljoin(self.url, l.get("href")) + subs.append([sublink,ty.l.get("title")]) return subs def get_title(self): diff --git a/offpunk.py b/offpunk.py index 70c81d2..186bc98 100755 --- a/offpunk.py +++ b/offpunk.py @@ -157,13 +157,6 @@ class GeminiItem(): def __init__(self, url, name=""): self.last_mode = None url = netcache.normalize_url(url) - findmode = url.split("##offpunk_mode=") - if len(findmode) > 1: - self.url = findmode[0] - if findmode[1] in ["full"] or findmode[1].isnumeric(): - self.last_mode = findmode[1] - else: - self.url = url self.url = fix_ipv6_url(self.url).strip() self.name = name self.mime = None @@ -171,57 +164,6 @@ class GeminiItem(): self.scheme = "https" self.local = False - # This method is used to load once the list of links in a gi - # Links can be followed, after a space, by a description/title - #TODO: remove this code - # def get_links(self,mode=None): - # links = [] - # toreturn = [] - # if self.renderer: - # if not mode: - # mode = self.renderer.last_mode - # links = self.renderer.get_links(mode=mode) - # for l in links: - # #split between link and potential name - # # check that l is non-empty - # url = None - # if l: - # splitted = l.split(maxsplit=1) - # url = self.absolutise_url(splitted[0]) - # if url and looks_like_url(url): - # if len(splitted) > 1: - # #We add a name only for Gopher items - # if url.startswith("gopher://"): - # newgi = GeminiItem(url,name=splitted[1]) - # else: - # newgi = GeminiItem(url) - # else: - # newgi = GeminiItem(url) - # toreturn.append(newgi) - # elif url and mode != "links_only" and url.startswith("data:image/"): - # imgurl,imgdata = ansirenderer.looks_like_base64(url,self.url) - # if imgurl: - # toreturn.append(GeminiItem(imgurl)) - # else: - # toreturn.append(None) - # else: - # # We must include a None item to keep the link count valid - # toreturn.append(None) - # return toreturn - - #TODO: should be in ansirenderer - def get_subscribe_links(self): - if self.renderer: - subs = self.renderer.get_subscribe_links() - abssubs = [] - # some rss links are relatives - for s in subs: - s[0] = self.absolutise_url(s[0]) - abssubs.append(s) - return abssubs - else: - return [] - def get_filename(self): filename = os.path.basename(netcache.get_cache_path(self.url)) return filename @@ -235,8 +177,6 @@ class GeminiItem(): tmpf = cache_path return tmpf - - def set_error(self,err): # If we get an error, we want to keep an existing cache # but we need to touch it or to create an empty one @@ -264,65 +204,6 @@ class GeminiItem(): cache.write("The ressource will be tentatively fetched during next sync.\n") cache.close() - - def root(self): - return GeminiItem(self._derive_url("/")) - - def up(self,level=1): - path = self.path.rstrip('/') - count = 0 - while count < level: - pathbits = list(os.path.split(path)) - # Don't try to go higher than root or in config - if self.local or len(pathbits) == 1 : - return self - # Get rid of bottom component - if len(pathbits) > 1: - pathbits.pop() - path = os.path.join(*pathbits) - count += 1 - if self.scheme == "gopher": - path = "/1" + path - return GeminiItem(self._derive_url(path)) - - def query(self, query): - query = urllib.parse.quote(query) - return GeminiItem(self._derive_url(query=query)) - - def _derive_url(self, path="", query=""): - """ - A thin wrapper around urlunparse which avoids inserting standard ports - into URLs just to keep things clean. - """ - if not self.port or self.port == netcache.standard_ports[self.scheme] : - host = self.host - else: - host = self.host + ":" + str(self.port) - return urllib.parse.urlunparse((self.scheme,host,path or self.path, "", query, "")) - - def absolutise_url(self, relative_url): - """ - Convert a relative URL to an absolute URL by using the URL of this - GeminiItem as a base. - """ - try: - abs_url = urllib.parse.urljoin(self.url, relative_url) - except ValueError as e: - abs_url = None - return abs_url - - #TODO: explore how to put this in ansirenderer - def url_mode(self): - url = self.url - if self.last_mode and self.last_mode != "readable": - url += "##offpunk_mode=" + self.last_mode - return url - - #what is the line to add to a list for this url ? - def to_map_line(self): - return "=> {} {}\n".format(self.url_mode(), self.renderer.get_page_title()) - - # Cheap and cheerful URL detector def looks_like_url(word): try: @@ -613,6 +494,8 @@ class GeminiClient(cmd.Cmd): self.page_index = 0 # Update state (external files are not added to history) self.gi = gi + if mode and mode != "readable": + url += "##offpunk_mode=" + mode self.current_url = url if update_hist and not self.sync_only: self._update_history(gi) @@ -987,7 +870,22 @@ Take an integer as argument to go up multiple times.""" level = int(args[0]) elif args[0] != "": print("Up only take integer as arguments") - self._go_to_gi(self.gi.up(level=level)) + #TODO: implement UP + path = self.path.rstrip('/') + count = 0 + while count < level: + pathbits = list(os.path.split(path)) + # Don't try to go higher than root or in config + if self.local or len(pathbits) == 1 : + return self + # Get rid of bottom component + if len(pathbits) > 1: + pathbits.pop() + path = os.path.join(*pathbits) + count += 1 + if self.scheme == "gopher": + path = "/1" + path + return GeminiItem(self._derive_url(path)) def do_back(self, *args): """Go back to the previous gemini item.""" @@ -1012,7 +910,8 @@ Take an integer as argument to go up multiple times.""" @needs_gi def do_root(self, *args): """Go to root selector of the server hosting current item.""" - self._go_to_gi(self.gi.root()) + parse = urllib.parse.urlparse(self.url) + self._go_to_url(urllib.parse.urlunparse(parse.scheme,parse.netloc,"/","","","")) def do_tour(self, line): """Add index items as waypoints on a tour, which is basically a FIFO @@ -1039,7 +938,7 @@ Current tour can be listed with `tour ls` and scrubbed with `tour clear`.""" self.list_show("tour") elif line == "clear": for l in self.list_get_links("tour"): - self.list_rm_url(l.url_mode(),"tour") + self.list_rm_url(l,"tour") elif line == "*": #TODO: change to use renderer.get_links and change list_add_line for l in self.get_renderer().get_links(): @@ -1225,8 +1124,7 @@ Use 'ls -l' to see URLs.""" def do_gus(self, line): """Submit a search query to the geminispace.info search engine.""" - gus = GeminiItem("gemini://geminispace.info/search") - self._go_to_gi(gus.query(line)) + self._go_to_url(urllib.parse.urlunparse("gemini","geminispace.info","/search","",line,"")) def do_history(self, *args): """Display history.""" @@ -1266,7 +1164,7 @@ Use "view feeds" to see available feeds on this page. elif args[0] in ["normal","readable"]: self._go_to_gi(self.gi,mode="readable") elif args[0] == "feed": - subs = self.gi.get_subscribe_links() + subs = self.get_renderer().get_subscribe_links() if len(subs) > 1: self.do_go(subs[1][0]) elif "rss" in subs[0][1] or "atom" in subs[0][1]: @@ -1274,7 +1172,7 @@ Use "view feeds" to see available feeds on this page. else: print("No other feed found on %s"%self.gi.url) elif args[0] == "feeds": - subs = self.gi.get_subscribe_links() + subs = self.get_renderer().get_subscribe_links() stri = "Available views :\n" counter = 0 for s in subs: @@ -1436,7 +1334,7 @@ If no argument given, URL is added to Bookmarks.""" If a new link is found in the page during a --sync, the new link is automatically fetched and added to your next tour. To unsubscribe, remove the page from the "subscribed" list.""" - subs = self.gi.get_subscribe_links() + subs = self.get_renderer().get_subscribe_links() if len(subs) > 1: stri = "Multiple feeds have been found :\n" elif "rss" in subs[0][1] or "atom" in subs[0][1] : @@ -1463,7 +1361,6 @@ To unsubscribe, remove the page from the "subscribed" list.""" else: sublink,title = None,None if sublink: - sublink = self.gi.absolutise_url(sublink) gi = GeminiItem(sublink,name=title) list_path = self.get_list("subscribed") added = self.list_add_line("subscribed",gi=gi,verbose=False) @@ -1494,13 +1391,17 @@ Bookmarks are stored using the 'add' command.""" archives, which is a special historical list limited in size. It is similar to `move archives`.""" for li in self.list_lists(): if li not in ["archives", "history"]: - deleted = self.list_rm_url(self.gi.url_mode(),li) + deleted = self.list_rm_url(self.current_url,li) if deleted: print("Removed from %s"%li) self.list_add_top("archives",limit=self.options["archives_size"]) print("Archiving: %s"%self.get_renderer().get_page_title()) print("\x1b[2;34mCurrent maximum size of archives : %s\x1b[0m" %self.options["archives_size"]) + #what is the line to add to a list for this url ? + def to_map_line(self): + return "=> {} {}\n".format(self.current_url, self.get_renderer().get_page_title()) + def list_add_line(self,list,gi=None,verbose=True): list_path = self.list_path(list) if not list_path and self.list_is_system(list): @@ -1518,12 +1419,12 @@ archives, which is a special historical list limited in size. It is similar to ` l_file.close() for l in lines: sp = l.split() - if gi.url_mode() in sp: + if self.current_url in sp: if verbose: print("%s already in %s."%(gi.url,list)) return False with open(list_path,"a") as l_file: - l_file.write(gi.to_map_line()) + l_file.write(self.to_map_line()) l_file.close() if verbose: print("%s added to %s" %(gi.url,list)) @@ -1532,7 +1433,7 @@ archives, which is a special historical list limited in size. It is similar to ` def list_add_top(self,list,limit=0,truncate_lines=0): if not self.gi: return - stri = self.gi.to_map_line().strip("\n") + stri = self.to_map_line().strip("\n") if list == "archives": stri += ", archived on " elif list == "history": @@ -1680,7 +1581,7 @@ If current page was not in a list, this command is similar to `add LIST`.""" lists = self.list_lists() for l in lists: if l != args[0] and l not in ["archives", "history"]: - isremoved = self.list_rm_url(self.gi.url_mode(),l) + isremoved = self.list_rm_url(self.current_url,l) if isremoved: print("Removed from %s"%l) self.list_add_line(args[0]) @@ -2001,7 +1902,7 @@ Argument : duration of cache validity (in seconds).""" fetch_gitem(l,depth=depth,validity=validity,savetotour=tourchildren,count=[counter,end]) if tourandremove: if add_to_tour(l): - self.list_rm_url(l.url_mode(),list) + self.list_rm_url(l,list) self.sync_only = True lists = self.list_lists() diff --git a/offutils.py b/offutils.py index 852d481..cf85b29 100644 --- a/offutils.py +++ b/offutils.py @@ -53,3 +53,5 @@ def term_width(new_width=None): if cur < width: width = cur return width + +