nearly got rid of geminiitem

This commit is contained in:
Lionel Dricot 2023-08-03 16:54:29 +02:00
parent 91e8b7aa16
commit ed019fa04e
3 changed files with 89 additions and 138 deletions

View File

@ -446,13 +446,53 @@ class AbstractRenderer():
def set_mode(self,mode): def set_mode(self,mode):
self.last_mode = mode self.last_mode = mode
def get_links(self,mode=None): def get_links(self,mode=None):
print("mode :%s and last_mode : %s (%s)"%(mode,self.last_mode,self.url)) # This method is used to load once the list of links in a gi
# Links can be followed, after a space, by a description/title
#TODO: remove this code
# def get_links(self,mode=None):
# links = []
# toreturn = []
# if self.renderer:
# if not mode:
# mode = self.renderer.last_mode
# links = self.renderer.get_links(mode=mode)
# for l in links:
# #split between link and potential name
# # check that l is non-empty
# url = None
# if l:
# splitted = l.split(maxsplit=1)
# url = self.absolutise_url(splitted[0])
# if url and looks_like_url(url):
# if len(splitted) > 1:
# #We add a name only for Gopher items
# if url.startswith("gopher://"):
# newgi = GeminiItem(url,name=splitted[1])
# else:
# newgi = GeminiItem(url)
# else:
# newgi = GeminiItem(url)
# toreturn.append(newgi)
# elif url and mode != "links_only" and url.startswith("data:image/"):
# imgurl,imgdata = ansirenderer.looks_like_base64(url,self.url)
# if imgurl:
# toreturn.append(GeminiItem(imgurl))
# else:
# toreturn.append(None)
# else:
# # We must include a None item to keep the link count valid
# toreturn.append(None)
# return toreturn
if not mode: mode = self.last_mode if not mode: mode = self.last_mode
if mode not in self.links : if mode not in self.links :
prepared_body = self.prepare(self.body,mode=mode) prepared_body = self.prepare(self.body,mode=mode)
results = self.render(prepared_body,mode=mode) results = self.render(prepared_body,mode=mode)
if results: if results:
self.links[mode] = results[1] #we should absolutize all URLs here
self.links[mode] = []
for l in results[1]:
abs_l = urllib.parse.urljoin(self.url,l.split()[0])
self.links[mode].append(abs_l)
for l in self.get_subscribe_links()[1:]: for l in self.get_subscribe_links()[1:]:
self.links[mode].append(l[0]) self.links[mode].append(l[0])
return self.links[mode] return self.links[mode]
@ -529,7 +569,13 @@ class AbstractRenderer():
result = self.render(prepared_body,width=width,mode=mode) result = self.render(prepared_body,width=width,mode=mode)
if result: if result:
self.rendered_text[mode] = result[0] self.rendered_text[mode] = result[0]
self.links[mode] = result[1] #The following is there to prepoulate self.links
#but it seems to slow down a lot the loading
#self.links[mode] = []
#we should absolutize all URLs here
#for l in result[1]:
# abs_l = urllib.parse.urljoin(self.url,l.split()[0])
# self.links[mode].append(abs_l)
return self.rendered_text[mode] return self.rendered_text[mode]
def _window_title(self,title,info=None): def _window_title(self,title,info=None):
@ -957,7 +1003,9 @@ class HtmlRenderer(AbstractRenderer):
ty = l.get("type") ty = l.get("type")
if ty : if ty :
if "rss" in ty or "atom" in ty or "feed" in ty: if "rss" in ty or "atom" in ty or "feed" in ty:
subs.append([l.get("href"),ty,l.get("title")]) # some rss links are relatives: we absolutise_url
sublink = urllib.parse.urljoin(self.url, l.get("href"))
subs.append([sublink,ty.l.get("title")])
return subs return subs
def get_title(self): def get_title(self):

View File

@ -157,13 +157,6 @@ class GeminiItem():
def __init__(self, url, name=""): def __init__(self, url, name=""):
self.last_mode = None self.last_mode = None
url = netcache.normalize_url(url) url = netcache.normalize_url(url)
findmode = url.split("##offpunk_mode=")
if len(findmode) > 1:
self.url = findmode[0]
if findmode[1] in ["full"] or findmode[1].isnumeric():
self.last_mode = findmode[1]
else:
self.url = url
self.url = fix_ipv6_url(self.url).strip() self.url = fix_ipv6_url(self.url).strip()
self.name = name self.name = name
self.mime = None self.mime = None
@ -171,57 +164,6 @@ class GeminiItem():
self.scheme = "https" self.scheme = "https"
self.local = False self.local = False
# This method is used to load once the list of links in a gi
# Links can be followed, after a space, by a description/title
#TODO: remove this code
# def get_links(self,mode=None):
# links = []
# toreturn = []
# if self.renderer:
# if not mode:
# mode = self.renderer.last_mode
# links = self.renderer.get_links(mode=mode)
# for l in links:
# #split between link and potential name
# # check that l is non-empty
# url = None
# if l:
# splitted = l.split(maxsplit=1)
# url = self.absolutise_url(splitted[0])
# if url and looks_like_url(url):
# if len(splitted) > 1:
# #We add a name only for Gopher items
# if url.startswith("gopher://"):
# newgi = GeminiItem(url,name=splitted[1])
# else:
# newgi = GeminiItem(url)
# else:
# newgi = GeminiItem(url)
# toreturn.append(newgi)
# elif url and mode != "links_only" and url.startswith("data:image/"):
# imgurl,imgdata = ansirenderer.looks_like_base64(url,self.url)
# if imgurl:
# toreturn.append(GeminiItem(imgurl))
# else:
# toreturn.append(None)
# else:
# # We must include a None item to keep the link count valid
# toreturn.append(None)
# return toreturn
#TODO: should be in ansirenderer
def get_subscribe_links(self):
if self.renderer:
subs = self.renderer.get_subscribe_links()
abssubs = []
# some rss links are relatives
for s in subs:
s[0] = self.absolutise_url(s[0])
abssubs.append(s)
return abssubs
else:
return []
def get_filename(self): def get_filename(self):
filename = os.path.basename(netcache.get_cache_path(self.url)) filename = os.path.basename(netcache.get_cache_path(self.url))
return filename return filename
@ -235,8 +177,6 @@ class GeminiItem():
tmpf = cache_path tmpf = cache_path
return tmpf return tmpf
def set_error(self,err): def set_error(self,err):
# If we get an error, we want to keep an existing cache # If we get an error, we want to keep an existing cache
# but we need to touch it or to create an empty one # but we need to touch it or to create an empty one
@ -264,65 +204,6 @@ class GeminiItem():
cache.write("The ressource will be tentatively fetched during next sync.\n") cache.write("The ressource will be tentatively fetched during next sync.\n")
cache.close() cache.close()
def root(self):
return GeminiItem(self._derive_url("/"))
def up(self,level=1):
path = self.path.rstrip('/')
count = 0
while count < level:
pathbits = list(os.path.split(path))
# Don't try to go higher than root or in config
if self.local or len(pathbits) == 1 :
return self
# Get rid of bottom component
if len(pathbits) > 1:
pathbits.pop()
path = os.path.join(*pathbits)
count += 1
if self.scheme == "gopher":
path = "/1" + path
return GeminiItem(self._derive_url(path))
def query(self, query):
query = urllib.parse.quote(query)
return GeminiItem(self._derive_url(query=query))
def _derive_url(self, path="", query=""):
"""
A thin wrapper around urlunparse which avoids inserting standard ports
into URLs just to keep things clean.
"""
if not self.port or self.port == netcache.standard_ports[self.scheme] :
host = self.host
else:
host = self.host + ":" + str(self.port)
return urllib.parse.urlunparse((self.scheme,host,path or self.path, "", query, ""))
def absolutise_url(self, relative_url):
"""
Convert a relative URL to an absolute URL by using the URL of this
GeminiItem as a base.
"""
try:
abs_url = urllib.parse.urljoin(self.url, relative_url)
except ValueError as e:
abs_url = None
return abs_url
#TODO: explore how to put this in ansirenderer
def url_mode(self):
url = self.url
if self.last_mode and self.last_mode != "readable":
url += "##offpunk_mode=" + self.last_mode
return url
#what is the line to add to a list for this url?
def to_map_line(self):
return "=> {} {}\n".format(self.url_mode(), self.renderer.get_page_title())
# Cheap and cheerful URL detector # Cheap and cheerful URL detector
def looks_like_url(word): def looks_like_url(word):
try: try:
@ -613,6 +494,8 @@ class GeminiClient(cmd.Cmd):
self.page_index = 0 self.page_index = 0
# Update state (external files are not added to history) # Update state (external files are not added to history)
self.gi = gi self.gi = gi
if mode and mode != "readable":
url += "##offpunk_mode=" + mode
self.current_url = url self.current_url = url
if update_hist and not self.sync_only: if update_hist and not self.sync_only:
self._update_history(gi) self._update_history(gi)
@ -987,7 +870,22 @@ Take an integer as argument to go up multiple times."""
level = int(args[0]) level = int(args[0])
elif args[0] != "": elif args[0] != "":
print("Up only take integer as arguments") print("Up only take integer as arguments")
self._go_to_gi(self.gi.up(level=level)) #TODO: implement UP
path = self.path.rstrip('/')
count = 0
while count < level:
pathbits = list(os.path.split(path))
# Don't try to go higher than root or in config
if self.local or len(pathbits) == 1 :
return self
# Get rid of bottom component
if len(pathbits) > 1:
pathbits.pop()
path = os.path.join(*pathbits)
count += 1
if self.scheme == "gopher":
path = "/1" + path
return GeminiItem(self._derive_url(path))
def do_back(self, *args): def do_back(self, *args):
"""Go back to the previous gemini item.""" """Go back to the previous gemini item."""
@ -1012,7 +910,8 @@ Take an integer as argument to go up multiple times."""
@needs_gi @needs_gi
def do_root(self, *args): def do_root(self, *args):
"""Go to root selector of the server hosting current item.""" """Go to root selector of the server hosting current item."""
self._go_to_gi(self.gi.root()) parse = urllib.parse.urlparse(self.url)
self._go_to_url(urllib.parse.urlunparse(parse.scheme,parse.netloc,"/","","",""))
def do_tour(self, line): def do_tour(self, line):
"""Add index items as waypoints on a tour, which is basically a FIFO """Add index items as waypoints on a tour, which is basically a FIFO
@ -1039,7 +938,7 @@ Current tour can be listed with `tour ls` and scrubbed with `tour clear`."""
self.list_show("tour") self.list_show("tour")
elif line == "clear": elif line == "clear":
for l in self.list_get_links("tour"): for l in self.list_get_links("tour"):
self.list_rm_url(l.url_mode(),"tour") self.list_rm_url(l,"tour")
elif line == "*": elif line == "*":
#TODO: change to use renderer.get_links and change list_add_line #TODO: change to use renderer.get_links and change list_add_line
for l in self.get_renderer().get_links(): for l in self.get_renderer().get_links():
@ -1225,8 +1124,7 @@ Use 'ls -l' to see URLs."""
def do_gus(self, line): def do_gus(self, line):
"""Submit a search query to the geminispace.info search engine.""" """Submit a search query to the geminispace.info search engine."""
gus = GeminiItem("gemini://geminispace.info/search") self._go_to_url(urllib.parse.urlunparse("gemini","geminispace.info","/search","",line,""))
self._go_to_gi(gus.query(line))
def do_history(self, *args): def do_history(self, *args):
"""Display history.""" """Display history."""
@ -1266,7 +1164,7 @@ Use "view feeds" to see available feeds on this page.
elif args[0] in ["normal","readable"]: elif args[0] in ["normal","readable"]:
self._go_to_gi(self.gi,mode="readable") self._go_to_gi(self.gi,mode="readable")
elif args[0] == "feed": elif args[0] == "feed":
subs = self.gi.get_subscribe_links() subs = self.get_renderer().get_subscribe_links()
if len(subs) > 1: if len(subs) > 1:
self.do_go(subs[1][0]) self.do_go(subs[1][0])
elif "rss" in subs[0][1] or "atom" in subs[0][1]: elif "rss" in subs[0][1] or "atom" in subs[0][1]:
@ -1274,7 +1172,7 @@ Use "view feeds" to see available feeds on this page.
else: else:
print("No other feed found on %s"%self.gi.url) print("No other feed found on %s"%self.gi.url)
elif args[0] == "feeds": elif args[0] == "feeds":
subs = self.gi.get_subscribe_links() subs = self.get_renderer().get_subscribe_links()
stri = "Available views :\n" stri = "Available views :\n"
counter = 0 counter = 0
for s in subs: for s in subs:
@ -1436,7 +1334,7 @@ If no argument given, URL is added to Bookmarks."""
If a new link is found in the page during a --sync, the new link is automatically If a new link is found in the page during a --sync, the new link is automatically
fetched and added to your next tour. fetched and added to your next tour.
To unsubscribe, remove the page from the "subscribed" list.""" To unsubscribe, remove the page from the "subscribed" list."""
subs = self.gi.get_subscribe_links() subs = self.get_renderer().get_subscribe_links()
if len(subs) > 1: if len(subs) > 1:
stri = "Multiple feeds have been found :\n" stri = "Multiple feeds have been found :\n"
elif "rss" in subs[0][1] or "atom" in subs[0][1] : elif "rss" in subs[0][1] or "atom" in subs[0][1] :
@ -1463,7 +1361,6 @@ To unsubscribe, remove the page from the "subscribed" list."""
else: else:
sublink,title = None,None sublink,title = None,None
if sublink: if sublink:
sublink = self.gi.absolutise_url(sublink)
gi = GeminiItem(sublink,name=title) gi = GeminiItem(sublink,name=title)
list_path = self.get_list("subscribed") list_path = self.get_list("subscribed")
added = self.list_add_line("subscribed",gi=gi,verbose=False) added = self.list_add_line("subscribed",gi=gi,verbose=False)
@ -1494,13 +1391,17 @@ Bookmarks are stored using the 'add' command."""
archives, which is a special historical list limited in size. It is similar to `move archives`.""" archives, which is a special historical list limited in size. It is similar to `move archives`."""
for li in self.list_lists(): for li in self.list_lists():
if li not in ["archives", "history"]: if li not in ["archives", "history"]:
deleted = self.list_rm_url(self.gi.url_mode(),li) deleted = self.list_rm_url(self.current_url,li)
if deleted: if deleted:
print("Removed from %s"%li) print("Removed from %s"%li)
self.list_add_top("archives",limit=self.options["archives_size"]) self.list_add_top("archives",limit=self.options["archives_size"])
print("Archiving:%s"%self.get_renderer().get_page_title()) print("Archiving:%s"%self.get_renderer().get_page_title())
print("\x1b[2;34mCurrent maximum size of archives : %s\x1b[0m" %self.options["archives_size"]) print("\x1b[2;34mCurrent maximum size of archives : %s\x1b[0m" %self.options["archives_size"])
#what is the line to add to a list for this url?
def to_map_line(self):
return "=> {} {}\n".format(self.current_url, self.get_renderer().get_page_title())
def list_add_line(self,list,gi=None,verbose=True): def list_add_line(self,list,gi=None,verbose=True):
list_path = self.list_path(list) list_path = self.list_path(list)
if not list_path and self.list_is_system(list): if not list_path and self.list_is_system(list):
@ -1518,12 +1419,12 @@ archives, which is a special historical list limited in size. It is similar to `
l_file.close() l_file.close()
for l in lines: for l in lines:
sp = l.split() sp = l.split()
if gi.url_mode() in sp: if self.current_url in sp:
if verbose: if verbose:
print("%s already in %s."%(gi.url,list)) print("%s already in %s."%(gi.url,list))
return False return False
with open(list_path,"a") as l_file: with open(list_path,"a") as l_file:
l_file.write(gi.to_map_line()) l_file.write(self.to_map_line())
l_file.close() l_file.close()
if verbose: if verbose:
print("%s added to %s" %(gi.url,list)) print("%s added to %s" %(gi.url,list))
@ -1532,7 +1433,7 @@ archives, which is a special historical list limited in size. It is similar to `
def list_add_top(self,list,limit=0,truncate_lines=0): def list_add_top(self,list,limit=0,truncate_lines=0):
if not self.gi: if not self.gi:
return return
stri = self.gi.to_map_line().strip("\n") stri = self.to_map_line().strip("\n")
if list == "archives": if list == "archives":
stri += ", archived on " stri += ", archived on "
elif list == "history": elif list == "history":
@ -1680,7 +1581,7 @@ If current page was not in a list, this command is similar to `add LIST`."""
lists = self.list_lists() lists = self.list_lists()
for l in lists: for l in lists:
if l != args[0] and l not in ["archives", "history"]: if l != args[0] and l not in ["archives", "history"]:
isremoved = self.list_rm_url(self.gi.url_mode(),l) isremoved = self.list_rm_url(self.current_url,l)
if isremoved: if isremoved:
print("Removed from %s"%l) print("Removed from %s"%l)
self.list_add_line(args[0]) self.list_add_line(args[0])
@ -2001,7 +1902,7 @@ Argument : duration of cache validity (in seconds)."""
fetch_gitem(l,depth=depth,validity=validity,savetotour=tourchildren,count=[counter,end]) fetch_gitem(l,depth=depth,validity=validity,savetotour=tourchildren,count=[counter,end])
if tourandremove: if tourandremove:
if add_to_tour(l): if add_to_tour(l):
self.list_rm_url(l.url_mode(),list) self.list_rm_url(l,list)
self.sync_only = True self.sync_only = True
lists = self.list_lists() lists = self.list_lists()

View File

@ -53,3 +53,5 @@ def term_width(new_width=None):
if cur < width: if cur < width:
width = cur width = cur
return width return width