diff --git a/ansirenderer.py b/ansirenderer.py new file mode 100644 index 0000000..2ebdef3 --- /dev/null +++ b/ansirenderer.py @@ -0,0 +1,931 @@ +#!/bin/python +import os + + +# First, we define the different content->text renderers, outside of the rest +# (They could later be factorized in other files or replaced) +class AbstractRenderer(): + def __init__(self,content,url,center=True): + self.url = url + self.body = str(content) + #there’s one rendered text and one links table per mode + self.rendered_text = {} + self.links = {} + self.images = {} + self.title = None + self.validity = True + self.temp_file = {} + self.less_histfile = {} + self.center = center + + #This class hold an internal representation of the HTML text + class representation: + def __init__(self,width,title=None,center=True): + self.title=title + self.center = center + self.final_text = "" + self.opened = [] + self.width = width + self.last_line = "" + self.last_line_colors = {} + self.last_line_center = False + self.new_paragraph = True + self.i_indent = "" + self.s_indent = "" + self.r_indent = "" + self.current_indent = "" + self.disabled_indents = None + # each color is an [open,close] pair code + self.colors = { + "bold" : ["1","22"], + "faint" : ["2","22"], + "italic" : ["3","23"], + "underline": ["4","24"], + "red" : ["31","39"], + "yellow" : ["33","39"], + "blue" : ["34","39"], + } + + def _insert(self,color,open=True): + if open: o = 0 + else: o = 1 + pos = len(self.last_line) + #we remember the position where to insert color codes + if not pos in self.last_line_colors: + self.last_line_colors[pos] = [] + #Two inverse code cancel each other + if [color,int(not o)] in self.last_line_colors[pos]: + self.last_line_colors[pos].remove([color,int(not o)]) + else: + self.last_line_colors[pos].append([color,o])#+color+str(o)) + + # Take self.last line and add ANSI codes to it before adding it to + # self.final_text. + def _endline(self): + if len(self.last_line.strip()) > 0: + for c in self.opened: + self._insert(c,open=False) + nextline = "" + added_char = 0 + #we insert the color code at the saved positions + while len (self.last_line_colors) > 0: + pos,colors = self.last_line_colors.popitem() + #popitem itterates LIFO. + #So we go, backward, to the pos (starting at the end of last_line) + nextline = self.last_line[pos:] + nextline + ansicol = "\x1b[" + for c,o in colors: + ansicol += self.colors[c][o] + ";" + ansicol = ansicol[:-1]+"m" + nextline = ansicol + nextline + added_char += len(ansicol) + self.last_line = self.last_line[:pos] + nextline = self.last_line + nextline + if self.last_line_center: + #we have to care about the ansi char while centering + width = term_width() + added_char + nextline = nextline.strip().center(width) + self.last_line_center = False + else: + #should we lstrip the nextline in the addition ? + nextline = self.current_indent + nextline.lstrip() + self.r_indent + self.current_indent = self.s_indent + self.final_text += nextline + self.last_line = "" + self.final_text += "\n" + for c in self.opened: + self._insert(c,open=True) + else: + self.last_line = "" + + + def center_line(self): + self.last_line_center = True + + def open_color(self,color): + if color in self.colors and color not in self.opened: + self._insert(color,open=True) + self.opened.append(color) + def close_color(self,color): + if color in self.colors and color in self.opened: + self._insert(color,open=False) + self.opened.remove(color) + def close_all(self): + if len(self.colors) > 0: + self.last_line += "\x1b[0m" + self.opened.clear() + + def startindent(self,indent,sub=None,reverse=None): + self._endline() + self.i_indent = indent + self.current_indent = indent + if sub: + self.s_indent = sub + else: + self.s_indent = indent + if reverse: + self.r_indent = reverse + else: + self.r_indent = "" + + + def endindent(self): + self._endline() + self.i_indent = "" + self.s_indent = "" + self.r_indent = "" + self.current_indent = "" + + def _disable_indents(self): + self.disabled_indents = [] + self.disabled_indents.append(self.current_indent) + self.disabled_indents.append(self.i_indent) + self.disabled_indents.append(self.s_indent) + self.disabled_indents.append(self.r_indent) + self.endindent() + + def _enable_indents(self): + if self.disabled_indents: + self.current_indent = self.disabled_indents[0] + self.i_indent = self.disabled_indents[1] + self.s_indent = self.disabled_indents[2] + self.r_indent = self.disabled_indents[3] + self.disabled_indents = None + + def newline(self): + self._endline() + + #A new paragraph implies 2 newlines (1 blank line between paragraphs) + #But it is only used if didn’t already started one to avoid plenty + #of blank lines. force=True allows to bypass that limit. + #new_paragraph becomes false as soon as text is entered into it + def newparagraph(self,force=False): + if force or not self.new_paragraph: + self._endline() + self.final_text += "\n" + self.new_paragraph = True + + def add_space(self): + if len(self.last_line) > 0 and self.last_line[-1] != " ": + self.last_line += " " + + def _title_first(self,intext=None): + if self.title: + if not self.title == intext: + self._disable_indents() + self.open_color("blue") + self.open_color("bold") + self.open_color("underline") + self.add_text(self.title) + self.close_all() + self.newparagraph() + self._enable_indents() + self.title = None + + # Beware, blocks are not wrapped nor indented and left untouched! + # They are mostly useful for pictures and preformatted text. + def add_block(self,intext): + # If necessary, we add the title before a block + self._title_first() + # we don’t want to indent blocks + self._endline() + self._disable_indents() + self.final_text += self.current_indent + intext + self.new_paragraph = False + self._endline() + self._enable_indents() + + def add_text(self,intext): + self._title_first(intext=intext) + lines = [] + last = (self.last_line + intext) + self.last_line = "" + # With the following, we basically cancel adding only spaces + # on an empty line + if len(last.strip()) > 0: + self.new_paragraph = False + else: + last = last.strip() + if len(last) > self.width: + width = self.width - len(self.current_indent) - len(self.r_indent) + spaces_left = len(last) - len(last.lstrip()) + spaces_right = len(last) - len(last.rstrip()) + lines = textwrap.wrap(last,width,drop_whitespace=True) + self.last_line += spaces_left*" " + while len(lines) > 1: + l = lines.pop(0) + self.last_line += l + self._endline() + if len(lines) == 1: + li = lines[0] + self.last_line += li + spaces_right*" " + else: + self.last_line = last + + def get_final(self): + self.close_all() + self._endline() + #if no content, we still add the title + self._title_first() + lines = self.final_text.splitlines() + lines2 = [] + termspace = shutil.get_terminal_size()[0] + #Following code instert blanck spaces to center the content + if self.center and termspace > term_width(): + margin = int((termspace - term_width())//2) + else: + margin = 0 + for l in lines : + lines2.append(margin*" "+l) + return "\n".join(lines2) + + def get_subscribe_links(self): + return [[self.url,self.get_mime(),self.get_title()]] + def is_valid(self): + return self.validity + def get_links(self,mode="links_only"): + if mode not in self.links : + prepared_body = self.prepare(self.body,mode=mode) + results = self.render(prepared_body,mode=mode) + if results: + self.links[mode] = results[1] + for l in self.get_subscribe_links()[1:]: + self.links[mode].append(l[0]) + return self.links[mode] + def get_title(self): + return "Abstract title" + + # This function return a list of URL which should be downloaded + # before displaying the page (images in HTML pages, typically) + def get_images(self,mode="readable"): + if not mode in self.images: + self.get_body(mode=mode) + # we also invalidate the body that was done without images + self.rendered_text.pop(mode) + if mode in self.images: + return self.images[mode] + else: + return [] + #This function will give gemtext to the gemtext renderer + def prepare(self,body,mode=None): + return body + + def get_body(self,width=None,mode="readable"): + if not width: + width = term_width() + if mode not in self.rendered_text: + prepared_body = self.prepare(self.body,mode=mode) + result = self.render(prepared_body,width=width,mode=mode) + if result: + self.rendered_text[mode] = result[0] + self.links[mode] = result[1] + return self.rendered_text[mode] + + def _window_title(self,title,info=None): + title_r = self.representation(term_width()) + title_r.open_color("red") + title_r.open_color("bold") + title_r.add_text(title) + title_r.close_color("bold") + if info: + title_r.add_text(" (%s)"%info) + title_r.close_color("red") + return title_r.get_final() + + def display(self,mode="readable",window_title="",window_info=None,grep=None): + if not mode: mode = "readable" + wtitle = self._window_title(window_title,info=window_info) + body = wtitle + "\n" + self.get_body(mode=mode) + if not body: + return False + # We actually put the body in a tmpfile before giving it to less + if mode not in self.temp_file: + tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False) + self.temp_file[mode] = tmpf.name + tmpf.write(body) + tmpf.close() + if mode not in self.less_histfile: + firsttime = True + tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False) + self.less_histfile[mode] = tmpf.name + else: + firsttime = False + less_cmd(self.temp_file[mode], histfile=self.less_histfile[mode],cat=firsttime,grep=grep) + return True + + def get_temp_file(self,mode="readable"): + if mode in self.temp_file: + return self.temp_file[mode] + else: + return None + + # An instance of AbstractRenderer should have a self.render(body,width,mode) method. + # 3 modes are used : readable (by default), full and links_only (the fastest, when + # rendered content is not used, only the links are needed) + # The prepare() function is called before the rendering. It is useful if + # your renderer output in a format suitable for another existing renderer (such as gemtext) + +# Gemtext Rendering Engine +class GemtextRenderer(AbstractRenderer): + def get_mime(self): + return "text/gemini" + def get_title(self): + if self.title: + return self.title + elif self.body: + lines = self.body.splitlines() + for line in lines: + if line.startswith("#"): + self.title = line.strip("#").strip() + return self.title + if len(lines) > 0: + # If not title found, we take the first 50 char + # of the first line + title_line = lines[0].strip() + if len(title_line) > 50: + title_line = title_line[:49] + "…" + self.title = title_line + return self.title + else: + self.title = "Empty Page" + return self.title + else: + return "Unknown Gopher Page" + + #render_gemtext + def render(self,gemtext, width=None,mode=None): + if not width: + width = term_width() + r = self.representation(width) + links = [] + hidden_links = [] + preformatted = False + def format_link(url,index,name=None): + if "://" in url: + protocol,adress = url.split("://",maxsplit=1) + protocol = " %s" %protocol + else: + adress = url + protocol = "" + if "gemini" in protocol or "list" in protocol: + protocol = "" + if not name: + name = adress + line = "[%d%s] %s" % (index, protocol, name) + return line + for line in gemtext.splitlines(): + r.newline() + if line.startswith("```"): + preformatted = not preformatted + elif preformatted: + # infinite line to not wrap preformated + r.add_block(line+"\n") + elif len(line.strip()) == 0: + r.newparagraph(force=True) + elif line.startswith("=>"): + strippedline = line[2:].strip() + if strippedline: + links.append(strippedline) + splitted = strippedline.split(maxsplit=1) + url = splitted[0] + name = None + if len(splitted) > 1: + name = splitted[1] + link = format_link(url,len(links),name=name) + #r.open_color("blue") + #r.open_color("faint") + #r.open_color("underline") + startpos = link.find("] ") + 2 + r.startindent("",sub=startpos*" ") + r.add_text(link) + r.endindent() + #r.close_all() + elif line.startswith("* "): + line = line[1:].lstrip("\t ") + r.startindent("• ",sub=" ") + r.add_text(line) + r.endindent() + elif line.startswith(">"): + line = line[1:].lstrip("\t ") + r.startindent("> ") + r.add_text(line) + r.endindent() + elif line.startswith("###"): + line = line[3:].lstrip("\t ") + r.open_color("blue") + r.add_text(line) + r.close_color("blue") + elif line.startswith("##"): + line = line[2:].lstrip("\t ") + r.open_color("blue") + r.add_text(line) + r.close_color("blue") + elif line.startswith("#"): + line = line[1:].lstrip("\t ") + if not self.title: + self.title = line + r.open_color("bold") + r.open_color("blue") + r.open_color("underline") + r.add_text(line) + r.close_color("underline") + r.close_color("bold") + r.close_color("blue") + else: + if "://" in line: + words = line.split() + for w in words: + if "://" in w: + hidden_links.append(w) + r.add_text(line.rstrip()) + links += hidden_links + return r.get_final(), links + +class GopherRenderer(AbstractRenderer): + def get_mime(self): + return "text/gopher" + def get_title(self): + if not self.title: + self.title = "" + if self.body: + firstline = self.body.splitlines()[0] + firstline = firstline.split("\t")[0] + if firstline.startswith("i"): + firstline = firstline[1:] + self.title = firstline + return self.title + + #menu_or_text + def render(self,body,width=None,mode=None): + if not width: + width = term_width() + try: + render,links = self._render_goph(body,width=width,mode=mode) + except Exception as err: + print("Error rendering Gopher ",err) + r = self.representation(width) + r.add_block(body) + render = r.get_final() + links = [] + return render,links + + def _render_goph(self,body,width=None,mode=None): + if not width: + width = term_width() + # This was copied straight from Agena (then later adapted) + links = [] + r = self.representation(width) + for line in self.body.split("\n"): + r.newline() + if line.startswith("i"): + towrap = line[1:].split("\t")[0] + if len(towrap.strip()) > 0: + r.add_text(towrap) + else: + r.newparagraph() + elif not line.strip() in [".",""]: + parts = line.split("\t") + parts[-1] = parts[-1].strip() + if parts[-1] == "+": + parts = parts[:-1] + if len(parts) == 4: + name,path,host,port = parts + itemtype = name[0] + name = name[1:] + if port == "70": + port = "" + else: + port = ":%s"%port + if itemtype == "h" and path.startswith("URL:"): + url = path[4:] + else: + url = "gopher://%s%s/%s%s" %(host,port,itemtype,path) + url = url.replace(" ","%20") + linkline = url + " " + name + links.append(linkline) + towrap = "[%s] "%len(links)+ name + r.add_text(towrap) + else: + r.add_text(line) + return r.get_final(),links + + +class FolderRenderer(GemtextRenderer): + def __init__(self,content,url,center=True,datadir=None): + GemtextRenderer.__init__(self,content,url,center) + self.datadir = datadir + + def get_mime(self): + return "Directory" + def prepare(self,body,mode=None): + def get_first_line(l): + path = os.path.join(listdir,l+".gmi") + with open(path) as f: + first_line = f.readline().strip() + f.close() + if first_line.startswith("#"): + return first_line + else: + return None + def write_list(l): + body = "" + for li in l: + path = "list:///%s"%li + gi = GeminiItem(path) + size = len(gi.get_links()) + body += "=> %s %s (%s items)\n" %(str(path),li,size) + return body + listdir = os.path.join(self.datadir,"lists") + if self.url != listdir: + return "This is folder %s" %self.url + else: + self.title = "My lists" + lists = [] + if os.path.exists(listdir): + listfiles = os.listdir(listdir) + if len(listfiles) > 0: + for l in listfiles: + #removing the .gmi at the end of the name + lists.append(l[:-4]) + if len(lists) > 0: + body = "" + my_lists = [] + system_lists = [] + subscriptions = [] + frozen = [] + lists.sort() + for l in lists: + if l in ["history","to_fetch","archives","tour"]: + system_lists.append(l) + else: + first_line = get_first_line(l) + if first_line and "#subscribed" in first_line: + subscriptions.append(l) + elif first_line and "#frozen" in first_line: + frozen.append(l) + else: + my_lists.append(l) + if len(my_lists) > 0: + body+= "\n## Bookmarks Lists (updated during sync)\n" + body += write_list(my_lists) + if len(subscriptions) > 0: + body +="\n## Subscriptions (new links in those are added to tour)\n" + body += write_list(subscriptions) + if len(frozen) > 0: + body +="\n## Frozen (fetched but never updated)\n" + body += write_list(frozen) + if len(system_lists) > 0: + body +="\n## System Lists\n" + body += write_list(system_lists) + return body + +class FeedRenderer(GemtextRenderer): + def get_mime(self): + return "application/rss+xml" + def is_valid(self): + if _DO_FEED: + parsed = feedparser.parse(self.body) + else: + return False + if parsed.bozo: + return False + else: + #If no content, then fallback to HTML + return len(parsed.entries) > 0 + + def get_title(self): + if not self.title: + self.get_body() + return self.title + + def prepare(self,content,mode="readable",width=None): + if not width: + width = term_width() + self.title = "RSS/Atom feed" + page = "" + if _DO_FEED: + parsed = feedparser.parse(content) + else: + page += "Please install python-feedparser to handle RSS/Atom feeds\n" + self.validity = False + return page + if parsed.bozo: + page += "Invalid RSS feed\n\n" + page += str(parsed.bozo_exception) + self.validity = False + else: + if "title" in parsed.feed: + t = parsed.feed.title + else: + t = "Unknown" + self.title = "%s (XML feed)" %t + title = "# %s"%self.title + page += title + "\n" + if "updated" in parsed.feed: + page += "Last updated on %s\n\n" %parsed.feed.updated + if "subtitle" in parsed.feed: + page += parsed.feed.subtitle + "\n" + if "link" in parsed.feed: + page += "=> %s\n" %parsed.feed.link + page += "\n## Entries\n" + if len(parsed.entries) < 1: + self.validity = False + for i in parsed.entries: + line = "=> %s " %i.link + if "published" in i: + pub_date = time.strftime("%Y-%m-%d",i.published_parsed) + line += pub_date + " : " + if "title" in i: + line += "%s" %(i.title) + if "author" in i: + line += " (by %s)"%i.author + page += line + "\n" + if mode == "full": + if "summary" in i: + html = HtmlRenderer(i.summary,self.url,center=False) + rendered = html.get_body(width=None,mode="full") + page += "\n" + page += rendered + page += "\n------------\n\n" + return page + +class ImageRenderer(AbstractRenderer): + def get_mime(self): + return "image/*" + def is_valid(self): + if _RENDER_IMAGE: + return True + else: + return False + def get_links(self,mode=None): + return [] + def get_title(self): + return "Picture file" + def render(self,img,width=None,mode=None): + #with inline, we use symbols to be rendered with less. + #else we use the best possible renderer. + if mode == "links_only": + return "", [] + if not width: + width = term_width() + spaces = 0 + else: + spaces = int((term_width() - width)//2) + ansi_img = inline_image(img,width) + #Now centering the image + lines = ansi_img.splitlines() + new_img = "" + for l in lines: + new_img += spaces*" " + l + "\n" + return new_img, [] + def display(self,mode=None,window_title=None,window_info=None,grep=None): + if window_title: + print(self._window_title(window_title,info=window_info)) + terminal_image(self.body) + return True + +class HtmlRenderer(AbstractRenderer): + def get_mime(self): + return "text/html" + def is_valid(self): + if not _DO_HTML: + print("HTML document detected. Please install python-bs4 and python-readability.") + return _DO_HTML and self.validity + def get_subscribe_links(self): + subs = [[self.url,self.get_mime(),self.get_title()]] + soup = BeautifulSoup(self.body, 'html.parser') + links = soup.find_all("link",rel="alternate",recursive=True) + for l in links: + ty = l.get("type") + if ty : + if "rss" in ty or "atom" in ty or "feed" in ty: + subs.append([l.get("href"),ty,l.get("title")]) + return subs + + def get_title(self): + if self.title: + return self.title + elif self.body: + if _HAS_READABILITY: + try: + readable = Document(self.body) + self.title = readable.short_title() + return self.title + except Exception as err: + pass + soup = BeautifulSoup(self.body,"html.parser") + self.title = str(soup.title.string) + else: + return "" + + # Our own HTML engine (crazy, isn’t it?) + # Return [rendered_body, list_of_links] + # mode is either links_only, readable or full + def render(self,body,mode="readable",width=None,add_title=True): + if not width: + width = term_width() + if not _DO_HTML: + print("HTML document detected. Please install python-bs4 and python-readability.") + return + # This method recursively parse the HTML + r = self.representation(width,title=self.get_title(),center=self.center) + links = [] + # You know how bad html is when you realize that space sometimes meaningful, somtimes not. + # CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces. + # HTML is real crap. At least the one people are generating. + + def render_image(src,width=40,mode=None): + ansi_img = "" + imgurl,imgdata = looks_like_base64(src,self.url) + if _RENDER_IMAGE and mode != "links_only" and imgurl: + try: + #4 followings line are there to translate the URL into cache path + g = GeminiItem(imgurl) + img = g.get_cache_path() + if imgdata: + with open(img,"wb") as cached: + cached.write(base64.b64decode(imgdata)) + cached.close() + if g.is_cache_valid(): + renderer = ImageRenderer(img,imgurl) + # Image are 40px wide except if terminal is smaller + if width > 40: + size = 40 + else: + size = width + ansi_img = "\n" + renderer.get_body(width=size,mode="inline") + except Exception as err: + #we sometimes encounter really bad formatted files or URL + ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n" + return ansi_img + def sanitize_string(string): + #never start with a "\n" + #string = string.lstrip("\n") + string = string.replace("\r","").replace("\n", " ").replace("\t"," ") + endspace = string.endswith(" ") or string.endswith("\xa0") + startspace = string.startswith(" ") or string.startswith("\xa0") + toreturn = string.replace("\n", " ").replace("\t"," ").strip() + while " " in toreturn: + toreturn = toreturn.replace(" "," ") + toreturn = html.unescape(toreturn) + if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"): + toreturn += " " + if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"): + toreturn = " " + toreturn + return toreturn + def recursive_render(element,indent="",preformatted=False): + if element.name == "blockquote": + r.newparagraph() + r.startindent(" ",reverse=" ") + for child in element.children: + r.open_color("italic") + recursive_render(child,indent="\t") + r.close_color("italic") + r.endindent() + elif element.name in ["div","p"]: + r.newparagraph() + for child in element.children: + recursive_render(child,indent=indent) + r.newparagraph() + elif element.name in ["span"]: + r.add_space() + for child in element.children: + recursive_render(child,indent=indent) + r.add_space() + elif element.name in ["h1","h2","h3","h4","h5","h6"]: + r.open_color("blue") + if element.name in ["h1"]: + r.open_color("bold") + r.open_color("underline") + elif element.name in ["h2"]: + r.open_color("bold") + elif element.name in ["h5","h6"]: + r.open_color("faint") + for child in element.children: + r.newparagraph() + recursive_render(child) + r.newparagraph() + r.close_all() + elif element.name in ["code","tt"]: + for child in element.children: + recursive_render(child,indent=indent,preformatted=True) + elif element.name in ["pre"]: + r.newparagraph() + r.add_block(element.text) + r.newparagraph() + elif element.name in ["li"]: + r.startindent(" • ",sub=" ") + for child in element.children: + recursive_render(child,indent=indent) + r.endindent() + elif element.name in ["tr"]: + r.startindent("|",reverse="|") + for child in element.children: + recursive_render(child,indent=indent) + r.endindent() + elif element.name in ["td","th"]: + r.add_text("| ") + for child in element.children: + recursive_render(child) + r.add_text(" |") + # italics + elif element.name in ["em","i"]: + r.open_color("italic") + for child in element.children: + recursive_render(child,indent=indent,preformatted=preformatted) + r.close_color("italic") + #bold + elif element.name in ["b","strong"]: + r.open_color("bold") + for child in element.children: + recursive_render(child,indent=indent,preformatted=preformatted) + r.close_color("bold") + elif element.name == "a": + link = element.get('href') + # support for images nested in links + if link: + text = "" + imgtext = "" + #we display images first in a link + for child in element.children: + if child.name == "img": + recursive_render(child) + imgtext = "[IMG LINK %s]" + links.append(link+" "+text) + link_id = str(len(links)) + r.open_color("blue") + r.open_color("faint") + for child in element.children: + if child.name != "img": + recursive_render(child,preformatted=preformatted) + if imgtext != "": + r.center_line() + r.add_text(imgtext%link_id) + else: + r.add_text(" [%s]"%link_id) + r.close_color("blue") + r.close_color("faint") + else: + #No real link found + for child in element.children: + recursive_render(child,preformatted=preformatted) + elif element.name == "img": + src = element.get("src") + text = "" + ansi_img = render_image(src,width=width,mode=mode) + alt = element.get("alt") + if alt: + alt = sanitize_string(alt) + text += "[IMG] %s"%alt + else: + text += "[IMG]" + if src: + links.append(src+" "+text) + if not mode in self.images: + self.images[mode] = [] + abs_url = urllib.parse.urljoin(self.url, src) + self.images[mode].append(abs_url) + link_id = " [%s]"%(len(links)) + r.add_block(ansi_img) + r.open_color("faint") + r.open_color("yellow") + r.center_line() + r.add_text(text + link_id) + r.close_color("faint") + r.close_color("yellow") + r.newline() + elif element.name == "br": + r.newline() + elif element.name not in ["script","style","template"] and type(element) != Comment: + if element.string: + if preformatted : + r.open_color("faint") + r.add_text(element.string) + r.close_color("faint") + else: + s = sanitize_string(element.string) + if len(s.strip()) > 0: + r.add_text(s) + else: + for child in element.children: + recursive_render(child,indent=indent) + # the real render_html hearth + if mode == "full": + summary = body + elif _HAS_READABILITY: + try: + readable = Document(body) + summary = readable.summary() + except Exception as err: + summary = body + else: + summary = body + soup = BeautifulSoup(summary, 'html.parser') + #soup = BeautifulSoup(summary, 'html5lib') + if soup : + if soup.body : + recursive_render(soup.body) + else: + recursive_render(soup) + return r.get_final(),links + diff --git a/offpunk.py b/offpunk.py index 4bd14ed..c5f9772 100755 --- a/offpunk.py +++ b/offpunk.py @@ -48,6 +48,7 @@ import webbrowser import html import base64 import subprocess +import ansirenderer # In terms of arguments, this can take an input file/string to be passed to # stdin, a parameter to do (well-escaped) "%" replacement on the command, a @@ -394,941 +395,17 @@ standard_ports = { "https" : 443, "spartan": 300, } - -# First, we define the different content->text renderers, outside of the rest -# (They could later be factorized in other files or replaced) -class AbstractRenderer(): - def __init__(self,content,url,center=True): - self.url = url - self.body = str(content) - #there’s one rendered text and one links table per mode - self.rendered_text = {} - self.links = {} - self.images = {} - self.title = None - self.validity = True - self.temp_file = {} - self.less_histfile = {} - self.center = center - - #This class hold an internal representation of the HTML text - class representation: - def __init__(self,width,title=None,center=True): - self.title=title - self.center = center - self.final_text = "" - self.opened = [] - self.width = width - self.last_line = "" - self.last_line_colors = {} - self.last_line_center = False - self.new_paragraph = True - self.i_indent = "" - self.s_indent = "" - self.r_indent = "" - self.current_indent = "" - self.disabled_indents = None - # each color is an [open,close] pair code - self.colors = { - "bold" : ["1","22"], - "faint" : ["2","22"], - "italic" : ["3","23"], - "underline": ["4","24"], - "red" : ["31","39"], - "yellow" : ["33","39"], - "blue" : ["34","39"], - } - - def _insert(self,color,open=True): - if open: o = 0 - else: o = 1 - pos = len(self.last_line) - #we remember the position where to insert color codes - if not pos in self.last_line_colors: - self.last_line_colors[pos] = [] - #Two inverse code cancel each other - if [color,int(not o)] in self.last_line_colors[pos]: - self.last_line_colors[pos].remove([color,int(not o)]) - else: - self.last_line_colors[pos].append([color,o])#+color+str(o)) - - # Take self.last line and add ANSI codes to it before adding it to - # self.final_text. - def _endline(self): - if len(self.last_line.strip()) > 0: - for c in self.opened: - self._insert(c,open=False) - nextline = "" - added_char = 0 - #we insert the color code at the saved positions - while len (self.last_line_colors) > 0: - pos,colors = self.last_line_colors.popitem() - #popitem itterates LIFO. - #So we go, backward, to the pos (starting at the end of last_line) - nextline = self.last_line[pos:] + nextline - ansicol = "\x1b[" - for c,o in colors: - ansicol += self.colors[c][o] + ";" - ansicol = ansicol[:-1]+"m" - nextline = ansicol + nextline - added_char += len(ansicol) - self.last_line = self.last_line[:pos] - nextline = self.last_line + nextline - if self.last_line_center: - #we have to care about the ansi char while centering - width = term_width() + added_char - nextline = nextline.strip().center(width) - self.last_line_center = False - else: - #should we lstrip the nextline in the addition ? - nextline = self.current_indent + nextline.lstrip() + self.r_indent - self.current_indent = self.s_indent - self.final_text += nextline - self.last_line = "" - self.final_text += "\n" - for c in self.opened: - self._insert(c,open=True) - else: - self.last_line = "" - - - def center_line(self): - self.last_line_center = True - - def open_color(self,color): - if color in self.colors and color not in self.opened: - self._insert(color,open=True) - self.opened.append(color) - def close_color(self,color): - if color in self.colors and color in self.opened: - self._insert(color,open=False) - self.opened.remove(color) - def close_all(self): - if len(self.colors) > 0: - self.last_line += "\x1b[0m" - self.opened.clear() - - def startindent(self,indent,sub=None,reverse=None): - self._endline() - self.i_indent = indent - self.current_indent = indent - if sub: - self.s_indent = sub - else: - self.s_indent = indent - if reverse: - self.r_indent = reverse - else: - self.r_indent = "" - - - def endindent(self): - self._endline() - self.i_indent = "" - self.s_indent = "" - self.r_indent = "" - self.current_indent = "" - - def _disable_indents(self): - self.disabled_indents = [] - self.disabled_indents.append(self.current_indent) - self.disabled_indents.append(self.i_indent) - self.disabled_indents.append(self.s_indent) - self.disabled_indents.append(self.r_indent) - self.endindent() - - def _enable_indents(self): - if self.disabled_indents: - self.current_indent = self.disabled_indents[0] - self.i_indent = self.disabled_indents[1] - self.s_indent = self.disabled_indents[2] - self.r_indent = self.disabled_indents[3] - self.disabled_indents = None - - def newline(self): - self._endline() - - #A new paragraph implies 2 newlines (1 blank line between paragraphs) - #But it is only used if didn’t already started one to avoid plenty - #of blank lines. force=True allows to bypass that limit. - #new_paragraph becomes false as soon as text is entered into it - def newparagraph(self,force=False): - if force or not self.new_paragraph: - self._endline() - self.final_text += "\n" - self.new_paragraph = True - - def add_space(self): - if len(self.last_line) > 0 and self.last_line[-1] != " ": - self.last_line += " " - - def _title_first(self,intext=None): - if self.title: - if not self.title == intext: - self._disable_indents() - self.open_color("blue") - self.open_color("bold") - self.open_color("underline") - self.add_text(self.title) - self.close_all() - self.newparagraph() - self._enable_indents() - self.title = None - - # Beware, blocks are not wrapped nor indented and left untouched! - # They are mostly useful for pictures and preformatted text. - def add_block(self,intext): - # If necessary, we add the title before a block - self._title_first() - # we don’t want to indent blocks - self._endline() - self._disable_indents() - self.final_text += self.current_indent + intext - self.new_paragraph = False - self._endline() - self._enable_indents() - - def add_text(self,intext): - self._title_first(intext=intext) - lines = [] - last = (self.last_line + intext) - self.last_line = "" - # With the following, we basically cancel adding only spaces - # on an empty line - if len(last.strip()) > 0: - self.new_paragraph = False - else: - last = last.strip() - if len(last) > self.width: - width = self.width - len(self.current_indent) - len(self.r_indent) - spaces_left = len(last) - len(last.lstrip()) - spaces_right = len(last) - len(last.rstrip()) - lines = textwrap.wrap(last,width,drop_whitespace=True) - self.last_line += spaces_left*" " - while len(lines) > 1: - l = lines.pop(0) - self.last_line += l - self._endline() - if len(lines) == 1: - li = lines[0] - self.last_line += li + spaces_right*" " - else: - self.last_line = last - - def get_final(self): - self.close_all() - self._endline() - #if no content, we still add the title - self._title_first() - lines = self.final_text.splitlines() - lines2 = [] - termspace = shutil.get_terminal_size()[0] - #Following code instert blanck spaces to center the content - if self.center and termspace > term_width(): - margin = int((termspace - term_width())//2) - else: - margin = 0 - for l in lines : - lines2.append(margin*" "+l) - return "\n".join(lines2) - - def get_subscribe_links(self): - return [[self.url,self.get_mime(),self.get_title()]] - def is_valid(self): - return self.validity - def get_links(self,mode="links_only"): - if mode not in self.links : - prepared_body = self.prepare(self.body,mode=mode) - results = self.render(prepared_body,mode=mode) - if results: - self.links[mode] = results[1] - for l in self.get_subscribe_links()[1:]: - self.links[mode].append(l[0]) - return self.links[mode] - def get_title(self): - return "Abstract title" - - # This function return a list of URL which should be downloaded - # before displaying the page (images in HTML pages, typically) - def get_images(self,mode="readable"): - if not mode in self.images: - self.get_body(mode=mode) - # we also invalidate the body that was done without images - self.rendered_text.pop(mode) - if mode in self.images: - return self.images[mode] - else: - return [] - #This function will give gemtext to the gemtext renderer - def prepare(self,body,mode=None): - return body - - def get_body(self,width=None,mode="readable"): - if not width: - width = term_width() - if mode not in self.rendered_text: - prepared_body = self.prepare(self.body,mode=mode) - result = self.render(prepared_body,width=width,mode=mode) - if result: - self.rendered_text[mode] = result[0] - self.links[mode] = result[1] - return self.rendered_text[mode] - - def _window_title(self,title,info=None): - title_r = self.representation(term_width()) - title_r.open_color("red") - title_r.open_color("bold") - title_r.add_text(title) - title_r.close_color("bold") - if info: - title_r.add_text(" (%s)"%info) - title_r.close_color("red") - return title_r.get_final() - - def display(self,mode="readable",window_title="",window_info=None,grep=None): - if not mode: mode = "readable" - wtitle = self._window_title(window_title,info=window_info) - body = wtitle + "\n" + self.get_body(mode=mode) - if not body: - return False - # We actually put the body in a tmpfile before giving it to less - if mode not in self.temp_file: - tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False) - self.temp_file[mode] = tmpf.name - tmpf.write(body) - tmpf.close() - if mode not in self.less_histfile: - firsttime = True - tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False) - self.less_histfile[mode] = tmpf.name - else: - firsttime = False - less_cmd(self.temp_file[mode], histfile=self.less_histfile[mode],cat=firsttime,grep=grep) - return True - - def get_temp_file(self,mode="readable"): - if mode in self.temp_file: - return self.temp_file[mode] - else: - return None - - # An instance of AbstractRenderer should have a self.render(body,width,mode) method. - # 3 modes are used : readable (by default), full and links_only (the fastest, when - # rendered content is not used, only the links are needed) - # The prepare() function is called before the rendering. It is useful if - # your renderer output in a format suitable for another existing renderer (such as gemtext) - -# Gemtext Rendering Engine -class GemtextRenderer(AbstractRenderer): - def get_mime(self): - return "text/gemini" - def get_title(self): - if self.title: - return self.title - elif self.body: - lines = self.body.splitlines() - for line in lines: - if line.startswith("#"): - self.title = line.strip("#").strip() - return self.title - if len(lines) > 0: - # If not title found, we take the first 50 char - # of the first line - title_line = lines[0].strip() - if len(title_line) > 50: - title_line = title_line[:49] + "…" - self.title = title_line - return self.title - else: - self.title = "Empty Page" - return self.title - else: - return "Unknown Gopher Page" - - #render_gemtext - def render(self,gemtext, width=None,mode=None): - if not width: - width = term_width() - r = self.representation(width) - links = [] - hidden_links = [] - preformatted = False - def format_link(url,index,name=None): - if "://" in url: - protocol,adress = url.split("://",maxsplit=1) - protocol = " %s" %protocol - else: - adress = url - protocol = "" - if "gemini" in protocol or "list" in protocol: - protocol = "" - if not name: - name = adress - line = "[%d%s] %s" % (index, protocol, name) - return line - for line in gemtext.splitlines(): - r.newline() - if line.startswith("```"): - preformatted = not preformatted - elif preformatted: - # infinite line to not wrap preformated - r.add_block(line+"\n") - elif len(line.strip()) == 0: - r.newparagraph(force=True) - elif line.startswith("=>"): - strippedline = line[2:].strip() - if strippedline: - links.append(strippedline) - splitted = strippedline.split(maxsplit=1) - url = splitted[0] - name = None - if len(splitted) > 1: - name = splitted[1] - link = format_link(url,len(links),name=name) - #r.open_color("blue") - #r.open_color("faint") - #r.open_color("underline") - startpos = link.find("] ") + 2 - r.startindent("",sub=startpos*" ") - r.add_text(link) - r.endindent() - #r.close_all() - elif line.startswith("* "): - line = line[1:].lstrip("\t ") - r.startindent("• ",sub=" ") - r.add_text(line) - r.endindent() - elif line.startswith(">"): - line = line[1:].lstrip("\t ") - r.startindent("> ") - r.add_text(line) - r.endindent() - elif line.startswith("###"): - line = line[3:].lstrip("\t ") - r.open_color("blue") - r.add_text(line) - r.close_color("blue") - elif line.startswith("##"): - line = line[2:].lstrip("\t ") - r.open_color("blue") - r.add_text(line) - r.close_color("blue") - elif line.startswith("#"): - line = line[1:].lstrip("\t ") - if not self.title: - self.title = line - r.open_color("bold") - r.open_color("blue") - r.open_color("underline") - r.add_text(line) - r.close_color("underline") - r.close_color("bold") - r.close_color("blue") - else: - if "://" in line: - words = line.split() - for w in words: - if "://" in w: - hidden_links.append(w) - r.add_text(line.rstrip()) - links += hidden_links - return r.get_final(), links - -class GopherRenderer(AbstractRenderer): - def get_mime(self): - return "text/gopher" - def get_title(self): - if not self.title: - self.title = "" - if self.body: - firstline = self.body.splitlines()[0] - firstline = firstline.split("\t")[0] - if firstline.startswith("i"): - firstline = firstline[1:] - self.title = firstline - return self.title - - #menu_or_text - def render(self,body,width=None,mode=None): - if not width: - width = term_width() - try: - render,links = self._render_goph(body,width=width,mode=mode) - except Exception as err: - print("Error rendering Gopher ",err) - r = self.representation(width) - r.add_block(body) - render = r.get_final() - links = [] - return render,links - - def _render_goph(self,body,width=None,mode=None): - if not width: - width = term_width() - # This was copied straight from Agena (then later adapted) - links = [] - r = self.representation(width) - for line in self.body.split("\n"): - r.newline() - if line.startswith("i"): - towrap = line[1:].split("\t")[0] - if len(towrap.strip()) > 0: - r.add_text(towrap) - else: - r.newparagraph() - elif not line.strip() in [".",""]: - parts = line.split("\t") - parts[-1] = parts[-1].strip() - if parts[-1] == "+": - parts = parts[:-1] - if len(parts) == 4: - name,path,host,port = parts - itemtype = name[0] - name = name[1:] - if port == "70": - port = "" - else: - port = ":%s"%port - if itemtype == "h" and path.startswith("URL:"): - url = path[4:] - else: - url = "gopher://%s%s/%s%s" %(host,port,itemtype,path) - url = url.replace(" ","%20") - linkline = url + " " + name - links.append(linkline) - towrap = "[%s] "%len(links)+ name - r.add_text(towrap) - else: - r.add_text(line) - return r.get_final(),links - - -class FolderRenderer(GemtextRenderer): - def get_mime(self): - return "Directory" - def prepare(self,body,mode=None): - def get_first_line(l): - path = os.path.join(listdir,l+".gmi") - with open(path) as f: - first_line = f.readline().strip() - f.close() - if first_line.startswith("#"): - return first_line - else: - return None - def write_list(l): - body = "" - for li in l: - path = "list:///%s"%li - gi = GeminiItem(path) - size = len(gi.get_links()) - body += "=> %s %s (%s items)\n" %(str(path),li,size) - return body - listdir = os.path.join(_DATA_DIR,"lists") - if self.url != listdir: - return "This is folder %s" %self.url - else: - self.title = "My lists" - lists = [] - if os.path.exists(listdir): - listfiles = os.listdir(listdir) - if len(listfiles) > 0: - for l in listfiles: - #removing the .gmi at the end of the name - lists.append(l[:-4]) - if len(lists) > 0: - body = "" - my_lists = [] - system_lists = [] - subscriptions = [] - frozen = [] - lists.sort() - for l in lists: - if l in ["history","to_fetch","archives","tour"]: - system_lists.append(l) - else: - first_line = get_first_line(l) - if first_line and "#subscribed" in first_line: - subscriptions.append(l) - elif first_line and "#frozen" in first_line: - frozen.append(l) - else: - my_lists.append(l) - if len(my_lists) > 0: - body+= "\n## Bookmarks Lists (updated during sync)\n" - body += write_list(my_lists) - if len(subscriptions) > 0: - body +="\n## Subscriptions (new links in those are added to tour)\n" - body += write_list(subscriptions) - if len(frozen) > 0: - body +="\n## Frozen (fetched but never updated)\n" - body += write_list(frozen) - if len(system_lists) > 0: - body +="\n## System Lists\n" - body += write_list(system_lists) - return body - -class FeedRenderer(GemtextRenderer): - def get_mime(self): - return "application/rss+xml" - def is_valid(self): - if _DO_FEED: - parsed = feedparser.parse(self.body) - else: - return False - if parsed.bozo: - return False - else: - #If no content, then fallback to HTML - return len(parsed.entries) > 0 - - def get_title(self): - if not self.title: - self.get_body() - return self.title - - def prepare(self,content,mode="readable",width=None): - if not width: - width = term_width() - self.title = "RSS/Atom feed" - page = "" - if _DO_FEED: - parsed = feedparser.parse(content) - else: - page += "Please install python-feedparser to handle RSS/Atom feeds\n" - self.validity = False - return page - if parsed.bozo: - page += "Invalid RSS feed\n\n" - page += str(parsed.bozo_exception) - self.validity = False - else: - if "title" in parsed.feed: - t = parsed.feed.title - else: - t = "Unknown" - self.title = "%s (XML feed)" %t - title = "# %s"%self.title - page += title + "\n" - if "updated" in parsed.feed: - page += "Last updated on %s\n\n" %parsed.feed.updated - if "subtitle" in parsed.feed: - page += parsed.feed.subtitle + "\n" - if "link" in parsed.feed: - page += "=> %s\n" %parsed.feed.link - page += "\n## Entries\n" - if len(parsed.entries) < 1: - self.validity = False - for i in parsed.entries: - line = "=> %s " %i.link - if "published" in i: - pub_date = time.strftime("%Y-%m-%d",i.published_parsed) - line += pub_date + " : " - if "title" in i: - line += "%s" %(i.title) - if "author" in i: - line += " (by %s)"%i.author - page += line + "\n" - if mode == "full": - if "summary" in i: - html = HtmlRenderer(i.summary,self.url,center=False) - rendered = html.get_body(width=None,mode="full") - page += "\n" - page += rendered - page += "\n------------\n\n" - return page - -class ImageRenderer(AbstractRenderer): - def get_mime(self): - return "image/*" - def is_valid(self): - if _RENDER_IMAGE: - return True - else: - return False - def get_links(self,mode=None): - return [] - def get_title(self): - return "Picture file" - def render(self,img,width=None,mode=None): - #with inline, we use symbols to be rendered with less. - #else we use the best possible renderer. - if mode == "links_only": - return "", [] - if not width: - width = term_width() - spaces = 0 - else: - spaces = int((term_width() - width)//2) - ansi_img = inline_image(img,width) - #Now centering the image - lines = ansi_img.splitlines() - new_img = "" - for l in lines: - new_img += spaces*" " + l + "\n" - return new_img, [] - def display(self,mode=None,window_title=None,window_info=None,grep=None): - if window_title: - print(self._window_title(window_title,info=window_info)) - terminal_image(self.body) - return True - -class HtmlRenderer(AbstractRenderer): - def get_mime(self): - return "text/html" - def is_valid(self): - if not _DO_HTML: - print("HTML document detected. Please install python-bs4 and python-readability.") - return _DO_HTML and self.validity - def get_subscribe_links(self): - subs = [[self.url,self.get_mime(),self.get_title()]] - soup = BeautifulSoup(self.body, 'html.parser') - links = soup.find_all("link",rel="alternate",recursive=True) - for l in links: - ty = l.get("type") - if ty : - if "rss" in ty or "atom" in ty or "feed" in ty: - subs.append([l.get("href"),ty,l.get("title")]) - return subs - - def get_title(self): - if self.title: - return self.title - elif self.body: - if _HAS_READABILITY: - try: - readable = Document(self.body) - self.title = readable.short_title() - return self.title - except Exception as err: - pass - soup = BeautifulSoup(self.body,"html.parser") - self.title = str(soup.title.string) - else: - return "" - - # Our own HTML engine (crazy, isn’t it?) - # Return [rendered_body, list_of_links] - # mode is either links_only, readable or full - def render(self,body,mode="readable",width=None,add_title=True): - if not width: - width = term_width() - if not _DO_HTML: - print("HTML document detected. Please install python-bs4 and python-readability.") - return - # This method recursively parse the HTML - r = self.representation(width,title=self.get_title(),center=self.center) - links = [] - # You know how bad html is when you realize that space sometimes meaningful, somtimes not. - # CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces. - # HTML is real crap. At least the one people are generating. - - def render_image(src,width=40,mode=None): - ansi_img = "" - imgurl,imgdata = looks_like_base64(src,self.url) - if _RENDER_IMAGE and mode != "links_only" and imgurl: - try: - #4 followings line are there to translate the URL into cache path - g = GeminiItem(imgurl) - img = g.get_cache_path() - if imgdata: - with open(img,"wb") as cached: - cached.write(base64.b64decode(imgdata)) - cached.close() - if g.is_cache_valid(): - renderer = ImageRenderer(img,imgurl) - # Image are 40px wide except if terminal is smaller - if width > 40: - size = 40 - else: - size = width - ansi_img = "\n" + renderer.get_body(width=size,mode="inline") - except Exception as err: - #we sometimes encounter really bad formatted files or URL - ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n" - return ansi_img - def sanitize_string(string): - #never start with a "\n" - #string = string.lstrip("\n") - string = string.replace("\r","").replace("\n", " ").replace("\t"," ") - endspace = string.endswith(" ") or string.endswith("\xa0") - startspace = string.startswith(" ") or string.startswith("\xa0") - toreturn = string.replace("\n", " ").replace("\t"," ").strip() - while " " in toreturn: - toreturn = toreturn.replace(" "," ") - toreturn = html.unescape(toreturn) - if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"): - toreturn += " " - if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"): - toreturn = " " + toreturn - return toreturn - def recursive_render(element,indent="",preformatted=False): - if element.name == "blockquote": - r.newparagraph() - r.startindent(" ",reverse=" ") - for child in element.children: - r.open_color("italic") - recursive_render(child,indent="\t") - r.close_color("italic") - r.endindent() - elif element.name in ["div","p"]: - r.newparagraph() - for child in element.children: - recursive_render(child,indent=indent) - r.newparagraph() - elif element.name in ["span"]: - r.add_space() - for child in element.children: - recursive_render(child,indent=indent) - r.add_space() - elif element.name in ["h1","h2","h3","h4","h5","h6"]: - r.open_color("blue") - if element.name in ["h1"]: - r.open_color("bold") - r.open_color("underline") - elif element.name in ["h2"]: - r.open_color("bold") - elif element.name in ["h5","h6"]: - r.open_color("faint") - for child in element.children: - r.newparagraph() - recursive_render(child) - r.newparagraph() - r.close_all() - elif element.name in ["code","tt"]: - for child in element.children: - recursive_render(child,indent=indent,preformatted=True) - elif element.name in ["pre"]: - r.newparagraph() - r.add_block(element.text) - r.newparagraph() - elif element.name in ["li"]: - r.startindent(" • ",sub=" ") - for child in element.children: - recursive_render(child,indent=indent) - r.endindent() - elif element.name in ["tr"]: - r.startindent("|",reverse="|") - for child in element.children: - recursive_render(child,indent=indent) - r.endindent() - elif element.name in ["td","th"]: - r.add_text("| ") - for child in element.children: - recursive_render(child) - r.add_text(" |") - # italics - elif element.name in ["em","i"]: - r.open_color("italic") - for child in element.children: - recursive_render(child,indent=indent,preformatted=preformatted) - r.close_color("italic") - #bold - elif element.name in ["b","strong"]: - r.open_color("bold") - for child in element.children: - recursive_render(child,indent=indent,preformatted=preformatted) - r.close_color("bold") - elif element.name == "a": - link = element.get('href') - # support for images nested in links - if link: - text = "" - imgtext = "" - #we display images first in a link - for child in element.children: - if child.name == "img": - recursive_render(child) - imgtext = "[IMG LINK %s]" - links.append(link+" "+text) - link_id = str(len(links)) - r.open_color("blue") - r.open_color("faint") - for child in element.children: - if child.name != "img": - recursive_render(child,preformatted=preformatted) - if imgtext != "": - r.center_line() - r.add_text(imgtext%link_id) - else: - r.add_text(" [%s]"%link_id) - r.close_color("blue") - r.close_color("faint") - else: - #No real link found - for child in element.children: - recursive_render(child,preformatted=preformatted) - elif element.name == "img": - src = element.get("src") - text = "" - ansi_img = render_image(src,width=width,mode=mode) - alt = element.get("alt") - if alt: - alt = sanitize_string(alt) - text += "[IMG] %s"%alt - else: - text += "[IMG]" - if src: - links.append(src+" "+text) - if not mode in self.images: - self.images[mode] = [] - abs_url = urllib.parse.urljoin(self.url, src) - self.images[mode].append(abs_url) - link_id = " [%s]"%(len(links)) - r.add_block(ansi_img) - r.open_color("faint") - r.open_color("yellow") - r.center_line() - r.add_text(text + link_id) - r.close_color("faint") - r.close_color("yellow") - r.newline() - elif element.name == "br": - r.newline() - elif element.name not in ["script","style","template"] and type(element) != Comment: - if element.string: - if preformatted : - r.open_color("faint") - r.add_text(element.string) - r.close_color("faint") - else: - s = sanitize_string(element.string) - if len(s.strip()) > 0: - r.add_text(s) - else: - for child in element.children: - recursive_render(child,indent=indent) - # the real render_html hearth - if mode == "full": - summary = body - elif _HAS_READABILITY: - try: - readable = Document(body) - summary = readable.summary() - except Exception as err: - summary = body - else: - summary = body - soup = BeautifulSoup(summary, 'html.parser') - #soup = BeautifulSoup(summary, 'html5lib') - if soup : - if soup.body : - recursive_render(soup.body) - else: - recursive_render(soup) - return r.get_final(),links - # Mapping mimetypes with renderers # (any content with a mimetype text/* not listed here will be rendered with as GemText) _FORMAT_RENDERERS = { - "text/gemini": GemtextRenderer, - "text/html" : HtmlRenderer, - "text/xml" : FeedRenderer, - "application/xml" : FeedRenderer, - "application/rss+xml" : FeedRenderer, - "application/atom+xml" : FeedRenderer, - "text/gopher": GopherRenderer, - "image/*": ImageRenderer + "text/gemini": ansirenderer.GemtextRenderer, + "text/html" : ansirenderer.HtmlRenderer, + "text/xml" : ansirenderer.FeedRenderer, + "application/xml" : ansirenderer.FeedRenderer, + "application/rss+xml" : ansirenderer.FeedRenderer, + "application/atom+xml" : ansirenderer.FeedRenderer, + "text/gopher": ansirenderer.GopherRenderer, + "image/*": ansirenderer.ImageRenderer } # Offpunk is organized as follow: # - a GeminiClient instance which handles the browsing of GeminiItems (= pages). @@ -1630,7 +707,7 @@ class GeminiItem(): def _set_renderer(self,mime=None): if self.local and os.path.isdir(self.get_cache_path()): - self.renderer = FolderRenderer("",self.get_cache_path()) + self.renderer = ansirenderer.FolderRenderer("",self.get_cache_path(),datadir=_DATA_DIR) return if not mime: mime = self.get_mime()