experiment

2023-07-03 11:43:06 +02:00 · 2023-07-03 11:43:06 +02:00 · 86a67d47fa
parent 1a40ad786f
commit 86a67d47fa
2 changed files with 941 additions and 933 deletions
--- a/ansirenderer.py
+++ b/ansirenderer.py
@ -0,0 +1,931 @@
 #!/bin/python
 import os
 # First, we define the different content->text renderers, outside of the rest
 # (They could later be factorized in other files or replaced)
 class AbstractRenderer():
    def __init__(self,content,url,center=True):
        self.url = url
        self.body = str(content)
        #there’s one rendered text and one links table per mode
        self.rendered_text = {}
        self.links = {}
        self.images = {}
        self.title = None
        self.validity = True
        self.temp_file = {}
        self.less_histfile = {}
        self.center = center
    #This class hold an internal representation of the HTML text
    class representation:
        def __init__(self,width,title=None,center=True):
            self.title=title
            self.center = center
            self.final_text = ""
            self.opened = []
            self.width = width
            self.last_line = ""
            self.last_line_colors = {}
            self.last_line_center = False
            self.new_paragraph = True
            self.i_indent = ""
            self.s_indent = ""
            self.r_indent = ""
            self.current_indent = ""
            self.disabled_indents = None
            # each color is an [open,close] pair code
            self.colors = {
                            "bold"   : ["1","22"],
                            "faint"  : ["2","22"],
                            "italic" : ["3","23"],
                            "underline": ["4","24"],
                            "red"    : ["31","39"],
                            "yellow" : ["33","39"],
                            "blue"   : ["34","39"],
                       }
        def _insert(self,color,open=True):
            if open: o = 0
            else: o = 1
            pos = len(self.last_line)
            #we remember the position where to insert color codes
            if not pos in self.last_line_colors:
                self.last_line_colors[pos] = []
            #Two inverse code cancel each other
            if [color,int(not o)] in self.last_line_colors[pos]:
                self.last_line_colors[pos].remove([color,int(not o)])
            else:
                self.last_line_colors[pos].append([color,o])#+color+str(o))
        # Take self.last line and add ANSI codes to it before adding it to
        # self.final_text.
        def _endline(self):
            if len(self.last_line.strip()) > 0:
                for c in self.opened:
                    self._insert(c,open=False)
                nextline = ""
                added_char = 0
                #we insert the color code at the saved positions
                while len (self.last_line_colors) > 0:
                    pos,colors = self.last_line_colors.popitem()
                    #popitem itterates LIFO.
                    #So we go, backward, to the pos (starting at the end of last_line)
                    nextline = self.last_line[pos:] + nextline
                    ansicol = "\x1b["
                    for c,o in colors:
                        ansicol += self.colors[c][o] + ";"
                    ansicol = ansicol[:-1]+"m"
                    nextline = ansicol + nextline
                    added_char += len(ansicol)
                    self.last_line = self.last_line[:pos]
                nextline = self.last_line + nextline
                if self.last_line_center:
                    #we have to care about the ansi char while centering
                    width = term_width() + added_char
                    nextline = nextline.strip().center(width)
                    self.last_line_center = False
                else:
                    #should we lstrip the nextline in the addition ?
                    nextline = self.current_indent + nextline.lstrip() + self.r_indent
                    self.current_indent = self.s_indent
                self.final_text += nextline
                self.last_line = ""
                self.final_text += "\n"
                for c in self.opened:
                    self._insert(c,open=True)
            else:
                self.last_line = ""
        def center_line(self):
            self.last_line_center = True
        def open_color(self,color):
            if color in self.colors and color not in self.opened:
                self._insert(color,open=True)
                self.opened.append(color)
        def close_color(self,color):
            if color in self.colors and color in self.opened:
                self._insert(color,open=False)
                self.opened.remove(color)
        def close_all(self):
            if len(self.colors) > 0:
                self.last_line += "\x1b[0m"
                self.opened.clear()
        def startindent(self,indent,sub=None,reverse=None):
            self._endline()
            self.i_indent = indent
            self.current_indent = indent
            if sub:
                self.s_indent = sub
            else:
                self.s_indent = indent
            if reverse:
                self.r_indent = reverse
            else:
                self.r_indent = ""
        def endindent(self):
            self._endline()
            self.i_indent = ""
            self.s_indent = ""
            self.r_indent = ""
            self.current_indent = ""
        def _disable_indents(self):
            self.disabled_indents = []
            self.disabled_indents.append(self.current_indent)
            self.disabled_indents.append(self.i_indent)
            self.disabled_indents.append(self.s_indent)
            self.disabled_indents.append(self.r_indent)
            self.endindent()
        def _enable_indents(self):
            if self.disabled_indents:
                self.current_indent = self.disabled_indents[0]
                self.i_indent = self.disabled_indents[1]
                self.s_indent = self.disabled_indents[2]
                self.r_indent = self.disabled_indents[3]
            self.disabled_indents = None
        def newline(self):
            self._endline()
        #A new paragraph implies 2 newlines (1 blank line between paragraphs)
        #But it is only used if didn’t already started one to avoid plenty
        #of blank lines. force=True allows to bypass that limit.
        #new_paragraph becomes false as soon as text is entered into it
        def newparagraph(self,force=False):
            if force or not self.new_paragraph:
                self._endline()
                self.final_text += "\n"
                self.new_paragraph = True
        def add_space(self):
            if len(self.last_line) > 0 and self.last_line[-1] != " ":
                self.last_line += " "
        def _title_first(self,intext=None):
            if self.title:
                if not self.title == intext:
                    self._disable_indents()
                    self.open_color("blue")
                    self.open_color("bold")
                    self.open_color("underline")
                    self.add_text(self.title)
                    self.close_all()
                    self.newparagraph()
                    self._enable_indents()
                self.title = None
        # Beware, blocks are not wrapped nor indented and left untouched!
        # They are mostly useful for pictures and preformatted text.
        def add_block(self,intext):
            # If necessary, we add the title before a block
            self._title_first()
            # we don’t want to indent blocks
            self._endline()
            self._disable_indents()
            self.final_text += self.current_indent + intext
            self.new_paragraph = False
            self._endline()
            self._enable_indents()
        def add_text(self,intext):
            self._title_first(intext=intext)
            lines = []
            last = (self.last_line + intext)
            self.last_line = ""
            # With the following, we basically cancel adding only spaces
            # on an empty line
            if len(last.strip()) > 0:
                self.new_paragraph = False
            else:
                last = last.strip()
            if len(last) > self.width:
                width = self.width - len(self.current_indent) - len(self.r_indent)
                spaces_left = len(last) - len(last.lstrip())
                spaces_right = len(last) - len(last.rstrip())
                lines = textwrap.wrap(last,width,drop_whitespace=True)
                self.last_line += spaces_left*" "
                while len(lines) > 1:
                    l = lines.pop(0)
                    self.last_line += l
                    self._endline()
                if len(lines) == 1:
                    li = lines[0]
                    self.last_line += li + spaces_right*" "
            else:
                self.last_line = last
        def get_final(self):
            self.close_all()
            self._endline()
            #if no content, we still add the title
            self._title_first()
            lines = self.final_text.splitlines()
            lines2 = []
            termspace = shutil.get_terminal_size()[0]
            #Following code instert blanck spaces to center the content
            if self.center and termspace > term_width():
                margin = int((termspace - term_width())//2)
            else:
                margin = 0
            for l in lines :
                lines2.append(margin*" "+l)
            return "\n".join(lines2)
    def get_subscribe_links(self):
        return [[self.url,self.get_mime(),self.get_title()]]
    def is_valid(self):
        return self.validity
    def get_links(self,mode="links_only"):
        if mode not in self.links :
            prepared_body = self.prepare(self.body,mode=mode)
            results = self.render(prepared_body,mode=mode)
            if results:
                self.links[mode] = results[1]
                for l in self.get_subscribe_links()[1:]:
                    self.links[mode].append(l[0])
        return self.links[mode]
    def get_title(self):
        return "Abstract title"
    # This function return a list of URL which should be downloaded
    # before displaying the page (images in HTML pages, typically)
    def get_images(self,mode="readable"):
        if not mode in self.images:
            self.get_body(mode=mode)
            # we also invalidate the body that was done without images
            self.rendered_text.pop(mode)
        if mode in self.images:
            return self.images[mode]
        else:
            return []
    #This function will give gemtext to the gemtext renderer
    def prepare(self,body,mode=None):
        return body
    def get_body(self,width=None,mode="readable"):
        if not width:
            width = term_width()
        if mode not in self.rendered_text:
            prepared_body = self.prepare(self.body,mode=mode)
            result = self.render(prepared_body,width=width,mode=mode)
            if result:
                self.rendered_text[mode] = result[0]
                self.links[mode] = result[1]
        return self.rendered_text[mode]
    def _window_title(self,title,info=None):
        title_r = self.representation(term_width())
        title_r.open_color("red")
        title_r.open_color("bold")
        title_r.add_text(title)
        title_r.close_color("bold")
        if info:
            title_r.add_text("   (%s)"%info)
        title_r.close_color("red")
        return title_r.get_final()
    def display(self,mode="readable",window_title="",window_info=None,grep=None):
        if not mode: mode = "readable"
        wtitle = self._window_title(window_title,info=window_info)
        body = wtitle + "\n" + self.get_body(mode=mode)
        if not body:
            return False
        # We actually put the body in a tmpfile before giving it to less
        if mode not in self.temp_file:
            tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
            self.temp_file[mode] = tmpf.name
            tmpf.write(body)
            tmpf.close()
        if mode not in self.less_histfile:
            firsttime = True
            tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
            self.less_histfile[mode] = tmpf.name
        else:
            firsttime = False
        less_cmd(self.temp_file[mode], histfile=self.less_histfile[mode],cat=firsttime,grep=grep)
        return True
    def get_temp_file(self,mode="readable"):
        if mode in self.temp_file:
            return self.temp_file[mode]
        else:
            return None
    # An instance of AbstractRenderer should have a self.render(body,width,mode) method.
    # 3 modes are used : readable (by default), full and links_only (the fastest, when
    # rendered content is not used, only the links are needed)
    # The prepare() function is called before the rendering. It is useful if
    # your renderer output in a format suitable for another existing renderer (such as gemtext)
 # Gemtext Rendering Engine
 class GemtextRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/gemini"
    def get_title(self):
        if self.title:
            return self.title
        elif self.body:
            lines = self.body.splitlines()
            for line in lines:
                if line.startswith("#"):
                    self.title = line.strip("#").strip()
                    return self.title
            if len(lines) > 0:
                # If not title found, we take the first 50 char
                # of the first line
                title_line = lines[0].strip()
                if len(title_line) > 50:
                    title_line = title_line[:49] + "…"
                self.title = title_line
                return self.title
            else:
                self.title = "Empty Page"
                return self.title
        else:
            return "Unknown Gopher Page"
    #render_gemtext
    def render(self,gemtext, width=None,mode=None):
        if not width:
            width = term_width()
        r = self.representation(width)
        links = []
        hidden_links = []
        preformatted = False
        def format_link(url,index,name=None):
            if "://" in url:
                protocol,adress = url.split("://",maxsplit=1)
                protocol = " %s" %protocol
            else:
                adress = url
                protocol = ""
            if "gemini" in protocol or "list" in protocol:
                protocol = ""
            if not name:
                name = adress
            line = "[%d%s] %s" % (index, protocol, name)
            return line
        for line in gemtext.splitlines():
            r.newline()
            if line.startswith("```"):
                preformatted = not preformatted
            elif preformatted:
                # infinite line to not wrap preformated
                r.add_block(line+"\n")
            elif len(line.strip()) == 0:
                r.newparagraph(force=True)
            elif line.startswith("=>"):
                strippedline = line[2:].strip()
                if strippedline:
                    links.append(strippedline)
                    splitted = strippedline.split(maxsplit=1)
                    url = splitted[0]
                    name = None
                    if len(splitted) > 1:
                        name = splitted[1]
                    link = format_link(url,len(links),name=name)
                    #r.open_color("blue")
                    #r.open_color("faint")
                    #r.open_color("underline")
                    startpos = link.find("] ") + 2
                    r.startindent("",sub=startpos*" ")
                    r.add_text(link)
                    r.endindent()
                    #r.close_all()
            elif line.startswith("* "):
                line = line[1:].lstrip("\t ")
                r.startindent("• ",sub="  ")
                r.add_text(line)
                r.endindent()
            elif line.startswith(">"):
                line = line[1:].lstrip("\t ")
                r.startindent("> ")
                r.add_text(line)
                r.endindent()
            elif line.startswith("###"):
                line = line[3:].lstrip("\t ")
                r.open_color("blue")
                r.add_text(line)
                r.close_color("blue")
            elif line.startswith("##"):
                line = line[2:].lstrip("\t ")
                r.open_color("blue")
                r.add_text(line)
                r.close_color("blue")
            elif line.startswith("#"):
                line = line[1:].lstrip("\t ")
                if not self.title:
                    self.title = line
                r.open_color("bold")
                r.open_color("blue")
                r.open_color("underline")
                r.add_text(line)
                r.close_color("underline")
                r.close_color("bold")
                r.close_color("blue")
            else:
                if "://" in line:
                    words = line.split()
                    for w in words:
                        if "://" in w:
                            hidden_links.append(w)
                r.add_text(line.rstrip())
        links += hidden_links
        return r.get_final(), links
 class GopherRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/gopher"
    def get_title(self):
        if not self.title:
            self.title = ""
            if self.body:
                firstline = self.body.splitlines()[0]
                firstline = firstline.split("\t")[0]
                if firstline.startswith("i"):
                    firstline = firstline[1:]
                self.title = firstline
        return self.title
    #menu_or_text
    def render(self,body,width=None,mode=None):
        if not width:
            width = term_width()
        try:
            render,links = self._render_goph(body,width=width,mode=mode)
        except Exception as err:
            print("Error rendering Gopher ",err)
            r = self.representation(width)
            r.add_block(body)
            render = r.get_final()
            links = []
        return render,links
    def _render_goph(self,body,width=None,mode=None):
        if not width:
            width = term_width()
        # This was copied straight from Agena (then later adapted)
        links = []
        r = self.representation(width)
        for line in self.body.split("\n"):
            r.newline()
            if line.startswith("i"):
                towrap = line[1:].split("\t")[0]
                if len(towrap.strip()) > 0:
                    r.add_text(towrap)
                else:
                    r.newparagraph()
            elif not line.strip() in [".",""]:
                parts = line.split("\t")
                parts[-1] = parts[-1].strip()
                if parts[-1] == "+":
                    parts = parts[:-1]
                if len(parts) == 4:
                    name,path,host,port = parts
                    itemtype = name[0]
                    name = name[1:]
                    if port == "70":
                        port = ""
                    else:
                        port = ":%s"%port
                    if itemtype == "h" and path.startswith("URL:"):
                        url = path[4:]
                    else:
                        url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
                    url = url.replace(" ","%20")
                    linkline = url + " " + name
                    links.append(linkline)
                    towrap = "[%s] "%len(links)+ name
                    r.add_text(towrap)
                else:
                    r.add_text(line)
        return r.get_final(),links
 class FolderRenderer(GemtextRenderer):
    def __init__(self,content,url,center=True,datadir=None):
        GemtextRenderer.__init__(self,content,url,center)
        self.datadir = datadir
    def get_mime(self):
        return "Directory"
    def prepare(self,body,mode=None):
        def get_first_line(l):
            path = os.path.join(listdir,l+".gmi")
            with open(path) as f:
                first_line = f.readline().strip()
                f.close()
            if first_line.startswith("#"):
                return first_line
            else:
                return None
        def write_list(l):
            body = ""
            for li in l:
                path = "list:///%s"%li
                gi = GeminiItem(path)
                size = len(gi.get_links())
                body += "=> %s %s (%s items)\n" %(str(path),li,size)
            return body
        listdir = os.path.join(self.datadir,"lists")
        if self.url != listdir:
            return "This is folder %s" %self.url
        else:
            self.title = "My lists"
            lists = []
            if os.path.exists(listdir):
                listfiles = os.listdir(listdir)
                if len(listfiles) > 0:
                    for l in listfiles:
                        #removing the .gmi at the end of the name
                        lists.append(l[:-4])
            if len(lists) > 0:
                body = ""
                my_lists = []
                system_lists = []
                subscriptions = []
                frozen = []
                lists.sort()
                for l in lists:
                    if l in ["history","to_fetch","archives","tour"]:
                        system_lists.append(l)
                    else:
                        first_line = get_first_line(l)
                        if first_line and "#subscribed" in first_line:
                            subscriptions.append(l)
                        elif first_line and "#frozen" in first_line:
                            frozen.append(l)
                        else:
                            my_lists.append(l)
                if len(my_lists) > 0:
                    body+= "\n## Bookmarks Lists (updated during sync)\n"
                    body += write_list(my_lists)
                if len(subscriptions) > 0:
                    body +="\n## Subscriptions (new links in those are added to tour)\n"
                    body += write_list(subscriptions)
                if len(frozen) > 0:
                    body +="\n## Frozen (fetched but never updated)\n"
                    body += write_list(frozen)
                if len(system_lists) > 0:
                    body +="\n## System Lists\n"
                    body += write_list(system_lists)
                return body
 class FeedRenderer(GemtextRenderer):
    def get_mime(self):
        return "application/rss+xml"
    def is_valid(self):
        if _DO_FEED:
            parsed = feedparser.parse(self.body)
        else:
            return False
        if parsed.bozo:
            return False
        else:
            #If no content, then fallback to HTML
            return len(parsed.entries) > 0
    def get_title(self):
        if not self.title:
            self.get_body()
        return self.title
    def prepare(self,content,mode="readable",width=None):
        if not width:
            width = term_width()
        self.title = "RSS/Atom feed"
        page = ""
        if _DO_FEED:
            parsed = feedparser.parse(content)
        else:
            page += "Please install python-feedparser to handle RSS/Atom feeds\n"
            self.validity = False
            return page
        if parsed.bozo:
            page += "Invalid RSS feed\n\n"
            page += str(parsed.bozo_exception)
            self.validity = False
        else:
            if "title" in parsed.feed:
                t = parsed.feed.title
            else:
                t = "Unknown"
            self.title = "%s (XML feed)" %t
            title = "# %s"%self.title
            page += title + "\n"
            if "updated" in parsed.feed:
                page += "Last updated on %s\n\n" %parsed.feed.updated
            if "subtitle" in parsed.feed:
                page += parsed.feed.subtitle + "\n"
            if "link" in parsed.feed:
                page += "=> %s\n" %parsed.feed.link
            page += "\n## Entries\n"
            if len(parsed.entries) < 1:
                self.validity = False
            for i in parsed.entries:
                line = "=> %s " %i.link
                if "published" in i:
                    pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
                    line += pub_date + " : "
                if "title" in i:
                    line += "%s" %(i.title)
                if "author" in i:
                    line += " (by %s)"%i.author
                page += line + "\n"
                if mode == "full":
                    if "summary" in i:
                        html = HtmlRenderer(i.summary,self.url,center=False)
                        rendered = html.get_body(width=None,mode="full")
                        page += "\n"
                        page += rendered
                        page += "\n------------\n\n"
        return page
 class ImageRenderer(AbstractRenderer):
    def get_mime(self):
        return "image/*"
    def is_valid(self):
        if _RENDER_IMAGE:
            return True
        else:
            return False
    def get_links(self,mode=None):
        return []
    def get_title(self):
        return "Picture file"
    def render(self,img,width=None,mode=None):
        #with inline, we use symbols to be rendered with less.
        #else we use the best possible renderer.
        if mode == "links_only":
            return "", []
        if not width:
            width = term_width()
            spaces = 0
        else:
            spaces = int((term_width() - width)//2)
        ansi_img = inline_image(img,width)
        #Now centering the image
        lines = ansi_img.splitlines()
        new_img = ""
        for l in lines:
            new_img += spaces*" " + l + "\n"
        return new_img, []
    def display(self,mode=None,window_title=None,window_info=None,grep=None):
        if window_title:
            print(self._window_title(window_title,info=window_info))
        terminal_image(self.body)
        return True
 class HtmlRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/html"
    def is_valid(self):
        if not _DO_HTML:
            print("HTML document detected. Please install python-bs4 and python-readability.")
        return _DO_HTML and self.validity
    def get_subscribe_links(self):
        subs = [[self.url,self.get_mime(),self.get_title()]]
        soup = BeautifulSoup(self.body, 'html.parser')
        links = soup.find_all("link",rel="alternate",recursive=True)
        for l in links:
            ty = l.get("type")
            if ty :
                if "rss" in ty or "atom" in ty or "feed" in ty:
                    subs.append([l.get("href"),ty,l.get("title")])
        return subs
    def get_title(self):
        if self.title:
            return self.title
        elif self.body:
            if _HAS_READABILITY:
                try:
                    readable = Document(self.body)
                    self.title = readable.short_title()
                    return self.title
                except Exception as err:
                    pass
            soup = BeautifulSoup(self.body,"html.parser")
            self.title = str(soup.title.string)
        else:
            return ""
    # Our own HTML engine (crazy, isn’t it?)
    # Return [rendered_body, list_of_links]
    # mode is either links_only, readable or full
    def render(self,body,mode="readable",width=None,add_title=True):
        if not width:
            width = term_width()
        if not _DO_HTML:
            print("HTML document detected. Please install python-bs4 and python-readability.")
            return
        # This method recursively parse the HTML
        r = self.representation(width,title=self.get_title(),center=self.center)
        links = []
        # You know how bad html is when you realize that space sometimes meaningful, somtimes not.
        # CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
        # HTML is real crap. At least the one people are generating.
        def render_image(src,width=40,mode=None):
            ansi_img = ""
            imgurl,imgdata = looks_like_base64(src,self.url)
            if _RENDER_IMAGE and mode != "links_only" and imgurl:
                try:
                    #4 followings line are there to translate the URL into cache path
                    g = GeminiItem(imgurl)
                    img = g.get_cache_path()
                    if imgdata:
                        with open(img,"wb") as cached:
                            cached.write(base64.b64decode(imgdata))
                            cached.close()
                    if g.is_cache_valid():
                        renderer = ImageRenderer(img,imgurl)
                        # Image are 40px wide except if terminal is smaller
                        if width > 40:
                            size = 40
                        else:
                            size = width
                        ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
                except Exception as err:
                    #we sometimes encounter really bad formatted files or URL
                    ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n"
            return ansi_img
        def sanitize_string(string):
            #never start with a "\n"
            #string = string.lstrip("\n")
            string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
            endspace = string.endswith(" ") or string.endswith("\xa0")
            startspace = string.startswith(" ") or string.startswith("\xa0")
            toreturn = string.replace("\n", " ").replace("\t"," ").strip()
            while "  " in toreturn:
                toreturn = toreturn.replace("  "," ")
            toreturn = html.unescape(toreturn)
            if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
                toreturn += " "
            if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
                toreturn = " " + toreturn
            return toreturn
        def recursive_render(element,indent="",preformatted=False):
            if element.name == "blockquote":
                r.newparagraph()
                r.startindent("   ",reverse="     ")
                for child in element.children:
                    r.open_color("italic")
                    recursive_render(child,indent="\t")
                    r.close_color("italic")
                r.endindent()
            elif element.name in ["div","p"]:
                r.newparagraph()
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.newparagraph()
            elif element.name in ["span"]:
                r.add_space()
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.add_space()
            elif element.name in ["h1","h2","h3","h4","h5","h6"]:
                r.open_color("blue")
                if element.name in ["h1"]:
                    r.open_color("bold")
                    r.open_color("underline")
                elif element.name in ["h2"]:
                    r.open_color("bold")
                elif element.name in ["h5","h6"]:
                    r.open_color("faint")
                for child in element.children:
                    r.newparagraph()
                    recursive_render(child)
                    r.newparagraph()
                    r.close_all()
            elif element.name in ["code","tt"]:
                for child in element.children:
                   recursive_render(child,indent=indent,preformatted=True)
            elif element.name in ["pre"]:
                r.newparagraph()
                r.add_block(element.text)
                r.newparagraph()
            elif element.name in ["li"]:
                r.startindent(" • ",sub="   ")
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.endindent()
            elif element.name in ["tr"]:
                r.startindent("|",reverse="|")
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.endindent()
            elif element.name in ["td","th"]:
                r.add_text("| ")
                for child in element.children:
                    recursive_render(child)
                r.add_text(" |")
            # italics
            elif element.name in ["em","i"]:
                r.open_color("italic")
                for child in element.children:
                    recursive_render(child,indent=indent,preformatted=preformatted)
                r.close_color("italic")
            #bold
            elif element.name in ["b","strong"]:
                r.open_color("bold")
                for child in element.children:
                    recursive_render(child,indent=indent,preformatted=preformatted)
                r.close_color("bold")
            elif element.name == "a":
                link = element.get('href')
                # support for images nested in links
                if link:
                    text = ""
                    imgtext = ""
                    #we display images first in a link
                    for child in element.children:
                        if child.name == "img":
                            recursive_render(child)
                            imgtext = "[IMG LINK %s]"
                    links.append(link+" "+text)
                    link_id = str(len(links))
                    r.open_color("blue")
                    r.open_color("faint")
                    for child in element.children:
                        if child.name != "img":
                            recursive_render(child,preformatted=preformatted)
                    if imgtext != "":
                        r.center_line()
                        r.add_text(imgtext%link_id)
                    else:
                        r.add_text(" [%s]"%link_id)
                    r.close_color("blue")
                    r.close_color("faint")
                else:
                    #No real link found
                    for child in element.children:
                        recursive_render(child,preformatted=preformatted)
            elif element.name == "img":
                src = element.get("src")
                text = ""
                ansi_img = render_image(src,width=width,mode=mode)
                alt = element.get("alt")
                if alt:
                    alt = sanitize_string(alt)
                    text += "[IMG] %s"%alt
                else:
                    text += "[IMG]"
                if src:
                    links.append(src+" "+text)
                    if not mode in self.images:
                        self.images[mode] = []
                    abs_url = urllib.parse.urljoin(self.url, src)
                    self.images[mode].append(abs_url)
                    link_id = " [%s]"%(len(links))
                    r.add_block(ansi_img)
                    r.open_color("faint")
                    r.open_color("yellow")
                    r.center_line()
                    r.add_text(text + link_id)
                    r.close_color("faint")
                    r.close_color("yellow")
                    r.newline()
            elif element.name == "br":
                r.newline()
            elif element.name not in ["script","style","template"] and type(element) != Comment:
                if element.string:
                    if preformatted :
                        r.open_color("faint")
                        r.add_text(element.string)
                        r.close_color("faint")
                    else:
                        s = sanitize_string(element.string)
                        if len(s.strip()) > 0:
                            r.add_text(s)
                else:
                    for child in element.children:
                        recursive_render(child,indent=indent)
        # the real render_html hearth
        if mode == "full":
            summary = body
        elif _HAS_READABILITY:
            try:
                readable = Document(body)
                summary = readable.summary()
            except Exception as err:
                summary = body
        else:
            summary = body
        soup = BeautifulSoup(summary, 'html.parser')
        #soup = BeautifulSoup(summary, 'html5lib')
        if soup :
            if soup.body :
                recursive_render(soup.body)
            else:
                recursive_render(soup)
        return r.get_final(),links
--- a/offpunk.py
+++ b/offpunk.py
@ -48,6 +48,7 @@ import webbrowser
 import html
 import base64
 import subprocess
 import ansirenderer
 # In terms of arguments, this can take an input file/string to be passed to
 # stdin, a parameter to do (well-escaped) "%" replacement on the command, a
@ -394,941 +395,17 @@ standard_ports = {
        "https"  : 443,
        "spartan": 300,
 }
 # First, we define the different content->text renderers, outside of the rest
 # (They could later be factorized in other files or replaced)
 class AbstractRenderer():
    def __init__(self,content,url,center=True):
        self.url = url
        self.body = str(content)
        #there’s one rendered text and one links table per mode
        self.rendered_text = {}
        self.links = {}
        self.images = {}
        self.title = None
        self.validity = True
        self.temp_file = {}
        self.less_histfile = {}
        self.center = center
    #This class hold an internal representation of the HTML text
    class representation:
        def __init__(self,width,title=None,center=True):
            self.title=title
            self.center = center
            self.final_text = ""
            self.opened = []
            self.width = width
            self.last_line = ""
            self.last_line_colors = {}
            self.last_line_center = False
            self.new_paragraph = True
            self.i_indent = ""
            self.s_indent = ""
            self.r_indent = ""
            self.current_indent = ""
            self.disabled_indents = None
            # each color is an [open,close] pair code
            self.colors = {
                            "bold"   : ["1","22"],
                            "faint"  : ["2","22"],
                            "italic" : ["3","23"],
                            "underline": ["4","24"],
                            "red"    : ["31","39"],
                            "yellow" : ["33","39"],
                            "blue"   : ["34","39"],
                       }
        def _insert(self,color,open=True):
            if open: o = 0
            else: o = 1
            pos = len(self.last_line)
            #we remember the position where to insert color codes
            if not pos in self.last_line_colors:
                self.last_line_colors[pos] = []
            #Two inverse code cancel each other
            if [color,int(not o)] in self.last_line_colors[pos]:
                self.last_line_colors[pos].remove([color,int(not o)])
            else:
                self.last_line_colors[pos].append([color,o])#+color+str(o))
        # Take self.last line and add ANSI codes to it before adding it to
        # self.final_text.
        def _endline(self):
            if len(self.last_line.strip()) > 0:
                for c in self.opened:
                    self._insert(c,open=False)
                nextline = ""
                added_char = 0
                #we insert the color code at the saved positions
                while len (self.last_line_colors) > 0:
                    pos,colors = self.last_line_colors.popitem()
                    #popitem itterates LIFO.
                    #So we go, backward, to the pos (starting at the end of last_line)
                    nextline = self.last_line[pos:] + nextline
                    ansicol = "\x1b["
                    for c,o in colors:
                        ansicol += self.colors[c][o] + ";"
                    ansicol = ansicol[:-1]+"m"
                    nextline = ansicol + nextline
                    added_char += len(ansicol)
                    self.last_line = self.last_line[:pos]
                nextline = self.last_line + nextline
                if self.last_line_center:
                    #we have to care about the ansi char while centering
                    width = term_width() + added_char
                    nextline = nextline.strip().center(width)
                    self.last_line_center = False
                else:
                    #should we lstrip the nextline in the addition ?
                    nextline = self.current_indent + nextline.lstrip() + self.r_indent
                    self.current_indent = self.s_indent
                self.final_text += nextline
                self.last_line = ""
                self.final_text += "\n"
                for c in self.opened:
                    self._insert(c,open=True)
            else:
                self.last_line = ""
        def center_line(self):
            self.last_line_center = True
        def open_color(self,color):
            if color in self.colors and color not in self.opened:
                self._insert(color,open=True)
                self.opened.append(color)
        def close_color(self,color):
            if color in self.colors and color in self.opened:
                self._insert(color,open=False)
                self.opened.remove(color)
        def close_all(self):
            if len(self.colors) > 0:
                self.last_line += "\x1b[0m"
                self.opened.clear()
        def startindent(self,indent,sub=None,reverse=None):
            self._endline()
            self.i_indent = indent
            self.current_indent = indent
            if sub:
                self.s_indent = sub
            else:
                self.s_indent = indent
            if reverse:
                self.r_indent = reverse
            else:
                self.r_indent = ""
        def endindent(self):
            self._endline()
            self.i_indent = ""
            self.s_indent = ""
            self.r_indent = ""
            self.current_indent = ""
        def _disable_indents(self):
            self.disabled_indents = []
            self.disabled_indents.append(self.current_indent)
            self.disabled_indents.append(self.i_indent)
            self.disabled_indents.append(self.s_indent)
            self.disabled_indents.append(self.r_indent)
            self.endindent()
        def _enable_indents(self):
            if self.disabled_indents:
                self.current_indent = self.disabled_indents[0]
                self.i_indent = self.disabled_indents[1]
                self.s_indent = self.disabled_indents[2]
                self.r_indent = self.disabled_indents[3]
            self.disabled_indents = None
        def newline(self):
            self._endline()
        #A new paragraph implies 2 newlines (1 blank line between paragraphs)
        #But it is only used if didn’t already started one to avoid plenty
        #of blank lines. force=True allows to bypass that limit.
        #new_paragraph becomes false as soon as text is entered into it
        def newparagraph(self,force=False):
            if force or not self.new_paragraph:
                self._endline()
                self.final_text += "\n"
                self.new_paragraph = True
        def add_space(self):
            if len(self.last_line) > 0 and self.last_line[-1] != " ":
                self.last_line += " "
        def _title_first(self,intext=None):
            if self.title:
                if not self.title == intext:
                    self._disable_indents()
                    self.open_color("blue")
                    self.open_color("bold")
                    self.open_color("underline")
                    self.add_text(self.title)
                    self.close_all()
                    self.newparagraph()
                    self._enable_indents()
                self.title = None
        # Beware, blocks are not wrapped nor indented and left untouched!
        # They are mostly useful for pictures and preformatted text.
        def add_block(self,intext):
            # If necessary, we add the title before a block
            self._title_first()
            # we don’t want to indent blocks
            self._endline()
            self._disable_indents()
            self.final_text += self.current_indent + intext
            self.new_paragraph = False
            self._endline()
            self._enable_indents()
        def add_text(self,intext):
            self._title_first(intext=intext)
            lines = []
            last = (self.last_line + intext)
            self.last_line = ""
            # With the following, we basically cancel adding only spaces
            # on an empty line
            if len(last.strip()) > 0:
                self.new_paragraph = False
            else:
                last = last.strip()
            if len(last) > self.width:
                width = self.width - len(self.current_indent) - len(self.r_indent)
                spaces_left = len(last) - len(last.lstrip())
                spaces_right = len(last) - len(last.rstrip())
                lines = textwrap.wrap(last,width,drop_whitespace=True)
                self.last_line += spaces_left*" "
                while len(lines) > 1:
                    l = lines.pop(0)
                    self.last_line += l
                    self._endline()
                if len(lines) == 1:
                    li = lines[0]
                    self.last_line += li + spaces_right*" "
            else:
                self.last_line = last
        def get_final(self):
            self.close_all()
            self._endline()
            #if no content, we still add the title
            self._title_first()
            lines = self.final_text.splitlines()
            lines2 = []
            termspace = shutil.get_terminal_size()[0]
            #Following code instert blanck spaces to center the content
            if self.center and termspace > term_width():
                margin = int((termspace - term_width())//2)
            else:
                margin = 0
            for l in lines :
                lines2.append(margin*" "+l)
            return "\n".join(lines2)
    def get_subscribe_links(self):
        return [[self.url,self.get_mime(),self.get_title()]]
    def is_valid(self):
        return self.validity
    def get_links(self,mode="links_only"):
        if mode not in self.links :
            prepared_body = self.prepare(self.body,mode=mode)
            results = self.render(prepared_body,mode=mode)
            if results:
                self.links[mode] = results[1]
                for l in self.get_subscribe_links()[1:]:
                    self.links[mode].append(l[0])
        return self.links[mode]
    def get_title(self):
        return "Abstract title"
    # This function return a list of URL which should be downloaded
    # before displaying the page (images in HTML pages, typically)
    def get_images(self,mode="readable"):
        if not mode in self.images:
            self.get_body(mode=mode)
            # we also invalidate the body that was done without images
            self.rendered_text.pop(mode)
        if mode in self.images:
            return self.images[mode]
        else:
            return []
    #This function will give gemtext to the gemtext renderer
    def prepare(self,body,mode=None):
        return body
    def get_body(self,width=None,mode="readable"):
        if not width:
            width = term_width()
        if mode not in self.rendered_text:
            prepared_body = self.prepare(self.body,mode=mode)
            result = self.render(prepared_body,width=width,mode=mode)
            if result:
                self.rendered_text[mode] = result[0]
                self.links[mode] = result[1]
        return self.rendered_text[mode]
    def _window_title(self,title,info=None):
        title_r = self.representation(term_width())
        title_r.open_color("red")
        title_r.open_color("bold")
        title_r.add_text(title)
        title_r.close_color("bold")
        if info:
            title_r.add_text("   (%s)"%info)
        title_r.close_color("red")
        return title_r.get_final()
    def display(self,mode="readable",window_title="",window_info=None,grep=None):
        if not mode: mode = "readable"
        wtitle = self._window_title(window_title,info=window_info)
        body = wtitle + "\n" + self.get_body(mode=mode)
        if not body:
            return False
        # We actually put the body in a tmpfile before giving it to less
        if mode not in self.temp_file:
            tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
            self.temp_file[mode] = tmpf.name
            tmpf.write(body)
            tmpf.close()
        if mode not in self.less_histfile:
            firsttime = True
            tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
            self.less_histfile[mode] = tmpf.name
        else:
            firsttime = False
        less_cmd(self.temp_file[mode], histfile=self.less_histfile[mode],cat=firsttime,grep=grep)
        return True
    def get_temp_file(self,mode="readable"):
        if mode in self.temp_file:
            return self.temp_file[mode]
        else:
            return None
    # An instance of AbstractRenderer should have a self.render(body,width,mode) method.
    # 3 modes are used : readable (by default), full and links_only (the fastest, when
    # rendered content is not used, only the links are needed)
    # The prepare() function is called before the rendering. It is useful if
    # your renderer output in a format suitable for another existing renderer (such as gemtext)
 # Gemtext Rendering Engine
 class GemtextRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/gemini"
    def get_title(self):
        if self.title:
            return self.title
        elif self.body:
            lines = self.body.splitlines()
            for line in lines:
                if line.startswith("#"):
                    self.title = line.strip("#").strip()
                    return self.title
            if len(lines) > 0:
                # If not title found, we take the first 50 char
                # of the first line
                title_line = lines[0].strip()
                if len(title_line) > 50:
                    title_line = title_line[:49] + "…"
                self.title = title_line
                return self.title
            else:
                self.title = "Empty Page"
                return self.title
        else:
            return "Unknown Gopher Page"
    #render_gemtext
    def render(self,gemtext, width=None,mode=None):
        if not width:
            width = term_width()
        r = self.representation(width)
        links = []
        hidden_links = []
        preformatted = False
        def format_link(url,index,name=None):
            if "://" in url:
                protocol,adress = url.split("://",maxsplit=1)
                protocol = " %s" %protocol
            else:
                adress = url
                protocol = ""
            if "gemini" in protocol or "list" in protocol:
                protocol = ""
            if not name:
                name = adress
            line = "[%d%s] %s" % (index, protocol, name)
            return line
        for line in gemtext.splitlines():
            r.newline()
            if line.startswith("```"):
                preformatted = not preformatted
            elif preformatted:
                # infinite line to not wrap preformated
                r.add_block(line+"\n")
            elif len(line.strip()) == 0:
                r.newparagraph(force=True)
            elif line.startswith("=>"):
                strippedline = line[2:].strip()
                if strippedline:
                    links.append(strippedline)
                    splitted = strippedline.split(maxsplit=1)
                    url = splitted[0]
                    name = None
                    if len(splitted) > 1:
                        name = splitted[1]
                    link = format_link(url,len(links),name=name)
                    #r.open_color("blue")
                    #r.open_color("faint")
                    #r.open_color("underline")
                    startpos = link.find("] ") + 2
                    r.startindent("",sub=startpos*" ")
                    r.add_text(link)
                    r.endindent()
                    #r.close_all()
            elif line.startswith("* "):
                line = line[1:].lstrip("\t ")
                r.startindent("• ",sub="  ")
                r.add_text(line)
                r.endindent()
            elif line.startswith(">"):
                line = line[1:].lstrip("\t ")
                r.startindent("> ")
                r.add_text(line)
                r.endindent()
            elif line.startswith("###"):
                line = line[3:].lstrip("\t ")
                r.open_color("blue")
                r.add_text(line)
                r.close_color("blue")
            elif line.startswith("##"):
                line = line[2:].lstrip("\t ")
                r.open_color("blue")
                r.add_text(line)
                r.close_color("blue")
            elif line.startswith("#"):
                line = line[1:].lstrip("\t ")
                if not self.title:
                    self.title = line
                r.open_color("bold")
                r.open_color("blue")
                r.open_color("underline")
                r.add_text(line)
                r.close_color("underline")
                r.close_color("bold")
                r.close_color("blue")
            else:
                if "://" in line:
                    words = line.split()
                    for w in words:
                        if "://" in w:
                            hidden_links.append(w)
                r.add_text(line.rstrip())
        links += hidden_links
        return r.get_final(), links
 class GopherRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/gopher"
    def get_title(self):
        if not self.title:
            self.title = ""
            if self.body:
                firstline = self.body.splitlines()[0]
                firstline = firstline.split("\t")[0]
                if firstline.startswith("i"):
                    firstline = firstline[1:]
                self.title = firstline
        return self.title
    #menu_or_text
    def render(self,body,width=None,mode=None):
        if not width:
            width = term_width()
        try:
            render,links = self._render_goph(body,width=width,mode=mode)
        except Exception as err:
            print("Error rendering Gopher ",err)
            r = self.representation(width)
            r.add_block(body)
            render = r.get_final()
            links = []
        return render,links
    def _render_goph(self,body,width=None,mode=None):
        if not width:
            width = term_width()
        # This was copied straight from Agena (then later adapted)
        links = []
        r = self.representation(width)
        for line in self.body.split("\n"):
            r.newline()
            if line.startswith("i"):
                towrap = line[1:].split("\t")[0]
                if len(towrap.strip()) > 0:
                    r.add_text(towrap)
                else:
                    r.newparagraph()
            elif not line.strip() in [".",""]:
                parts = line.split("\t")
                parts[-1] = parts[-1].strip()
                if parts[-1] == "+":
                    parts = parts[:-1]
                if len(parts) == 4:
                    name,path,host,port = parts
                    itemtype = name[0]
                    name = name[1:]
                    if port == "70":
                        port = ""
                    else:
                        port = ":%s"%port
                    if itemtype == "h" and path.startswith("URL:"):
                        url = path[4:]
                    else:
                        url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
                    url = url.replace(" ","%20")
                    linkline = url + " " + name
                    links.append(linkline)
                    towrap = "[%s] "%len(links)+ name
                    r.add_text(towrap)
                else:
                    r.add_text(line)
        return r.get_final(),links
 class FolderRenderer(GemtextRenderer):
    def get_mime(self):
        return "Directory"
    def prepare(self,body,mode=None):
        def get_first_line(l):
            path = os.path.join(listdir,l+".gmi")
            with open(path) as f:
                first_line = f.readline().strip()
                f.close()
            if first_line.startswith("#"):
                return first_line
            else:
                return None
        def write_list(l):
            body = ""
            for li in l:
                path = "list:///%s"%li
                gi = GeminiItem(path)
                size = len(gi.get_links())
                body += "=> %s %s (%s items)\n" %(str(path),li,size)
            return body
        listdir = os.path.join(_DATA_DIR,"lists")
        if self.url != listdir:
            return "This is folder %s" %self.url
        else:
            self.title = "My lists"
            lists = []
            if os.path.exists(listdir):
                listfiles = os.listdir(listdir)
                if len(listfiles) > 0:
                    for l in listfiles:
                        #removing the .gmi at the end of the name
                        lists.append(l[:-4])
            if len(lists) > 0:
                body = ""
                my_lists = []
                system_lists = []
                subscriptions = []
                frozen = []
                lists.sort()
                for l in lists:
                    if l in ["history","to_fetch","archives","tour"]:
                        system_lists.append(l)
                    else:
                        first_line = get_first_line(l)
                        if first_line and "#subscribed" in first_line:
                            subscriptions.append(l)
                        elif first_line and "#frozen" in first_line:
                            frozen.append(l)
                        else:
                            my_lists.append(l)
                if len(my_lists) > 0:
                    body+= "\n## Bookmarks Lists (updated during sync)\n"
                    body += write_list(my_lists)
                if len(subscriptions) > 0:
                    body +="\n## Subscriptions (new links in those are added to tour)\n"
                    body += write_list(subscriptions)
                if len(frozen) > 0:
                    body +="\n## Frozen (fetched but never updated)\n"
                    body += write_list(frozen)
                if len(system_lists) > 0:
                    body +="\n## System Lists\n"
                    body += write_list(system_lists)
                return body
 class FeedRenderer(GemtextRenderer):
    def get_mime(self):
        return "application/rss+xml"
    def is_valid(self):
        if _DO_FEED:
            parsed = feedparser.parse(self.body)
        else:
            return False
        if parsed.bozo:
            return False
        else:
            #If no content, then fallback to HTML
            return len(parsed.entries) > 0
    def get_title(self):
        if not self.title:
            self.get_body()
        return self.title
    def prepare(self,content,mode="readable",width=None):
        if not width:
            width = term_width()
        self.title = "RSS/Atom feed"
        page = ""
        if _DO_FEED:
            parsed = feedparser.parse(content)
        else:
            page += "Please install python-feedparser to handle RSS/Atom feeds\n"
            self.validity = False
            return page
        if parsed.bozo:
            page += "Invalid RSS feed\n\n"
            page += str(parsed.bozo_exception)
            self.validity = False
        else:
            if "title" in parsed.feed:
                t = parsed.feed.title
            else:
                t = "Unknown"
            self.title = "%s (XML feed)" %t
            title = "# %s"%self.title
            page += title + "\n"
            if "updated" in parsed.feed:
                page += "Last updated on %s\n\n" %parsed.feed.updated
            if "subtitle" in parsed.feed:
                page += parsed.feed.subtitle + "\n"
            if "link" in parsed.feed:
                page += "=> %s\n" %parsed.feed.link
            page += "\n## Entries\n"
            if len(parsed.entries) < 1:
                self.validity = False
            for i in parsed.entries:
                line = "=> %s " %i.link
                if "published" in i:
                    pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
                    line += pub_date + " : "
                if "title" in i:
                    line += "%s" %(i.title)
                if "author" in i:
                    line += " (by %s)"%i.author
                page += line + "\n"
                if mode == "full":
                    if "summary" in i:
                        html = HtmlRenderer(i.summary,self.url,center=False)
                        rendered = html.get_body(width=None,mode="full")
                        page += "\n"
                        page += rendered
                        page += "\n------------\n\n"
        return page
 class ImageRenderer(AbstractRenderer):
    def get_mime(self):
        return "image/*"
    def is_valid(self):
        if _RENDER_IMAGE:
            return True
        else:
            return False
    def get_links(self,mode=None):
        return []
    def get_title(self):
        return "Picture file"
    def render(self,img,width=None,mode=None):
        #with inline, we use symbols to be rendered with less.
        #else we use the best possible renderer.
        if mode == "links_only":
            return "", []
        if not width:
            width = term_width()
            spaces = 0
        else:
            spaces = int((term_width() - width)//2)
        ansi_img = inline_image(img,width)
        #Now centering the image
        lines = ansi_img.splitlines()
        new_img = ""
        for l in lines:
            new_img += spaces*" " + l + "\n"
        return new_img, []
    def display(self,mode=None,window_title=None,window_info=None,grep=None):
        if window_title:
            print(self._window_title(window_title,info=window_info))
        terminal_image(self.body)
        return True
 class HtmlRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/html"
    def is_valid(self):
        if not _DO_HTML:
            print("HTML document detected. Please install python-bs4 and python-readability.")
        return _DO_HTML and self.validity
    def get_subscribe_links(self):
        subs = [[self.url,self.get_mime(),self.get_title()]]
        soup = BeautifulSoup(self.body, 'html.parser')
        links = soup.find_all("link",rel="alternate",recursive=True)
        for l in links:
            ty = l.get("type")
            if ty :
                if "rss" in ty or "atom" in ty or "feed" in ty:
                    subs.append([l.get("href"),ty,l.get("title")])
        return subs
    def get_title(self):
        if self.title:
            return self.title
        elif self.body:
            if _HAS_READABILITY:
                try:
                    readable = Document(self.body)
                    self.title = readable.short_title()
                    return self.title
                except Exception as err:
                    pass
            soup = BeautifulSoup(self.body,"html.parser")
            self.title = str(soup.title.string)
        else:
            return ""
    # Our own HTML engine (crazy, isn’t it?)
    # Return [rendered_body, list_of_links]
    # mode is either links_only, readable or full
    def render(self,body,mode="readable",width=None,add_title=True):
        if not width:
            width = term_width()
        if not _DO_HTML:
            print("HTML document detected. Please install python-bs4 and python-readability.")
            return
        # This method recursively parse the HTML
        r = self.representation(width,title=self.get_title(),center=self.center)
        links = []
        # You know how bad html is when you realize that space sometimes meaningful, somtimes not.
        # CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
        # HTML is real crap. At least the one people are generating.
        def render_image(src,width=40,mode=None):
            ansi_img = ""
            imgurl,imgdata = looks_like_base64(src,self.url)
            if _RENDER_IMAGE and mode != "links_only" and imgurl:
                try:
                    #4 followings line are there to translate the URL into cache path
                    g = GeminiItem(imgurl)
                    img = g.get_cache_path()
                    if imgdata:
                        with open(img,"wb") as cached:
                            cached.write(base64.b64decode(imgdata))
                            cached.close()
                    if g.is_cache_valid():
                        renderer = ImageRenderer(img,imgurl)
                        # Image are 40px wide except if terminal is smaller
                        if width > 40:
                            size = 40
                        else:
                            size = width
                        ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
                except Exception as err:
                    #we sometimes encounter really bad formatted files or URL
                    ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n"
            return ansi_img
        def sanitize_string(string):
            #never start with a "\n"
            #string = string.lstrip("\n")
            string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
            endspace = string.endswith(" ") or string.endswith("\xa0")
            startspace = string.startswith(" ") or string.startswith("\xa0")
            toreturn = string.replace("\n", " ").replace("\t"," ").strip()
            while "  " in toreturn:
                toreturn = toreturn.replace("  "," ")
            toreturn = html.unescape(toreturn)
            if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
                toreturn += " "
            if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
                toreturn = " " + toreturn
            return toreturn
        def recursive_render(element,indent="",preformatted=False):
            if element.name == "blockquote":
                r.newparagraph()
                r.startindent("   ",reverse="     ")
                for child in element.children:
                    r.open_color("italic")
                    recursive_render(child,indent="\t")
                    r.close_color("italic")
                r.endindent()
            elif element.name in ["div","p"]:
                r.newparagraph()
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.newparagraph()
            elif element.name in ["span"]:
                r.add_space()
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.add_space()
            elif element.name in ["h1","h2","h3","h4","h5","h6"]:
                r.open_color("blue")
                if element.name in ["h1"]:
                    r.open_color("bold")
                    r.open_color("underline")
                elif element.name in ["h2"]:
                    r.open_color("bold")
                elif element.name in ["h5","h6"]:
                    r.open_color("faint")
                for child in element.children:
                    r.newparagraph()
                    recursive_render(child)
                    r.newparagraph()
                    r.close_all()
            elif element.name in ["code","tt"]:
                for child in element.children:
                   recursive_render(child,indent=indent,preformatted=True)
            elif element.name in ["pre"]:
                r.newparagraph()
                r.add_block(element.text)
                r.newparagraph()
            elif element.name in ["li"]:
                r.startindent(" • ",sub="   ")
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.endindent()
            elif element.name in ["tr"]:
                r.startindent("|",reverse="|")
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.endindent()
            elif element.name in ["td","th"]:
                r.add_text("| ")
                for child in element.children:
                    recursive_render(child)
                r.add_text(" |")
            # italics
            elif element.name in ["em","i"]:
                r.open_color("italic")
                for child in element.children:
                    recursive_render(child,indent=indent,preformatted=preformatted)
                r.close_color("italic")
            #bold
            elif element.name in ["b","strong"]:
                r.open_color("bold")
                for child in element.children:
                    recursive_render(child,indent=indent,preformatted=preformatted)
                r.close_color("bold")
            elif element.name == "a":
                link = element.get('href')
                # support for images nested in links
                if link:
                    text = ""
                    imgtext = ""
                    #we display images first in a link
                    for child in element.children:
                        if child.name == "img":
                            recursive_render(child)
                            imgtext = "[IMG LINK %s]"
                    links.append(link+" "+text)
                    link_id = str(len(links))
                    r.open_color("blue")
                    r.open_color("faint")
                    for child in element.children:
                        if child.name != "img":
                            recursive_render(child,preformatted=preformatted)
                    if imgtext != "":
                        r.center_line()
                        r.add_text(imgtext%link_id)
                    else:
                        r.add_text(" [%s]"%link_id)
                    r.close_color("blue")
                    r.close_color("faint")
                else:
                    #No real link found
                    for child in element.children:
                        recursive_render(child,preformatted=preformatted)
            elif element.name == "img":
                src = element.get("src")
                text = ""
                ansi_img = render_image(src,width=width,mode=mode)
                alt = element.get("alt")
                if alt:
                    alt = sanitize_string(alt)
                    text += "[IMG] %s"%alt
                else:
                    text += "[IMG]"
                if src:
                    links.append(src+" "+text)
                    if not mode in self.images:
                        self.images[mode] = []
                    abs_url = urllib.parse.urljoin(self.url, src)
                    self.images[mode].append(abs_url)
                    link_id = " [%s]"%(len(links))
                    r.add_block(ansi_img)
                    r.open_color("faint")
                    r.open_color("yellow")
                    r.center_line()
                    r.add_text(text + link_id)
                    r.close_color("faint")
                    r.close_color("yellow")
                    r.newline()
            elif element.name == "br":
                r.newline()
            elif element.name not in ["script","style","template"] and type(element) != Comment:
                if element.string:
                    if preformatted :
                        r.open_color("faint")
                        r.add_text(element.string)
                        r.close_color("faint")
                    else:
                        s = sanitize_string(element.string)
                        if len(s.strip()) > 0:
                            r.add_text(s)
                else:
                    for child in element.children:
                        recursive_render(child,indent=indent)
        # the real render_html hearth
        if mode == "full":
            summary = body
        elif _HAS_READABILITY:
            try:
                readable = Document(body)
                summary = readable.summary()
            except Exception as err:
                summary = body
        else:
            summary = body
        soup = BeautifulSoup(summary, 'html.parser')
        #soup = BeautifulSoup(summary, 'html5lib')
        if soup :
            if soup.body :
                recursive_render(soup.body)
            else:
                recursive_render(soup)
        return r.get_final(),links
 # Mapping mimetypes with renderers
 # (any content with a mimetype text/* not listed here will be rendered with as GemText)
 _FORMAT_RENDERERS = {
-    "text/gemini":  GemtextRenderer,
+    "text/gemini":  ansirenderer.GemtextRenderer,
-    "text/html" :   HtmlRenderer,
+    "text/html" :   ansirenderer.HtmlRenderer,
-    "text/xml" : FeedRenderer,
+    "text/xml" : ansirenderer.FeedRenderer,
-    "application/xml" : FeedRenderer,
+    "application/xml" : ansirenderer.FeedRenderer,
-    "application/rss+xml" : FeedRenderer,
+    "application/rss+xml" : ansirenderer.FeedRenderer,
-    "application/atom+xml" : FeedRenderer,
+    "application/atom+xml" : ansirenderer.FeedRenderer,
-    "text/gopher": GopherRenderer,
+    "text/gopher": ansirenderer.GopherRenderer,
-    "image/*": ImageRenderer
+    "image/*": ansirenderer.ImageRenderer
 }
 # Offpunk is organized as follow:
 # - a GeminiClient instance which handles the browsing of GeminiItems (= pages).
@ -1630,7 +707,7 @@ class GeminiItem():
    def _set_renderer(self,mime=None):
        if self.local and os.path.isdir(self.get_cache_path()):
-            self.renderer = FolderRenderer("",self.get_cache_path())
+            self.renderer = ansirenderer.FolderRenderer("",self.get_cache_path(),datadir=_DATA_DIR)
            return
        if not mime:
            mime = self.get_mime()