offpunk/ansirenderer.py

#!/bin/python
import os
import sys
import shutil
import tempfile
import subprocess
import textwrap
import time
import html
import urllib
import argparse
import mimetypes
import fnmatch
import netcache
from offutils import run,term_width
try:
    from readability import Document
    _HAS_READABILITY = True
except ModuleNotFoundError:
    _HAS_READABILITY = False

try:
    from bs4 import BeautifulSoup
    from bs4 import Comment
    _HAS_SOUP = True
except ModuleNotFoundError:
    _HAS_SOUP = False

_DO_HTML = _HAS_SOUP #and _HAS_READABILITY
if _DO_HTML and not _HAS_READABILITY:
    print("To improve your web experience (less cruft in webpages),")
    print("please install python3-readability or readability-lxml")

try:
    import feedparser
    _DO_FEED = True
except ModuleNotFoundError:
    _DO_FEED = False


less_version = 0
if not shutil.which("less"):
    print("Please install the pager \"less\" to run Offpunk.")
    print("If you wish to use another pager, send me an email !")
    print("(I’m really curious to hear about people not having \"less\" on their system.)")
    sys.exit()
output = run("less --version")
# We get less Version (which is the only integer on the first line)
words = output.split("\n")[0].split()
less_version = 0
for w in words:
    if w.isdigit():
        less_version = int(w)
# restoring position only works for version of less > 572
if less_version >= 572:
    _LESS_RESTORE_POSITION = True
else:
    _LESS_RESTORE_POSITION = False
#_DEFAULT_LESS = "less -EXFRfM -PMurl\ lines\ \%lt-\%lb/\%L\ \%Pb\%$ %s"
# -E : quit when reaching end of file (to behave like "cat")
# -F : quit if content fits the screen (behave like "cat")
# -X : does not clear the screen
# -R : interpret ANSI colors correctly
# -f : suppress warning for some contents
# -M : long prompt (to have info about where you are in the file)
# -W : hilite the new first line after a page skip (space)
# -i : ignore case in search
# -S : do not wrap long lines. Wrapping is done by offpunk, longlines
# are there on purpose (surch in asciiart)
#--incsearch : incremental search starting rev581
if less_version >= 581:
    less_base = "less --incsearch --save-marks -~ -XRfMWiS"
elif less_version >= 572:
    less_base = "less --save-marks -XRfMWiS"
else:
    less_base = "less -XRfMWiS"
_DEFAULT_LESS = less_base + " \"+''\" %s"
_DEFAULT_CAT = less_base + " -EF %s"
def less_cmd(file, histfile=None,cat=False,grep=None):
    if histfile:
        env = {"LESSHISTFILE": histfile}
    else:
        env = {}
    if cat:
        cmd_str = _DEFAULT_CAT
    elif grep:
        grep_cmd = _GREP
        #case insensitive for lowercase search
        if grep.islower():
            grep_cmd += " -i"
        cmd_str = _DEFAULT_CAT + "|" + grep_cmd + " %s"%grep
    else:
        cmd_str = _DEFAULT_LESS
    run(cmd_str, parameter=file, direct_output=True, env=env)

try:
    from PIL import Image
    _HAS_PIL = True
except ModuleNotFoundError:
    _HAS_PIL = False
_HAS_TIMG = shutil.which('timg')
_HAS_CHAFA = shutil.which('chafa')
_NEW_CHAFA = False
_NEW_TIMG = False
_RENDER_IMAGE = False

# All this code to know if we render image inline or not
if _HAS_CHAFA:
    # starting with 1.10, chafa can return only one frame
    # which allows us to drop dependancy for PIL
    output = run("chafa --version")
    # output is "Chafa version M.m.p"
    # check for m < 1.10
    try:
        chafa_major, chafa_minor, _ = output.split("\n")[0].split(" ")[-1].split(".")
        if int(chafa_major) >= 1 and int(chafa_minor) >= 10:
            _NEW_CHAFA = True
    except:
        pass
if _NEW_CHAFA :
    _RENDER_IMAGE = True
if _HAS_TIMG :
    try:
        output = run("timg --version")
    except subprocess.CalledProcessError:
        output = False
    # We don’t deal with timg before 1.3.2 (looping options)
    if output and output[5:10] > "1.3.2":
        _NEW_TIMG = True
        _RENDER_IMAGE = True
elif _HAS_CHAFA and _HAS_PIL:
    _RENDER_IMAGE = True
if not _RENDER_IMAGE:
    print("To render images inline, you need either chafa or timg.")
    if not _NEW_CHAFA and not _NEW_TIMG:
        print("Before Chafa 1.10, you also need python-pil")


# This method return the image URL or invent it if it’s a base64 inline image
# It returns [url,image_data] where image_data is None for normal image
def looks_like_base64(src,baseurl):
    imgdata = None
    imgname = src
    if src and src.startswith("data:image/"):
        if ";base64," in src:
            splitted = src.split(";base64,")
            extension = splitted[0].strip("data:image/")[:3]
            imgdata = splitted[1]
            imgname = imgdata[:20] + "." + extension
            imgurl = urllib.parse.urljoin(baseurl, imgname)
        else:
            #We can’t handle other data:image such as svg for now
            imgurl = None
    else:
        imgurl = urllib.parse.urljoin(baseurl, imgname)
    return imgurl,imgdata

#return ANSI text that can be show by less
def inline_image(img_file,width):
    #Chafa is faster than timg inline. Let use that one by default
    inline = None
    ansi_img = ""
    #We avoid errors by not trying to render non-image files
    if shutil.which("file"):
        mime = run("file -b --mime-type %s", parameter=img_file).strip()
        if not "image" in mime:
            return ansi_img
    if _HAS_CHAFA:
        if _HAS_PIL and not _NEW_CHAFA:
            # this code is a hack to remove frames from animated gif
            img_obj = Image.open(img_file)
            if hasattr(img_obj,"n_frames") and img_obj.n_frames > 1:
                # we remove all frames but the first one
                img_obj.save(img_file,format="gif",save_all=False)
            inline = "chafa --bg white -s %s -f symbols"
        elif _NEW_CHAFA:
            inline = "chafa --bg white -t 1 -s %s -f symbols --animate=off"
    if not inline and _NEW_TIMG:
        inline = "timg --frames=1 -p q -g %sx1000"
    if inline:
        cmd = inline%width + " %s"
        try:
            ansi_img = run(cmd, parameter=img_file)
        except Exception as err:
            ansi_img = "***image failed : %s***\n" %err
    return ansi_img

def terminal_image(img_file):
    #Render by timg is better than old chafa.
    # it is also centered
    cmd = None
    if _NEW_TIMG:
        cmd = "timg --loops=1 -C"
    elif _HAS_CHAFA:
        cmd = "chafa -d 0 --bg white -t 1 -w 1"
    if cmd:
        cmd = cmd + " %s"
        run(cmd, parameter=img_file, direct_output=True)


# First, we define the different content->text renderers, outside of the rest
# (They could later be factorized in other files or replaced)
class AbstractRenderer():
    def __init__(self,content,url,center=True):
        self.url = url
        self.body = str(content)
        #there’s one rendered text and one links table per mode
        self.rendered_text = {}
        self.links = {}
        self.images = {}
        self.title = None
        self.validity = True
        self.temp_files = {}
        self.less_histfile = {}
        self.center = center
        self.last_mode = "readable"

    #This class hold an internal representation of the HTML text
    class representation:
        def __init__(self,width,title=None,center=True):
            self.title=title
            self.center = center
            self.final_text = ""
            self.opened = []
            self.width = width
            self.last_line = ""
            self.last_line_colors = {}
            self.last_line_center = False
            self.new_paragraph = True
            self.i_indent = ""
            self.s_indent = ""
            self.r_indent = ""
            self.current_indent = ""
            self.disabled_indents = None
            # each color is an [open,close] pair code
            self.colors = {
                            "bold"   : ["1","22"],
                            "faint"  : ["2","22"],
                            "italic" : ["3","23"],
                            "underline": ["4","24"],
                            "red"    : ["31","39"],
                            "yellow" : ["33","39"],
                            "blue"   : ["34","39"],
                       }

        def _insert(self,color,open=True):
            if open: o = 0
            else: o = 1
            pos = len(self.last_line)
            #we remember the position where to insert color codes
            if not pos in self.last_line_colors:
                self.last_line_colors[pos] = []
            #Two inverse code cancel each other
            if [color,int(not o)] in self.last_line_colors[pos]:
                self.last_line_colors[pos].remove([color,int(not o)])
            else:
                self.last_line_colors[pos].append([color,o])#+color+str(o))

        # Take self.last line and add ANSI codes to it before adding it to
        # self.final_text.
        def _endline(self):
            if len(self.last_line.strip()) > 0:
                for c in self.opened:
                    self._insert(c,open=False)
                nextline = ""
                added_char = 0
                #we insert the color code at the saved positions
                while len (self.last_line_colors) > 0:
                    pos,colors = self.last_line_colors.popitem()
                    #popitem itterates LIFO.
                    #So we go, backward, to the pos (starting at the end of last_line)
                    nextline = self.last_line[pos:] + nextline
                    ansicol = "\x1b["
                    for c,o in colors:
                        ansicol += self.colors[c][o] + ";"
                    ansicol = ansicol[:-1]+"m"
                    nextline = ansicol + nextline
                    added_char += len(ansicol)
                    self.last_line = self.last_line[:pos]
                nextline = self.last_line + nextline
                if self.last_line_center:
                    #we have to care about the ansi char while centering
                    width = term_width() + added_char
                    nextline = nextline.strip().center(width)
                    self.last_line_center = False
                else:
                    #should we lstrip the nextline in the addition ?
                    nextline = self.current_indent + nextline.lstrip() + self.r_indent
                    self.current_indent = self.s_indent
                self.final_text += nextline
                self.last_line = ""
                self.final_text += "\n"
                for c in self.opened:
                    self._insert(c,open=True)
            else:
                self.last_line = ""


        def center_line(self):
            self.last_line_center = True

        def open_color(self,color):
            if color in self.colors and color not in self.opened:
                self._insert(color,open=True)
                self.opened.append(color)
        def close_color(self,color):
            if color in self.colors and color in self.opened:
                self._insert(color,open=False)
                self.opened.remove(color)
        def close_all(self):
            if len(self.colors) > 0:
                self.last_line += "\x1b[0m"
                self.opened.clear()

        def startindent(self,indent,sub=None,reverse=None):
            self._endline()
            self.i_indent = indent
            self.current_indent = indent
            if sub:
                self.s_indent = sub
            else:
                self.s_indent = indent
            if reverse:
                self.r_indent = reverse
            else:
                self.r_indent = ""


        def endindent(self):
            self._endline()
            self.i_indent = ""
            self.s_indent = ""
            self.r_indent = ""
            self.current_indent = ""

        def _disable_indents(self):
            self.disabled_indents = []
            self.disabled_indents.append(self.current_indent)
            self.disabled_indents.append(self.i_indent)
            self.disabled_indents.append(self.s_indent)
            self.disabled_indents.append(self.r_indent)
            self.endindent()

        def _enable_indents(self):
            if self.disabled_indents:
                self.current_indent = self.disabled_indents[0]
                self.i_indent = self.disabled_indents[1]
                self.s_indent = self.disabled_indents[2]
                self.r_indent = self.disabled_indents[3]
            self.disabled_indents = None

        def newline(self):
            self._endline()

        #A new paragraph implies 2 newlines (1 blank line between paragraphs)
        #But it is only used if didn’t already started one to avoid plenty
        #of blank lines. force=True allows to bypass that limit.
        #new_paragraph becomes false as soon as text is entered into it
        def newparagraph(self,force=False):
            if force or not self.new_paragraph:
                self._endline()
                self.final_text += "\n"
                self.new_paragraph = True

        def add_space(self):
            if len(self.last_line) > 0 and self.last_line[-1] != " ":
                self.last_line += " "

        def _title_first(self,intext=None):
            if self.title:
                if not self.title == intext:
                    self._disable_indents()
                    self.open_color("blue")
                    self.open_color("bold")
                    self.open_color("underline")
                    self.add_text(self.title)
                    self.close_all()
                    self.newparagraph()
                    self._enable_indents()
                self.title = None

        # Beware, blocks are not wrapped nor indented and left untouched!
        # They are mostly useful for pictures and preformatted text.
        def add_block(self,intext):
            # If necessary, we add the title before a block
            self._title_first()
            # we don’t want to indent blocks
            self._endline()
            self._disable_indents()
            self.final_text += self.current_indent + intext
            self.new_paragraph = False
            self._endline()
            self._enable_indents()

        def add_text(self,intext):
            self._title_first(intext=intext)
            lines = []
            last = (self.last_line + intext)
            self.last_line = ""
            # With the following, we basically cancel adding only spaces
            # on an empty line
            if len(last.strip()) > 0:
                self.new_paragraph = False
            else:
                last = last.strip()
            if len(last) > self.width:
                width = self.width - len(self.current_indent) - len(self.r_indent)
                spaces_left = len(last) - len(last.lstrip())
                spaces_right = len(last) - len(last.rstrip())
                lines = textwrap.wrap(last,width,drop_whitespace=True)
                self.last_line += spaces_left*" "
                while len(lines) > 1:
                    l = lines.pop(0)
                    self.last_line += l
                    self._endline()
                if len(lines) == 1:
                    li = lines[0]
                    self.last_line += li + spaces_right*" "
            else:
                self.last_line = last

        def get_final(self):
            self.close_all()
            self._endline()
            #if no content, we still add the title
            self._title_first()
            lines = self.final_text.splitlines()
            lines2 = []
            termspace = shutil.get_terminal_size()[0]
            #Following code instert blanck spaces to center the content
            if self.center and termspace > term_width():
                margin = int((termspace - term_width())//2)
            else:
                margin = 0
            for l in lines :
                lines2.append(margin*" "+l)
            return "\n".join(lines2)

    def get_subscribe_links(self):
        return [[self.url,self.get_mime(),self.get_title()]]
    def is_valid(self):
        return self.validity
    def is_local(self):
        #TODO with self.url
        return False
    def set_mode(self,mode):
        self.last_mode = mode
    def get_links(self,mode=None):
    # This method is used to load once the list of links in a gi
    # Links can be followed, after a space, by a description/title
    #TODO: remove this code
   # def get_links(self,mode=None):
   #     links = []
   #     toreturn = []
   #     if self.renderer:
   #         if not mode:
   #             mode = self.renderer.last_mode
   #         links = self.renderer.get_links(mode=mode)
   #     for l in links:
   #         #split between link and potential name
   #         # check that l is non-empty
   #         url = None
   #         if l:
   #             splitted = l.split(maxsplit=1)
   #             url = self.absolutise_url(splitted[0])
   #         if url and looks_like_url(url):
   #             if len(splitted) > 1:
   #                 #We add a name only for Gopher items
   #                 if url.startswith("gopher://"):
   #                     newgi = GeminiItem(url,name=splitted[1])
   #                 else:
   #                     newgi = GeminiItem(url)
   #             else:
   #                 newgi = GeminiItem(url)
   #             toreturn.append(newgi)
   #         elif url and mode != "links_only" and url.startswith("data:image/"):
   #             imgurl,imgdata = ansirenderer.looks_like_base64(url,self.url)
   #             if imgurl:
   #                 toreturn.append(GeminiItem(imgurl))
   #             else:
   #                 toreturn.append(None)
   #         else:
   #             # We must include a None item to keep the link count valid
   #             toreturn.append(None)
   #     return toreturn
        if not mode: mode = self.last_mode
        if mode not in self.links :
            prepared_body = self.prepare(self.body,mode=mode)
            results = self.render(prepared_body,mode=mode)
            if results:
                #we should absolutize all URLs here
                self.links[mode] = []
                for l in results[1]:
                    abs_l = urllib.parse.urljoin(self.url,l.split()[0])
                    self.links[mode].append(abs_l)
                for l in self.get_subscribe_links()[1:]:
                    self.links[mode].append(l[0])
        return self.links[mode]
    def get_link(self,nb):
        links = self.get_links()
        if len(links) < nb:
            print("Index too high! No link %s for %s" %(nb,self.url))
            return None
        else:
            return links[nb-1]

    #get_title is about the "content title", so the title in the page itself
    def get_title(self):
        return "Abstract title"

    def get_page_title(self):
        title = self.get_title()
        if not title or len(title) == 0:
            title = self.get_url_title()
        else:
            title += " (%s)" %self.get_url_title()
        return title

    #this function is about creating a title derived from the URL
    def get_url_title(self):
        #small intelligence to try to find a good name for a capsule
        #we try to find eithe ~username or /users/username
        #else we fallback to hostname
        #TODO: handle local name
       # if self.local:
       #     if self.name != "":
       #         red_title = self.name
       #     else:
       #         red_title = self.path
       # else:
       #TODO: handle host and path separation
        red_title = "TODO:host" #self.host
        path = self.url
        if "user" in path:
            i = 0
            splitted = path.split("/")
            while i < (len(splitted)-1):
                if splitted[i].startswith("user"):
                    red_title = splitted[i+1]
                i += 1
        if "~" in path:
            for pp in path.split("/"):
                if pp.startswith("~"):
                    red_title = pp[1:]
        return red_title

    # This function return a list of URL which should be downloaded
    # before displaying the page (images in HTML pages, typically)
    def get_images(self,mode=None):
        if not mode: mode = self.last_mode
        if not mode in self.images:
            self.get_body(mode=mode)
            # we also invalidate the body that was done without images
            self.rendered_text.pop(mode)
        if mode in self.images:
            return self.images[mode]
        else:
            return []
    #This function will give gemtext to the gemtext renderer
    def prepare(self,body,mode=None):
        return body

    def get_body(self,width=None,mode=None):
        if not mode: mode = self.last_mode
        if not width:
            width = term_width()
        if mode not in self.rendered_text:
            prepared_body = self.prepare(self.body,mode=mode)
            result = self.render(prepared_body,width=width,mode=mode)
            if result:
                self.rendered_text[mode] = result[0]
                #The following is there to prepoulate self.links
                #but it seems to slow down a lot the loading
                #self.links[mode] = []
                #we should absolutize all URLs here
                #for l in result[1]:
                #    abs_l = urllib.parse.urljoin(self.url,l.split()[0])
                #    self.links[mode].append(abs_l)
        return self.rendered_text[mode]

    def _window_title(self,title,info=None):
        title_r = self.representation(term_width())
        title_r.open_color("red")
        title_r.open_color("bold")
        title_r.add_text(title)
        title_r.close_color("bold")
        if info:
            title_r.add_text("   (%s)"%info)
        title_r.close_color("red")
        return title_r.get_final()

    def display(self,mode=None,window_title="",window_info=None,grep=None):
        if mode: self.last_mode = mode
        else: mode = self.last_mode
        wtitle = self._window_title(window_title,info=window_info)
        body = wtitle + "\n" + self.get_body(mode=mode)
        if not body:
            return False
        # We actually put the body in a tmpfile before giving it to less
        if mode not in self.temp_files:
            tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
            self.temp_files[mode] = tmpf.name
            tmpf.write(body)
            tmpf.close()
        if mode not in self.less_histfile:
            firsttime = True
            tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
            self.less_histfile[mode] = tmpf.name
        else:
            firsttime = False
        less_cmd(self.temp_files[mode], histfile=self.less_histfile[mode],cat=firsttime,grep=grep)
        return True

    def get_temp_file(self,mode=None):
        if not mode: mode = self.last_mode
        if mode in self.temp_files:
            return self.temp_files[mode]
        else:
            return None

    # An instance of AbstractRenderer should have a self.render(body,width,mode) method.
    # 3 modes are used : readable (by default), full and links_only (the fastest, when
    # rendered content is not used, only the links are needed)
    # The prepare() function is called before the rendering. It is useful if
    # your renderer output in a format suitable for another existing renderer (such as gemtext)

# Gemtext Rendering Engine
class GemtextRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/gemini"
    def get_title(self):
        if self.title:
            return self.title
        elif self.body:
            lines = self.body.splitlines()
            for line in lines:
                if line.startswith("#"):
                    self.title = line.strip("#").strip()
                    return self.title
            if len(lines) > 0:
                # If not title found, we take the first 50 char
                # of the first line
                title_line = lines[0].strip()
                if len(title_line) > 50:
                    title_line = title_line[:49] + "…"
                self.title = title_line
                return self.title
            else:
                self.title = "Empty Page"
                return self.title
        else:
            return "Unknown Gopher Page"

    #render_gemtext
    def render(self,gemtext, width=None,mode=None):
        if not width:
            width = term_width()
        r = self.representation(width)
        links = []
        hidden_links = []
        preformatted = False
        def format_link(url,index,name=None):
            if "://" in url:
                protocol,adress = url.split("://",maxsplit=1)
                protocol = " %s" %protocol
            else:
                adress = url
                protocol = ""
            if "gemini" in protocol or "list" in protocol:
                protocol = ""
            if not name:
                name = adress
            line = "[%d%s] %s" % (index, protocol, name)
            return line
        for line in gemtext.splitlines():
            r.newline()
            if line.startswith("```"):
                preformatted = not preformatted
            elif preformatted:
                # infinite line to not wrap preformated
                r.add_block(line+"\n")
            elif len(line.strip()) == 0:
                r.newparagraph(force=True)
            elif line.startswith("=>"):
                strippedline = line[2:].strip()
                if strippedline:
                    links.append(strippedline)
                    splitted = strippedline.split(maxsplit=1)
                    url = splitted[0]
                    name = None
                    if len(splitted) > 1:
                        name = splitted[1]
                    link = format_link(url,len(links),name=name)
                    #r.open_color("blue")
                    #r.open_color("faint")
                    #r.open_color("underline")
                    startpos = link.find("] ") + 2
                    r.startindent("",sub=startpos*" ")
                    r.add_text(link)
                    r.endindent()
                    #r.close_all()
            elif line.startswith("* "):
                line = line[1:].lstrip("\t ")
                r.startindent("• ",sub="  ")
                r.add_text(line)
                r.endindent()
            elif line.startswith(">"):
                line = line[1:].lstrip("\t ")
                r.startindent("> ")
                r.add_text(line)
                r.endindent()
            elif line.startswith("###"):
                line = line[3:].lstrip("\t ")
                r.open_color("blue")
                r.add_text(line)
                r.close_color("blue")
            elif line.startswith("##"):
                line = line[2:].lstrip("\t ")
                r.open_color("blue")
                r.add_text(line)
                r.close_color("blue")
            elif line.startswith("#"):
                line = line[1:].lstrip("\t ")
                if not self.title:
                    self.title = line
                r.open_color("bold")
                r.open_color("blue")
                r.open_color("underline")
                r.add_text(line)
                r.close_color("underline")
                r.close_color("bold")
                r.close_color("blue")
            else:
                if "://" in line:
                    words = line.split()
                    for w in words:
                        if "://" in w:
                            hidden_links.append(w)
                r.add_text(line.rstrip())
        links += hidden_links
        return r.get_final(), links

class GopherRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/gopher"
    def get_title(self):
        if not self.title:
            self.title = ""
            if self.body:
                firstline = self.body.splitlines()[0]
                firstline = firstline.split("\t")[0]
                if firstline.startswith("i"):
                    firstline = firstline[1:]
                self.title = firstline
        return self.title

    #menu_or_text
    def render(self,body,width=None,mode=None):
        if not width:
            width = term_width()
        try:
            render,links = self._render_goph(body,width=width,mode=mode)
        except Exception as err:
            print("Error rendering Gopher ",err)
            r = self.representation(width)
            r.add_block(body)
            render = r.get_final()
            links = []
        return render,links

    def _render_goph(self,body,width=None,mode=None):
        if not width:
            width = term_width()
        # This was copied straight from Agena (then later adapted)
        links = []
        r = self.representation(width)
        for line in self.body.split("\n"):
            r.newline()
            if line.startswith("i"):
                towrap = line[1:].split("\t")[0]
                if len(towrap.strip()) > 0:
                    r.add_text(towrap)
                else:
                    r.newparagraph()
            elif not line.strip() in [".",""]:
                parts = line.split("\t")
                parts[-1] = parts[-1].strip()
                if parts[-1] == "+":
                    parts = parts[:-1]
                if len(parts) == 4:
                    name,path,host,port = parts
                    itemtype = name[0]
                    name = name[1:]
                    if port == "70":
                        port = ""
                    else:
                        port = ":%s"%port
                    if itemtype == "h" and path.startswith("URL:"):
                        url = path[4:]
                    else:
                        url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
                    url = url.replace(" ","%20")
                    linkline = url + " " + name
                    links.append(linkline)
                    towrap = "[%s] "%len(links)+ name
                    r.add_text(towrap)
                else:
                    r.add_text(line)
        return r.get_final(),links


class FolderRenderer(GemtextRenderer):
    #it was initialized with:
    #self.renderer = FolderRenderer("",self.get_cache_path(),datadir=_DATA_DIR)
    def __init__(self,content,url,center=True,datadir=None):
        GemtextRenderer.__init__(self,content,url,center)
        self.datadir = datadir

    def get_mime(self):
        return "Directory"
    def prepare(self,body,mode=None):
        def get_first_line(l):
            path = os.path.join(listdir,l+".gmi")
            with open(path) as f:
                first_line = f.readline().strip()
                f.close()
            if first_line.startswith("#"):
                return first_line
            else:
                return None
        def write_list(l):
            body = ""
            for li in l:
                path = "list:///%s"%li
                #TODO : size of lists
                #gi = GeminiItem(path)
                #size = len(gi.get_links())
                size = "TODO"
                body += "=> %s %s (%s items)\n" %(str(path),li,size)
            return body
        listdir = os.path.join(self.datadir,"lists")
        if self.url != listdir:
            return "This is folder %s" %self.url
        else:
            self.title = "My lists"
            lists = []
            if os.path.exists(listdir):
                listfiles = os.listdir(listdir)
                if len(listfiles) > 0:
                    for l in listfiles:
                        #removing the .gmi at the end of the name
                        lists.append(l[:-4])
            if len(lists) > 0:
                body = ""
                my_lists = []
                system_lists = []
                subscriptions = []
                frozen = []
                lists.sort()
                for l in lists:
                    if l in ["history","to_fetch","archives","tour"]:
                        system_lists.append(l)
                    else:
                        first_line = get_first_line(l)
                        if first_line and "#subscribed" in first_line:
                            subscriptions.append(l)
                        elif first_line and "#frozen" in first_line:
                            frozen.append(l)
                        else:
                            my_lists.append(l)
                if len(my_lists) > 0:
                    body+= "\n## Bookmarks Lists (updated during sync)\n"
                    body += write_list(my_lists)
                if len(subscriptions) > 0:
                    body +="\n## Subscriptions (new links in those are added to tour)\n"
                    body += write_list(subscriptions)
                if len(frozen) > 0:
                    body +="\n## Frozen (fetched but never updated)\n"
                    body += write_list(frozen)
                if len(system_lists) > 0:
                    body +="\n## System Lists\n"
                    body += write_list(system_lists)
                return body

class FeedRenderer(GemtextRenderer):
    def get_mime(self):
        return "application/rss+xml"
    def is_valid(self):
        if _DO_FEED:
            parsed = feedparser.parse(self.body)
        else:
            return False
        if parsed.bozo:
            return False
        else:
            #If no content, then fallback to HTML
            return len(parsed.entries) > 0

    def get_title(self):
        if not self.title:
            self.get_body()
        return self.title

    def prepare(self,content,mode=None,width=None):
        if not mode: mode = self.last_mode
        if not width:
            width = term_width()
        self.title = "RSS/Atom feed"
        page = ""
        if _DO_FEED:
            parsed = feedparser.parse(content)
        else:
            page += "Please install python-feedparser to handle RSS/Atom feeds\n"
            self.validity = False
            return page
        if parsed.bozo:
            page += "Invalid RSS feed\n\n"
            page += str(parsed.bozo_exception)
            self.validity = False
        else:
            if "title" in parsed.feed:
                t = parsed.feed.title
            else:
                t = "Unknown"
            self.title = "%s (XML feed)" %t
            title = "# %s"%self.title
            page += title + "\n"
            if "updated" in parsed.feed:
                page += "Last updated on %s\n\n" %parsed.feed.updated
            if "subtitle" in parsed.feed:
                page += parsed.feed.subtitle + "\n"
            if "link" in parsed.feed:
                page += "=> %s\n" %parsed.feed.link
            page += "\n## Entries\n"
            if len(parsed.entries) < 1:
                self.validity = False
            for i in parsed.entries:
                line = "=> %s " %i.link
                if "published" in i:
                    pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
                    line += pub_date + " : "
                if "title" in i:
                    line += "%s" %(i.title)
                if "author" in i:
                    line += " (by %s)"%i.author
                page += line + "\n"
                if mode == "full":
                    if "summary" in i:
                        html = HtmlRenderer(i.summary,self.url,center=False)
                        rendered = html.get_body(width=None,mode="full")
                        page += "\n"
                        page += rendered
                        page += "\n------------\n\n"
        return page

class ImageRenderer(AbstractRenderer):
    def get_mime(self):
        return "image/*"
    def is_valid(self):
        if _RENDER_IMAGE:
            return True
        else:
            return False
    def get_links(self,mode=None):
        return []
    def get_title(self):
        return "Picture file"
    def render(self,img,width=None,mode=None):
        #with inline, we use symbols to be rendered with less.
        #else we use the best possible renderer.
        if mode == "links_only":
            return "", []
        if not width:
            width = term_width()
            spaces = 0
        else:
            spaces = int((term_width() - width)//2)
        ansi_img = inline_image(img,width)
        #Now centering the image
        lines = ansi_img.splitlines()
        new_img = ""
        for l in lines:
            new_img += spaces*" " + l + "\n"
        return new_img, []
    def display(self,mode=None,window_title=None,window_info=None,grep=None):
        if window_title:
            print(self._window_title(window_title,info=window_info))
        terminal_image(self.body)
        return True

class HtmlRenderer(AbstractRenderer):
    def get_mime(self):
        return "text/html"
    def is_valid(self):
        if not _DO_HTML:
            print("HTML document detected. Please install python-bs4 and python-readability.")
        return _DO_HTML and self.validity
    def get_subscribe_links(self):
        subs = [[self.url,self.get_mime(),self.get_title()]]
        soup = BeautifulSoup(self.body, 'html.parser')
        links = soup.find_all("link",rel="alternate",recursive=True)
        for l in links:
            ty = l.get("type")
            if ty :
                if "rss" in ty or "atom" in ty or "feed" in ty:
                    # some rss links are relatives: we absolutise_url
                    sublink = urllib.parse.urljoin(self.url, l.get("href"))
                    subs.append([sublink,ty,l.get("title")])
        return subs

    def get_title(self):
        if self.title:
            return self.title
        elif self.body:
            if _HAS_READABILITY:
                try:
                    readable = Document(self.body)
                    self.title = readable.short_title()
                    return self.title
                except Exception as err:
                    pass
            soup = BeautifulSoup(self.body,"html.parser")
            self.title = str(soup.title.string)
        else:
            return ""

    # Our own HTML engine (crazy, isn’t it?)
    # Return [rendered_body, list_of_links]
    # mode is either links_only, readable or full
    def render(self,body,mode=None,width=None,add_title=True):
        if not mode: mode = self.last_mode
        if not width:
            width = term_width()
        if not _DO_HTML:
            print("HTML document detected. Please install python-bs4 and python-readability.")
            return
        # This method recursively parse the HTML
        r = self.representation(width,title=self.get_title(),center=self.center)
        links = []
        # You know how bad html is when you realize that space sometimes meaningful, somtimes not.
        # CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
        # HTML is real crap. At least the one people are generating.

        def render_image(src,width=40,mode=None):
            ansi_img = ""
            imgurl,imgdata = looks_like_base64(src,self.url)
            if _RENDER_IMAGE and mode != "links_only" and imgurl:
                try:
                    #4 followings line are there to translate the URL into cache path
                    img = netcache.get_cache_path(imgurl)
                    if imgdata:
                        with open(img,"wb") as cached:
                            cached.write(base64.b64decode(imgdata))
                            cached.close()
                    if netcache.is_cache_valid(img):
                        renderer = ImageRenderer(img,imgurl)
                        # Image are 40px wide except if terminal is smaller
                        if width > 40:
                            size = 40
                        else:
                            size = width
                        ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
                except Exception as err:
                    #we sometimes encounter really bad formatted files or URL
                    ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n"
            return ansi_img
        def sanitize_string(string):
            #never start with a "\n"
            #string = string.lstrip("\n")
            string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
            endspace = string.endswith(" ") or string.endswith("\xa0")
            startspace = string.startswith(" ") or string.startswith("\xa0")
            toreturn = string.replace("\n", " ").replace("\t"," ").strip()
            while "  " in toreturn:
                toreturn = toreturn.replace("  "," ")
            toreturn = html.unescape(toreturn)
            if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
                toreturn += " "
            if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
                toreturn = " " + toreturn
            return toreturn
        def recursive_render(element,indent="",preformatted=False):
            if element.name == "blockquote":
                r.newparagraph()
                r.startindent("   ",reverse="     ")
                for child in element.children:
                    r.open_color("italic")
                    recursive_render(child,indent="\t")
                    r.close_color("italic")
                r.endindent()
            elif element.name in ["div","p"]:
                r.newparagraph()
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.newparagraph()
            elif element.name in ["span"]:
                r.add_space()
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.add_space()
            elif element.name in ["h1","h2","h3","h4","h5","h6"]:
                r.open_color("blue")
                if element.name in ["h1"]:
                    r.open_color("bold")
                    r.open_color("underline")
                elif element.name in ["h2"]:
                    r.open_color("bold")
                elif element.name in ["h5","h6"]:
                    r.open_color("faint")
                for child in element.children:
                    r.newparagraph()
                    recursive_render(child)
                    r.newparagraph()
                    r.close_all()
            elif element.name in ["code","tt"]:
                for child in element.children:
                   recursive_render(child,indent=indent,preformatted=True)
            elif element.name in ["pre"]:
                r.newparagraph()
                r.add_block(element.text)
                r.newparagraph()
            elif element.name in ["li"]:
                r.startindent(" • ",sub="   ")
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.endindent()
            elif element.name in ["tr"]:
                r.startindent("|",reverse="|")
                for child in element.children:
                    recursive_render(child,indent=indent)
                r.endindent()
            elif element.name in ["td","th"]:
                r.add_text("| ")
                for child in element.children:
                    recursive_render(child)
                r.add_text(" |")
            # italics
            elif element.name in ["em","i"]:
                r.open_color("italic")
                for child in element.children:
                    recursive_render(child,indent=indent,preformatted=preformatted)
                r.close_color("italic")
            #bold
            elif element.name in ["b","strong"]:
                r.open_color("bold")
                for child in element.children:
                    recursive_render(child,indent=indent,preformatted=preformatted)
                r.close_color("bold")
            elif element.name == "a":
                link = element.get('href')
                # support for images nested in links
                if link:
                    text = ""
                    imgtext = ""
                    #we display images first in a link
                    for child in element.children:
                        if child.name == "img":
                            recursive_render(child)
                            imgtext = "[IMG LINK %s]"
                    links.append(link+" "+text)
                    link_id = str(len(links))
                    r.open_color("blue")
                    r.open_color("faint")
                    for child in element.children:
                        if child.name != "img":
                            recursive_render(child,preformatted=preformatted)
                    if imgtext != "":
                        r.center_line()
                        r.add_text(imgtext%link_id)
                    else:
                        r.add_text(" [%s]"%link_id)
                    r.close_color("blue")
                    r.close_color("faint")
                else:
                    #No real link found
                    for child in element.children:
                        recursive_render(child,preformatted=preformatted)
            elif element.name == "img":
                src = element.get("src")
                text = ""
                ansi_img = render_image(src,width=width,mode=mode)
                alt = element.get("alt")
                if alt:
                    alt = sanitize_string(alt)
                    text += "[IMG] %s"%alt
                else:
                    text += "[IMG]"
                if src:
                    links.append(src+" "+text)
                    if not mode in self.images:
                        self.images[mode] = []
                    abs_url = urllib.parse.urljoin(self.url, src)
                    self.images[mode].append(abs_url)
                    link_id = " [%s]"%(len(links))
                    r.add_block(ansi_img)
                    r.open_color("faint")
                    r.open_color("yellow")
                    r.center_line()
                    r.add_text(text + link_id)
                    r.close_color("faint")
                    r.close_color("yellow")
                    r.newline()
            elif element.name == "br":
                r.newline()
            elif element.name not in ["script","style","template"] and type(element) != Comment:
                if element.string:
                    if preformatted :
                        r.open_color("faint")
                        r.add_text(element.string)
                        r.close_color("faint")
                    else:
                        s = sanitize_string(element.string)
                        if len(s.strip()) > 0:
                            r.add_text(s)
                else:
                    for child in element.children:
                        recursive_render(child,indent=indent)
        # the real render_html hearth
        if mode == "full":
            summary = body
        elif _HAS_READABILITY:
            try:
                readable = Document(body)
                summary = readable.summary()
            except Exception as err:
                summary = body
        else:
            summary = body
        soup = BeautifulSoup(summary, 'html.parser')
        #soup = BeautifulSoup(summary, 'html5lib')
        if soup :
            if soup.body :
                recursive_render(soup.body)
            else:
                recursive_render(soup)
        return r.get_final(),links


# Mapping mimetypes with renderers
# (any content with a mimetype text/* not listed here will be rendered with as GemText)
_FORMAT_RENDERERS = {
    "text/gemini":  GemtextRenderer,
    "text/html" :   HtmlRenderer,
    "text/xml" : FeedRenderer,
    "application/xml" : FeedRenderer,
    "application/rss+xml" : FeedRenderer,
    "application/atom+xml" : FeedRenderer,
    "text/gopher": GopherRenderer,
    "image/*": ImageRenderer
}
def get_mime(path):
    #Beware, this one is really a shaddy ad-hoc function
    if path.startswith("mailto:"):
        mime = "mailto"
    elif os.path.isdir(path):
        mime = "Local Folder"
    elif path.endswith(".gmi"):
        mime = "text/gemini"
    elif shutil.which("file") :
        mime = run("file -b --mime-type %s", parameter=path).strip()
        mime2,encoding = mimetypes.guess_type(path,strict=False)
        #If we hesitate between html and xml, takes the xml one
        #because the FeedRendered fallback to HtmlRenderer
        if mime2 and mime != mime2 and "html" in mime and "xml" in mime2:
            mime = "text/xml"
        # If it’s a xml file, consider it as such, regardless of what file thinks
        elif path.endswith(".xml"):
            mime = "text/xml"
        #Some xml/html document are considered as octet-stream
        if mime == "application/octet-stream":
            mime = "text/xml"
    else:
        mime,encoding = mimetypes.guess_type(path,strict=False)
    #gmi Mimetype is not recognized yet
    if not mime and not shutil.which("file") :
        print("Cannot guess the mime type of the file. Please install \"file\".")
        print("(and send me an email, I’m curious of systems without \"file\" installed!")
    if mime.startswith("text") and mime not in _FORMAT_RENDERERS:
        if mime2 and mime2 in _FORMAT_RENDERERS:
            mime = mime2
        else:
            #by default, we consider it’s gemini except for html
            mime = "text/gemini"
    return mime

def renderer_from_file(path,url=None):
    mime = get_mime(path)
    if not url:
        url = path
    if os.path.exists(path):
        if mime.startswith("text/"):
            with open(path) as f:
                print("DEBUG: opening %s"%path)
                content = f.read()
                f.close()
        else:
            content = path
        return set_renderer(content,url,mime)
    else:
        return None

def set_renderer(content,url,mime):
    renderer = None
    if mime == "Local Folder":
        renderer = FolderRenderer("",url,datadir=_DATA_DIR)
        return renderer
    mime_to_use = []
    for m in _FORMAT_RENDERERS:
        if fnmatch.fnmatch(mime, m):
            mime_to_use.append(m)
    if len(mime_to_use) > 0:
        current_mime = mime_to_use[0]
        func = _FORMAT_RENDERERS[current_mime]
        if current_mime.startswith("text"):
            renderer = func(content,url)
            # We double check if the renderer is correct.
            # If not, we fallback to html
            # (this is currently only for XHTML, often being
            # mislabelled as xml thus RSS feeds)
            if not renderer.is_valid():
                func = _FORMAT_RENDERERS["text/html"]
                #print("Set (fallback)RENDERER to html instead of %s"%mime)
                renderer = func(content,url)
        else:
            #TODO: check this code and then remove one if.
            #we don’t parse text, we give the file to the renderer
            renderer = func(content,url)
            if not renderer.is_valid():
                renderer = None
    return renderer


def render(input,path=None,format="auto",mime=None,url=None):
    if format == "gemtext":
        r = GemtextRenderer(input,url)
    elif format == "html":
        r = HtmlRenderer(input,url)
    elif format == "feed":
        r = FeedRenderer(input,url)
    elif format == "gopher":
        r = GopherRenderer(input,url)
    elif format == "image":
        r = ImageRenderer(input,url)
    elif format == "folder":
        r = FolderRenderer(input,url)
    else:
        if not mime and path:
            r= renderer_from_file(path,url)
        else:
            r = set_renderer(input,url,mime)
        print("DEBUG: renderer is %s"%r)
    if r:
        r.display()
    else:
        print("Could not render %s"%input)


def main():
    parser = argparse.ArgumentParser(description=__doc__)
    parser.add_argument("--format", choices=["auto","gemtext","html","feed","gopher","image","folder"],
                        help="Renderer to use. Available: auto, gemtext, html, feed, gopher, image, folder")
    parser.add_argument("--mime", help="Mime of the content to parse")
    ## The argument needs to be a path to a file. If none, then stdin is used which allows
    ## to pipe text directly into ansirenderer
    parser.add_argument("--url",metavar="URL", nargs="*",
                        help="Original URL of the content")
    parser.add_argument("content",metavar="INPUT", nargs="*", type=argparse.FileType("r"),
                         default=sys.stdin, help="Path to the text to render (default to stdin)")
    args = parser.parse_args()
    # Detect if we are running interactively or in a pipe
    if sys.stdin.isatty():
        #we are interactive, not in stdin, we can have multiple files as input
        for f in args.content:
            path = os.path.abspath(f.name)
            try:
                content = f.read()
            except UnicodeDecodeError:
                content = f
            render(content,path=path,format=args.format,url=args.url,mime=args.mime)
    else:
        #we are in stdin
        if not args.format and not args.mime:
            print("Format or mime should be specified when running with stdin")
        else:
            render(args.content.read(),path=None,format=args.format,url=args.url,mime=args.mime)

if __name__ == '__main__':
    main()