experiment

This commit is contained in:
Lionel Dricot 2023-07-03 11:43:06 +02:00
parent 1a40ad786f
commit 86a67d47fa
2 changed files with 941 additions and 933 deletions

931
ansirenderer.py Normal file
View File

@ -0,0 +1,931 @@
#!/bin/python
import os
# First, we define the different content->text renderers, outside of the rest
# (They could later be factorized in other files or replaced)
class AbstractRenderer():
def __init__(self,content,url,center=True):
self.url = url
self.body = str(content)
#theres one rendered text and one links table per mode
self.rendered_text = {}
self.links = {}
self.images = {}
self.title = None
self.validity = True
self.temp_file = {}
self.less_histfile = {}
self.center = center
#This class hold an internal representation of the HTML text
class representation:
def __init__(self,width,title=None,center=True):
self.title=title
self.center = center
self.final_text = ""
self.opened = []
self.width = width
self.last_line = ""
self.last_line_colors = {}
self.last_line_center = False
self.new_paragraph = True
self.i_indent = ""
self.s_indent = ""
self.r_indent = ""
self.current_indent = ""
self.disabled_indents = None
# each color is an [open,close] pair code
self.colors = {
"bold" : ["1","22"],
"faint" : ["2","22"],
"italic" : ["3","23"],
"underline": ["4","24"],
"red" : ["31","39"],
"yellow" : ["33","39"],
"blue" : ["34","39"],
}
def _insert(self,color,open=True):
if open: o = 0
else: o = 1
pos = len(self.last_line)
#we remember the position where to insert color codes
if not pos in self.last_line_colors:
self.last_line_colors[pos] = []
#Two inverse code cancel each other
if [color,int(not o)] in self.last_line_colors[pos]:
self.last_line_colors[pos].remove([color,int(not o)])
else:
self.last_line_colors[pos].append([color,o])#+color+str(o))
# Take self.last line and add ANSI codes to it before adding it to
# self.final_text.
def _endline(self):
if len(self.last_line.strip()) > 0:
for c in self.opened:
self._insert(c,open=False)
nextline = ""
added_char = 0
#we insert the color code at the saved positions
while len (self.last_line_colors) > 0:
pos,colors = self.last_line_colors.popitem()
#popitem itterates LIFO.
#So we go, backward, to the pos (starting at the end of last_line)
nextline = self.last_line[pos:] + nextline
ansicol = "\x1b["
for c,o in colors:
ansicol += self.colors[c][o] + ";"
ansicol = ansicol[:-1]+"m"
nextline = ansicol + nextline
added_char += len(ansicol)
self.last_line = self.last_line[:pos]
nextline = self.last_line + nextline
if self.last_line_center:
#we have to care about the ansi char while centering
width = term_width() + added_char
nextline = nextline.strip().center(width)
self.last_line_center = False
else:
#should we lstrip the nextline in the addition ?
nextline = self.current_indent + nextline.lstrip() + self.r_indent
self.current_indent = self.s_indent
self.final_text += nextline
self.last_line = ""
self.final_text += "\n"
for c in self.opened:
self._insert(c,open=True)
else:
self.last_line = ""
def center_line(self):
self.last_line_center = True
def open_color(self,color):
if color in self.colors and color not in self.opened:
self._insert(color,open=True)
self.opened.append(color)
def close_color(self,color):
if color in self.colors and color in self.opened:
self._insert(color,open=False)
self.opened.remove(color)
def close_all(self):
if len(self.colors) > 0:
self.last_line += "\x1b[0m"
self.opened.clear()
def startindent(self,indent,sub=None,reverse=None):
self._endline()
self.i_indent = indent
self.current_indent = indent
if sub:
self.s_indent = sub
else:
self.s_indent = indent
if reverse:
self.r_indent = reverse
else:
self.r_indent = ""
def endindent(self):
self._endline()
self.i_indent = ""
self.s_indent = ""
self.r_indent = ""
self.current_indent = ""
def _disable_indents(self):
self.disabled_indents = []
self.disabled_indents.append(self.current_indent)
self.disabled_indents.append(self.i_indent)
self.disabled_indents.append(self.s_indent)
self.disabled_indents.append(self.r_indent)
self.endindent()
def _enable_indents(self):
if self.disabled_indents:
self.current_indent = self.disabled_indents[0]
self.i_indent = self.disabled_indents[1]
self.s_indent = self.disabled_indents[2]
self.r_indent = self.disabled_indents[3]
self.disabled_indents = None
def newline(self):
self._endline()
#A new paragraph implies 2 newlines (1 blank line between paragraphs)
#But it is only used if didnt already started one to avoid plenty
#of blank lines. force=True allows to bypass that limit.
#new_paragraph becomes false as soon as text is entered into it
def newparagraph(self,force=False):
if force or not self.new_paragraph:
self._endline()
self.final_text += "\n"
self.new_paragraph = True
def add_space(self):
if len(self.last_line) > 0 and self.last_line[-1] != " ":
self.last_line += " "
def _title_first(self,intext=None):
if self.title:
if not self.title == intext:
self._disable_indents()
self.open_color("blue")
self.open_color("bold")
self.open_color("underline")
self.add_text(self.title)
self.close_all()
self.newparagraph()
self._enable_indents()
self.title = None
# Beware, blocks are not wrapped nor indented and left untouched!
# They are mostly useful for pictures and preformatted text.
def add_block(self,intext):
# If necessary, we add the title before a block
self._title_first()
# we dont want to indent blocks
self._endline()
self._disable_indents()
self.final_text += self.current_indent + intext
self.new_paragraph = False
self._endline()
self._enable_indents()
def add_text(self,intext):
self._title_first(intext=intext)
lines = []
last = (self.last_line + intext)
self.last_line = ""
# With the following, we basically cancel adding only spaces
# on an empty line
if len(last.strip()) > 0:
self.new_paragraph = False
else:
last = last.strip()
if len(last) > self.width:
width = self.width - len(self.current_indent) - len(self.r_indent)
spaces_left = len(last) - len(last.lstrip())
spaces_right = len(last) - len(last.rstrip())
lines = textwrap.wrap(last,width,drop_whitespace=True)
self.last_line += spaces_left*" "
while len(lines) > 1:
l = lines.pop(0)
self.last_line += l
self._endline()
if len(lines) == 1:
li = lines[0]
self.last_line += li + spaces_right*" "
else:
self.last_line = last
def get_final(self):
self.close_all()
self._endline()
#if no content, we still add the title
self._title_first()
lines = self.final_text.splitlines()
lines2 = []
termspace = shutil.get_terminal_size()[0]
#Following code instert blanck spaces to center the content
if self.center and termspace > term_width():
margin = int((termspace - term_width())//2)
else:
margin = 0
for l in lines :
lines2.append(margin*" "+l)
return "\n".join(lines2)
def get_subscribe_links(self):
return [[self.url,self.get_mime(),self.get_title()]]
def is_valid(self):
return self.validity
def get_links(self,mode="links_only"):
if mode not in self.links :
prepared_body = self.prepare(self.body,mode=mode)
results = self.render(prepared_body,mode=mode)
if results:
self.links[mode] = results[1]
for l in self.get_subscribe_links()[1:]:
self.links[mode].append(l[0])
return self.links[mode]
def get_title(self):
return "Abstract title"
# This function return a list of URL which should be downloaded
# before displaying the page (images in HTML pages, typically)
def get_images(self,mode="readable"):
if not mode in self.images:
self.get_body(mode=mode)
# we also invalidate the body that was done without images
self.rendered_text.pop(mode)
if mode in self.images:
return self.images[mode]
else:
return []
#This function will give gemtext to the gemtext renderer
def prepare(self,body,mode=None):
return body
def get_body(self,width=None,mode="readable"):
if not width:
width = term_width()
if mode not in self.rendered_text:
prepared_body = self.prepare(self.body,mode=mode)
result = self.render(prepared_body,width=width,mode=mode)
if result:
self.rendered_text[mode] = result[0]
self.links[mode] = result[1]
return self.rendered_text[mode]
def _window_title(self,title,info=None):
title_r = self.representation(term_width())
title_r.open_color("red")
title_r.open_color("bold")
title_r.add_text(title)
title_r.close_color("bold")
if info:
title_r.add_text(" (%s)"%info)
title_r.close_color("red")
return title_r.get_final()
def display(self,mode="readable",window_title="",window_info=None,grep=None):
if not mode: mode = "readable"
wtitle = self._window_title(window_title,info=window_info)
body = wtitle + "\n" + self.get_body(mode=mode)
if not body:
return False
# We actually put the body in a tmpfile before giving it to less
if mode not in self.temp_file:
tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
self.temp_file[mode] = tmpf.name
tmpf.write(body)
tmpf.close()
if mode not in self.less_histfile:
firsttime = True
tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
self.less_histfile[mode] = tmpf.name
else:
firsttime = False
less_cmd(self.temp_file[mode], histfile=self.less_histfile[mode],cat=firsttime,grep=grep)
return True
def get_temp_file(self,mode="readable"):
if mode in self.temp_file:
return self.temp_file[mode]
else:
return None
# An instance of AbstractRenderer should have a self.render(body,width,mode) method.
# 3 modes are used :readable (by default), full and links_only (the fastest, when
# rendered content is not used, only the links are needed)
# The prepare() function is called before the rendering. It is useful if
# your renderer output in a format suitable for another existing renderer (such as gemtext)
# Gemtext Rendering Engine
class GemtextRenderer(AbstractRenderer):
def get_mime(self):
return "text/gemini"
def get_title(self):
if self.title:
return self.title
elif self.body:
lines = self.body.splitlines()
for line in lines:
if line.startswith("#"):
self.title = line.strip("#").strip()
return self.title
if len(lines) > 0:
# If not title found, we take the first 50 char
# of the first line
title_line = lines[0].strip()
if len(title_line) > 50:
title_line = title_line[:49] + ""
self.title = title_line
return self.title
else:
self.title = "Empty Page"
return self.title
else:
return "Unknown Gopher Page"
#render_gemtext
def render(self,gemtext, width=None,mode=None):
if not width:
width = term_width()
r = self.representation(width)
links = []
hidden_links = []
preformatted = False
def format_link(url,index,name=None):
if "://" in url:
protocol,adress = url.split("://",maxsplit=1)
protocol = " %s" %protocol
else:
adress = url
protocol = ""
if "gemini" in protocol or "list" in protocol:
protocol = ""
if not name:
name = adress
line = "[%d%s] %s" % (index, protocol, name)
return line
for line in gemtext.splitlines():
r.newline()
if line.startswith("```"):
preformatted = not preformatted
elif preformatted:
# infinite line to not wrap preformated
r.add_block(line+"\n")
elif len(line.strip()) == 0:
r.newparagraph(force=True)
elif line.startswith("=>"):
strippedline = line[2:].strip()
if strippedline:
links.append(strippedline)
splitted = strippedline.split(maxsplit=1)
url = splitted[0]
name = None
if len(splitted) > 1:
name = splitted[1]
link = format_link(url,len(links),name=name)
#r.open_color("blue")
#r.open_color("faint")
#r.open_color("underline")
startpos = link.find("] ") + 2
r.startindent("",sub=startpos*" ")
r.add_text(link)
r.endindent()
#r.close_all()
elif line.startswith("* "):
line = line[1:].lstrip("\t ")
r.startindent("",sub=" ")
r.add_text(line)
r.endindent()
elif line.startswith(">"):
line = line[1:].lstrip("\t ")
r.startindent("> ")
r.add_text(line)
r.endindent()
elif line.startswith("###"):
line = line[3:].lstrip("\t ")
r.open_color("blue")
r.add_text(line)
r.close_color("blue")
elif line.startswith("##"):
line = line[2:].lstrip("\t ")
r.open_color("blue")
r.add_text(line)
r.close_color("blue")
elif line.startswith("#"):
line = line[1:].lstrip("\t ")
if not self.title:
self.title = line
r.open_color("bold")
r.open_color("blue")
r.open_color("underline")
r.add_text(line)
r.close_color("underline")
r.close_color("bold")
r.close_color("blue")
else:
if "://" in line:
words = line.split()
for w in words:
if "://" in w:
hidden_links.append(w)
r.add_text(line.rstrip())
links += hidden_links
return r.get_final(), links
class GopherRenderer(AbstractRenderer):
def get_mime(self):
return "text/gopher"
def get_title(self):
if not self.title:
self.title = ""
if self.body:
firstline = self.body.splitlines()[0]
firstline = firstline.split("\t")[0]
if firstline.startswith("i"):
firstline = firstline[1:]
self.title = firstline
return self.title
#menu_or_text
def render(self,body,width=None,mode=None):
if not width:
width = term_width()
try:
render,links = self._render_goph(body,width=width,mode=mode)
except Exception as err:
print("Error rendering Gopher ",err)
r = self.representation(width)
r.add_block(body)
render = r.get_final()
links = []
return render,links
def _render_goph(self,body,width=None,mode=None):
if not width:
width = term_width()
# This was copied straight from Agena (then later adapted)
links = []
r = self.representation(width)
for line in self.body.split("\n"):
r.newline()
if line.startswith("i"):
towrap = line[1:].split("\t")[0]
if len(towrap.strip()) > 0:
r.add_text(towrap)
else:
r.newparagraph()
elif not line.strip() in [".",""]:
parts = line.split("\t")
parts[-1] = parts[-1].strip()
if parts[-1] == "+":
parts = parts[:-1]
if len(parts) == 4:
name,path,host,port = parts
itemtype = name[0]
name = name[1:]
if port == "70":
port = ""
else:
port = ":%s"%port
if itemtype == "h" and path.startswith("URL:"):
url = path[4:]
else:
url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
url = url.replace(" ","%20")
linkline = url + " " + name
links.append(linkline)
towrap = "[%s] "%len(links)+ name
r.add_text(towrap)
else:
r.add_text(line)
return r.get_final(),links
class FolderRenderer(GemtextRenderer):
def __init__(self,content,url,center=True,datadir=None):
GemtextRenderer.__init__(self,content,url,center)
self.datadir = datadir
def get_mime(self):
return "Directory"
def prepare(self,body,mode=None):
def get_first_line(l):
path = os.path.join(listdir,l+".gmi")
with open(path) as f:
first_line = f.readline().strip()
f.close()
if first_line.startswith("#"):
return first_line
else:
return None
def write_list(l):
body = ""
for li in l:
path = "list:///%s"%li
gi = GeminiItem(path)
size = len(gi.get_links())
body += "=> %s %s (%s items)\n" %(str(path),li,size)
return body
listdir = os.path.join(self.datadir,"lists")
if self.url != listdir:
return "This is folder %s" %self.url
else:
self.title = "My lists"
lists = []
if os.path.exists(listdir):
listfiles = os.listdir(listdir)
if len(listfiles) > 0:
for l in listfiles:
#removing the .gmi at the end of the name
lists.append(l[:-4])
if len(lists) > 0:
body = ""
my_lists = []
system_lists = []
subscriptions = []
frozen = []
lists.sort()
for l in lists:
if l in ["history","to_fetch","archives","tour"]:
system_lists.append(l)
else:
first_line = get_first_line(l)
if first_line and "#subscribed" in first_line:
subscriptions.append(l)
elif first_line and "#frozen" in first_line:
frozen.append(l)
else:
my_lists.append(l)
if len(my_lists) > 0:
body+= "\n## Bookmarks Lists (updated during sync)\n"
body += write_list(my_lists)
if len(subscriptions) > 0:
body +="\n## Subscriptions (new links in those are added to tour)\n"
body += write_list(subscriptions)
if len(frozen) > 0:
body +="\n## Frozen (fetched but never updated)\n"
body += write_list(frozen)
if len(system_lists) > 0:
body +="\n## System Lists\n"
body += write_list(system_lists)
return body
class FeedRenderer(GemtextRenderer):
def get_mime(self):
return "application/rss+xml"
def is_valid(self):
if _DO_FEED:
parsed = feedparser.parse(self.body)
else:
return False
if parsed.bozo:
return False
else:
#If no content, then fallback to HTML
return len(parsed.entries) > 0
def get_title(self):
if not self.title:
self.get_body()
return self.title
def prepare(self,content,mode="readable",width=None):
if not width:
width = term_width()
self.title = "RSS/Atom feed"
page = ""
if _DO_FEED:
parsed = feedparser.parse(content)
else:
page += "Please install python-feedparser to handle RSS/Atom feeds\n"
self.validity = False
return page
if parsed.bozo:
page += "Invalid RSS feed\n\n"
page += str(parsed.bozo_exception)
self.validity = False
else:
if "title" in parsed.feed:
t = parsed.feed.title
else:
t = "Unknown"
self.title = "%s (XML feed)" %t
title = "# %s"%self.title
page += title + "\n"
if "updated" in parsed.feed:
page += "Last updated on %s\n\n" %parsed.feed.updated
if "subtitle" in parsed.feed:
page += parsed.feed.subtitle + "\n"
if "link" in parsed.feed:
page += "=> %s\n" %parsed.feed.link
page += "\n## Entries\n"
if len(parsed.entries) < 1:
self.validity = False
for i in parsed.entries:
line = "=> %s " %i.link
if "published" in i:
pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
line += pub_date + " : "
if "title" in i:
line += "%s" %(i.title)
if "author" in i:
line += " (by %s)"%i.author
page += line + "\n"
if mode == "full":
if "summary" in i:
html = HtmlRenderer(i.summary,self.url,center=False)
rendered = html.get_body(width=None,mode="full")
page += "\n"
page += rendered
page += "\n------------\n\n"
return page
class ImageRenderer(AbstractRenderer):
def get_mime(self):
return "image/*"
def is_valid(self):
if _RENDER_IMAGE:
return True
else:
return False
def get_links(self,mode=None):
return []
def get_title(self):
return "Picture file"
def render(self,img,width=None,mode=None):
#with inline, we use symbols to be rendered with less.
#else we use the best possible renderer.
if mode == "links_only":
return "", []
if not width:
width = term_width()
spaces = 0
else:
spaces = int((term_width() - width)//2)
ansi_img = inline_image(img,width)
#Now centering the image
lines = ansi_img.splitlines()
new_img = ""
for l in lines:
new_img += spaces*" " + l + "\n"
return new_img, []
def display(self,mode=None,window_title=None,window_info=None,grep=None):
if window_title:
print(self._window_title(window_title,info=window_info))
terminal_image(self.body)
return True
class HtmlRenderer(AbstractRenderer):
def get_mime(self):
return "text/html"
def is_valid(self):
if not _DO_HTML:
print("HTML document detected. Please install python-bs4 and python-readability.")
return _DO_HTML and self.validity
def get_subscribe_links(self):
subs = [[self.url,self.get_mime(),self.get_title()]]
soup = BeautifulSoup(self.body, 'html.parser')
links = soup.find_all("link",rel="alternate",recursive=True)
for l in links:
ty = l.get("type")
if ty :
if "rss" in ty or "atom" in ty or "feed" in ty:
subs.append([l.get("href"),ty,l.get("title")])
return subs
def get_title(self):
if self.title:
return self.title
elif self.body:
if _HAS_READABILITY:
try:
readable = Document(self.body)
self.title = readable.short_title()
return self.title
except Exception as err:
pass
soup = BeautifulSoup(self.body,"html.parser")
self.title = str(soup.title.string)
else:
return ""
# Our own HTML engine (crazy, isnt it?)
# Return [rendered_body, list_of_links]
# mode is either links_only, readable or full
def render(self,body,mode="readable",width=None,add_title=True):
if not width:
width = term_width()
if not _DO_HTML:
print("HTML document detected. Please install python-bs4 and python-readability.")
return
# This method recursively parse the HTML
r = self.representation(width,title=self.get_title(),center=self.center)
links = []
# You know how bad html is when you realize that space sometimes meaningful, somtimes not.
# CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
# HTMLis real crap. At least the one people are generating.
def render_image(src,width=40,mode=None):
ansi_img = ""
imgurl,imgdata = looks_like_base64(src,self.url)
if _RENDER_IMAGE and mode != "links_only" and imgurl:
try:
#4 followings line are there to translate the URL into cache path
g = GeminiItem(imgurl)
img = g.get_cache_path()
if imgdata:
with open(img,"wb") as cached:
cached.write(base64.b64decode(imgdata))
cached.close()
if g.is_cache_valid():
renderer = ImageRenderer(img,imgurl)
# Image are 40px wide except if terminal is smaller
if width > 40:
size = 40
else:
size = width
ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
except Exception as err:
#we sometimes encounter really bad formatted files or URL
ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n"
return ansi_img
def sanitize_string(string):
#never start with a "\n"
#string = string.lstrip("\n")
string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
endspace = string.endswith(" ") or string.endswith("\xa0")
startspace = string.startswith(" ") or string.startswith("\xa0")
toreturn = string.replace("\n", " ").replace("\t"," ").strip()
while " " in toreturn:
toreturn = toreturn.replace(" "," ")
toreturn = html.unescape(toreturn)
if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
toreturn += " "
if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
toreturn = " " + toreturn
return toreturn
def recursive_render(element,indent="",preformatted=False):
if element.name == "blockquote":
r.newparagraph()
r.startindent(" ",reverse=" ")
for child in element.children:
r.open_color("italic")
recursive_render(child,indent="\t")
r.close_color("italic")
r.endindent()
elif element.name in ["div","p"]:
r.newparagraph()
for child in element.children:
recursive_render(child,indent=indent)
r.newparagraph()
elif element.name in ["span"]:
r.add_space()
for child in element.children:
recursive_render(child,indent=indent)
r.add_space()
elif element.name in ["h1","h2","h3","h4","h5","h6"]:
r.open_color("blue")
if element.name in ["h1"]:
r.open_color("bold")
r.open_color("underline")
elif element.name in ["h2"]:
r.open_color("bold")
elif element.name in ["h5","h6"]:
r.open_color("faint")
for child in element.children:
r.newparagraph()
recursive_render(child)
r.newparagraph()
r.close_all()
elif element.name in ["code","tt"]:
for child in element.children:
recursive_render(child,indent=indent,preformatted=True)
elif element.name in ["pre"]:
r.newparagraph()
r.add_block(element.text)
r.newparagraph()
elif element.name in ["li"]:
r.startindent("",sub=" ")
for child in element.children:
recursive_render(child,indent=indent)
r.endindent()
elif element.name in ["tr"]:
r.startindent("|",reverse="|")
for child in element.children:
recursive_render(child,indent=indent)
r.endindent()
elif element.name in ["td","th"]:
r.add_text("| ")
for child in element.children:
recursive_render(child)
r.add_text(" |")
# italics
elif element.name in ["em","i"]:
r.open_color("italic")
for child in element.children:
recursive_render(child,indent=indent,preformatted=preformatted)
r.close_color("italic")
#bold
elif element.name in ["b","strong"]:
r.open_color("bold")
for child in element.children:
recursive_render(child,indent=indent,preformatted=preformatted)
r.close_color("bold")
elif element.name == "a":
link = element.get('href')
# support for images nested in links
if link:
text = ""
imgtext = ""
#we display images first in a link
for child in element.children:
if child.name == "img":
recursive_render(child)
imgtext = "[IMG LINK %s]"
links.append(link+" "+text)
link_id = str(len(links))
r.open_color("blue")
r.open_color("faint")
for child in element.children:
if child.name != "img":
recursive_render(child,preformatted=preformatted)
if imgtext != "":
r.center_line()
r.add_text(imgtext%link_id)
else:
r.add_text(" [%s]"%link_id)
r.close_color("blue")
r.close_color("faint")
else:
#No real link found
for child in element.children:
recursive_render(child,preformatted=preformatted)
elif element.name == "img":
src = element.get("src")
text = ""
ansi_img = render_image(src,width=width,mode=mode)
alt = element.get("alt")
if alt:
alt = sanitize_string(alt)
text += "[IMG] %s"%alt
else:
text += "[IMG]"
if src:
links.append(src+" "+text)
if not mode in self.images:
self.images[mode] = []
abs_url = urllib.parse.urljoin(self.url, src)
self.images[mode].append(abs_url)
link_id = " [%s]"%(len(links))
r.add_block(ansi_img)
r.open_color("faint")
r.open_color("yellow")
r.center_line()
r.add_text(text + link_id)
r.close_color("faint")
r.close_color("yellow")
r.newline()
elif element.name == "br":
r.newline()
elif element.name not in ["script","style","template"] and type(element) != Comment:
if element.string:
if preformatted :
r.open_color("faint")
r.add_text(element.string)
r.close_color("faint")
else:
s = sanitize_string(element.string)
if len(s.strip()) > 0:
r.add_text(s)
else:
for child in element.children:
recursive_render(child,indent=indent)
# the real render_html hearth
if mode == "full":
summary = body
elif _HAS_READABILITY:
try:
readable = Document(body)
summary = readable.summary()
except Exception as err:
summary = body
else:
summary = body
soup = BeautifulSoup(summary, 'html.parser')
#soup = BeautifulSoup(summary, 'html5lib')
if soup :
if soup.body :
recursive_render(soup.body)
else:
recursive_render(soup)
return r.get_final(),links

View File

@ -48,6 +48,7 @@ import webbrowser
import html
import base64
import subprocess
import ansirenderer
# In terms of arguments, this can take an input file/string to be passed to
# stdin, a parameter to do (well-escaped) "%" replacement on the command, a
@ -394,941 +395,17 @@ standard_ports = {
"https" : 443,
"spartan": 300,
}
# First, we define the different content->text renderers, outside of the rest
# (They could later be factorized in other files or replaced)
class AbstractRenderer():
def __init__(self,content,url,center=True):
self.url = url
self.body = str(content)
#theres one rendered text and one links table per mode
self.rendered_text = {}
self.links = {}
self.images = {}
self.title = None
self.validity = True
self.temp_file = {}
self.less_histfile = {}
self.center = center
#This class hold an internal representation of the HTML text
class representation:
def __init__(self,width,title=None,center=True):
self.title=title
self.center = center
self.final_text = ""
self.opened = []
self.width = width
self.last_line = ""
self.last_line_colors = {}
self.last_line_center = False
self.new_paragraph = True
self.i_indent = ""
self.s_indent = ""
self.r_indent = ""
self.current_indent = ""
self.disabled_indents = None
# each color is an [open,close] pair code
self.colors = {
"bold" : ["1","22"],
"faint" : ["2","22"],
"italic" : ["3","23"],
"underline": ["4","24"],
"red" : ["31","39"],
"yellow" : ["33","39"],
"blue" : ["34","39"],
}
def _insert(self,color,open=True):
if open: o = 0
else: o = 1
pos = len(self.last_line)
#we remember the position where to insert color codes
if not pos in self.last_line_colors:
self.last_line_colors[pos] = []
#Two inverse code cancel each other
if [color,int(not o)] in self.last_line_colors[pos]:
self.last_line_colors[pos].remove([color,int(not o)])
else:
self.last_line_colors[pos].append([color,o])#+color+str(o))
# Take self.last line and add ANSI codes to it before adding it to
# self.final_text.
def _endline(self):
if len(self.last_line.strip()) > 0:
for c in self.opened:
self._insert(c,open=False)
nextline = ""
added_char = 0
#we insert the color code at the saved positions
while len (self.last_line_colors) > 0:
pos,colors = self.last_line_colors.popitem()
#popitem itterates LIFO.
#So we go, backward, to the pos (starting at the end of last_line)
nextline = self.last_line[pos:] + nextline
ansicol = "\x1b["
for c,o in colors:
ansicol += self.colors[c][o] + ";"
ansicol = ansicol[:-1]+"m"
nextline = ansicol + nextline
added_char += len(ansicol)
self.last_line = self.last_line[:pos]
nextline = self.last_line + nextline
if self.last_line_center:
#we have to care about the ansi char while centering
width = term_width() + added_char
nextline = nextline.strip().center(width)
self.last_line_center = False
else:
#should we lstrip the nextline in the addition ?
nextline = self.current_indent + nextline.lstrip() + self.r_indent
self.current_indent = self.s_indent
self.final_text += nextline
self.last_line = ""
self.final_text += "\n"
for c in self.opened:
self._insert(c,open=True)
else:
self.last_line = ""
def center_line(self):
self.last_line_center = True
def open_color(self,color):
if color in self.colors and color not in self.opened:
self._insert(color,open=True)
self.opened.append(color)
def close_color(self,color):
if color in self.colors and color in self.opened:
self._insert(color,open=False)
self.opened.remove(color)
def close_all(self):
if len(self.colors) > 0:
self.last_line += "\x1b[0m"
self.opened.clear()
def startindent(self,indent,sub=None,reverse=None):
self._endline()
self.i_indent = indent
self.current_indent = indent
if sub:
self.s_indent = sub
else:
self.s_indent = indent
if reverse:
self.r_indent = reverse
else:
self.r_indent = ""
def endindent(self):
self._endline()
self.i_indent = ""
self.s_indent = ""
self.r_indent = ""
self.current_indent = ""
def _disable_indents(self):
self.disabled_indents = []
self.disabled_indents.append(self.current_indent)
self.disabled_indents.append(self.i_indent)
self.disabled_indents.append(self.s_indent)
self.disabled_indents.append(self.r_indent)
self.endindent()
def _enable_indents(self):
if self.disabled_indents:
self.current_indent = self.disabled_indents[0]
self.i_indent = self.disabled_indents[1]
self.s_indent = self.disabled_indents[2]
self.r_indent = self.disabled_indents[3]
self.disabled_indents = None
def newline(self):
self._endline()
#A new paragraph implies 2 newlines (1 blank line between paragraphs)
#But it is only used if didnt already started one to avoid plenty
#of blank lines. force=True allows to bypass that limit.
#new_paragraph becomes false as soon as text is entered into it
def newparagraph(self,force=False):
if force or not self.new_paragraph:
self._endline()
self.final_text += "\n"
self.new_paragraph = True
def add_space(self):
if len(self.last_line) > 0 and self.last_line[-1] != " ":
self.last_line += " "
def _title_first(self,intext=None):
if self.title:
if not self.title == intext:
self._disable_indents()
self.open_color("blue")
self.open_color("bold")
self.open_color("underline")
self.add_text(self.title)
self.close_all()
self.newparagraph()
self._enable_indents()
self.title = None
# Beware, blocks are not wrapped nor indented and left untouched!
# They are mostly useful for pictures and preformatted text.
def add_block(self,intext):
# If necessary, we add the title before a block
self._title_first()
# we dont want to indent blocks
self._endline()
self._disable_indents()
self.final_text += self.current_indent + intext
self.new_paragraph = False
self._endline()
self._enable_indents()
def add_text(self,intext):
self._title_first(intext=intext)
lines = []
last = (self.last_line + intext)
self.last_line = ""
# With the following, we basically cancel adding only spaces
# on an empty line
if len(last.strip()) > 0:
self.new_paragraph = False
else:
last = last.strip()
if len(last) > self.width:
width = self.width - len(self.current_indent) - len(self.r_indent)
spaces_left = len(last) - len(last.lstrip())
spaces_right = len(last) - len(last.rstrip())
lines = textwrap.wrap(last,width,drop_whitespace=True)
self.last_line += spaces_left*" "
while len(lines) > 1:
l = lines.pop(0)
self.last_line += l
self._endline()
if len(lines) == 1:
li = lines[0]
self.last_line += li + spaces_right*" "
else:
self.last_line = last
def get_final(self):
self.close_all()
self._endline()
#if no content, we still add the title
self._title_first()
lines = self.final_text.splitlines()
lines2 = []
termspace = shutil.get_terminal_size()[0]
#Following code instert blanck spaces to center the content
if self.center and termspace > term_width():
margin = int((termspace - term_width())//2)
else:
margin = 0
for l in lines :
lines2.append(margin*" "+l)
return "\n".join(lines2)
def get_subscribe_links(self):
return [[self.url,self.get_mime(),self.get_title()]]
def is_valid(self):
return self.validity
def get_links(self,mode="links_only"):
if mode not in self.links :
prepared_body = self.prepare(self.body,mode=mode)
results = self.render(prepared_body,mode=mode)
if results:
self.links[mode] = results[1]
for l in self.get_subscribe_links()[1:]:
self.links[mode].append(l[0])
return self.links[mode]
def get_title(self):
return "Abstract title"
# This function return a list of URL which should be downloaded
# before displaying the page (images in HTML pages, typically)
def get_images(self,mode="readable"):
if not mode in self.images:
self.get_body(mode=mode)
# we also invalidate the body that was done without images
self.rendered_text.pop(mode)
if mode in self.images:
return self.images[mode]
else:
return []
#This function will give gemtext to the gemtext renderer
def prepare(self,body,mode=None):
return body
def get_body(self,width=None,mode="readable"):
if not width:
width = term_width()
if mode not in self.rendered_text:
prepared_body = self.prepare(self.body,mode=mode)
result = self.render(prepared_body,width=width,mode=mode)
if result:
self.rendered_text[mode] = result[0]
self.links[mode] = result[1]
return self.rendered_text[mode]
def _window_title(self,title,info=None):
title_r = self.representation(term_width())
title_r.open_color("red")
title_r.open_color("bold")
title_r.add_text(title)
title_r.close_color("bold")
if info:
title_r.add_text(" (%s)"%info)
title_r.close_color("red")
return title_r.get_final()
def display(self,mode="readable",window_title="",window_info=None,grep=None):
if not mode: mode = "readable"
wtitle = self._window_title(window_title,info=window_info)
body = wtitle + "\n" + self.get_body(mode=mode)
if not body:
return False
# We actually put the body in a tmpfile before giving it to less
if mode not in self.temp_file:
tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
self.temp_file[mode] = tmpf.name
tmpf.write(body)
tmpf.close()
if mode not in self.less_histfile:
firsttime = True
tmpf = tempfile.NamedTemporaryFile("w", encoding="UTF-8", delete=False)
self.less_histfile[mode] = tmpf.name
else:
firsttime = False
less_cmd(self.temp_file[mode], histfile=self.less_histfile[mode],cat=firsttime,grep=grep)
return True
def get_temp_file(self,mode="readable"):
if mode in self.temp_file:
return self.temp_file[mode]
else:
return None
# An instance of AbstractRenderer should have a self.render(body,width,mode) method.
# 3 modes are used :readable (by default), full and links_only (the fastest, when
# rendered content is not used, only the links are needed)
# The prepare() function is called before the rendering. It is useful if
# your renderer output in a format suitable for another existing renderer (such as gemtext)
# Gemtext Rendering Engine
class GemtextRenderer(AbstractRenderer):
def get_mime(self):
return "text/gemini"
def get_title(self):
if self.title:
return self.title
elif self.body:
lines = self.body.splitlines()
for line in lines:
if line.startswith("#"):
self.title = line.strip("#").strip()
return self.title
if len(lines) > 0:
# If not title found, we take the first 50 char
# of the first line
title_line = lines[0].strip()
if len(title_line) > 50:
title_line = title_line[:49] + ""
self.title = title_line
return self.title
else:
self.title = "Empty Page"
return self.title
else:
return "Unknown Gopher Page"
#render_gemtext
def render(self,gemtext, width=None,mode=None):
if not width:
width = term_width()
r = self.representation(width)
links = []
hidden_links = []
preformatted = False
def format_link(url,index,name=None):
if "://" in url:
protocol,adress = url.split("://",maxsplit=1)
protocol = " %s" %protocol
else:
adress = url
protocol = ""
if "gemini" in protocol or "list" in protocol:
protocol = ""
if not name:
name = adress
line = "[%d%s] %s" % (index, protocol, name)
return line
for line in gemtext.splitlines():
r.newline()
if line.startswith("```"):
preformatted = not preformatted
elif preformatted:
# infinite line to not wrap preformated
r.add_block(line+"\n")
elif len(line.strip()) == 0:
r.newparagraph(force=True)
elif line.startswith("=>"):
strippedline = line[2:].strip()
if strippedline:
links.append(strippedline)
splitted = strippedline.split(maxsplit=1)
url = splitted[0]
name = None
if len(splitted) > 1:
name = splitted[1]
link = format_link(url,len(links),name=name)
#r.open_color("blue")
#r.open_color("faint")
#r.open_color("underline")
startpos = link.find("] ") + 2
r.startindent("",sub=startpos*" ")
r.add_text(link)
r.endindent()
#r.close_all()
elif line.startswith("* "):
line = line[1:].lstrip("\t ")
r.startindent("",sub=" ")
r.add_text(line)
r.endindent()
elif line.startswith(">"):
line = line[1:].lstrip("\t ")
r.startindent("> ")
r.add_text(line)
r.endindent()
elif line.startswith("###"):
line = line[3:].lstrip("\t ")
r.open_color("blue")
r.add_text(line)
r.close_color("blue")
elif line.startswith("##"):
line = line[2:].lstrip("\t ")
r.open_color("blue")
r.add_text(line)
r.close_color("blue")
elif line.startswith("#"):
line = line[1:].lstrip("\t ")
if not self.title:
self.title = line
r.open_color("bold")
r.open_color("blue")
r.open_color("underline")
r.add_text(line)
r.close_color("underline")
r.close_color("bold")
r.close_color("blue")
else:
if "://" in line:
words = line.split()
for w in words:
if "://" in w:
hidden_links.append(w)
r.add_text(line.rstrip())
links += hidden_links
return r.get_final(), links
class GopherRenderer(AbstractRenderer):
def get_mime(self):
return "text/gopher"
def get_title(self):
if not self.title:
self.title = ""
if self.body:
firstline = self.body.splitlines()[0]
firstline = firstline.split("\t")[0]
if firstline.startswith("i"):
firstline = firstline[1:]
self.title = firstline
return self.title
#menu_or_text
def render(self,body,width=None,mode=None):
if not width:
width = term_width()
try:
render,links = self._render_goph(body,width=width,mode=mode)
except Exception as err:
print("Error rendering Gopher ",err)
r = self.representation(width)
r.add_block(body)
render = r.get_final()
links = []
return render,links
def _render_goph(self,body,width=None,mode=None):
if not width:
width = term_width()
# This was copied straight from Agena (then later adapted)
links = []
r = self.representation(width)
for line in self.body.split("\n"):
r.newline()
if line.startswith("i"):
towrap = line[1:].split("\t")[0]
if len(towrap.strip()) > 0:
r.add_text(towrap)
else:
r.newparagraph()
elif not line.strip() in [".",""]:
parts = line.split("\t")
parts[-1] = parts[-1].strip()
if parts[-1] == "+":
parts = parts[:-1]
if len(parts) == 4:
name,path,host,port = parts
itemtype = name[0]
name = name[1:]
if port == "70":
port = ""
else:
port = ":%s"%port
if itemtype == "h" and path.startswith("URL:"):
url = path[4:]
else:
url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
url = url.replace(" ","%20")
linkline = url + " " + name
links.append(linkline)
towrap = "[%s] "%len(links)+ name
r.add_text(towrap)
else:
r.add_text(line)
return r.get_final(),links
class FolderRenderer(GemtextRenderer):
def get_mime(self):
return "Directory"
def prepare(self,body,mode=None):
def get_first_line(l):
path = os.path.join(listdir,l+".gmi")
with open(path) as f:
first_line = f.readline().strip()
f.close()
if first_line.startswith("#"):
return first_line
else:
return None
def write_list(l):
body = ""
for li in l:
path = "list:///%s"%li
gi = GeminiItem(path)
size = len(gi.get_links())
body += "=> %s %s (%s items)\n" %(str(path),li,size)
return body
listdir = os.path.join(_DATA_DIR,"lists")
if self.url != listdir:
return "This is folder %s" %self.url
else:
self.title = "My lists"
lists = []
if os.path.exists(listdir):
listfiles = os.listdir(listdir)
if len(listfiles) > 0:
for l in listfiles:
#removing the .gmi at the end of the name
lists.append(l[:-4])
if len(lists) > 0:
body = ""
my_lists = []
system_lists = []
subscriptions = []
frozen = []
lists.sort()
for l in lists:
if l in ["history","to_fetch","archives","tour"]:
system_lists.append(l)
else:
first_line = get_first_line(l)
if first_line and "#subscribed" in first_line:
subscriptions.append(l)
elif first_line and "#frozen" in first_line:
frozen.append(l)
else:
my_lists.append(l)
if len(my_lists) > 0:
body+= "\n## Bookmarks Lists (updated during sync)\n"
body += write_list(my_lists)
if len(subscriptions) > 0:
body +="\n## Subscriptions (new links in those are added to tour)\n"
body += write_list(subscriptions)
if len(frozen) > 0:
body +="\n## Frozen (fetched but never updated)\n"
body += write_list(frozen)
if len(system_lists) > 0:
body +="\n## System Lists\n"
body += write_list(system_lists)
return body
class FeedRenderer(GemtextRenderer):
def get_mime(self):
return "application/rss+xml"
def is_valid(self):
if _DO_FEED:
parsed = feedparser.parse(self.body)
else:
return False
if parsed.bozo:
return False
else:
#If no content, then fallback to HTML
return len(parsed.entries) > 0
def get_title(self):
if not self.title:
self.get_body()
return self.title
def prepare(self,content,mode="readable",width=None):
if not width:
width = term_width()
self.title = "RSS/Atom feed"
page = ""
if _DO_FEED:
parsed = feedparser.parse(content)
else:
page += "Please install python-feedparser to handle RSS/Atom feeds\n"
self.validity = False
return page
if parsed.bozo:
page += "Invalid RSS feed\n\n"
page += str(parsed.bozo_exception)
self.validity = False
else:
if "title" in parsed.feed:
t = parsed.feed.title
else:
t = "Unknown"
self.title = "%s (XML feed)" %t
title = "# %s"%self.title
page += title + "\n"
if "updated" in parsed.feed:
page += "Last updated on %s\n\n" %parsed.feed.updated
if "subtitle" in parsed.feed:
page += parsed.feed.subtitle + "\n"
if "link" in parsed.feed:
page += "=> %s\n" %parsed.feed.link
page += "\n## Entries\n"
if len(parsed.entries) < 1:
self.validity = False
for i in parsed.entries:
line = "=> %s " %i.link
if "published" in i:
pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
line += pub_date + " : "
if "title" in i:
line += "%s" %(i.title)
if "author" in i:
line += " (by %s)"%i.author
page += line + "\n"
if mode == "full":
if "summary" in i:
html = HtmlRenderer(i.summary,self.url,center=False)
rendered = html.get_body(width=None,mode="full")
page += "\n"
page += rendered
page += "\n------------\n\n"
return page
class ImageRenderer(AbstractRenderer):
def get_mime(self):
return "image/*"
def is_valid(self):
if _RENDER_IMAGE:
return True
else:
return False
def get_links(self,mode=None):
return []
def get_title(self):
return "Picture file"
def render(self,img,width=None,mode=None):
#with inline, we use symbols to be rendered with less.
#else we use the best possible renderer.
if mode == "links_only":
return "", []
if not width:
width = term_width()
spaces = 0
else:
spaces = int((term_width() - width)//2)
ansi_img = inline_image(img,width)
#Now centering the image
lines = ansi_img.splitlines()
new_img = ""
for l in lines:
new_img += spaces*" " + l + "\n"
return new_img, []
def display(self,mode=None,window_title=None,window_info=None,grep=None):
if window_title:
print(self._window_title(window_title,info=window_info))
terminal_image(self.body)
return True
class HtmlRenderer(AbstractRenderer):
def get_mime(self):
return "text/html"
def is_valid(self):
if not _DO_HTML:
print("HTML document detected. Please install python-bs4 and python-readability.")
return _DO_HTML and self.validity
def get_subscribe_links(self):
subs = [[self.url,self.get_mime(),self.get_title()]]
soup = BeautifulSoup(self.body, 'html.parser')
links = soup.find_all("link",rel="alternate",recursive=True)
for l in links:
ty = l.get("type")
if ty :
if "rss" in ty or "atom" in ty or "feed" in ty:
subs.append([l.get("href"),ty,l.get("title")])
return subs
def get_title(self):
if self.title:
return self.title
elif self.body:
if _HAS_READABILITY:
try:
readable = Document(self.body)
self.title = readable.short_title()
return self.title
except Exception as err:
pass
soup = BeautifulSoup(self.body,"html.parser")
self.title = str(soup.title.string)
else:
return ""
# Our own HTML engine (crazy, isnt it?)
# Return [rendered_body, list_of_links]
# mode is either links_only, readable or full
def render(self,body,mode="readable",width=None,add_title=True):
if not width:
width = term_width()
if not _DO_HTML:
print("HTML document detected. Please install python-bs4 and python-readability.")
return
# This method recursively parse the HTML
r = self.representation(width,title=self.get_title(),center=self.center)
links = []
# You know how bad html is when you realize that space sometimes meaningful, somtimes not.
# CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
# HTMLis real crap. At least the one people are generating.
def render_image(src,width=40,mode=None):
ansi_img = ""
imgurl,imgdata = looks_like_base64(src,self.url)
if _RENDER_IMAGE and mode != "links_only" and imgurl:
try:
#4 followings line are there to translate the URL into cache path
g = GeminiItem(imgurl)
img = g.get_cache_path()
if imgdata:
with open(img,"wb") as cached:
cached.write(base64.b64decode(imgdata))
cached.close()
if g.is_cache_valid():
renderer = ImageRenderer(img,imgurl)
# Image are 40px wide except if terminal is smaller
if width > 40:
size = 40
else:
size = width
ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
except Exception as err:
#we sometimes encounter really bad formatted files or URL
ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n"
return ansi_img
def sanitize_string(string):
#never start with a "\n"
#string = string.lstrip("\n")
string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
endspace = string.endswith(" ") or string.endswith("\xa0")
startspace = string.startswith(" ") or string.startswith("\xa0")
toreturn = string.replace("\n", " ").replace("\t"," ").strip()
while " " in toreturn:
toreturn = toreturn.replace(" "," ")
toreturn = html.unescape(toreturn)
if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
toreturn += " "
if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
toreturn = " " + toreturn
return toreturn
def recursive_render(element,indent="",preformatted=False):
if element.name == "blockquote":
r.newparagraph()
r.startindent(" ",reverse=" ")
for child in element.children:
r.open_color("italic")
recursive_render(child,indent="\t")
r.close_color("italic")
r.endindent()
elif element.name in ["div","p"]:
r.newparagraph()
for child in element.children:
recursive_render(child,indent=indent)
r.newparagraph()
elif element.name in ["span"]:
r.add_space()
for child in element.children:
recursive_render(child,indent=indent)
r.add_space()
elif element.name in ["h1","h2","h3","h4","h5","h6"]:
r.open_color("blue")
if element.name in ["h1"]:
r.open_color("bold")
r.open_color("underline")
elif element.name in ["h2"]:
r.open_color("bold")
elif element.name in ["h5","h6"]:
r.open_color("faint")
for child in element.children:
r.newparagraph()
recursive_render(child)
r.newparagraph()
r.close_all()
elif element.name in ["code","tt"]:
for child in element.children:
recursive_render(child,indent=indent,preformatted=True)
elif element.name in ["pre"]:
r.newparagraph()
r.add_block(element.text)
r.newparagraph()
elif element.name in ["li"]:
r.startindent("",sub=" ")
for child in element.children:
recursive_render(child,indent=indent)
r.endindent()
elif element.name in ["tr"]:
r.startindent("|",reverse="|")
for child in element.children:
recursive_render(child,indent=indent)
r.endindent()
elif element.name in ["td","th"]:
r.add_text("| ")
for child in element.children:
recursive_render(child)
r.add_text(" |")
# italics
elif element.name in ["em","i"]:
r.open_color("italic")
for child in element.children:
recursive_render(child,indent=indent,preformatted=preformatted)
r.close_color("italic")
#bold
elif element.name in ["b","strong"]:
r.open_color("bold")
for child in element.children:
recursive_render(child,indent=indent,preformatted=preformatted)
r.close_color("bold")
elif element.name == "a":
link = element.get('href')
# support for images nested in links
if link:
text = ""
imgtext = ""
#we display images first in a link
for child in element.children:
if child.name == "img":
recursive_render(child)
imgtext = "[IMG LINK %s]"
links.append(link+" "+text)
link_id = str(len(links))
r.open_color("blue")
r.open_color("faint")
for child in element.children:
if child.name != "img":
recursive_render(child,preformatted=preformatted)
if imgtext != "":
r.center_line()
r.add_text(imgtext%link_id)
else:
r.add_text(" [%s]"%link_id)
r.close_color("blue")
r.close_color("faint")
else:
#No real link found
for child in element.children:
recursive_render(child,preformatted=preformatted)
elif element.name == "img":
src = element.get("src")
text = ""
ansi_img = render_image(src,width=width,mode=mode)
alt = element.get("alt")
if alt:
alt = sanitize_string(alt)
text += "[IMG] %s"%alt
else:
text += "[IMG]"
if src:
links.append(src+" "+text)
if not mode in self.images:
self.images[mode] = []
abs_url = urllib.parse.urljoin(self.url, src)
self.images[mode].append(abs_url)
link_id = " [%s]"%(len(links))
r.add_block(ansi_img)
r.open_color("faint")
r.open_color("yellow")
r.center_line()
r.add_text(text + link_id)
r.close_color("faint")
r.close_color("yellow")
r.newline()
elif element.name == "br":
r.newline()
elif element.name not in ["script","style","template"] and type(element) != Comment:
if element.string:
if preformatted :
r.open_color("faint")
r.add_text(element.string)
r.close_color("faint")
else:
s = sanitize_string(element.string)
if len(s.strip()) > 0:
r.add_text(s)
else:
for child in element.children:
recursive_render(child,indent=indent)
# the real render_html hearth
if mode == "full":
summary = body
elif _HAS_READABILITY:
try:
readable = Document(body)
summary = readable.summary()
except Exception as err:
summary = body
else:
summary = body
soup = BeautifulSoup(summary, 'html.parser')
#soup = BeautifulSoup(summary, 'html5lib')
if soup :
if soup.body :
recursive_render(soup.body)
else:
recursive_render(soup)
return r.get_final(),links
# Mapping mimetypes with renderers
# (any content with a mimetype text/* not listed here will be rendered with as GemText)
_FORMAT_RENDERERS = {
"text/gemini": GemtextRenderer,
"text/html" : HtmlRenderer,
"text/xml" : FeedRenderer,
"application/xml" : FeedRenderer,
"application/rss+xml" : FeedRenderer,
"application/atom+xml" : FeedRenderer,
"text/gopher": GopherRenderer,
"image/*": ImageRenderer
"text/gemini": ansirenderer.GemtextRenderer,
"text/html" : ansirenderer.HtmlRenderer,
"text/xml" : ansirenderer.FeedRenderer,
"application/xml" : ansirenderer.FeedRenderer,
"application/rss+xml" : ansirenderer.FeedRenderer,
"application/atom+xml" : ansirenderer.FeedRenderer,
"text/gopher": ansirenderer.GopherRenderer,
"image/*": ansirenderer.ImageRenderer
}
# Offpunk is organized as follow:
# - a GeminiClient instance which handles the browsing of GeminiItems (= pages).
@ -1630,7 +707,7 @@ class GeminiItem():
def _set_renderer(self,mime=None):
if self.local and os.path.isdir(self.get_cache_path()):
self.renderer = FolderRenderer("",self.get_cache_path())
self.renderer = ansirenderer.FolderRenderer("",self.get_cache_path(),datadir=_DATA_DIR)
return
if not mime:
mime = self.get_mime()