first step to transform renderers into their own objects

This commit is contained in:
Lionel Dricot 2022-01-29 15:35:28 +01:00
parent 69db426ab8
commit f2944d35af
1 changed files with 211 additions and 180 deletions

View File

@ -187,187 +187,218 @@ standard_ports = {
# First, we define the gemtext and html renderers, outside of the rest
# (They could later be factorized in other files or replaced)
# Gemtext Rendering Engine
# this method takes the original gemtext and returns
# [rendered_text,links_table]
def render_gemtext(gemtext, width=80):
links = []
preformatted = False
rendered_text = ""
#This local method takes a line and apply the ansi code given as "color"
#The whole line is then wrapped and ansi code are ended.
def wrap_line(line,color=None,i_indent="",s_indent=""):
wrapped = textwrap.wrap(line,width,initial_indent=i_indent,\
subsequent_indent=s_indent)
final = ""
for l in wrapped:
if color:
l = color + l + "\x1b[0m"
if l.strip() != "":
final += l + "\n"
return final
def format_link(url,index,name=None):
if "://" in url:
protocol,adress = url.split("://",maxsplit=1)
protocol = " %s" %protocol
else:
adress = url
protocol = ""
if "gemini" in protocol:
protocol = ""
if not name:
name = adress
line = "[%d%s] %s" % (index, protocol, name)
return line
for line in gemtext.splitlines():
if line.startswith("```"):
preformatted = not preformatted
elif preformatted:
rendered_text += line + "\n"
elif line.startswith("=>"):
strippedline = line[2:].strip()
if strippedline:
links.append(strippedline)
splitted = strippedline.split(maxsplit=1)
url = splitted[0]
name = None
if len(splitted) > 1:
name = splitted[1]
link = format_link(url,len(links),name=name)
startpos = link.find("] ") + 2
wrapped = wrap_line(link,s_indent=startpos*" ")
rendered_text += wrapped
elif line.startswith("* "):
line = line[1:].lstrip("\t ")
rendered_text += textwrap.fill(line, width, initial_indent = "",
subsequent_indent=" ") + "\n"
elif line.startswith(">"):
line = line[1:].lstrip("\t ")
rendered_text += textwrap.fill(line,width, initial_indent = "> ",
subsequent_indent="> ") + "\n"
elif line.startswith("###"):
line = line[3:].lstrip("\t ")
rendered_text += wrap_line(line, color="\x1b[34m\x1b[2m")
elif line.startswith("##"):
line = line[2:].lstrip("\t ")
rendered_text += wrap_line(line, color="\x1b[34m")
elif line.startswith("#"):
line = line[1:].lstrip("\t ")
rendered_text += wrap_line(line,color="\x1b[1;34m\x1b[4m")
else:
rendered_text += wrap_line(line).rstrip() + "\n"
return rendered_text, links
# Gemtext Rendering Engine
# this method takes the original gemtext and returns
# [rendered_text,links_table]
class GemtextRenderer():
def __init__(self,content):
self.body = content
self.rendered_text = None
self.links = None
# Our own HTML engine (crazy, isnt it?)
# Return [rendered_body, list_of_links]
def render_html(body,width=80):
if not _DO_HTML:
print("HTML document detected. Please install python-bs4 and python-readability.")
return
# This method recursively parse the HTML
r_body = ""
links = []
def recursive_render(element,indent=""):
def get_body(self):
if self.rendered_text == None :
self.rendered_text, self.links = self.render_gemtext(self.body)
return self.rendered_text
def get_links(self):
if self.links == None :
self.rendered_text, self.links = self.render_gemtext(self.body)
return self.links
def render_gemtext(self,gemtext, width=80):
links = []
preformatted = False
rendered_text = ""
#This local method takes a line and apply the ansi code given as "color"
#The whole line is then wrapped and ansi code are ended.
def wrap_line(line,color=None,i_indent="",s_indent=""):
wrapped = textwrap.wrap(line,width,initial_indent=i_indent,\
subsequent_indent=s_indent)
final = ""
for l in wrapped:
if color:
l = color + l + "\x1b[0m"
if l.strip() != "":
final += l + "\n"
return final
def format_link(url,index,name=None):
if "://" in url:
protocol,adress = url.split("://",maxsplit=1)
protocol = " %s" %protocol
else:
adress = url
protocol = ""
if "gemini" in protocol:
protocol = ""
if not name:
name = adress
line = "[%d%s] %s" % (index, protocol, name)
return line
for line in gemtext.splitlines():
if line.startswith("```"):
preformatted = not preformatted
elif preformatted:
rendered_text += line + "\n"
elif line.startswith("=>"):
strippedline = line[2:].strip()
if strippedline:
links.append(strippedline)
splitted = strippedline.split(maxsplit=1)
url = splitted[0]
name = None
if len(splitted) > 1:
name = splitted[1]
link = format_link(url,len(links),name=name)
startpos = link.find("] ") + 2
wrapped = wrap_line(link,s_indent=startpos*" ")
rendered_text += wrapped
elif line.startswith("* "):
line = line[1:].lstrip("\t ")
rendered_text += textwrap.fill(line, width, initial_indent = "",
subsequent_indent=" ") + "\n"
elif line.startswith(">"):
line = line[1:].lstrip("\t ")
rendered_text += textwrap.fill(line,width, initial_indent = "> ",
subsequent_indent="> ") + "\n"
elif line.startswith("###"):
line = line[3:].lstrip("\t ")
rendered_text += wrap_line(line, color="\x1b[34m\x1b[2m")
elif line.startswith("##"):
line = line[2:].lstrip("\t ")
rendered_text += wrap_line(line, color="\x1b[34m")
elif line.startswith("#"):
line = line[1:].lstrip("\t ")
rendered_text += wrap_line(line,color="\x1b[1;34m\x1b[4m")
else:
rendered_text += wrap_line(line).rstrip() + "\n"
return rendered_text, links
class HtmlRenderer():
def __init__(self,content):
self.body = content
self.rendered_text = None
self.links = None
def get_body(self):
if self.rendered_text == None :
self.rendered_text, self.links = self.render_html(self.body)
return self.rendered_text
def get_links(self):
if self.links == None :
self.rendered_text, self.links = self.render_html(self.body)
return self.links
# Our own HTML engine (crazy, isnt it?)
# Return [rendered_body, list_of_links]
def render_html(self,body,width=80):
if not _DO_HTML:
print("HTML document detected. Please install python-bs4 and python-readability.")
return
# This method recursively parse the HTML
r_body = ""
links = []
def recursive_render(element,indent=""):
rendered_body = ""
#print("rendering %s - %s with indent %s" %(element.name,element.string,indent))
if element.name == "blockquote":
for child in element.children:
rendered_body += recursive_render(child,indent="\t").rstrip("\t")
elif element.name == "div":
rendered_body += "\n"
for child in element.children:
rendered_body += recursive_render(child,indent=indent)
elif element.name in ["h1","h2","h3","h4","h5","h6"]:
line = element.get_text()
if element.name in ["h1","h2"]:
rendered_body += "\n"+"\x1b[1;34m\x1b[4m" + line + "\x1b[0m"+"\n"
elif element.name in ["h3","h4"]:
rendered_body += "\n" + "\x1b[34m" + line + "\x1b[0m" + "\n"
else:
rendered_body += "\n" + "\x1b[34m\x1b[2m" + line + "\x1b[0m" + "\n"
elif element.name == "pre":
rendered_body += "\n"
for child in element.children:
rendered_body += recursive_render(child,indent=indent)
rendered_body += "\n\n"
elif element.name == "li":
line = ""
for child in element.children:
line += recursive_render(child,indent=indent).strip("\n")
#print("in li: ***%s***"%line)
rendered_body += " * " + line.strip() + "\n"
elif element.name in ["code","em","b","i"]:
# we dont do anything with those markup right now. Maybe later?
for child in element.children:
rendered_body += recursive_render(child,indent=indent).strip("\n")
elif element.name == "p":
temp_str = ""
for child in element.children:
temp_str += recursive_render(child,indent=indent)
rendered_body = temp_str + "\n\n"
elif element.name == "a":
text = element.get_text().strip()
link = element.get('href')
if link:
links.append(link+" "+text)
link_id = " [%s] "%(len(links))
rendered_body = "\x1b[34m\x1b[2m " + text + link_id + "\x1b[0m"
else:
#No real link found
rendered_body = text
elif element.name == "br":
rendered_body = "\n"
elif element.string:
#print("tag without children:",element.name)
#print("string : **%s** "%element.string.strip())
#print("########")
rendered_body = element.string.strip("\n").strip("\t")
else:
#print("tag children:",element.name)
for child in element.children:
rendered_body += recursive_render(child,indent=indent)
#print("body for element %s: %s"%(element.name,rendered_body))
return indent + rendered_body
# the real render_html hearth
readable = Document(body)
title = readable.short_title()
summary = readable.summary()
r_body += "\x1b[1;34m\x1b[4m" + title + "\x1b[0m""\n"
soup = BeautifulSoup(summary, 'html.parser')
rendered_body = ""
#print("rendering %s - %s with indent %s" %(element.name,element.string,indent))
if element.name == "blockquote":
for child in element.children:
rendered_body += recursive_render(child,indent="\t").rstrip("\t")
elif element.name == "div":
rendered_body += "\n"
for child in element.children:
rendered_body += recursive_render(child,indent=indent)
elif element.name in ["h1","h2","h3","h4","h5","h6"]:
line = element.get_text()
if element.name in ["h1","h2"]:
rendered_body += "\n"+"\x1b[1;34m\x1b[4m" + line + "\x1b[0m"+"\n"
elif element.name in ["h3","h4"]:
rendered_body += "\n" + "\x1b[34m" + line + "\x1b[0m" + "\n"
else:
rendered_body += "\n" + "\x1b[34m\x1b[2m" + line + "\x1b[0m" + "\n"
elif element.name == "pre":
rendered_body += "\n"
for child in element.children:
rendered_body += recursive_render(child,indent=indent)
rendered_body += "\n\n"
elif element.name == "li":
line = ""
for child in element.children:
line += recursive_render(child,indent=indent).strip("\n")
#print("in li: ***%s***"%line)
rendered_body += " * " + line.strip() + "\n"
elif element.name in ["code","em","b","i"]:
# we dont do anything with those markup right now. Maybe later?
for child in element.children:
rendered_body += recursive_render(child,indent=indent).strip("\n")
elif element.name == "p":
temp_str = ""
for child in element.children:
temp_str += recursive_render(child,indent=indent)
rendered_body = temp_str + "\n\n"
elif element.name == "a":
text = element.get_text().strip()
link = element.get('href')
if link:
links.append(link+" "+text)
link_id = " [%s] "%(len(links))
rendered_body = "\x1b[34m\x1b[2m " + text + link_id + "\x1b[0m"
else:
#No real link found
rendered_body = text
elif element.name == "br":
rendered_body = "\n"
elif element.string:
#print("tag without children:",element.name)
#print("string : **%s** "%element.string.strip())
#print("########")
rendered_body = element.string.strip("\n").strip("\t")
else:
#print("tag children:",element.name)
for child in element.children:
rendered_body += recursive_render(child,indent=indent)
#print("body for element %s: %s"%(element.name,rendered_body))
return indent + rendered_body
# the real render_html hearth
readable = Document(body)
title = readable.short_title()
summary = readable.summary()
r_body += "\x1b[1;34m\x1b[4m" + title + "\x1b[0m""\n"
soup = BeautifulSoup(summary, 'html.parser')
rendered_body = ""
if soup and soup.body :
for el in soup.body.contents:
rendered_body += recursive_render(el)
paragraphs = rendered_body.split("\n\n")
for par in paragraphs:
lines = par.splitlines()
for line in lines:
if line.startswith("\t"):
i_indent = " "
s_indent = i_indent
line = line.strip("\t")
elif line.startswith(" * "):
i_indent = "" # we keep the initial bullet)
s_indent = " "
else:
i_indent = ""
s_indent = i_indent
if line.strip() != "":
wrapped = textwrap.fill(line,width,initial_indent=i_indent,
subsequent_indent=s_indent)
wrapped += "\n"
else:
wrapped = ""
r_body += wrapped
r_body += "\n"
return r_body,links
if soup and soup.body :
for el in soup.body.contents:
rendered_body += recursive_render(el)
paragraphs = rendered_body.split("\n\n")
for par in paragraphs:
lines = par.splitlines()
for line in lines:
if line.startswith("\t"):
i_indent = " "
s_indent = i_indent
line = line.strip("\t")
elif line.startswith(" * "):
i_indent = "" # we keep the initial bullet)
s_indent = " "
else:
i_indent = ""
s_indent = i_indent
if line.strip() != "":
wrapped = textwrap.fill(line,width,initial_indent=i_indent,
subsequent_indent=s_indent)
wrapped += "\n"
else:
wrapped = ""
r_body += wrapped
r_body += "\n"
return r_body,links
# Mapping mimetypes with renderers
# (any content with a mimetype text/* not listed here will be rendered with render_gemtext)
_FORMAT_RENDERERS = {
"text/gemini": render_gemtext,
"text/html" : render_html,
"text/xml" : render_html
"text/gemini": GemtextRenderer,
"text/html" : HtmlRenderer,
"text/xml" : HtmlRenderer
}
# Offpunk is organized as follow:
# - a GeminiClient instance which handles the browsing of GeminiItems (= pages).
@ -560,12 +591,12 @@ class GeminiItem():
if not self.renderer:
mime = self.get_mime()
if mime in _FORMAT_RENDERERS:
self.renderer = _FORMAT_RENDERERS[mime]
func = _FORMAT_RENDERERS[mime]
self.renderer = func(self.get_body())
if self.renderer:
body = self.get_body()
r_body, links = self.renderer(body)
self.__make_links(links)
to_return = self._make_terminal_title() + r_body
body = self.renderer.get_body()
self.__make_links(self.renderer.get_links())
to_return = self._make_terminal_title() + body
return to_return
else:
return None