offpunk/ansicat.py

1416 lines
53 KiB
Python
Raw Normal View History

#!/usr/bin/env python3
2023-07-03 09:43:06 +00:00
import os
import sys
2023-07-03 21:48:55 +00:00
import shutil
import subprocess
import textwrap
import time
import html
2023-07-04 11:55:01 +00:00
import urllib
2023-07-18 10:33:30 +00:00
import argparse
import mimetypes
import fnmatch
2023-07-30 21:12:01 +00:00
import netcache
import offthemes
2023-08-12 22:07:07 +00:00
from offutils import run,term_width,is_local,looks_like_base64
import base64
2023-08-13 13:20:01 +00:00
from offutils import _DATA_DIR
2023-07-03 21:48:55 +00:00
try:
from readability import Document
_HAS_READABILITY = True
except ModuleNotFoundError:
_HAS_READABILITY = False
try:
from bs4 import BeautifulSoup
from bs4 import Comment
2023-10-05 16:17:01 +00:00
#if bs4 version >= 4.11, we need to silent some xml warnings
import bs4
version = bs4.__version__.split(".")
recent = False
if int(version[0]) > 4:
recent = True
elif int(version[0]) == 4:
recent = int(version[1]) >= 11
if recent:
2023-10-05 16:17:01 +00:00
# As this is only for silencing some warnings, we fail
# silently. We dont really care
try:
from bs4 import XMLParsedAsHTMLWarning
import warnings
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
except:
pass
2023-07-03 21:48:55 +00:00
_HAS_SOUP = True
except ModuleNotFoundError:
_HAS_SOUP = False
_DO_HTML = _HAS_SOUP #and _HAS_READABILITY
if _DO_HTML and not _HAS_READABILITY:
print("To improve your web experience (less cruft in webpages),")
print("please install python3-readability or readability-lxml")
try:
import feedparser
_DO_FEED = True
except ModuleNotFoundError:
_DO_FEED = False
2023-07-04 11:55:01 +00:00
try:
from PIL import Image
_HAS_PIL = True
except ModuleNotFoundError:
_HAS_PIL = False
_HAS_TIMG = shutil.which('timg')
_HAS_CHAFA = shutil.which('chafa')
_NEW_CHAFA = False
_NEW_TIMG = False
_RENDER_IMAGE = False
# All this code to know if we render image inline or not
if _HAS_CHAFA:
# starting with 1.10, chafa can return only one frame
# which allows us to drop dependancy for PIL
output = run("chafa --version")
# output is "Chafa version M.m.p"
# check for m < 1.10
try:
chafa_major, chafa_minor, _ = output.split("\n")[0].split(" ")[-1].split(".")
if int(chafa_major) >= 1 and int(chafa_minor) >= 10:
_NEW_CHAFA = True
except:
pass
if _NEW_CHAFA :
_RENDER_IMAGE = True
if _HAS_TIMG :
try:
output = run("timg --version")
except subprocess.CalledProcessError:
output = False
# We dont deal with timg before 1.3.2 (looping options)
if output and output[5:10] > "1.3.2":
_NEW_TIMG = True
_RENDER_IMAGE = True
elif _HAS_CHAFA and _HAS_PIL:
_RENDER_IMAGE = True
if not _RENDER_IMAGE:
print("To render images inline, you need either chafa or timg.")
if not _NEW_CHAFA and not _NEW_TIMG:
print("Before Chafa 1.10, you also need python-pil")
2023-07-03 09:43:06 +00:00
#return ANSItext that can be show by less
def inline_image(img_file,width):
2023-09-04 08:29:14 +00:00
#We dont even try displaying pictures that are not there
if not os.path.exists(img_file):
return ""
#Chafa is faster than timg inline. Let use that one by default
inline = None
ansi_img = ""
#We avoid errors by not trying to render non-image files
if shutil.which("file"):
mime = run("file -b --mime-type %s", parameter=img_file).strip()
if not "image" in mime:
return ansi_img
if _HAS_CHAFA:
if _HAS_PIL and not _NEW_CHAFA:
# this code is a hack to remove frames from animated gif
img_obj = Image.open(img_file)
if hasattr(img_obj,"n_frames") and img_obj.n_frames > 1:
# we remove all frames but the first one
img_obj.save(img_file,format="gif",save_all=False)
inline = "chafa --bg white -s %s -f symbols"
elif _NEW_CHAFA:
inline = "chafa --bg white -t 1 -s %s -f symbols --animate=off"
if not inline and _NEW_TIMG:
inline = "timg --frames=1 -p q -g %sx1000"
if inline:
cmd = inline%width + " %s"
try:
ansi_img = run(cmd, parameter=img_file)
except Exception as err:
ansi_img = "***image failed : %s***\n" %err
return ansi_img
def terminal_image(img_file):
#Render by timg is better than old chafa.
# it is also centered
cmd = None
if _NEW_CHAFA:
cmd = "chafa -C on -d 0 --bg white -t 1 -w 1"
elif _NEW_TIMG:
cmd = "timg --loops=1 -C"
elif _HAS_CHAFA:
cmd = "chafa -d 0 --bg white -t 1 -w 1"
if cmd:
cmd = cmd + " %s"
run(cmd, parameter=img_file, direct_output=True)
2023-07-03 09:43:06 +00:00
# First, we define the different content->text renderers, outside of the rest
# (They could later be factorized in other files or replaced)
class AbstractRenderer():
def __init__(self,content,url,center=True):
self.url = url
self.body = str(content)
#theres one rendered text and one links table per mode
self.rendered_text = {}
self.links = {}
self.images = {}
self.title = None
self.validity = True
2023-07-31 07:34:12 +00:00
self.temp_files = {}
2023-07-03 09:43:06 +00:00
self.center = center
2023-07-31 07:34:12 +00:00
self.last_mode = "readable"
self.theme = offthemes.default
2023-07-03 09:43:06 +00:00
2023-08-14 09:43:20 +00:00
def display(self,mode=None,directdisplay=False):
wtitle = self.get_formatted_title()
if mode == "source":
body = self.body
else:
body = wtitle + "\n" + self.get_body(mode=mode)
2023-08-14 09:43:20 +00:00
if directdisplay:
print(body)
return True
else:
return body
def has_direct_display(self):
return False
def set_theme(self,theme):
if theme:
self.theme.update(theme)
def get_theme(self):
return self.theme
2023-07-03 09:43:06 +00:00
#This class hold an internal representation of the HTML text
class representation:
def __init__(self,width,title=None,center=True,theme={}):
2023-07-03 09:43:06 +00:00
self.title=title
self.center = center
self.final_text = ""
self.opened = []
self.width = width
self.last_line = ""
self.last_line_colors = {}
self.last_line_center = False
self.new_paragraph = True
self.i_indent = ""
self.s_indent = ""
self.r_indent = ""
self.current_indent = ""
self.disabled_indents = None
# each color is an [open,close] pair code
self.theme = theme
self.colors = offthemes.colors
2023-07-03 09:43:06 +00:00
def _insert(self,color,open=True):
if open: o = 0
else: o = 1
pos = len(self.last_line)
#we remember the position where to insert color codes
if not pos in self.last_line_colors:
self.last_line_colors[pos] = []
#Two inverse code cancel each other
if [color,int(not o)] in self.last_line_colors[pos]:
self.last_line_colors[pos].remove([color,int(not o)])
else:
self.last_line_colors[pos].append([color,o])#+color+str(o))
# Take self.last line and add ANSI codes to it before adding it to
# self.final_text.
def _endline(self):
if len(self.last_line.strip()) > 0:
for c in self.opened:
self._insert(c,open=False)
nextline = ""
added_char = 0
#we insert the color code at the saved positions
while len (self.last_line_colors) > 0:
pos,colors = self.last_line_colors.popitem()
#popitem itterates LIFO.
#So we go, backward, to the pos (starting at the end of last_line)
nextline = self.last_line[pos:] + nextline
ansicol = "\x1b["
for c,o in colors:
ansicol += self.colors[c][o] + ";"
ansicol = ansicol[:-1]+"m"
nextline = ansicol + nextline
added_char += len(ansicol)
self.last_line = self.last_line[:pos]
nextline = self.last_line + nextline
if self.last_line_center:
#we have to care about the ansi char while centering
width = term_width() + added_char
nextline = nextline.strip().center(width)
self.last_line_center = False
else:
#should we lstrip the nextline in the addition ?
nextline = self.current_indent + nextline.lstrip() + self.r_indent
self.current_indent = self.s_indent
self.final_text += nextline
self.last_line = ""
self.final_text += "\n"
for c in self.opened:
self._insert(c,open=True)
else:
self.last_line = ""
def center_line(self):
self.last_line_center = True
2023-08-30 15:02:54 +00:00
def open_theme(self,element):
if element in self.theme:
colors = self.theme[element]
for c in colors:
self.open_color(c)
return True
else:
return False
def close_theme(self,element):
if element in self.theme:
colors = self.theme[element]
for c in colors:
self.close_color(c)
2023-07-03 09:43:06 +00:00
def open_color(self,color):
if color in self.colors and color not in self.opened:
self._insert(color,open=True)
self.opened.append(color)
def close_color(self,color):
if color in self.colors and color in self.opened:
self._insert(color,open=False)
self.opened.remove(color)
def close_all(self):
if len(self.colors) > 0:
self.last_line += "\x1b[0m"
self.opened.clear()
def startindent(self,indent,sub=None,reverse=None):
self._endline()
self.i_indent = indent
self.current_indent = indent
if sub:
self.s_indent = sub
else:
self.s_indent = indent
if reverse:
self.r_indent = reverse
else:
self.r_indent = ""
def endindent(self):
self._endline()
self.i_indent = ""
self.s_indent = ""
self.r_indent = ""
self.current_indent = ""
def _disable_indents(self):
self.disabled_indents = []
self.disabled_indents.append(self.current_indent)
self.disabled_indents.append(self.i_indent)
self.disabled_indents.append(self.s_indent)
self.disabled_indents.append(self.r_indent)
self.endindent()
def _enable_indents(self):
if self.disabled_indents:
self.current_indent = self.disabled_indents[0]
self.i_indent = self.disabled_indents[1]
self.s_indent = self.disabled_indents[2]
self.r_indent = self.disabled_indents[3]
self.disabled_indents = None
def newline(self):
self._endline()
#A new paragraph implies 2 newlines (1 blank line between paragraphs)
#But it is only used if didnt already started one to avoid plenty
#of blank lines. force=True allows to bypass that limit.
#new_paragraph becomes false as soon as text is entered into it
def newparagraph(self,force=False):
if force or not self.new_paragraph:
self._endline()
self.final_text += "\n"
self.new_paragraph = True
def add_space(self):
if len(self.last_line) > 0 and self.last_line[-1] != " ":
self.last_line += " "
def _title_first(self,intext=None):
if self.title:
if not self.title == intext:
self._disable_indents()
2023-08-30 15:02:54 +00:00
self.open_theme("title")
2023-07-03 09:43:06 +00:00
self.add_text(self.title)
self.close_all()
self.newparagraph()
self._enable_indents()
self.title = None
# Beware, blocks are not wrapped nor indented and left untouched!
# They are mostly useful for pictures and preformatted text.
def add_block(self,intext):
# If necessary, we add the title before a block
self._title_first()
# we dont want to indent blocks
self._endline()
self._disable_indents()
self.final_text += self.current_indent + intext
self.new_paragraph = False
self._endline()
self._enable_indents()
def add_text(self,intext):
self._title_first(intext=intext)
lines = []
last = (self.last_line + intext)
self.last_line = ""
# With the following, we basically cancel adding only spaces
# on an empty line
if len(last.strip()) > 0:
self.new_paragraph = False
else:
last = last.strip()
if len(last) > self.width:
width = self.width - len(self.current_indent) - len(self.r_indent)
spaces_left = len(last) - len(last.lstrip())
spaces_right = len(last) - len(last.rstrip())
lines = textwrap.wrap(last,width,drop_whitespace=True)
self.last_line += spaces_left*" "
while len(lines) > 1:
l = lines.pop(0)
self.last_line += l
self._endline()
if len(lines) == 1:
li = lines[0]
self.last_line += li + spaces_right*" "
else:
self.last_line = last
def get_final(self):
self.close_all()
self._endline()
#if no content, we still add the title
self._title_first()
lines = self.final_text.splitlines()
lines2 = []
termspace = shutil.get_terminal_size()[0]
#Following code instert blanck spaces to center the content
if self.center and termspace > term_width():
margin = int((termspace - term_width())//2)
else:
margin = 0
for l in lines :
lines2.append(margin*" "+l)
return "\n".join(lines2)
def get_subscribe_links(self):
return [[self.url,self.get_mime(),self.get_title()]]
def is_valid(self):
return self.validity
def set_mode(self,mode):
self.last_mode = mode
def get_mode(self):
return self.last_mode
2023-07-30 21:35:34 +00:00
def get_link(self,nb):
links = self.get_links()
if len(links) < nb:
print("Index too high! No link %s for %s" %(nb,self.url))
return None
else:
return links[nb-1]
2023-07-21 15:33:55 +00:00
#get_title is about the "content title", so the title in the page itself
2023-07-03 09:43:06 +00:00
def get_title(self):
return "Abstract title"
2023-07-30 14:59:32 +00:00
def get_page_title(self):
title = self.get_title()
if not title or len(title) == 0:
title = self.get_url_title()
else:
title += " (%s)" %self.get_url_title()
return title
2023-07-21 15:33:55 +00:00
def get_formatted_title(self):
title = self.get_url_title()
nbr = len(self.get_links())
if is_local(self.url):
title += " (%s items)"%nbr
str_last = "local file"
else:
str_last = "last accessed on %s"\
%time.ctime(netcache.cache_last_modified(self.url))
title += " (%s links)"%nbr
return self._window_title(title,info=str_last)
2023-07-21 15:33:55 +00:00
#this function is about creating a title derived from the URL
def get_url_title(self):
#small intelligence to try to find a good name for a capsule
#we try to find eithe ~username or /users/username
#else we fallback to hostname
2023-09-03 21:20:54 +00:00
if not self.url: return ""
if is_local(self.url):
splitpath = self.url.split("/")
filename = splitpath[-1]
return filename
2023-07-21 15:33:55 +00:00
path = self.url
parsed = urllib.parse.urlparse(self.url)
red_title = parsed.hostname
2023-07-21 15:33:55 +00:00
if "user" in path:
i = 0
splitted = path.split("/")
while i < (len(splitted)-1):
if splitted[i].startswith("user"):
red_title = splitted[i+1]
i += 1
if "~" in path:
for pp in path.split("/"):
if pp.startswith("~"):
red_title = pp[1:]
return red_title
2023-07-03 09:43:06 +00:00
# This function return a list of URL which should be downloaded
# before displaying the page (images in HTML pages, typically)
def get_images(self,mode=None):
if not mode: mode = self.last_mode
2023-07-03 09:43:06 +00:00
if not mode in self.images:
self.get_body(mode=mode)
# we also invalidate the body that was done without images
self.rendered_text.pop(mode)
if mode in self.images:
return self.images[mode]
else:
return []
#This function will give gemtext to the gemtext renderer
def prepare(self,body,mode=None):
return [[body,None]]
2023-07-03 09:43:06 +00:00
def _build_body_and_links(self,mode,width=None):
2023-07-03 09:43:06 +00:00
if not width:
width = term_width()
prepared_bodies = self.prepare(self.body,mode=mode)
self.rendered_text[mode] = ""
self.links[mode] = []
for b in prepared_bodies:
results = None
size = len(self.links[mode])
if b[1] in _FORMAT_RENDERERS:
r = _FORMAT_RENDERERS[b[1]](b[0],self.url,center=self.center)
results = r.render(b[0],width=width,mode=mode,startlinks=size)
else:
results = self.render(b[0],width=width,mode=mode,startlinks=size)
if results:
self.rendered_text[mode] += results[0] + "\n"
2023-08-03 14:54:29 +00:00
#we should absolutize all URLs here
for l in results[1]:
abs_l = urllib.parse.urljoin(self.url,l.split()[0])
self.links[mode].append(abs_l)
for l in self.get_subscribe_links()[1:]:
self.links[mode].append(l[0])
def get_body(self,width=None,mode=None):
if not mode: mode = self.last_mode
if mode not in self.rendered_text:
self._build_body_and_links(mode,width)
2023-07-03 09:43:06 +00:00
return self.rendered_text[mode]
def get_links(self,mode=None):
if not mode: mode = self.last_mode
if mode not in self.links :
self._build_body_and_links(mode)
return self.links[mode]
2023-07-03 09:43:06 +00:00
def _window_title(self,title,info=None):
title_r = self.representation(term_width(),theme=self.theme)
2023-08-30 15:02:54 +00:00
title_r.open_theme("window_title")
2023-07-03 09:43:06 +00:00
title_r.add_text(title)
2023-08-30 15:02:54 +00:00
title_r.close_theme("window_title")
2023-07-03 09:43:06 +00:00
if info:
2023-08-30 15:02:54 +00:00
title_r.open_theme("window_subtitle")
2023-07-03 09:43:06 +00:00
title_r.add_text(" (%s)"%info)
2023-08-30 15:02:54 +00:00
title_r.close_theme("window_subtitle")
2023-07-03 09:43:06 +00:00
return title_r.get_final()
# An instance of AbstractRenderer should have a self.render(body,width,mode) method.
# 3 modes are used :readable (by default), full and links_only (the fastest, when
# rendered content is not used, only the links are needed)
# The prepare() function is called before the rendering. It is useful if
# your renderer output in a format suitable for another existing renderer (such as gemtext)
# The prepare() function output a list of tuple. Each tuple is [output text, format] where
# format should be in _FORMAT_RENDERERS. If None, current renderer is used
2023-07-03 09:43:06 +00:00
class PlaintextRenderer(AbstractRenderer):
def get_mime(self):
return "text/plain"
def get_title(self):
if self.title:
return self.title
elif self.body:
lines = self.body.splitlines()
if len(lines) > 0:
# If not title found, we take the first 50 char
# of the first line
title_line = lines[0].strip()
if len(title_line) > 50:
title_line = title_line[:49] + ""
self.title = title_line
return self.title
else:
self.title = "Empty Page"
return self.title
else:
return "(unknown)"
def render(self,gemtext, width=None,mode=None,startlinks=0):
return gemtext, []
2023-07-03 09:43:06 +00:00
# Gemtext Rendering Engine
class GemtextRenderer(AbstractRenderer):
def get_mime(self):
return "text/gemini"
def get_title(self):
if self.title:
return self.title
elif self.body:
lines = self.body.splitlines()
for line in lines:
if line.startswith("#"):
self.title = line.strip("#").strip()
return self.title
if len(lines) > 0:
# If not title found, we take the first 50 char
# of the first line
title_line = lines[0].strip()
if len(title_line) > 50:
title_line = title_line[:49] + ""
self.title = title_line
return self.title
else:
self.title = "Empty Page"
return self.title
else:
return "(unknown)"
2023-07-03 09:43:06 +00:00
#render_gemtext
def render(self,gemtext, width=None,mode=None,startlinks=0):
2023-07-03 09:43:06 +00:00
if not width:
width = term_width()
r = self.representation(width,theme=self.theme)
2023-07-03 09:43:06 +00:00
links = []
hidden_links = []
preformatted = False
def format_link(url,index,name=None):
if "://" in url:
protocol,adress = url.split("://",maxsplit=1)
protocol = " %s" %protocol
else:
adress = url
protocol = ""
if "gemini" in protocol or "list" in protocol:
protocol = ""
if not name:
name = adress
line = "[%d%s] %s" % (index, protocol, name)
return line
for line in gemtext.splitlines():
r.newline()
if line.startswith("```"):
preformatted = not preformatted
if preformatted:
r.open_theme("preformatted")
else:
r.close_theme("preformatted")
2023-07-03 09:43:06 +00:00
elif preformatted:
# infinite line to not wrap preformated
r.add_block(line+"\n")
elif len(line.strip()) == 0:
r.newparagraph(force=True)
elif line.startswith("=>"):
strippedline = line[2:].strip()
if strippedline:
links.append(strippedline)
splitted = strippedline.split(maxsplit=1)
url = splitted[0]
name = None
if len(splitted) > 1:
name = splitted[1]
link = format_link(url,len(links)+startlinks,name=name)
2023-08-30 15:02:54 +00:00
if r.open_theme("oneline_link"):
theme = "oneline_link"
else:
theme = "link"
r.open_theme("link")
2023-07-03 09:43:06 +00:00
startpos = link.find("] ") + 2
r.startindent("",sub=startpos*" ")
r.add_text(link)
2023-08-30 15:02:54 +00:00
r.close_theme(theme)
2023-07-03 09:43:06 +00:00
r.endindent()
elif line.startswith("* "):
line = line[1:].lstrip("\t ")
r.startindent("",sub=" ")
r.add_text(line)
r.endindent()
elif line.startswith(">"):
line = line[1:].lstrip("\t ")
r.startindent("> ")
r.open_theme("blockquote")
2023-07-03 09:43:06 +00:00
r.add_text(line)
r.close_theme("blockquote")
2023-07-03 09:43:06 +00:00
r.endindent()
elif line.startswith("###"):
line = line[3:].lstrip("\t ")
2023-08-30 15:02:54 +00:00
if r.open_theme("subsubtitle"):
theme = "subsubtitle"
else:
r.open_theme("subtitle")
theme = "subtitle"
2023-07-03 09:43:06 +00:00
r.add_text(line)
2023-08-30 15:02:54 +00:00
r.close_theme(theme)
2023-07-03 09:43:06 +00:00
elif line.startswith("##"):
line = line[2:].lstrip("\t ")
2023-08-30 15:02:54 +00:00
r.open_theme("subtitle")
2023-07-03 09:43:06 +00:00
r.add_text(line)
2023-08-30 15:02:54 +00:00
r.close_theme("subtitle")
2023-07-03 09:43:06 +00:00
elif line.startswith("#"):
line = line[1:].lstrip("\t ")
if not self.title:
self.title = line
2023-08-30 15:02:54 +00:00
r.open_theme("title")
2023-07-03 09:43:06 +00:00
r.add_text(line)
2023-08-30 15:02:54 +00:00
r.close_theme("title")
2023-07-03 09:43:06 +00:00
else:
if "://" in line:
words = line.split()
for w in words:
if "://" in w:
hidden_links.append(w)
r.add_text(line.rstrip())
links += hidden_links
return r.get_final(), links
class GopherRenderer(AbstractRenderer):
def get_mime(self):
return "text/gopher"
def get_title(self):
if not self.title:
self.title = ""
if self.body:
firstline = self.body.splitlines()[0]
firstline = firstline.split("\t")[0]
if firstline.startswith("i"):
firstline = firstline[1:]
self.title = firstline
return self.title
#menu_or_text
def render(self,body,width=None,mode=None,startlinks=0):
2023-07-03 09:43:06 +00:00
if not width:
width = term_width()
try:
render,links = self._render_goph(body,width=width,mode=mode,startlinks=startlinks)
2023-07-03 09:43:06 +00:00
except Exception as err:
print("Error rendering Gopher ",err)
r = self.representation(width,theme=self.theme)
2023-07-03 09:43:06 +00:00
r.add_block(body)
render = r.get_final()
links = []
return render,links
def _render_goph(self,body,width=None,mode=None,startlinks=0):
2023-07-03 09:43:06 +00:00
if not width:
width = term_width()
# This was copied straight from Agena (then later adapted)
links = []
r = self.representation(width,theme=self.theme)
2023-07-03 09:43:06 +00:00
for line in self.body.split("\n"):
r.newline()
if line.startswith("i"):
towrap = line[1:].split("\t")[0]
if len(towrap.strip()) > 0:
r.add_text(towrap)
else:
r.newparagraph()
elif not line.strip() in [".",""]:
parts = line.split("\t")
parts[-1] = parts[-1].strip()
if parts[-1] == "+":
parts = parts[:-1]
if len(parts) == 4:
name,path,host,port = parts
itemtype = name[0]
name = name[1:]
if port == "70":
port = ""
else:
port = ":%s"%port
if itemtype == "h" and path.startswith("URL:"):
url = path[4:]
else:
url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
url = url.replace(" ","%20")
linkline = url + " " + name
links.append(linkline)
number = len(links) + startlinks
towrap = "[%s] "%str(number)+ name
2023-07-03 09:43:06 +00:00
r.add_text(towrap)
else:
r.add_text(line)
return r.get_final(),links
class FolderRenderer(GemtextRenderer):
#it was initialized with:
#self.renderer = FolderRenderer("",self.get_cache_path(),datadir=_DATA_DIR)
2023-07-03 09:43:06 +00:00
def __init__(self,content,url,center=True,datadir=None):
GemtextRenderer.__init__(self,content,url,center)
self.datadir = datadir
def get_mime(self):
return "Directory"
def prepare(self,body,mode=None):
def get_first_line(l):
path = os.path.join(listdir,l+".gmi")
with open(path) as f:
first_line = f.readline().strip()
f.close()
if first_line.startswith("#"):
return first_line
else:
return None
def write_list(l):
body = ""
for li in l:
path = "list:///%s"%li
2023-08-22 13:43:17 +00:00
r = renderer_from_file(netcache.get_cache_path(path))
size = len(r.get_links())
2023-07-03 09:43:06 +00:00
body += "=> %s %s (%s items)\n" %(str(path),li,size)
return body
listdir = os.path.join(self.datadir,"lists")
2023-08-12 10:22:06 +00:00
self.title = "My lists"
lists = []
if os.path.exists(listdir):
listfiles = os.listdir(listdir)
if len(listfiles) > 0:
for l in listfiles:
#removing the .gmi at the end of the name
lists.append(l[:-4])
if len(lists) > 0:
body = ""
my_lists = []
system_lists = []
subscriptions = []
frozen = []
lists.sort()
for l in lists:
if l in ["history","to_fetch","archives","tour"]:
system_lists.append(l)
else:
first_line = get_first_line(l)
if first_line and "#subscribed" in first_line:
subscriptions.append(l)
elif first_line and "#frozen" in first_line:
frozen.append(l)
2023-07-03 09:43:06 +00:00
else:
2023-08-12 10:22:06 +00:00
my_lists.append(l)
if len(my_lists) > 0:
body+= "\n## Bookmarks Lists (updated during sync)\n"
body += write_list(my_lists)
if len(subscriptions) > 0:
body +="\n## Subscriptions (new links in those are added to tour)\n"
body += write_list(subscriptions)
if len(frozen) > 0:
body +="\n## Frozen (fetched but never updated)\n"
body += write_list(frozen)
if len(system_lists) > 0:
body +="\n## System Lists\n"
body += write_list(system_lists)
return [[body,None]]
2023-07-03 09:43:06 +00:00
class FeedRenderer(GemtextRenderer):
def get_mime(self):
return "application/rss+xml"
def is_valid(self):
if _DO_FEED:
parsed = feedparser.parse(self.body)
else:
return False
if parsed.bozo:
return False
else:
#If no content, then fallback to HTML
return len(parsed.entries) > 0
def get_title(self):
if not self.title:
self.get_body()
return self.title
def prepare(self,content,mode=None,width=None):
if not mode: mode = self.last_mode
2023-07-03 09:43:06 +00:00
if not width:
width = term_width()
self.title = "RSS/Atom feed"
toreturn = []
2023-07-03 09:43:06 +00:00
page = ""
if _DO_FEED:
parsed = feedparser.parse(content)
else:
page += "Please install python-feedparser to handle RSS/Atom feeds\n"
self.validity = False
return page
if parsed.bozo:
page += "Invalid RSS feed\n\n"
page += str(parsed.bozo_exception)
self.validity = False
else:
if "title" in parsed.feed:
t = parsed.feed.title
else:
t = "Unknown"
self.title = "%s (XML feed)" %t
title = "# %s"%self.title
page += title + "\n"
if "updated" in parsed.feed:
page += "Last updated on %s\n\n" %parsed.feed.updated
if "subtitle" in parsed.feed:
page += parsed.feed.subtitle + "\n"
if "link" in parsed.feed:
page += "=> %s\n" %parsed.feed.link
page += "\n## Entries\n"
toreturn.append([page,None])
2023-07-03 09:43:06 +00:00
if len(parsed.entries) < 1:
self.validity = False
postslist = ""
2023-07-03 09:43:06 +00:00
for i in parsed.entries:
if "link" in i:
line = "=> %s " %i.link
elif "links" in i and len(i.links) > 0:
link = None
j = 0
while not link and j < len(i.links):
link = i.links[j].href
if link:
line = "=> %s "%link
else:
line = "* "
else:
line = "* "
2023-07-03 09:43:06 +00:00
if "published" in i:
#sometimes fails so protect it
try:
pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
line += pub_date + " : "
except:
pass
2023-07-03 09:43:06 +00:00
if "title" in i:
line += "%s" %(i.title)
if "author" in i:
line += " (by %s)"%i.author
if mode == "full":
toreturn.append([line,None])
2023-07-03 09:43:06 +00:00
if "summary" in i:
toreturn.append([i.summary,"text/html"])
toreturn.append(["------------",None])
else:
postslist += line + "\n"
#If each posts is append to toreturn, a \n is inserted
#between each item of the list. Idont like it. Hence this hack
if mode != "full":
toreturn.append([postslist,None])
return toreturn
2023-07-03 09:43:06 +00:00
class ImageRenderer(AbstractRenderer):
def get_mime(self):
return "image/*"
def is_valid(self):
if _RENDER_IMAGE:
return True
else:
return False
def get_links(self,mode=None):
return []
def get_title(self):
return "Picture file"
def render(self,img,width=None,mode=None,startlinks=0):
2023-07-03 09:43:06 +00:00
#with inline, we use symbols to be rendered with less.
#else we use the best possible renderer.
if mode in ["full_links_only","links_only"]:
2023-07-03 09:43:06 +00:00
return "", []
if not width:
width = term_width()
spaces = 0
else:
spaces = int((term_width() - width)//2)
ansi_img = inline_image(img,width)
#Now centering the image
lines = ansi_img.splitlines()
new_img = ""
for l in lines:
new_img += spaces*" " + l + "\n"
return new_img, []
2023-08-14 09:43:20 +00:00
def has_direct_display(self):
return _RENDER_IMAGE
def display(self,mode=None,directdisplay=False):
wtitle = self.get_formatted_title()
if not directdisplay:
body = wtitle + "\n" + self.get_body(mode=mode)
return body
else:
print(self._window_title(wtitle))
terminal_image(self.body)
return True
2023-07-03 09:43:06 +00:00
class HtmlRenderer(AbstractRenderer):
def get_mime(self):
return "text/html"
def is_valid(self):
if not _DO_HTML:
print("HTML document detected. Please install python-bs4 and python-readability.")
return _DO_HTML and self.validity
def get_subscribe_links(self):
subs = [[self.url,self.get_mime(),self.get_title()]]
soup = BeautifulSoup(self.body, 'html.parser')
links = soup.find_all("link",rel="alternate",recursive=True)
for l in links:
ty = l.get("type")
if ty :
if "rss" in ty or "atom" in ty or "feed" in ty:
2023-08-03 14:54:29 +00:00
# some rss links are relatives: we absolutise_url
sublink = urllib.parse.urljoin(self.url, l.get("href"))
2023-08-03 21:17:12 +00:00
subs.append([sublink,ty,l.get("title")])
2023-07-03 09:43:06 +00:00
return subs
def get_title(self):
if self.title:
return self.title
elif self.body:
if _HAS_READABILITY:
try:
readable = Document(self.body)
self.title = readable.short_title()
return self.title
except Exception as err:
pass
soup = BeautifulSoup(self.body,"html.parser")
self.title = str(soup.title.string)
else:
return ""
# Our own HTML engine (crazy, isnt it?)
# Return [rendered_body, list_of_links]
# mode is either links_only, readable or full
def render(self,body,mode=None,width=None,add_title=True,startlinks=0):
if not mode: mode = self.last_mode
2023-07-03 09:43:06 +00:00
if not width:
width = term_width()
if not _DO_HTML:
print("HTML document detected. Please install python-bs4 and python-readability.")
return
# This method recursively parse the HTML
r = self.representation(width,title=self.get_title(),center=self.center,theme=self.theme)
2023-07-03 09:43:06 +00:00
links = []
# You know how bad html is when you realize that space sometimes meaningful, somtimes not.
# CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
# HTMLis real crap. At least the one people are generating.
def render_image(src,width=40,mode=None):
ansi_img = ""
imgurl,imgdata = looks_like_base64(src,self.url)
if _RENDER_IMAGE and mode not in ["full_links_only","links_only"] and imgurl:
2023-07-03 09:43:06 +00:00
try:
#4 followings line are there to translate the URL into cache path
2023-07-30 21:12:01 +00:00
img = netcache.get_cache_path(imgurl)
2023-07-03 09:43:06 +00:00
if imgdata:
os.makedirs(os.path.dirname(img), exist_ok=True)
2023-07-03 09:43:06 +00:00
with open(img,"wb") as cached:
cached.write(base64.b64decode(imgdata))
cached.close()
2023-07-30 21:12:01 +00:00
if netcache.is_cache_valid(img):
2023-07-03 09:43:06 +00:00
renderer = ImageRenderer(img,imgurl)
# Image are 40px wide except if terminal is smaller
if width > 40:
size = 40
else:
size = width
ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
except Exception as err:
#we sometimes encounter really bad formatted files or URL
ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n"
return ansi_img
def sanitize_string(string):
#never start with a "\n"
#string = string.lstrip("\n")
string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
endspace = string.endswith(" ") or string.endswith("\xa0")
startspace = string.startswith(" ") or string.startswith("\xa0")
toreturn = string.replace("\n", " ").replace("\t"," ").strip()
while " " in toreturn:
toreturn = toreturn.replace(" "," ")
toreturn = html.unescape(toreturn)
if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
toreturn += " "
if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
toreturn = " " + toreturn
return toreturn
def recursive_render(element,indent="",preformatted=False):
if element.name == "blockquote":
r.newparagraph()
r.startindent(" ",reverse=" ")
for child in element.children:
2023-08-30 15:02:54 +00:00
r.open_theme("blockquote")
2023-07-03 09:43:06 +00:00
recursive_render(child,indent="\t")
2023-08-30 15:02:54 +00:00
r.close_theme("blockquote")
2023-07-03 09:43:06 +00:00
r.endindent()
elif element.name in ["div","p"]:
r.newparagraph()
for child in element.children:
recursive_render(child,indent=indent)
r.newparagraph()
elif element.name in ["span"]:
r.add_space()
for child in element.children:
recursive_render(child,indent=indent)
r.add_space()
elif element.name in ["h1","h2","h3","h4","h5","h6"]:
if element.name in ["h1"]:
2023-08-30 15:02:54 +00:00
r.open_theme("title")
elif element.name in ["h2","h3"]:
r.open_theme("subtitle")
elif element.name in ["h4","h5","h6"]:
if not r.open_theme("subsubtitle"):
r.open_theme("subtitle")
2023-10-07 21:54:32 +00:00
r.newparagraph()
2023-07-03 09:43:06 +00:00
for child in element.children:
recursive_render(child)
2023-10-07 21:54:32 +00:00
#r.close_all()
2023-10-07 21:45:01 +00:00
r.close_all()
2023-10-07 21:54:32 +00:00
r.newparagraph()
2023-07-03 09:43:06 +00:00
elif element.name in ["code","tt"]:
for child in element.children:
recursive_render(child,indent=indent,preformatted=True)
elif element.name in ["pre"]:
r.newparagraph()
r.add_block(element.text)
r.newparagraph()
elif element.name in ["li"]:
r.startindent("",sub=" ")
for child in element.children:
recursive_render(child,indent=indent)
r.endindent()
elif element.name in ["tr"]:
r.startindent("|",reverse="|")
for child in element.children:
recursive_render(child,indent=indent)
r.endindent()
elif element.name in ["td","th"]:
r.add_text("| ")
for child in element.children:
recursive_render(child)
r.add_text(" |")
# italics
elif element.name in ["em","i"]:
r.open_color("italic")
for child in element.children:
recursive_render(child,indent=indent,preformatted=preformatted)
r.close_color("italic")
#bold
elif element.name in ["b","strong"]:
r.open_color("bold")
for child in element.children:
recursive_render(child,indent=indent,preformatted=preformatted)
r.close_color("bold")
elif element.name == "a":
link = element.get('href')
# support for images nested in links
if link:
text = ""
imgtext = ""
#we display images first in a link
for child in element.children:
if child.name == "img":
recursive_render(child)
imgtext = "[IMG LINK %s]"
links.append(link+" "+text)
link_id = str(len(links)+startlinks)
2023-08-30 15:02:54 +00:00
r.open_theme("link")
2023-07-03 09:43:06 +00:00
for child in element.children:
if child.name != "img":
recursive_render(child,preformatted=preformatted)
if imgtext != "":
r.center_line()
r.add_text(imgtext%link_id)
else:
r.add_text(" [%s]"%link_id)
2023-08-30 15:02:54 +00:00
r.close_theme("link")
2023-07-03 09:43:06 +00:00
else:
#No real link found
for child in element.children:
recursive_render(child,preformatted=preformatted)
elif element.name == "img":
src = element.get("src")
text = ""
ansi_img = render_image(src,width=width,mode=mode)
alt = element.get("alt")
if alt:
alt = sanitize_string(alt)
text += "[IMG] %s"%alt
else:
text += "[IMG]"
if src:
if not mode in self.images:
self.images[mode] = []
abs_url,data = looks_like_base64(src,self.url)
#if abs_url is None, it means we dont support
#the image (such as svg+xml). So we hide it.
if abs_url:
links.append(abs_url+" "+text)
self.images[mode].append(abs_url)
link_id = " [%s]"%(len(links)+startlinks)
r.add_block(ansi_img)
r.open_theme("image_link")
r.center_line()
r.add_text(text + link_id)
r.close_theme("image_link")
r.newline()
2023-07-03 09:43:06 +00:00
elif element.name == "br":
r.newline()
elif element.name not in ["script","style","template"] and type(element) != Comment:
if element.string:
if preformatted :
2023-08-30 15:02:54 +00:00
r.open_theme("preformatted")
2023-07-03 09:43:06 +00:00
r.add_text(element.string)
2023-08-30 15:02:54 +00:00
r.close_theme("preformatted")
2023-07-03 09:43:06 +00:00
else:
s = sanitize_string(element.string)
if len(s.strip()) > 0:
r.add_text(s)
else:
for child in element.children:
recursive_render(child,indent=indent)
# the real render_html hearth
if mode in ["full","full_links_only"]:
2023-07-03 09:43:06 +00:00
summary = body
elif _HAS_READABILITY:
try:
readable = Document(body)
summary = readable.summary()
except Exception as err:
summary = body
else:
summary = body
soup = BeautifulSoup(summary, 'html.parser')
#soup = BeautifulSoup(summary, 'html5lib')
if soup :
if soup.body :
recursive_render(soup.body)
else:
recursive_render(soup)
return r.get_final(),links
# Mapping mimetypes with renderers
# (any content with a mimetype text/* not listed here will be rendered with as GemText)
_FORMAT_RENDERERS = {
"text/gemini": GemtextRenderer,
"text/html" : HtmlRenderer,
"text/xml" : FeedRenderer,
"text/plain" : PlaintextRenderer,
"application/xml" : FeedRenderer,
"application/rss+xml" : FeedRenderer,
"application/atom+xml" : FeedRenderer,
"text/gopher": GopherRenderer,
"image/*": ImageRenderer,
"application/javascript": HtmlRenderer,
}
def get_mime(path,url=None):
#Beware, this one is really a shaddy ad-hoc function
2023-08-14 10:23:09 +00:00
if not path:
return None
elif url and url.startswith("gopher://"):
#special case for gopher
#code copy/pasted from netcache
parsed = urllib.parse.urlparse(url)
if len(parsed.path) >= 2:
itemtype = parsed.path[1]
path = parsed.path[2:]
else:
itemtype = "1"
path = ""
if itemtype == "0":
mime = "text/gemini"
elif itemtype == "1":
mime = "text/gopher"
elif itemtype == "h":
mime = "text/html"
elif itemtype in ("9","g","I","s",";"):
mime = "binary"
else:
mime = "text/gopher"
2023-08-14 10:23:09 +00:00
elif path.startswith("mailto:"):
mime = "mailto"
elif os.path.isdir(path):
mime = "Local Folder"
elif path.endswith(".gmi"):
mime = "text/gemini"
elif path.endswith("gophermap"):
mime = "text/gopher"
elif shutil.which("file") :
mime = run("file -b --mime-type %s", parameter=path).strip()
mime2,encoding = mimetypes.guess_type(path,strict=False)
#If we hesitate between html and xml, takes the xml one
#because the FeedRendered fallback to HtmlRenderer
if mime2 and mime != mime2 and "html" in mime and "xml" in mime2:
mime = "text/xml"
# If its a xml file, consider it as such, regardless of what file thinks
elif path.endswith(".xml"):
mime = "text/xml"
#Some xml/html document are considered as octet-stream
if mime == "application/octet-stream":
mime = "text/xml"
else:
mime,encoding = mimetypes.guess_type(path,strict=False)
#gmi Mimetype is not recognized yet
if not mime and not shutil.which("file") :
print("Cannot guess the mime type of the file. Please install \"file\".")
if mime.startswith("text") and mime not in _FORMAT_RENDERERS:
if mime2 and mime2 in _FORMAT_RENDERERS:
mime = mime2
else:
#by default, we consider its gemini except for html
mime = "text/gemini"
#file doesnt recognise gemtext. It should be the default renderer.
#the only case were it doesnt make sense is if the file is .txt
if mime == "text/plain" and not path.endswith(".txt"):
mime = "text/gemini"
return mime
def renderer_from_file(path,url=None,theme=None):
2023-08-14 10:23:09 +00:00
if not path:
return None
mime = get_mime(path,url=url)
if not url:
url = path
2023-07-21 15:33:55 +00:00
if os.path.exists(path):
if mime.startswith("text/") or mime in _FORMAT_RENDERERS:
2023-09-23 08:42:45 +00:00
with open(path,errors="ignore") as f:
content = f.read()
f.close()
else:
content = path
toreturn = set_renderer(content,url,mime,theme=theme)
2023-07-21 15:33:55 +00:00
else:
2023-08-12 10:22:06 +00:00
toreturn = None
return toreturn
2023-07-21 12:22:09 +00:00
def set_renderer(content,url,mime,theme=None):
renderer = None
if mime == "Local Folder":
renderer = FolderRenderer("",url,datadir=_DATA_DIR)
if theme:
renderer.set_theme(theme)
return renderer
mime_to_use = []
for m in _FORMAT_RENDERERS:
if fnmatch.fnmatch(mime, m):
mime_to_use.append(m)
if len(mime_to_use) > 0:
current_mime = mime_to_use[0]
func = _FORMAT_RENDERERS[current_mime]
if current_mime.startswith("text"):
renderer = func(content,url)
# We double check if the renderer is correct.
# If not, we fallback to html
# (this is currently only for XHTML, often being
# mislabelled as xml thus RSSfeeds)
if not renderer.is_valid():
func = _FORMAT_RENDERERS["text/html"]
#print("Set (fallback)RENDERER to html instead of %s"%mime)
renderer = func(content,url)
else:
#TODO: check this code and then remove one if.
#we dont parse text, we give the file to the renderer
renderer = func(content,url)
if not renderer.is_valid():
renderer = None
if renderer and theme:
renderer.set_theme(theme)
return renderer
def render(input,path=None,format="auto",mime=None,url=None):
2023-09-03 21:20:54 +00:00
if not url: url = ""
else: url=url[0]
if format == "gemtext":
2023-07-21 12:22:09 +00:00
r = GemtextRenderer(input,url)
elif format == "html":
2023-07-21 12:22:09 +00:00
r = HtmlRenderer(input,url)
elif format == "feed":
2023-07-21 12:22:09 +00:00
r = FeedRenderer(input,url)
elif format == "gopher":
2023-07-21 12:22:09 +00:00
r = GopherRenderer(input,url)
elif format == "image":
2023-07-21 12:22:09 +00:00
r = ImageRenderer(input,url)
elif format == "folder":
2023-07-21 12:22:09 +00:00
r = FolderRenderer(input,url)
elif format in ["plaintext","text"]:
r = PlaintextRenderer(input,url)
else:
if not mime and path:
2023-07-21 12:22:09 +00:00
r= renderer_from_file(path,url)
else:
r = set_renderer(input,url,mime)
if r:
2023-09-03 21:20:54 +00:00
r.display(directdisplay=True)
2023-07-30 21:12:01 +00:00
else:
print("Could not render %s"%input)
2023-07-18 10:33:30 +00:00
def main():
descri = "ansicat is a terminal rendering tool that will render multiple formats (HTML, \
Gemtext, RSS, Gophermap, Image) into ANSI text and colors.\n\
When used on a file, ansicat will try to autodetect the format. When used with \
standad input, the format must be manually specified.\n\
If the content contains links, the original URL of the content can be specified \
in order to correctly modify relatives links."
parser = argparse.ArgumentParser(prog="ansicat",description=descri)
parser.add_argument("--format", choices=["auto","gemtext","html","feed","gopher","image","folder","text","plaintext"],
help="Renderer to use. Available: auto, gemtext, html, feed, gopher, image, folder, plaintext")
parser.add_argument("--mime", help="Mime of the content to parse")
2023-07-18 21:43:45 +00:00
## The argument needs to be a path to a file. If none, then stdin is used which allows
## to pipe text directly into ansirenderer
parser.add_argument("--url",metavar="URL", nargs="*",
help="Original URL of the content")
parser.add_argument("content",metavar="INPUT", nargs="*", type=argparse.FileType("r"),
2023-07-18 21:43:45 +00:00
default=sys.stdin, help="Path to the text to render (default to stdin)")
2023-07-18 10:33:30 +00:00
args = parser.parse_args()
# Detect if we are running interactively or in a pipe
if sys.stdin.isatty():
#we are interactive, not in stdin, we can have multiple files as input
if isinstance(args.content,list):
for f in args.content:
path = os.path.abspath(f.name)
try:
content = f.read()
except UnicodeDecodeError:
content = f
render(content,path=path,format=args.format,url=args.url,mime=args.mime)
else:
print("Ansicat needs at least one file as an argument")
else:
#we are in stdin
if not args.format and not args.mime:
print("Format or mime should be specified when running with stdin")
else:
render(args.content.read(),path=None,format=args.format,url=args.url,mime=args.mime)
2023-07-18 10:33:30 +00:00
if __name__ == '__main__':
main()