2023-08-29 20:24:34 +00:00
|
|
|
|
#!/usr/bin/env python3
|
2023-07-03 09:43:06 +00:00
|
|
|
|
import os
|
2023-07-18 15:40:36 +00:00
|
|
|
|
import sys
|
2023-07-03 21:48:55 +00:00
|
|
|
|
import shutil
|
|
|
|
|
import subprocess
|
|
|
|
|
import textwrap
|
|
|
|
|
import time
|
|
|
|
|
import html
|
2023-07-04 11:55:01 +00:00
|
|
|
|
import urllib
|
2023-07-18 10:33:30 +00:00
|
|
|
|
import argparse
|
2023-07-20 23:03:40 +00:00
|
|
|
|
import mimetypes
|
2023-07-21 09:52:39 +00:00
|
|
|
|
import fnmatch
|
2023-07-30 21:12:01 +00:00
|
|
|
|
import netcache
|
2023-08-30 22:27:54 +00:00
|
|
|
|
import offthemes
|
2023-08-12 22:07:07 +00:00
|
|
|
|
from offutils import run,term_width,is_local,looks_like_base64
|
2023-09-13 19:07:32 +00:00
|
|
|
|
import base64
|
2023-08-13 13:20:01 +00:00
|
|
|
|
from offutils import _DATA_DIR
|
2023-07-03 21:48:55 +00:00
|
|
|
|
try:
|
|
|
|
|
from readability import Document
|
|
|
|
|
_HAS_READABILITY = True
|
|
|
|
|
except ModuleNotFoundError:
|
|
|
|
|
_HAS_READABILITY = False
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
from bs4 import BeautifulSoup
|
|
|
|
|
from bs4 import Comment
|
2023-10-05 16:17:01 +00:00
|
|
|
|
#if bs4 version >= 4.11, we need to silent some xml warnings
|
2023-09-12 16:06:42 +00:00
|
|
|
|
import bs4
|
|
|
|
|
version = bs4.__version__.split(".")
|
|
|
|
|
recent = False
|
|
|
|
|
if int(version[0]) > 4:
|
|
|
|
|
recent = True
|
|
|
|
|
elif int(version[0]) == 4:
|
2023-10-05 16:22:10 +00:00
|
|
|
|
recent = int(version[1]) >= 11
|
2023-09-12 16:06:42 +00:00
|
|
|
|
if recent:
|
2023-10-05 16:17:01 +00:00
|
|
|
|
# As this is only for silencing some warnings, we fail
|
|
|
|
|
# silently. We don’t really care
|
2023-10-05 12:27:58 +00:00
|
|
|
|
try:
|
|
|
|
|
from bs4 import XMLParsedAsHTMLWarning
|
|
|
|
|
import warnings
|
|
|
|
|
warnings.filterwarnings("ignore", category=XMLParsedAsHTMLWarning)
|
|
|
|
|
except:
|
|
|
|
|
pass
|
2023-07-03 21:48:55 +00:00
|
|
|
|
_HAS_SOUP = True
|
|
|
|
|
except ModuleNotFoundError:
|
|
|
|
|
_HAS_SOUP = False
|
|
|
|
|
|
|
|
|
|
_DO_HTML = _HAS_SOUP #and _HAS_READABILITY
|
|
|
|
|
if _DO_HTML and not _HAS_READABILITY:
|
|
|
|
|
print("To improve your web experience (less cruft in webpages),")
|
|
|
|
|
print("please install python3-readability or readability-lxml")
|
|
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
import feedparser
|
|
|
|
|
_DO_FEED = True
|
|
|
|
|
except ModuleNotFoundError:
|
|
|
|
|
_DO_FEED = False
|
|
|
|
|
|
2023-07-04 11:55:01 +00:00
|
|
|
|
try:
|
|
|
|
|
from PIL import Image
|
|
|
|
|
_HAS_PIL = True
|
|
|
|
|
except ModuleNotFoundError:
|
|
|
|
|
_HAS_PIL = False
|
|
|
|
|
_HAS_TIMG = shutil.which('timg')
|
|
|
|
|
_HAS_CHAFA = shutil.which('chafa')
|
|
|
|
|
_NEW_CHAFA = False
|
|
|
|
|
_NEW_TIMG = False
|
|
|
|
|
_RENDER_IMAGE = False
|
|
|
|
|
|
|
|
|
|
# All this code to know if we render image inline or not
|
|
|
|
|
if _HAS_CHAFA:
|
|
|
|
|
# starting with 1.10, chafa can return only one frame
|
|
|
|
|
# which allows us to drop dependancy for PIL
|
|
|
|
|
output = run("chafa --version")
|
|
|
|
|
# output is "Chafa version M.m.p"
|
|
|
|
|
# check for m < 1.10
|
|
|
|
|
try:
|
|
|
|
|
chafa_major, chafa_minor, _ = output.split("\n")[0].split(" ")[-1].split(".")
|
|
|
|
|
if int(chafa_major) >= 1 and int(chafa_minor) >= 10:
|
|
|
|
|
_NEW_CHAFA = True
|
|
|
|
|
except:
|
|
|
|
|
pass
|
|
|
|
|
if _NEW_CHAFA :
|
|
|
|
|
_RENDER_IMAGE = True
|
|
|
|
|
if _HAS_TIMG :
|
|
|
|
|
try:
|
|
|
|
|
output = run("timg --version")
|
|
|
|
|
except subprocess.CalledProcessError:
|
|
|
|
|
output = False
|
|
|
|
|
# We don’t deal with timg before 1.3.2 (looping options)
|
|
|
|
|
if output and output[5:10] > "1.3.2":
|
|
|
|
|
_NEW_TIMG = True
|
|
|
|
|
_RENDER_IMAGE = True
|
|
|
|
|
elif _HAS_CHAFA and _HAS_PIL:
|
|
|
|
|
_RENDER_IMAGE = True
|
|
|
|
|
if not _RENDER_IMAGE:
|
|
|
|
|
print("To render images inline, you need either chafa or timg.")
|
|
|
|
|
if not _NEW_CHAFA and not _NEW_TIMG:
|
|
|
|
|
print("Before Chafa 1.10, you also need python-pil")
|
|
|
|
|
|
|
|
|
|
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
2023-07-04 11:59:16 +00:00
|
|
|
|
#return ANSI text that can be show by less
|
|
|
|
|
def inline_image(img_file,width):
|
2023-09-04 08:29:14 +00:00
|
|
|
|
#We don’t even try displaying pictures that are not there
|
|
|
|
|
if not os.path.exists(img_file):
|
|
|
|
|
return ""
|
2023-07-04 11:59:16 +00:00
|
|
|
|
#Chafa is faster than timg inline. Let use that one by default
|
|
|
|
|
inline = None
|
|
|
|
|
ansi_img = ""
|
|
|
|
|
#We avoid errors by not trying to render non-image files
|
|
|
|
|
if shutil.which("file"):
|
|
|
|
|
mime = run("file -b --mime-type %s", parameter=img_file).strip()
|
|
|
|
|
if not "image" in mime:
|
|
|
|
|
return ansi_img
|
|
|
|
|
if _HAS_CHAFA:
|
|
|
|
|
if _HAS_PIL and not _NEW_CHAFA:
|
|
|
|
|
# this code is a hack to remove frames from animated gif
|
|
|
|
|
img_obj = Image.open(img_file)
|
|
|
|
|
if hasattr(img_obj,"n_frames") and img_obj.n_frames > 1:
|
|
|
|
|
# we remove all frames but the first one
|
|
|
|
|
img_obj.save(img_file,format="gif",save_all=False)
|
|
|
|
|
inline = "chafa --bg white -s %s -f symbols"
|
|
|
|
|
elif _NEW_CHAFA:
|
|
|
|
|
inline = "chafa --bg white -t 1 -s %s -f symbols --animate=off"
|
|
|
|
|
if not inline and _NEW_TIMG:
|
|
|
|
|
inline = "timg --frames=1 -p q -g %sx1000"
|
|
|
|
|
if inline:
|
|
|
|
|
cmd = inline%width + " %s"
|
|
|
|
|
try:
|
|
|
|
|
ansi_img = run(cmd, parameter=img_file)
|
|
|
|
|
except Exception as err:
|
|
|
|
|
ansi_img = "***image failed : %s***\n" %err
|
|
|
|
|
return ansi_img
|
|
|
|
|
|
|
|
|
|
def terminal_image(img_file):
|
|
|
|
|
#Render by timg is better than old chafa.
|
|
|
|
|
# it is also centered
|
|
|
|
|
cmd = None
|
2023-09-15 19:54:51 +00:00
|
|
|
|
if _NEW_CHAFA:
|
|
|
|
|
cmd = "chafa -C on -d 0 --bg white -t 1 -w 1"
|
|
|
|
|
elif _NEW_TIMG:
|
2023-07-04 11:59:16 +00:00
|
|
|
|
cmd = "timg --loops=1 -C"
|
|
|
|
|
elif _HAS_CHAFA:
|
|
|
|
|
cmd = "chafa -d 0 --bg white -t 1 -w 1"
|
|
|
|
|
if cmd:
|
|
|
|
|
cmd = cmd + " %s"
|
|
|
|
|
run(cmd, parameter=img_file, direct_output=True)
|
|
|
|
|
|
|
|
|
|
|
2023-07-03 09:43:06 +00:00
|
|
|
|
# First, we define the different content->text renderers, outside of the rest
|
|
|
|
|
# (They could later be factorized in other files or replaced)
|
|
|
|
|
class AbstractRenderer():
|
|
|
|
|
def __init__(self,content,url,center=True):
|
|
|
|
|
self.url = url
|
|
|
|
|
self.body = str(content)
|
|
|
|
|
#there’s one rendered text and one links table per mode
|
|
|
|
|
self.rendered_text = {}
|
|
|
|
|
self.links = {}
|
|
|
|
|
self.images = {}
|
|
|
|
|
self.title = None
|
|
|
|
|
self.validity = True
|
2023-07-31 07:34:12 +00:00
|
|
|
|
self.temp_files = {}
|
2023-07-03 09:43:06 +00:00
|
|
|
|
self.center = center
|
2023-07-31 07:34:12 +00:00
|
|
|
|
self.last_mode = "readable"
|
2023-08-30 22:27:54 +00:00
|
|
|
|
self.theme = offthemes.default
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
2023-08-14 09:43:20 +00:00
|
|
|
|
def display(self,mode=None,directdisplay=False):
|
|
|
|
|
wtitle = self.get_formatted_title()
|
2023-10-07 21:30:09 +00:00
|
|
|
|
if mode == "source":
|
|
|
|
|
body = self.body
|
|
|
|
|
else:
|
|
|
|
|
body = wtitle + "\n" + self.get_body(mode=mode)
|
2023-08-14 09:43:20 +00:00
|
|
|
|
if directdisplay:
|
|
|
|
|
print(body)
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
return body
|
|
|
|
|
|
|
|
|
|
def has_direct_display(self):
|
|
|
|
|
return False
|
|
|
|
|
|
2023-08-30 22:27:54 +00:00
|
|
|
|
def set_theme(self,theme):
|
|
|
|
|
if theme:
|
|
|
|
|
self.theme.update(theme)
|
|
|
|
|
|
|
|
|
|
def get_theme(self):
|
|
|
|
|
return self.theme
|
|
|
|
|
|
2023-07-03 09:43:06 +00:00
|
|
|
|
#This class hold an internal representation of the HTML text
|
|
|
|
|
class representation:
|
2023-08-30 22:27:54 +00:00
|
|
|
|
def __init__(self,width,title=None,center=True,theme={}):
|
2023-07-03 09:43:06 +00:00
|
|
|
|
self.title=title
|
|
|
|
|
self.center = center
|
|
|
|
|
self.final_text = ""
|
|
|
|
|
self.opened = []
|
|
|
|
|
self.width = width
|
|
|
|
|
self.last_line = ""
|
|
|
|
|
self.last_line_colors = {}
|
|
|
|
|
self.last_line_center = False
|
|
|
|
|
self.new_paragraph = True
|
|
|
|
|
self.i_indent = ""
|
|
|
|
|
self.s_indent = ""
|
|
|
|
|
self.r_indent = ""
|
|
|
|
|
self.current_indent = ""
|
|
|
|
|
self.disabled_indents = None
|
|
|
|
|
# each color is an [open,close] pair code
|
2023-08-30 22:27:54 +00:00
|
|
|
|
self.theme = theme
|
|
|
|
|
self.colors = offthemes.colors
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
|
|
|
|
def _insert(self,color,open=True):
|
|
|
|
|
if open: o = 0
|
|
|
|
|
else: o = 1
|
|
|
|
|
pos = len(self.last_line)
|
|
|
|
|
#we remember the position where to insert color codes
|
|
|
|
|
if not pos in self.last_line_colors:
|
|
|
|
|
self.last_line_colors[pos] = []
|
|
|
|
|
#Two inverse code cancel each other
|
|
|
|
|
if [color,int(not o)] in self.last_line_colors[pos]:
|
|
|
|
|
self.last_line_colors[pos].remove([color,int(not o)])
|
|
|
|
|
else:
|
|
|
|
|
self.last_line_colors[pos].append([color,o])#+color+str(o))
|
|
|
|
|
|
|
|
|
|
# Take self.last line and add ANSI codes to it before adding it to
|
|
|
|
|
# self.final_text.
|
|
|
|
|
def _endline(self):
|
|
|
|
|
if len(self.last_line.strip()) > 0:
|
|
|
|
|
for c in self.opened:
|
|
|
|
|
self._insert(c,open=False)
|
|
|
|
|
nextline = ""
|
|
|
|
|
added_char = 0
|
|
|
|
|
#we insert the color code at the saved positions
|
|
|
|
|
while len (self.last_line_colors) > 0:
|
|
|
|
|
pos,colors = self.last_line_colors.popitem()
|
|
|
|
|
#popitem itterates LIFO.
|
|
|
|
|
#So we go, backward, to the pos (starting at the end of last_line)
|
|
|
|
|
nextline = self.last_line[pos:] + nextline
|
|
|
|
|
ansicol = "\x1b["
|
|
|
|
|
for c,o in colors:
|
|
|
|
|
ansicol += self.colors[c][o] + ";"
|
|
|
|
|
ansicol = ansicol[:-1]+"m"
|
|
|
|
|
nextline = ansicol + nextline
|
|
|
|
|
added_char += len(ansicol)
|
|
|
|
|
self.last_line = self.last_line[:pos]
|
|
|
|
|
nextline = self.last_line + nextline
|
|
|
|
|
if self.last_line_center:
|
|
|
|
|
#we have to care about the ansi char while centering
|
|
|
|
|
width = term_width() + added_char
|
|
|
|
|
nextline = nextline.strip().center(width)
|
|
|
|
|
self.last_line_center = False
|
|
|
|
|
else:
|
|
|
|
|
#should we lstrip the nextline in the addition ?
|
|
|
|
|
nextline = self.current_indent + nextline.lstrip() + self.r_indent
|
|
|
|
|
self.current_indent = self.s_indent
|
|
|
|
|
self.final_text += nextline
|
|
|
|
|
self.last_line = ""
|
|
|
|
|
self.final_text += "\n"
|
|
|
|
|
for c in self.opened:
|
|
|
|
|
self._insert(c,open=True)
|
|
|
|
|
else:
|
|
|
|
|
self.last_line = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def center_line(self):
|
|
|
|
|
self.last_line_center = True
|
|
|
|
|
|
2023-08-30 15:02:54 +00:00
|
|
|
|
def open_theme(self,element):
|
|
|
|
|
if element in self.theme:
|
|
|
|
|
colors = self.theme[element]
|
|
|
|
|
for c in colors:
|
|
|
|
|
self.open_color(c)
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
def close_theme(self,element):
|
|
|
|
|
if element in self.theme:
|
|
|
|
|
colors = self.theme[element]
|
|
|
|
|
for c in colors:
|
|
|
|
|
self.close_color(c)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
def open_color(self,color):
|
|
|
|
|
if color in self.colors and color not in self.opened:
|
|
|
|
|
self._insert(color,open=True)
|
|
|
|
|
self.opened.append(color)
|
|
|
|
|
def close_color(self,color):
|
|
|
|
|
if color in self.colors and color in self.opened:
|
|
|
|
|
self._insert(color,open=False)
|
|
|
|
|
self.opened.remove(color)
|
|
|
|
|
def close_all(self):
|
|
|
|
|
if len(self.colors) > 0:
|
|
|
|
|
self.last_line += "\x1b[0m"
|
|
|
|
|
self.opened.clear()
|
|
|
|
|
|
|
|
|
|
def startindent(self,indent,sub=None,reverse=None):
|
|
|
|
|
self._endline()
|
|
|
|
|
self.i_indent = indent
|
|
|
|
|
self.current_indent = indent
|
|
|
|
|
if sub:
|
|
|
|
|
self.s_indent = sub
|
|
|
|
|
else:
|
|
|
|
|
self.s_indent = indent
|
|
|
|
|
if reverse:
|
|
|
|
|
self.r_indent = reverse
|
|
|
|
|
else:
|
|
|
|
|
self.r_indent = ""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
def endindent(self):
|
|
|
|
|
self._endline()
|
|
|
|
|
self.i_indent = ""
|
|
|
|
|
self.s_indent = ""
|
|
|
|
|
self.r_indent = ""
|
|
|
|
|
self.current_indent = ""
|
|
|
|
|
|
|
|
|
|
def _disable_indents(self):
|
|
|
|
|
self.disabled_indents = []
|
|
|
|
|
self.disabled_indents.append(self.current_indent)
|
|
|
|
|
self.disabled_indents.append(self.i_indent)
|
|
|
|
|
self.disabled_indents.append(self.s_indent)
|
|
|
|
|
self.disabled_indents.append(self.r_indent)
|
|
|
|
|
self.endindent()
|
|
|
|
|
|
|
|
|
|
def _enable_indents(self):
|
|
|
|
|
if self.disabled_indents:
|
|
|
|
|
self.current_indent = self.disabled_indents[0]
|
|
|
|
|
self.i_indent = self.disabled_indents[1]
|
|
|
|
|
self.s_indent = self.disabled_indents[2]
|
|
|
|
|
self.r_indent = self.disabled_indents[3]
|
|
|
|
|
self.disabled_indents = None
|
|
|
|
|
|
|
|
|
|
def newline(self):
|
|
|
|
|
self._endline()
|
|
|
|
|
|
|
|
|
|
#A new paragraph implies 2 newlines (1 blank line between paragraphs)
|
|
|
|
|
#But it is only used if didn’t already started one to avoid plenty
|
|
|
|
|
#of blank lines. force=True allows to bypass that limit.
|
|
|
|
|
#new_paragraph becomes false as soon as text is entered into it
|
|
|
|
|
def newparagraph(self,force=False):
|
|
|
|
|
if force or not self.new_paragraph:
|
|
|
|
|
self._endline()
|
|
|
|
|
self.final_text += "\n"
|
|
|
|
|
self.new_paragraph = True
|
|
|
|
|
|
|
|
|
|
def add_space(self):
|
|
|
|
|
if len(self.last_line) > 0 and self.last_line[-1] != " ":
|
|
|
|
|
self.last_line += " "
|
|
|
|
|
|
|
|
|
|
def _title_first(self,intext=None):
|
|
|
|
|
if self.title:
|
|
|
|
|
if not self.title == intext:
|
|
|
|
|
self._disable_indents()
|
2023-08-30 15:02:54 +00:00
|
|
|
|
self.open_theme("title")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
self.add_text(self.title)
|
|
|
|
|
self.close_all()
|
|
|
|
|
self.newparagraph()
|
|
|
|
|
self._enable_indents()
|
|
|
|
|
self.title = None
|
|
|
|
|
|
|
|
|
|
# Beware, blocks are not wrapped nor indented and left untouched!
|
|
|
|
|
# They are mostly useful for pictures and preformatted text.
|
|
|
|
|
def add_block(self,intext):
|
|
|
|
|
# If necessary, we add the title before a block
|
|
|
|
|
self._title_first()
|
|
|
|
|
# we don’t want to indent blocks
|
|
|
|
|
self._endline()
|
|
|
|
|
self._disable_indents()
|
|
|
|
|
self.final_text += self.current_indent + intext
|
|
|
|
|
self.new_paragraph = False
|
|
|
|
|
self._endline()
|
|
|
|
|
self._enable_indents()
|
|
|
|
|
|
|
|
|
|
def add_text(self,intext):
|
|
|
|
|
self._title_first(intext=intext)
|
|
|
|
|
lines = []
|
|
|
|
|
last = (self.last_line + intext)
|
|
|
|
|
self.last_line = ""
|
|
|
|
|
# With the following, we basically cancel adding only spaces
|
|
|
|
|
# on an empty line
|
|
|
|
|
if len(last.strip()) > 0:
|
|
|
|
|
self.new_paragraph = False
|
|
|
|
|
else:
|
|
|
|
|
last = last.strip()
|
|
|
|
|
if len(last) > self.width:
|
|
|
|
|
width = self.width - len(self.current_indent) - len(self.r_indent)
|
|
|
|
|
spaces_left = len(last) - len(last.lstrip())
|
|
|
|
|
spaces_right = len(last) - len(last.rstrip())
|
|
|
|
|
lines = textwrap.wrap(last,width,drop_whitespace=True)
|
|
|
|
|
self.last_line += spaces_left*" "
|
|
|
|
|
while len(lines) > 1:
|
|
|
|
|
l = lines.pop(0)
|
|
|
|
|
self.last_line += l
|
|
|
|
|
self._endline()
|
|
|
|
|
if len(lines) == 1:
|
|
|
|
|
li = lines[0]
|
|
|
|
|
self.last_line += li + spaces_right*" "
|
|
|
|
|
else:
|
|
|
|
|
self.last_line = last
|
|
|
|
|
|
|
|
|
|
def get_final(self):
|
|
|
|
|
self.close_all()
|
|
|
|
|
self._endline()
|
|
|
|
|
#if no content, we still add the title
|
|
|
|
|
self._title_first()
|
|
|
|
|
lines = self.final_text.splitlines()
|
|
|
|
|
lines2 = []
|
|
|
|
|
termspace = shutil.get_terminal_size()[0]
|
|
|
|
|
#Following code instert blanck spaces to center the content
|
|
|
|
|
if self.center and termspace > term_width():
|
|
|
|
|
margin = int((termspace - term_width())//2)
|
|
|
|
|
else:
|
|
|
|
|
margin = 0
|
|
|
|
|
for l in lines :
|
|
|
|
|
lines2.append(margin*" "+l)
|
|
|
|
|
return "\n".join(lines2)
|
|
|
|
|
|
|
|
|
|
def get_subscribe_links(self):
|
|
|
|
|
return [[self.url,self.get_mime(),self.get_title()]]
|
|
|
|
|
def is_valid(self):
|
|
|
|
|
return self.validity
|
2023-07-31 11:19:28 +00:00
|
|
|
|
def set_mode(self,mode):
|
|
|
|
|
self.last_mode = mode
|
2023-08-13 10:29:32 +00:00
|
|
|
|
def get_mode(self):
|
|
|
|
|
return self.last_mode
|
2023-07-30 21:35:34 +00:00
|
|
|
|
def get_link(self,nb):
|
|
|
|
|
links = self.get_links()
|
|
|
|
|
if len(links) < nb:
|
|
|
|
|
print("Index too high! No link %s for %s" %(nb,self.url))
|
|
|
|
|
return None
|
|
|
|
|
else:
|
|
|
|
|
return links[nb-1]
|
|
|
|
|
|
2023-07-21 15:33:55 +00:00
|
|
|
|
#get_title is about the "content title", so the title in the page itself
|
2023-07-03 09:43:06 +00:00
|
|
|
|
def get_title(self):
|
|
|
|
|
return "Abstract title"
|
2023-07-30 14:59:32 +00:00
|
|
|
|
|
|
|
|
|
def get_page_title(self):
|
|
|
|
|
title = self.get_title()
|
|
|
|
|
if not title or len(title) == 0:
|
|
|
|
|
title = self.get_url_title()
|
|
|
|
|
else:
|
|
|
|
|
title += " (%s)" %self.get_url_title()
|
|
|
|
|
return title
|
2023-07-21 15:33:55 +00:00
|
|
|
|
|
2023-08-11 14:09:28 +00:00
|
|
|
|
def get_formatted_title(self):
|
|
|
|
|
title = self.get_url_title()
|
|
|
|
|
nbr = len(self.get_links())
|
2023-08-11 21:31:33 +00:00
|
|
|
|
if is_local(self.url):
|
2023-08-11 14:09:28 +00:00
|
|
|
|
title += " (%s items)"%nbr
|
|
|
|
|
str_last = "local file"
|
|
|
|
|
else:
|
|
|
|
|
str_last = "last accessed on %s"\
|
|
|
|
|
%time.ctime(netcache.cache_last_modified(self.url))
|
|
|
|
|
title += " (%s links)"%nbr
|
|
|
|
|
return self._window_title(title,info=str_last)
|
|
|
|
|
|
2023-07-21 15:33:55 +00:00
|
|
|
|
#this function is about creating a title derived from the URL
|
|
|
|
|
def get_url_title(self):
|
|
|
|
|
#small intelligence to try to find a good name for a capsule
|
|
|
|
|
#we try to find eithe ~username or /users/username
|
|
|
|
|
#else we fallback to hostname
|
2023-09-03 21:20:54 +00:00
|
|
|
|
if not self.url: return ""
|
2023-08-11 21:31:33 +00:00
|
|
|
|
if is_local(self.url):
|
2023-08-11 14:09:28 +00:00
|
|
|
|
splitpath = self.url.split("/")
|
|
|
|
|
filename = splitpath[-1]
|
|
|
|
|
return filename
|
2023-07-21 15:33:55 +00:00
|
|
|
|
path = self.url
|
2023-08-11 14:09:28 +00:00
|
|
|
|
parsed = urllib.parse.urlparse(self.url)
|
|
|
|
|
red_title = parsed.hostname
|
2023-07-21 15:33:55 +00:00
|
|
|
|
if "user" in path:
|
|
|
|
|
i = 0
|
|
|
|
|
splitted = path.split("/")
|
|
|
|
|
while i < (len(splitted)-1):
|
|
|
|
|
if splitted[i].startswith("user"):
|
|
|
|
|
red_title = splitted[i+1]
|
|
|
|
|
i += 1
|
|
|
|
|
if "~" in path:
|
|
|
|
|
for pp in path.split("/"):
|
|
|
|
|
if pp.startswith("~"):
|
|
|
|
|
red_title = pp[1:]
|
|
|
|
|
return red_title
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
|
|
|
|
# This function return a list of URL which should be downloaded
|
|
|
|
|
# before displaying the page (images in HTML pages, typically)
|
2023-07-31 11:19:28 +00:00
|
|
|
|
def get_images(self,mode=None):
|
|
|
|
|
if not mode: mode = self.last_mode
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if not mode in self.images:
|
|
|
|
|
self.get_body(mode=mode)
|
|
|
|
|
# we also invalidate the body that was done without images
|
|
|
|
|
self.rendered_text.pop(mode)
|
|
|
|
|
if mode in self.images:
|
|
|
|
|
return self.images[mode]
|
|
|
|
|
else:
|
|
|
|
|
return []
|
|
|
|
|
#This function will give gemtext to the gemtext renderer
|
|
|
|
|
def prepare(self,body,mode=None):
|
2023-08-27 11:11:34 +00:00
|
|
|
|
return [[body,None]]
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
2023-08-27 11:11:34 +00:00
|
|
|
|
def _build_body_and_links(self,mode,width=None):
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if not width:
|
|
|
|
|
width = term_width()
|
2023-08-27 11:11:34 +00:00
|
|
|
|
prepared_bodies = self.prepare(self.body,mode=mode)
|
|
|
|
|
self.rendered_text[mode] = ""
|
|
|
|
|
self.links[mode] = []
|
|
|
|
|
for b in prepared_bodies:
|
|
|
|
|
results = None
|
|
|
|
|
size = len(self.links[mode])
|
|
|
|
|
if b[1] in _FORMAT_RENDERERS:
|
|
|
|
|
r = _FORMAT_RENDERERS[b[1]](b[0],self.url,center=self.center)
|
|
|
|
|
results = r.render(b[0],width=width,mode=mode,startlinks=size)
|
|
|
|
|
else:
|
|
|
|
|
results = self.render(b[0],width=width,mode=mode,startlinks=size)
|
|
|
|
|
if results:
|
|
|
|
|
self.rendered_text[mode] += results[0] + "\n"
|
2023-08-03 14:54:29 +00:00
|
|
|
|
#we should absolutize all URLs here
|
2023-08-27 11:11:34 +00:00
|
|
|
|
for l in results[1]:
|
|
|
|
|
abs_l = urllib.parse.urljoin(self.url,l.split()[0])
|
|
|
|
|
self.links[mode].append(abs_l)
|
|
|
|
|
for l in self.get_subscribe_links()[1:]:
|
|
|
|
|
self.links[mode].append(l[0])
|
|
|
|
|
|
|
|
|
|
def get_body(self,width=None,mode=None):
|
|
|
|
|
if not mode: mode = self.last_mode
|
|
|
|
|
if mode not in self.rendered_text:
|
|
|
|
|
self._build_body_and_links(mode,width)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
return self.rendered_text[mode]
|
2023-08-27 11:11:34 +00:00
|
|
|
|
def get_links(self,mode=None):
|
|
|
|
|
if not mode: mode = self.last_mode
|
|
|
|
|
if mode not in self.links :
|
|
|
|
|
self._build_body_and_links(mode)
|
|
|
|
|
return self.links[mode]
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
|
|
|
|
def _window_title(self,title,info=None):
|
2023-08-30 22:27:54 +00:00
|
|
|
|
title_r = self.representation(term_width(),theme=self.theme)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
title_r.open_theme("window_title")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
title_r.add_text(title)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
title_r.close_theme("window_title")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if info:
|
2023-08-30 15:02:54 +00:00
|
|
|
|
title_r.open_theme("window_subtitle")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
title_r.add_text(" (%s)"%info)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
title_r.close_theme("window_subtitle")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
return title_r.get_final()
|
|
|
|
|
|
|
|
|
|
# An instance of AbstractRenderer should have a self.render(body,width,mode) method.
|
|
|
|
|
# 3 modes are used : readable (by default), full and links_only (the fastest, when
|
|
|
|
|
# rendered content is not used, only the links are needed)
|
|
|
|
|
# The prepare() function is called before the rendering. It is useful if
|
|
|
|
|
# your renderer output in a format suitable for another existing renderer (such as gemtext)
|
2023-08-27 11:11:34 +00:00
|
|
|
|
# The prepare() function output a list of tuple. Each tuple is [output text, format] where
|
|
|
|
|
# format should be in _FORMAT_RENDERERS. If None, current renderer is used
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
2023-10-07 21:30:09 +00:00
|
|
|
|
class PlaintextRenderer(AbstractRenderer):
|
|
|
|
|
def get_mime(self):
|
|
|
|
|
return "text/plain"
|
|
|
|
|
def get_title(self):
|
|
|
|
|
if self.title:
|
|
|
|
|
return self.title
|
|
|
|
|
elif self.body:
|
|
|
|
|
lines = self.body.splitlines()
|
|
|
|
|
if len(lines) > 0:
|
|
|
|
|
# If not title found, we take the first 50 char
|
|
|
|
|
# of the first line
|
|
|
|
|
title_line = lines[0].strip()
|
|
|
|
|
if len(title_line) > 50:
|
|
|
|
|
title_line = title_line[:49] + "…"
|
|
|
|
|
self.title = title_line
|
|
|
|
|
return self.title
|
|
|
|
|
else:
|
|
|
|
|
self.title = "Empty Page"
|
|
|
|
|
return self.title
|
|
|
|
|
else:
|
|
|
|
|
return "(unknown)"
|
|
|
|
|
def render(self,gemtext, width=None,mode=None,startlinks=0):
|
|
|
|
|
return gemtext, []
|
|
|
|
|
|
2023-07-03 09:43:06 +00:00
|
|
|
|
# Gemtext Rendering Engine
|
|
|
|
|
class GemtextRenderer(AbstractRenderer):
|
|
|
|
|
def get_mime(self):
|
|
|
|
|
return "text/gemini"
|
|
|
|
|
def get_title(self):
|
|
|
|
|
if self.title:
|
|
|
|
|
return self.title
|
|
|
|
|
elif self.body:
|
|
|
|
|
lines = self.body.splitlines()
|
|
|
|
|
for line in lines:
|
|
|
|
|
if line.startswith("#"):
|
|
|
|
|
self.title = line.strip("#").strip()
|
|
|
|
|
return self.title
|
|
|
|
|
if len(lines) > 0:
|
|
|
|
|
# If not title found, we take the first 50 char
|
|
|
|
|
# of the first line
|
|
|
|
|
title_line = lines[0].strip()
|
|
|
|
|
if len(title_line) > 50:
|
|
|
|
|
title_line = title_line[:49] + "…"
|
|
|
|
|
self.title = title_line
|
|
|
|
|
return self.title
|
|
|
|
|
else:
|
|
|
|
|
self.title = "Empty Page"
|
|
|
|
|
return self.title
|
|
|
|
|
else:
|
2023-10-07 21:30:09 +00:00
|
|
|
|
return "(unknown)"
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
|
|
|
|
#render_gemtext
|
2023-08-27 11:11:34 +00:00
|
|
|
|
def render(self,gemtext, width=None,mode=None,startlinks=0):
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if not width:
|
|
|
|
|
width = term_width()
|
2023-08-30 22:27:54 +00:00
|
|
|
|
r = self.representation(width,theme=self.theme)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
links = []
|
|
|
|
|
hidden_links = []
|
|
|
|
|
preformatted = False
|
|
|
|
|
def format_link(url,index,name=None):
|
|
|
|
|
if "://" in url:
|
|
|
|
|
protocol,adress = url.split("://",maxsplit=1)
|
|
|
|
|
protocol = " %s" %protocol
|
|
|
|
|
else:
|
|
|
|
|
adress = url
|
|
|
|
|
protocol = ""
|
|
|
|
|
if "gemini" in protocol or "list" in protocol:
|
|
|
|
|
protocol = ""
|
|
|
|
|
if not name:
|
|
|
|
|
name = adress
|
|
|
|
|
line = "[%d%s] %s" % (index, protocol, name)
|
|
|
|
|
return line
|
|
|
|
|
for line in gemtext.splitlines():
|
|
|
|
|
r.newline()
|
|
|
|
|
if line.startswith("```"):
|
|
|
|
|
preformatted = not preformatted
|
2023-08-31 08:53:24 +00:00
|
|
|
|
if preformatted:
|
|
|
|
|
r.open_theme("preformatted")
|
|
|
|
|
else:
|
|
|
|
|
r.close_theme("preformatted")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
elif preformatted:
|
|
|
|
|
# infinite line to not wrap preformated
|
|
|
|
|
r.add_block(line+"\n")
|
|
|
|
|
elif len(line.strip()) == 0:
|
|
|
|
|
r.newparagraph(force=True)
|
|
|
|
|
elif line.startswith("=>"):
|
|
|
|
|
strippedline = line[2:].strip()
|
|
|
|
|
if strippedline:
|
|
|
|
|
links.append(strippedline)
|
|
|
|
|
splitted = strippedline.split(maxsplit=1)
|
|
|
|
|
url = splitted[0]
|
|
|
|
|
name = None
|
|
|
|
|
if len(splitted) > 1:
|
|
|
|
|
name = splitted[1]
|
2023-08-27 11:11:34 +00:00
|
|
|
|
link = format_link(url,len(links)+startlinks,name=name)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
if r.open_theme("oneline_link"):
|
|
|
|
|
theme = "oneline_link"
|
|
|
|
|
else:
|
|
|
|
|
theme = "link"
|
|
|
|
|
r.open_theme("link")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
startpos = link.find("] ") + 2
|
|
|
|
|
r.startindent("",sub=startpos*" ")
|
|
|
|
|
r.add_text(link)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.close_theme(theme)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.endindent()
|
|
|
|
|
elif line.startswith("* "):
|
|
|
|
|
line = line[1:].lstrip("\t ")
|
|
|
|
|
r.startindent("• ",sub=" ")
|
|
|
|
|
r.add_text(line)
|
|
|
|
|
r.endindent()
|
|
|
|
|
elif line.startswith(">"):
|
|
|
|
|
line = line[1:].lstrip("\t ")
|
|
|
|
|
r.startindent("> ")
|
2023-08-31 08:53:24 +00:00
|
|
|
|
r.open_theme("blockquote")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.add_text(line)
|
2023-08-31 08:53:24 +00:00
|
|
|
|
r.close_theme("blockquote")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.endindent()
|
|
|
|
|
elif line.startswith("###"):
|
|
|
|
|
line = line[3:].lstrip("\t ")
|
2023-08-30 15:02:54 +00:00
|
|
|
|
if r.open_theme("subsubtitle"):
|
|
|
|
|
theme = "subsubtitle"
|
|
|
|
|
else:
|
|
|
|
|
r.open_theme("subtitle")
|
|
|
|
|
theme = "subtitle"
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.add_text(line)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.close_theme(theme)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
elif line.startswith("##"):
|
|
|
|
|
line = line[2:].lstrip("\t ")
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.open_theme("subtitle")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.add_text(line)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.close_theme("subtitle")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
elif line.startswith("#"):
|
|
|
|
|
line = line[1:].lstrip("\t ")
|
|
|
|
|
if not self.title:
|
|
|
|
|
self.title = line
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.open_theme("title")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.add_text(line)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.close_theme("title")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
else:
|
|
|
|
|
if "://" in line:
|
|
|
|
|
words = line.split()
|
|
|
|
|
for w in words:
|
|
|
|
|
if "://" in w:
|
|
|
|
|
hidden_links.append(w)
|
|
|
|
|
r.add_text(line.rstrip())
|
|
|
|
|
links += hidden_links
|
|
|
|
|
return r.get_final(), links
|
|
|
|
|
|
|
|
|
|
class GopherRenderer(AbstractRenderer):
|
|
|
|
|
def get_mime(self):
|
|
|
|
|
return "text/gopher"
|
|
|
|
|
def get_title(self):
|
|
|
|
|
if not self.title:
|
|
|
|
|
self.title = ""
|
|
|
|
|
if self.body:
|
|
|
|
|
firstline = self.body.splitlines()[0]
|
|
|
|
|
firstline = firstline.split("\t")[0]
|
|
|
|
|
if firstline.startswith("i"):
|
|
|
|
|
firstline = firstline[1:]
|
|
|
|
|
self.title = firstline
|
|
|
|
|
return self.title
|
|
|
|
|
|
|
|
|
|
#menu_or_text
|
2023-08-27 11:11:34 +00:00
|
|
|
|
def render(self,body,width=None,mode=None,startlinks=0):
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if not width:
|
|
|
|
|
width = term_width()
|
|
|
|
|
try:
|
2023-08-27 11:11:34 +00:00
|
|
|
|
render,links = self._render_goph(body,width=width,mode=mode,startlinks=startlinks)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
except Exception as err:
|
|
|
|
|
print("Error rendering Gopher ",err)
|
2023-08-30 22:27:54 +00:00
|
|
|
|
r = self.representation(width,theme=self.theme)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.add_block(body)
|
|
|
|
|
render = r.get_final()
|
|
|
|
|
links = []
|
|
|
|
|
return render,links
|
|
|
|
|
|
2023-08-27 11:11:34 +00:00
|
|
|
|
def _render_goph(self,body,width=None,mode=None,startlinks=0):
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if not width:
|
|
|
|
|
width = term_width()
|
|
|
|
|
# This was copied straight from Agena (then later adapted)
|
|
|
|
|
links = []
|
2023-08-30 22:27:54 +00:00
|
|
|
|
r = self.representation(width,theme=self.theme)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
for line in self.body.split("\n"):
|
|
|
|
|
r.newline()
|
|
|
|
|
if line.startswith("i"):
|
|
|
|
|
towrap = line[1:].split("\t")[0]
|
|
|
|
|
if len(towrap.strip()) > 0:
|
|
|
|
|
r.add_text(towrap)
|
|
|
|
|
else:
|
|
|
|
|
r.newparagraph()
|
|
|
|
|
elif not line.strip() in [".",""]:
|
|
|
|
|
parts = line.split("\t")
|
|
|
|
|
parts[-1] = parts[-1].strip()
|
|
|
|
|
if parts[-1] == "+":
|
|
|
|
|
parts = parts[:-1]
|
|
|
|
|
if len(parts) == 4:
|
|
|
|
|
name,path,host,port = parts
|
|
|
|
|
itemtype = name[0]
|
|
|
|
|
name = name[1:]
|
|
|
|
|
if port == "70":
|
|
|
|
|
port = ""
|
|
|
|
|
else:
|
|
|
|
|
port = ":%s"%port
|
|
|
|
|
if itemtype == "h" and path.startswith("URL:"):
|
|
|
|
|
url = path[4:]
|
|
|
|
|
else:
|
|
|
|
|
url = "gopher://%s%s/%s%s" %(host,port,itemtype,path)
|
|
|
|
|
url = url.replace(" ","%20")
|
|
|
|
|
linkline = url + " " + name
|
|
|
|
|
links.append(linkline)
|
2023-08-27 11:11:34 +00:00
|
|
|
|
number = len(links) + startlinks
|
|
|
|
|
towrap = "[%s] "%str(number)+ name
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.add_text(towrap)
|
|
|
|
|
else:
|
|
|
|
|
r.add_text(line)
|
|
|
|
|
return r.get_final(),links
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
class FolderRenderer(GemtextRenderer):
|
2023-07-20 23:03:40 +00:00
|
|
|
|
#it was initialized with:
|
|
|
|
|
#self.renderer = FolderRenderer("",self.get_cache_path(),datadir=_DATA_DIR)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
def __init__(self,content,url,center=True,datadir=None):
|
|
|
|
|
GemtextRenderer.__init__(self,content,url,center)
|
|
|
|
|
self.datadir = datadir
|
|
|
|
|
|
|
|
|
|
def get_mime(self):
|
|
|
|
|
return "Directory"
|
|
|
|
|
def prepare(self,body,mode=None):
|
|
|
|
|
def get_first_line(l):
|
|
|
|
|
path = os.path.join(listdir,l+".gmi")
|
|
|
|
|
with open(path) as f:
|
|
|
|
|
first_line = f.readline().strip()
|
|
|
|
|
f.close()
|
|
|
|
|
if first_line.startswith("#"):
|
|
|
|
|
return first_line
|
|
|
|
|
else:
|
|
|
|
|
return None
|
|
|
|
|
def write_list(l):
|
|
|
|
|
body = ""
|
|
|
|
|
for li in l:
|
|
|
|
|
path = "list:///%s"%li
|
2023-08-22 13:43:17 +00:00
|
|
|
|
r = renderer_from_file(netcache.get_cache_path(path))
|
|
|
|
|
size = len(r.get_links())
|
2023-07-03 09:43:06 +00:00
|
|
|
|
body += "=> %s %s (%s items)\n" %(str(path),li,size)
|
|
|
|
|
return body
|
|
|
|
|
listdir = os.path.join(self.datadir,"lists")
|
2023-08-12 10:22:06 +00:00
|
|
|
|
self.title = "My lists"
|
|
|
|
|
lists = []
|
|
|
|
|
if os.path.exists(listdir):
|
|
|
|
|
listfiles = os.listdir(listdir)
|
|
|
|
|
if len(listfiles) > 0:
|
|
|
|
|
for l in listfiles:
|
|
|
|
|
#removing the .gmi at the end of the name
|
|
|
|
|
lists.append(l[:-4])
|
|
|
|
|
if len(lists) > 0:
|
|
|
|
|
body = ""
|
|
|
|
|
my_lists = []
|
|
|
|
|
system_lists = []
|
|
|
|
|
subscriptions = []
|
|
|
|
|
frozen = []
|
|
|
|
|
lists.sort()
|
|
|
|
|
for l in lists:
|
|
|
|
|
if l in ["history","to_fetch","archives","tour"]:
|
|
|
|
|
system_lists.append(l)
|
|
|
|
|
else:
|
|
|
|
|
first_line = get_first_line(l)
|
|
|
|
|
if first_line and "#subscribed" in first_line:
|
|
|
|
|
subscriptions.append(l)
|
|
|
|
|
elif first_line and "#frozen" in first_line:
|
|
|
|
|
frozen.append(l)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
else:
|
2023-08-12 10:22:06 +00:00
|
|
|
|
my_lists.append(l)
|
|
|
|
|
if len(my_lists) > 0:
|
|
|
|
|
body+= "\n## Bookmarks Lists (updated during sync)\n"
|
|
|
|
|
body += write_list(my_lists)
|
|
|
|
|
if len(subscriptions) > 0:
|
|
|
|
|
body +="\n## Subscriptions (new links in those are added to tour)\n"
|
|
|
|
|
body += write_list(subscriptions)
|
|
|
|
|
if len(frozen) > 0:
|
|
|
|
|
body +="\n## Frozen (fetched but never updated)\n"
|
|
|
|
|
body += write_list(frozen)
|
|
|
|
|
if len(system_lists) > 0:
|
|
|
|
|
body +="\n## System Lists\n"
|
|
|
|
|
body += write_list(system_lists)
|
2023-08-27 11:11:34 +00:00
|
|
|
|
return [[body,None]]
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
|
|
|
|
class FeedRenderer(GemtextRenderer):
|
|
|
|
|
def get_mime(self):
|
|
|
|
|
return "application/rss+xml"
|
|
|
|
|
def is_valid(self):
|
|
|
|
|
if _DO_FEED:
|
|
|
|
|
parsed = feedparser.parse(self.body)
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
if parsed.bozo:
|
|
|
|
|
return False
|
|
|
|
|
else:
|
|
|
|
|
#If no content, then fallback to HTML
|
|
|
|
|
return len(parsed.entries) > 0
|
|
|
|
|
|
|
|
|
|
def get_title(self):
|
|
|
|
|
if not self.title:
|
|
|
|
|
self.get_body()
|
|
|
|
|
return self.title
|
|
|
|
|
|
2023-07-31 11:19:28 +00:00
|
|
|
|
def prepare(self,content,mode=None,width=None):
|
|
|
|
|
if not mode: mode = self.last_mode
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if not width:
|
|
|
|
|
width = term_width()
|
|
|
|
|
self.title = "RSS/Atom feed"
|
2023-08-27 11:11:34 +00:00
|
|
|
|
toreturn = []
|
2023-07-03 09:43:06 +00:00
|
|
|
|
page = ""
|
|
|
|
|
if _DO_FEED:
|
|
|
|
|
parsed = feedparser.parse(content)
|
|
|
|
|
else:
|
|
|
|
|
page += "Please install python-feedparser to handle RSS/Atom feeds\n"
|
|
|
|
|
self.validity = False
|
|
|
|
|
return page
|
|
|
|
|
if parsed.bozo:
|
|
|
|
|
page += "Invalid RSS feed\n\n"
|
|
|
|
|
page += str(parsed.bozo_exception)
|
|
|
|
|
self.validity = False
|
|
|
|
|
else:
|
|
|
|
|
if "title" in parsed.feed:
|
|
|
|
|
t = parsed.feed.title
|
|
|
|
|
else:
|
|
|
|
|
t = "Unknown"
|
|
|
|
|
self.title = "%s (XML feed)" %t
|
|
|
|
|
title = "# %s"%self.title
|
|
|
|
|
page += title + "\n"
|
|
|
|
|
if "updated" in parsed.feed:
|
|
|
|
|
page += "Last updated on %s\n\n" %parsed.feed.updated
|
|
|
|
|
if "subtitle" in parsed.feed:
|
|
|
|
|
page += parsed.feed.subtitle + "\n"
|
|
|
|
|
if "link" in parsed.feed:
|
|
|
|
|
page += "=> %s\n" %parsed.feed.link
|
|
|
|
|
page += "\n## Entries\n"
|
2023-08-27 11:11:34 +00:00
|
|
|
|
toreturn.append([page,None])
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if len(parsed.entries) < 1:
|
|
|
|
|
self.validity = False
|
2023-08-27 11:46:33 +00:00
|
|
|
|
postslist = ""
|
2023-07-03 09:43:06 +00:00
|
|
|
|
for i in parsed.entries:
|
2023-10-01 12:04:17 +00:00
|
|
|
|
if "link" in i:
|
|
|
|
|
line = "=> %s " %i.link
|
2023-10-09 11:26:12 +00:00
|
|
|
|
elif "links" in i and len(i.links) > 0:
|
|
|
|
|
link = None
|
|
|
|
|
j = 0
|
|
|
|
|
while not link and j < len(i.links):
|
|
|
|
|
link = i.links[j].href
|
|
|
|
|
if link:
|
|
|
|
|
line = "=> %s "%link
|
|
|
|
|
else:
|
|
|
|
|
line = "* "
|
2023-10-01 12:04:17 +00:00
|
|
|
|
else:
|
|
|
|
|
line = "* "
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if "published" in i:
|
2023-11-03 22:01:46 +00:00
|
|
|
|
#sometimes fails so protect it
|
|
|
|
|
try:
|
|
|
|
|
pub_date = time.strftime("%Y-%m-%d",i.published_parsed)
|
|
|
|
|
line += pub_date + " : "
|
|
|
|
|
except:
|
|
|
|
|
pass
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if "title" in i:
|
|
|
|
|
line += "%s" %(i.title)
|
|
|
|
|
if "author" in i:
|
|
|
|
|
line += " (by %s)"%i.author
|
|
|
|
|
if mode == "full":
|
2023-08-27 11:46:33 +00:00
|
|
|
|
toreturn.append([line,None])
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if "summary" in i:
|
2023-08-27 11:11:34 +00:00
|
|
|
|
toreturn.append([i.summary,"text/html"])
|
|
|
|
|
toreturn.append(["------------",None])
|
2023-08-27 11:46:33 +00:00
|
|
|
|
else:
|
|
|
|
|
postslist += line + "\n"
|
|
|
|
|
#If each posts is append to toreturn, a \n is inserted
|
|
|
|
|
#between each item of the list. I don’t like it. Hence this hack
|
|
|
|
|
if mode != "full":
|
|
|
|
|
toreturn.append([postslist,None])
|
2023-08-27 11:11:34 +00:00
|
|
|
|
return toreturn
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
|
|
|
|
class ImageRenderer(AbstractRenderer):
|
|
|
|
|
def get_mime(self):
|
|
|
|
|
return "image/*"
|
|
|
|
|
def is_valid(self):
|
|
|
|
|
if _RENDER_IMAGE:
|
|
|
|
|
return True
|
|
|
|
|
else:
|
|
|
|
|
return False
|
|
|
|
|
def get_links(self,mode=None):
|
|
|
|
|
return []
|
|
|
|
|
def get_title(self):
|
|
|
|
|
return "Picture file"
|
2023-08-27 11:11:34 +00:00
|
|
|
|
def render(self,img,width=None,mode=None,startlinks=0):
|
2023-07-03 09:43:06 +00:00
|
|
|
|
#with inline, we use symbols to be rendered with less.
|
|
|
|
|
#else we use the best possible renderer.
|
2023-08-30 09:54:24 +00:00
|
|
|
|
if mode in ["full_links_only","links_only"]:
|
2023-07-03 09:43:06 +00:00
|
|
|
|
return "", []
|
|
|
|
|
if not width:
|
|
|
|
|
width = term_width()
|
|
|
|
|
spaces = 0
|
|
|
|
|
else:
|
|
|
|
|
spaces = int((term_width() - width)//2)
|
|
|
|
|
ansi_img = inline_image(img,width)
|
|
|
|
|
#Now centering the image
|
|
|
|
|
lines = ansi_img.splitlines()
|
|
|
|
|
new_img = ""
|
|
|
|
|
for l in lines:
|
|
|
|
|
new_img += spaces*" " + l + "\n"
|
|
|
|
|
return new_img, []
|
2023-08-14 09:43:20 +00:00
|
|
|
|
|
|
|
|
|
def has_direct_display(self):
|
|
|
|
|
return _RENDER_IMAGE
|
|
|
|
|
|
|
|
|
|
def display(self,mode=None,directdisplay=False):
|
|
|
|
|
wtitle = self.get_formatted_title()
|
|
|
|
|
if not directdisplay:
|
|
|
|
|
body = wtitle + "\n" + self.get_body(mode=mode)
|
|
|
|
|
return body
|
|
|
|
|
else:
|
|
|
|
|
print(self._window_title(wtitle))
|
|
|
|
|
terminal_image(self.body)
|
|
|
|
|
return True
|
2023-07-03 09:43:06 +00:00
|
|
|
|
|
|
|
|
|
class HtmlRenderer(AbstractRenderer):
|
|
|
|
|
def get_mime(self):
|
|
|
|
|
return "text/html"
|
|
|
|
|
def is_valid(self):
|
|
|
|
|
if not _DO_HTML:
|
|
|
|
|
print("HTML document detected. Please install python-bs4 and python-readability.")
|
|
|
|
|
return _DO_HTML and self.validity
|
|
|
|
|
def get_subscribe_links(self):
|
|
|
|
|
subs = [[self.url,self.get_mime(),self.get_title()]]
|
|
|
|
|
soup = BeautifulSoup(self.body, 'html.parser')
|
|
|
|
|
links = soup.find_all("link",rel="alternate",recursive=True)
|
|
|
|
|
for l in links:
|
|
|
|
|
ty = l.get("type")
|
|
|
|
|
if ty :
|
|
|
|
|
if "rss" in ty or "atom" in ty or "feed" in ty:
|
2023-08-03 14:54:29 +00:00
|
|
|
|
# some rss links are relatives: we absolutise_url
|
|
|
|
|
sublink = urllib.parse.urljoin(self.url, l.get("href"))
|
2023-08-03 21:17:12 +00:00
|
|
|
|
subs.append([sublink,ty,l.get("title")])
|
2023-07-03 09:43:06 +00:00
|
|
|
|
return subs
|
|
|
|
|
|
|
|
|
|
def get_title(self):
|
|
|
|
|
if self.title:
|
|
|
|
|
return self.title
|
|
|
|
|
elif self.body:
|
|
|
|
|
if _HAS_READABILITY:
|
|
|
|
|
try:
|
|
|
|
|
readable = Document(self.body)
|
|
|
|
|
self.title = readable.short_title()
|
|
|
|
|
return self.title
|
|
|
|
|
except Exception as err:
|
|
|
|
|
pass
|
|
|
|
|
soup = BeautifulSoup(self.body,"html.parser")
|
|
|
|
|
self.title = str(soup.title.string)
|
|
|
|
|
else:
|
|
|
|
|
return ""
|
|
|
|
|
|
|
|
|
|
# Our own HTML engine (crazy, isn’t it?)
|
|
|
|
|
# Return [rendered_body, list_of_links]
|
|
|
|
|
# mode is either links_only, readable or full
|
2023-08-27 11:11:34 +00:00
|
|
|
|
def render(self,body,mode=None,width=None,add_title=True,startlinks=0):
|
2023-07-31 11:19:28 +00:00
|
|
|
|
if not mode: mode = self.last_mode
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if not width:
|
|
|
|
|
width = term_width()
|
|
|
|
|
if not _DO_HTML:
|
|
|
|
|
print("HTML document detected. Please install python-bs4 and python-readability.")
|
|
|
|
|
return
|
|
|
|
|
# This method recursively parse the HTML
|
2023-08-30 22:27:54 +00:00
|
|
|
|
r = self.representation(width,title=self.get_title(),center=self.center,theme=self.theme)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
links = []
|
|
|
|
|
# You know how bad html is when you realize that space sometimes meaningful, somtimes not.
|
|
|
|
|
# CR are not meaniningful. Except that, somethimes, they should be interpreted as spaces.
|
|
|
|
|
# HTML is real crap. At least the one people are generating.
|
|
|
|
|
|
|
|
|
|
def render_image(src,width=40,mode=None):
|
|
|
|
|
ansi_img = ""
|
|
|
|
|
imgurl,imgdata = looks_like_base64(src,self.url)
|
2023-08-30 09:54:24 +00:00
|
|
|
|
if _RENDER_IMAGE and mode not in ["full_links_only","links_only"] and imgurl:
|
2023-07-03 09:43:06 +00:00
|
|
|
|
try:
|
|
|
|
|
#4 followings line are there to translate the URL into cache path
|
2023-07-30 21:12:01 +00:00
|
|
|
|
img = netcache.get_cache_path(imgurl)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
if imgdata:
|
2023-09-13 19:07:32 +00:00
|
|
|
|
os.makedirs(os.path.dirname(img), exist_ok=True)
|
2023-07-03 09:43:06 +00:00
|
|
|
|
with open(img,"wb") as cached:
|
|
|
|
|
cached.write(base64.b64decode(imgdata))
|
|
|
|
|
cached.close()
|
2023-07-30 21:12:01 +00:00
|
|
|
|
if netcache.is_cache_valid(img):
|
2023-07-03 09:43:06 +00:00
|
|
|
|
renderer = ImageRenderer(img,imgurl)
|
|
|
|
|
# Image are 40px wide except if terminal is smaller
|
|
|
|
|
if width > 40:
|
|
|
|
|
size = 40
|
|
|
|
|
else:
|
|
|
|
|
size = width
|
|
|
|
|
ansi_img = "\n" + renderer.get_body(width=size,mode="inline")
|
|
|
|
|
except Exception as err:
|
|
|
|
|
#we sometimes encounter really bad formatted files or URL
|
|
|
|
|
ansi_img = textwrap.fill("[BAD IMG] %s - %s"%(err,src),width) + "\n"
|
|
|
|
|
return ansi_img
|
|
|
|
|
def sanitize_string(string):
|
|
|
|
|
#never start with a "\n"
|
|
|
|
|
#string = string.lstrip("\n")
|
|
|
|
|
string = string.replace("\r","").replace("\n", " ").replace("\t"," ")
|
|
|
|
|
endspace = string.endswith(" ") or string.endswith("\xa0")
|
|
|
|
|
startspace = string.startswith(" ") or string.startswith("\xa0")
|
|
|
|
|
toreturn = string.replace("\n", " ").replace("\t"," ").strip()
|
|
|
|
|
while " " in toreturn:
|
|
|
|
|
toreturn = toreturn.replace(" "," ")
|
|
|
|
|
toreturn = html.unescape(toreturn)
|
|
|
|
|
if endspace and not toreturn.endswith(" ") and not toreturn.endswith("\xa0"):
|
|
|
|
|
toreturn += " "
|
|
|
|
|
if startspace and not toreturn.startswith(" ") and not toreturn.startswith("\xa0"):
|
|
|
|
|
toreturn = " " + toreturn
|
|
|
|
|
return toreturn
|
|
|
|
|
def recursive_render(element,indent="",preformatted=False):
|
|
|
|
|
if element.name == "blockquote":
|
|
|
|
|
r.newparagraph()
|
|
|
|
|
r.startindent(" ",reverse=" ")
|
|
|
|
|
for child in element.children:
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.open_theme("blockquote")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
recursive_render(child,indent="\t")
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.close_theme("blockquote")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.endindent()
|
|
|
|
|
elif element.name in ["div","p"]:
|
|
|
|
|
r.newparagraph()
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,indent=indent)
|
|
|
|
|
r.newparagraph()
|
|
|
|
|
elif element.name in ["span"]:
|
|
|
|
|
r.add_space()
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,indent=indent)
|
|
|
|
|
r.add_space()
|
|
|
|
|
elif element.name in ["h1","h2","h3","h4","h5","h6"]:
|
|
|
|
|
if element.name in ["h1"]:
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.open_theme("title")
|
|
|
|
|
elif element.name in ["h2","h3"]:
|
|
|
|
|
r.open_theme("subtitle")
|
|
|
|
|
elif element.name in ["h4","h5","h6"]:
|
|
|
|
|
if not r.open_theme("subsubtitle"):
|
|
|
|
|
r.open_theme("subtitle")
|
2023-10-07 21:54:32 +00:00
|
|
|
|
r.newparagraph()
|
2023-07-03 09:43:06 +00:00
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child)
|
2023-10-07 21:54:32 +00:00
|
|
|
|
#r.close_all()
|
2023-10-07 21:45:01 +00:00
|
|
|
|
r.close_all()
|
2023-10-07 21:54:32 +00:00
|
|
|
|
r.newparagraph()
|
2023-07-03 09:43:06 +00:00
|
|
|
|
elif element.name in ["code","tt"]:
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,indent=indent,preformatted=True)
|
|
|
|
|
elif element.name in ["pre"]:
|
|
|
|
|
r.newparagraph()
|
|
|
|
|
r.add_block(element.text)
|
|
|
|
|
r.newparagraph()
|
|
|
|
|
elif element.name in ["li"]:
|
|
|
|
|
r.startindent(" • ",sub=" ")
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,indent=indent)
|
|
|
|
|
r.endindent()
|
|
|
|
|
elif element.name in ["tr"]:
|
|
|
|
|
r.startindent("|",reverse="|")
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,indent=indent)
|
|
|
|
|
r.endindent()
|
|
|
|
|
elif element.name in ["td","th"]:
|
|
|
|
|
r.add_text("| ")
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child)
|
|
|
|
|
r.add_text(" |")
|
|
|
|
|
# italics
|
|
|
|
|
elif element.name in ["em","i"]:
|
|
|
|
|
r.open_color("italic")
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,indent=indent,preformatted=preformatted)
|
|
|
|
|
r.close_color("italic")
|
|
|
|
|
#bold
|
|
|
|
|
elif element.name in ["b","strong"]:
|
|
|
|
|
r.open_color("bold")
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,indent=indent,preformatted=preformatted)
|
|
|
|
|
r.close_color("bold")
|
|
|
|
|
elif element.name == "a":
|
|
|
|
|
link = element.get('href')
|
|
|
|
|
# support for images nested in links
|
|
|
|
|
if link:
|
|
|
|
|
text = ""
|
|
|
|
|
imgtext = ""
|
|
|
|
|
#we display images first in a link
|
|
|
|
|
for child in element.children:
|
|
|
|
|
if child.name == "img":
|
|
|
|
|
recursive_render(child)
|
|
|
|
|
imgtext = "[IMG LINK %s]"
|
|
|
|
|
links.append(link+" "+text)
|
2023-08-27 11:11:34 +00:00
|
|
|
|
link_id = str(len(links)+startlinks)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.open_theme("link")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
for child in element.children:
|
|
|
|
|
if child.name != "img":
|
|
|
|
|
recursive_render(child,preformatted=preformatted)
|
|
|
|
|
if imgtext != "":
|
|
|
|
|
r.center_line()
|
|
|
|
|
r.add_text(imgtext%link_id)
|
|
|
|
|
else:
|
|
|
|
|
r.add_text(" [%s]"%link_id)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.close_theme("link")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
else:
|
|
|
|
|
#No real link found
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,preformatted=preformatted)
|
|
|
|
|
elif element.name == "img":
|
|
|
|
|
src = element.get("src")
|
|
|
|
|
text = ""
|
|
|
|
|
ansi_img = render_image(src,width=width,mode=mode)
|
|
|
|
|
alt = element.get("alt")
|
|
|
|
|
if alt:
|
|
|
|
|
alt = sanitize_string(alt)
|
|
|
|
|
text += "[IMG] %s"%alt
|
|
|
|
|
else:
|
|
|
|
|
text += "[IMG]"
|
|
|
|
|
if src:
|
|
|
|
|
if not mode in self.images:
|
|
|
|
|
self.images[mode] = []
|
2023-09-13 19:07:32 +00:00
|
|
|
|
abs_url,data = looks_like_base64(src,self.url)
|
2023-10-19 22:06:59 +00:00
|
|
|
|
#if abs_url is None, it means we don’t support
|
|
|
|
|
#the image (such as svg+xml). So we hide it.
|
|
|
|
|
if abs_url:
|
|
|
|
|
links.append(abs_url+" "+text)
|
|
|
|
|
self.images[mode].append(abs_url)
|
|
|
|
|
link_id = " [%s]"%(len(links)+startlinks)
|
|
|
|
|
r.add_block(ansi_img)
|
|
|
|
|
r.open_theme("image_link")
|
|
|
|
|
r.center_line()
|
|
|
|
|
r.add_text(text + link_id)
|
|
|
|
|
r.close_theme("image_link")
|
|
|
|
|
r.newline()
|
2023-07-03 09:43:06 +00:00
|
|
|
|
elif element.name == "br":
|
|
|
|
|
r.newline()
|
|
|
|
|
elif element.name not in ["script","style","template"] and type(element) != Comment:
|
|
|
|
|
if element.string:
|
|
|
|
|
if preformatted :
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.open_theme("preformatted")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
r.add_text(element.string)
|
2023-08-30 15:02:54 +00:00
|
|
|
|
r.close_theme("preformatted")
|
2023-07-03 09:43:06 +00:00
|
|
|
|
else:
|
|
|
|
|
s = sanitize_string(element.string)
|
|
|
|
|
if len(s.strip()) > 0:
|
|
|
|
|
r.add_text(s)
|
|
|
|
|
else:
|
|
|
|
|
for child in element.children:
|
|
|
|
|
recursive_render(child,indent=indent)
|
|
|
|
|
# the real render_html hearth
|
2023-08-30 09:54:24 +00:00
|
|
|
|
if mode in ["full","full_links_only"]:
|
2023-07-03 09:43:06 +00:00
|
|
|
|
summary = body
|
|
|
|
|
elif _HAS_READABILITY:
|
|
|
|
|
try:
|
|
|
|
|
readable = Document(body)
|
|
|
|
|
summary = readable.summary()
|
|
|
|
|
except Exception as err:
|
|
|
|
|
summary = body
|
|
|
|
|
else:
|
|
|
|
|
summary = body
|
|
|
|
|
soup = BeautifulSoup(summary, 'html.parser')
|
|
|
|
|
#soup = BeautifulSoup(summary, 'html5lib')
|
|
|
|
|
if soup :
|
|
|
|
|
if soup.body :
|
|
|
|
|
recursive_render(soup.body)
|
|
|
|
|
else:
|
|
|
|
|
recursive_render(soup)
|
|
|
|
|
return r.get_final(),links
|
|
|
|
|
|
2023-07-20 23:03:40 +00:00
|
|
|
|
|
|
|
|
|
# Mapping mimetypes with renderers
|
|
|
|
|
# (any content with a mimetype text/* not listed here will be rendered with as GemText)
|
|
|
|
|
_FORMAT_RENDERERS = {
|
|
|
|
|
"text/gemini": GemtextRenderer,
|
|
|
|
|
"text/html" : HtmlRenderer,
|
|
|
|
|
"text/xml" : FeedRenderer,
|
2023-10-07 21:30:09 +00:00
|
|
|
|
"text/plain" : PlaintextRenderer,
|
2023-07-20 23:03:40 +00:00
|
|
|
|
"application/xml" : FeedRenderer,
|
|
|
|
|
"application/rss+xml" : FeedRenderer,
|
|
|
|
|
"application/atom+xml" : FeedRenderer,
|
|
|
|
|
"text/gopher": GopherRenderer,
|
2023-08-22 12:59:20 +00:00
|
|
|
|
"image/*": ImageRenderer,
|
|
|
|
|
"application/javascript": HtmlRenderer,
|
2023-07-20 23:03:40 +00:00
|
|
|
|
}
|
2023-09-11 08:22:49 +00:00
|
|
|
|
def get_mime(path,url=None):
|
2023-07-20 23:03:40 +00:00
|
|
|
|
#Beware, this one is really a shaddy ad-hoc function
|
2023-08-14 10:23:09 +00:00
|
|
|
|
if not path:
|
|
|
|
|
return None
|
2023-09-11 08:22:49 +00:00
|
|
|
|
elif url and url.startswith("gopher://"):
|
|
|
|
|
#special case for gopher
|
|
|
|
|
#code copy/pasted from netcache
|
|
|
|
|
parsed = urllib.parse.urlparse(url)
|
|
|
|
|
if len(parsed.path) >= 2:
|
|
|
|
|
itemtype = parsed.path[1]
|
|
|
|
|
path = parsed.path[2:]
|
|
|
|
|
else:
|
|
|
|
|
itemtype = "1"
|
|
|
|
|
path = ""
|
|
|
|
|
if itemtype == "0":
|
|
|
|
|
mime = "text/gemini"
|
|
|
|
|
elif itemtype == "1":
|
|
|
|
|
mime = "text/gopher"
|
|
|
|
|
elif itemtype == "h":
|
|
|
|
|
mime = "text/html"
|
2023-11-01 22:45:47 +00:00
|
|
|
|
elif itemtype in ("9","g","I","s",";"):
|
2023-09-11 08:22:49 +00:00
|
|
|
|
mime = "binary"
|
|
|
|
|
else:
|
|
|
|
|
mime = "text/gopher"
|
2023-08-14 10:23:09 +00:00
|
|
|
|
elif path.startswith("mailto:"):
|
2023-07-20 23:03:40 +00:00
|
|
|
|
mime = "mailto"
|
|
|
|
|
elif os.path.isdir(path):
|
|
|
|
|
mime = "Local Folder"
|
|
|
|
|
elif path.endswith(".gmi"):
|
|
|
|
|
mime = "text/gemini"
|
2023-09-11 08:22:49 +00:00
|
|
|
|
elif path.endswith("gophermap"):
|
|
|
|
|
mime = "text/gopher"
|
2023-07-20 23:03:40 +00:00
|
|
|
|
elif shutil.which("file") :
|
|
|
|
|
mime = run("file -b --mime-type %s", parameter=path).strip()
|
|
|
|
|
mime2,encoding = mimetypes.guess_type(path,strict=False)
|
|
|
|
|
#If we hesitate between html and xml, takes the xml one
|
|
|
|
|
#because the FeedRendered fallback to HtmlRenderer
|
|
|
|
|
if mime2 and mime != mime2 and "html" in mime and "xml" in mime2:
|
|
|
|
|
mime = "text/xml"
|
|
|
|
|
# If it’s a xml file, consider it as such, regardless of what file thinks
|
|
|
|
|
elif path.endswith(".xml"):
|
|
|
|
|
mime = "text/xml"
|
|
|
|
|
#Some xml/html document are considered as octet-stream
|
|
|
|
|
if mime == "application/octet-stream":
|
|
|
|
|
mime = "text/xml"
|
|
|
|
|
else:
|
|
|
|
|
mime,encoding = mimetypes.guess_type(path,strict=False)
|
|
|
|
|
#gmi Mimetype is not recognized yet
|
|
|
|
|
if not mime and not shutil.which("file") :
|
|
|
|
|
print("Cannot guess the mime type of the file. Please install \"file\".")
|
|
|
|
|
if mime.startswith("text") and mime not in _FORMAT_RENDERERS:
|
|
|
|
|
if mime2 and mime2 in _FORMAT_RENDERERS:
|
|
|
|
|
mime = mime2
|
|
|
|
|
else:
|
|
|
|
|
#by default, we consider it’s gemini except for html
|
|
|
|
|
mime = "text/gemini"
|
2023-10-07 22:23:08 +00:00
|
|
|
|
#file doesn’t recognise gemtext. It should be the default renderer.
|
|
|
|
|
#the only case were it doesn’t make sense is if the file is .txt
|
|
|
|
|
if mime == "text/plain" and not path.endswith(".txt"):
|
|
|
|
|
mime = "text/gemini"
|
2023-07-20 23:03:40 +00:00
|
|
|
|
return mime
|
|
|
|
|
|
2023-08-30 22:27:54 +00:00
|
|
|
|
def renderer_from_file(path,url=None,theme=None):
|
2023-08-14 10:23:09 +00:00
|
|
|
|
if not path:
|
|
|
|
|
return None
|
2023-09-11 08:22:49 +00:00
|
|
|
|
mime = get_mime(path,url=url)
|
2023-07-21 22:27:43 +00:00
|
|
|
|
if not url:
|
|
|
|
|
url = path
|
2023-07-21 15:33:55 +00:00
|
|
|
|
if os.path.exists(path):
|
2023-08-22 12:59:20 +00:00
|
|
|
|
if mime.startswith("text/") or mime in _FORMAT_RENDERERS:
|
2023-09-23 08:42:45 +00:00
|
|
|
|
with open(path,errors="ignore") as f:
|
2023-07-21 22:27:43 +00:00
|
|
|
|
content = f.read()
|
|
|
|
|
f.close()
|
|
|
|
|
else:
|
|
|
|
|
content = path
|
2023-08-30 22:27:54 +00:00
|
|
|
|
toreturn = set_renderer(content,url,mime,theme=theme)
|
2023-07-21 15:33:55 +00:00
|
|
|
|
else:
|
2023-08-12 10:22:06 +00:00
|
|
|
|
toreturn = None
|
|
|
|
|
return toreturn
|
2023-07-21 12:22:09 +00:00
|
|
|
|
|
2023-08-30 22:27:54 +00:00
|
|
|
|
def set_renderer(content,url,mime,theme=None):
|
2023-07-20 23:03:40 +00:00
|
|
|
|
renderer = None
|
|
|
|
|
if mime == "Local Folder":
|
|
|
|
|
renderer = FolderRenderer("",url,datadir=_DATA_DIR)
|
2023-08-30 22:27:54 +00:00
|
|
|
|
if theme:
|
|
|
|
|
renderer.set_theme(theme)
|
2023-07-20 23:03:40 +00:00
|
|
|
|
return renderer
|
|
|
|
|
mime_to_use = []
|
|
|
|
|
for m in _FORMAT_RENDERERS:
|
|
|
|
|
if fnmatch.fnmatch(mime, m):
|
|
|
|
|
mime_to_use.append(m)
|
|
|
|
|
if len(mime_to_use) > 0:
|
|
|
|
|
current_mime = mime_to_use[0]
|
|
|
|
|
func = _FORMAT_RENDERERS[current_mime]
|
|
|
|
|
if current_mime.startswith("text"):
|
|
|
|
|
renderer = func(content,url)
|
|
|
|
|
# We double check if the renderer is correct.
|
|
|
|
|
# If not, we fallback to html
|
|
|
|
|
# (this is currently only for XHTML, often being
|
|
|
|
|
# mislabelled as xml thus RSS feeds)
|
|
|
|
|
if not renderer.is_valid():
|
|
|
|
|
func = _FORMAT_RENDERERS["text/html"]
|
|
|
|
|
#print("Set (fallback)RENDERER to html instead of %s"%mime)
|
|
|
|
|
renderer = func(content,url)
|
|
|
|
|
else:
|
|
|
|
|
#TODO: check this code and then remove one if.
|
|
|
|
|
#we don’t parse text, we give the file to the renderer
|
|
|
|
|
renderer = func(content,url)
|
|
|
|
|
if not renderer.is_valid():
|
|
|
|
|
renderer = None
|
2023-08-30 22:27:54 +00:00
|
|
|
|
if renderer and theme:
|
|
|
|
|
renderer.set_theme(theme)
|
2023-07-20 23:03:40 +00:00
|
|
|
|
return renderer
|
|
|
|
|
|
2023-07-21 09:52:39 +00:00
|
|
|
|
def render(input,path=None,format="auto",mime=None,url=None):
|
2023-09-03 21:20:54 +00:00
|
|
|
|
if not url: url = ""
|
|
|
|
|
else: url=url[0]
|
2023-07-18 15:40:36 +00:00
|
|
|
|
if format == "gemtext":
|
2023-07-21 12:22:09 +00:00
|
|
|
|
r = GemtextRenderer(input,url)
|
2023-07-18 15:40:36 +00:00
|
|
|
|
elif format == "html":
|
2023-07-21 12:22:09 +00:00
|
|
|
|
r = HtmlRenderer(input,url)
|
2023-07-20 23:03:40 +00:00
|
|
|
|
elif format == "feed":
|
2023-07-21 12:22:09 +00:00
|
|
|
|
r = FeedRenderer(input,url)
|
2023-07-20 23:03:40 +00:00
|
|
|
|
elif format == "gopher":
|
2023-07-21 12:22:09 +00:00
|
|
|
|
r = GopherRenderer(input,url)
|
2023-07-20 23:03:40 +00:00
|
|
|
|
elif format == "image":
|
2023-07-21 12:22:09 +00:00
|
|
|
|
r = ImageRenderer(input,url)
|
2023-07-20 23:03:40 +00:00
|
|
|
|
elif format == "folder":
|
2023-07-21 12:22:09 +00:00
|
|
|
|
r = FolderRenderer(input,url)
|
2023-10-07 21:30:09 +00:00
|
|
|
|
elif format in ["plaintext","text"]:
|
|
|
|
|
r = PlaintextRenderer(input,url)
|
2023-07-20 23:03:40 +00:00
|
|
|
|
else:
|
2023-07-21 09:52:39 +00:00
|
|
|
|
if not mime and path:
|
2023-07-21 12:22:09 +00:00
|
|
|
|
r= renderer_from_file(path,url)
|
|
|
|
|
else:
|
|
|
|
|
r = set_renderer(input,url,mime)
|
2023-07-20 23:03:40 +00:00
|
|
|
|
if r:
|
2023-09-03 21:20:54 +00:00
|
|
|
|
r.display(directdisplay=True)
|
2023-07-30 21:12:01 +00:00
|
|
|
|
else:
|
|
|
|
|
print("Could not render %s"%input)
|
2023-07-18 15:40:36 +00:00
|
|
|
|
|
|
|
|
|
|
2023-07-18 10:33:30 +00:00
|
|
|
|
def main():
|
2023-11-09 11:36:32 +00:00
|
|
|
|
descri = "ansicat is a terminal rendering tool that will render multiple formats (HTML, \
|
|
|
|
|
Gemtext, RSS, Gophermap, Image) into ANSI text and colors.\n\
|
|
|
|
|
When used on a file, ansicat will try to autodetect the format. When used with \
|
|
|
|
|
standad input, the format must be manually specified.\n\
|
|
|
|
|
If the content contains links, the original URL of the content can be specified \
|
|
|
|
|
in order to correctly modify relatives links."
|
|
|
|
|
parser = argparse.ArgumentParser(prog="ansicat",description=descri)
|
2023-10-07 21:30:09 +00:00
|
|
|
|
parser.add_argument("--format", choices=["auto","gemtext","html","feed","gopher","image","folder","text","plaintext"],
|
|
|
|
|
help="Renderer to use. Available: auto, gemtext, html, feed, gopher, image, folder, plaintext")
|
2023-07-21 09:52:39 +00:00
|
|
|
|
parser.add_argument("--mime", help="Mime of the content to parse")
|
2023-07-18 21:43:45 +00:00
|
|
|
|
## The argument needs to be a path to a file. If none, then stdin is used which allows
|
|
|
|
|
## to pipe text directly into ansirenderer
|
2023-07-20 23:03:40 +00:00
|
|
|
|
parser.add_argument("--url",metavar="URL", nargs="*",
|
|
|
|
|
help="Original URL of the content")
|
|
|
|
|
parser.add_argument("content",metavar="INPUT", nargs="*", type=argparse.FileType("r"),
|
2023-07-18 21:43:45 +00:00
|
|
|
|
default=sys.stdin, help="Path to the text to render (default to stdin)")
|
2023-07-18 10:33:30 +00:00
|
|
|
|
args = parser.parse_args()
|
2023-07-21 09:52:39 +00:00
|
|
|
|
# Detect if we are running interactively or in a pipe
|
|
|
|
|
if sys.stdin.isatty():
|
|
|
|
|
#we are interactive, not in stdin, we can have multiple files as input
|
2023-11-09 11:46:47 +00:00
|
|
|
|
if isinstance(args.content,list):
|
|
|
|
|
for f in args.content:
|
|
|
|
|
path = os.path.abspath(f.name)
|
|
|
|
|
try:
|
|
|
|
|
content = f.read()
|
|
|
|
|
except UnicodeDecodeError:
|
|
|
|
|
content = f
|
|
|
|
|
render(content,path=path,format=args.format,url=args.url,mime=args.mime)
|
|
|
|
|
else:
|
|
|
|
|
print("Ansicat needs at least one file as an argument")
|
2023-07-21 09:52:39 +00:00
|
|
|
|
else:
|
|
|
|
|
#we are in stdin
|
|
|
|
|
if not args.format and not args.mime:
|
|
|
|
|
print("Format or mime should be specified when running with stdin")
|
|
|
|
|
else:
|
|
|
|
|
render(args.content.read(),path=None,format=args.format,url=args.url,mime=args.mime)
|
2023-07-18 10:33:30 +00:00
|
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
main()
|