gempher/utils.py

123 lines
3.8 KiB
Python

import string, gem2html, html2text, re, os.path
import urllib.parse as urlparse
# Utility function to parse a MIME type
def parse_mime(mimetype):
mimetype = mimetype.strip()
index = 0
type = ""
# type is everything before the /
while index<len(mimetype) and mimetype[index]!="/":
type+=mimetype[index]
index+=1
index+=1
subtype = ""
# subtype is everything after the slash and before the semicolon (if the latter exists)
while index<len(mimetype) and mimetype[index]!=";":
subtype+=mimetype[index]
index+=1
index+=1
# if there's no semicolon, there are no params
if index>=len(mimetype): return [type,subtype], dict()
params = dict()
while index<len(mimetype):
# skip whitespace
while index<len(mimetype) and mimetype[index] in string.whitespace:
index+=1
paramName = ""
# the parameter name is everything before the = or ;
while index<len(mimetype) and mimetype[index] not in "=;":
paramName+=mimetype[index]
index+=1
# if the string is over or there isn't an equals sign, there's no param value
if index>=len(mimetype) or mimetype[index]==";":
index+=1
params[paramName]=None
continue
# otherwise, grab the param value
index+=1
paramValue = ""
if mimetype[index]=='"':
index+=1
while True:
while index<len(mimetype) and mimetype[index] not in '\\"':
paramValue+=mimetype[index]
index+=1
if index>=len(mimetype): break
c = mimetype[index]
index+=1
if c=="\\":
if index>=len(mimetype):
paramValue+=c
break
paramValue+=mimetype[index]
index+=1
else:
break
# skip until next ;
while index<len(mimetype) and mimetype[index]!=";": index+=1
else:
while index<len(mimetype) and mimetype[index]!=";":
paramValue+=mimetype[index]
index+=1
if paramName: params[paramName]=paramValue
return [type, subtype], params
LINK_LINE = re.compile(r"^\[([^\]]+)\]\(([^)]+)\)$",re.MULTILINE)
LINK_INLINE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
# Convert gemtext to gopher
def gemtext2gopher(gemtext,baseurl="gemini://tilde.team",basehost="tilde.team",baseport=70,gemtexthost="tilde.team"):
# step 1, convert gemtext to html
html = gem2html.gem2html(gemtext)
# step 2, convert html to plaintext
h = html2text.HTML2Text()
h.use_automatic_links = False # only use [link text](link url) format, even in link text and link url are one and the same
h.feed(html)
text = h.finish()
h.close()
# step 3, convert plaintext to gophermap
for linkname, linkurl in LINK_LINE.finditer(text):
rawurl = f"[{linkname}]({linkurl})"
absoluteurl = urlparse.urljoin(baseurl,linkurl)
ext = os.path.splitext(absoluteurl)[1]
linktype = "1"
if ext==".gif": # g line - GIF image
linktype = "g"
elif ext in (".png",".jpg",".jpeg",".tiff"): # I line - non-GIF image
linktype = "I"
elif ext in (".bmp"): # : line - BMP image
linktype = ":"
elif ext in (".mp3",".flac",".aac"): # < line - audio file
linktype = "<"
parsed = urlparse.urlparse(absoluteurl)
selector, host, port = None, None, None
if parsed.scheme=="gopher":
selector = parsed.path
host = parsed.hostname
port = parsed.port or 70
elif parsed.scheme=="gemini":
if parsed.hostname==gemtexthost:
selector = parsed.path
host = basehost
port = baseport
else:
selector = f"/x/{parsed.scheme}/{parsed.netloc}{parsed.path}"
host = basehost
port = baseport
else:
linktype = "h" # force H link type for external links
selector = "URL:"+absoluteurl
host = basehost
port = baseport
text.replace(rawurl,f"{linktype}{linkname}\t{selector}\t{host}\t{port}")
text = LINK_INLINE.sub(lambda m: m.group(1),text)
# now apply dummy i-lines
lines = text.splitlines()
out = []
for line in lines:
if not "\t" in line:
out.append(f"i{line}\t.\tnull.host\t70")
else:
out.append(line)
return text