Initial commit

This commit is contained in:
Robert Miles 2021-08-16 01:30:42 +00:00
commit 47fbcfa13a
5 changed files with 393 additions and 0 deletions

21
LICENSE Normal file
View File

@ -0,0 +1,21 @@
The MIT License (MIT)
Copyright (c) 2021 Robert 'khuxkm' Miles, https://khuxkm.tilde.team <khuxkm@tilde.team>
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.

3
README.md Normal file
View File

@ -0,0 +1,3 @@
# gempher
Host gemini content via gopher. Requires html2text.

76
gem2html.py Normal file
View File

@ -0,0 +1,76 @@
import random, functools, os
from html import escape
_rand_n = lambda: functools.reduce(lambda x, y: (x<<8)+y,os.urandom(4))
ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
USED_IDS = set()
def rand_id():
n = _rand_n()
id = ""
while n>0:
n, index = divmod(n,len(ALPHABET))
id = ALPHABET[index]+id
if id in USED_IDS: return rand_id()
return id
def gem2html(content,link_callback=lambda url, text: (url, text)):
lines = content.splitlines()
out = "<body>\n"
pre = False
pre_alt = False
for line in lines:
if pre:
if line[:3]=="```":
pre=False
out+="</pre>\n"
if pre_alt:
out+="</figure>\n"
pre_alt=False
else:
out+=escape(line)+"\n"
else:
if line[:3]=="```":
if len(line)>3:
cap_id = rand_id()
out+="<figure role='img' aria-captionedby='{0}'><figcaption id='{0}' style='clip: rect(0 0 0 0); clip-path: inset(50%); height: 1px; overflow: hidden; position: absolute; white-space: nowrap; width: 1px;'>{1}</figcaption>\n".format(cap_id,escape(line[3:]))
pre_alt = True
pre = True
out+="<pre>\n"
elif line.startswith("#"):
if line[:3]=="###":
out+="<h3>{}</h3>".format(escape(line[3:].strip()))
elif line[:2]=="##":
out+="<h2>{}</h2>".format(escape(line[2:].strip()))
elif line[:1]=="#":
out+="<h1>{}</h1>".format(escape(line[1:].strip()))
elif line.startswith("* "):
out += "<ul>\n<li>{}</li>\n</ul>\n".format(escape(line[1:].strip()))
# combine consecutive unordered list items into one unordered list
out = out.replace("</ul>\n<ul>\n","")
elif line.startswith("=>"):
parts = line.split(None,2)
try:
url, text = parts[1:]
except ValueError:
try:
url=parts[1]
text=parts[1]
except:
# no link content at all
# just put a literal => in there
out+="<p></p>".format(escape(parts[0]))
continue
# now comes the fun part, use the link callback to mutilate these
url, text = link_callback(url, text)
# and now render
out+="<p><a href='{}'>{}</a></p>".format(escape(url),escape(text))
elif line.startswith(">"):
out+="<blockquote><p>{}</p></blockquote>".format(escape(line))
else: # any other line is a text line
if line:
out+="<p>{}</p>".format(escape(line))
else:
out+="<p><br></p>"
out+="</body>"
return out

171
gempher.py Normal file
View File

@ -0,0 +1,171 @@
import configparser, argparse, socketserver, ssl, threading, enum, time, utils
from urllib import parse as urlparse
# monkeypatch urllib.parse to understand gemini URLs
urlparse.uses_relative.append('gemini')
urlparse.uses_netloc.append('gemini')
# now import the utils (which will use the monkeypatched urllib.parse
import utils
# setup ssl context
ctx = ssl.create_default_context()
ctx.check_hostname=False
ctx.verify_mode=ssl.CERT_NONE
class ReturnCode(enum.IntEnum):
SUCCESS = 0
ERROR = auto()
INVALID_RESPONSE = auto()
SOCKET_TIMEOUT = auto()
UNKNOWN_ERROR = 9999
class Config:
def __init__(self,filename=None,overrides=dict()):
self._conf = configparser.ConfigParser()
if filename is not None: self._conf.read(filename)
self._overrides = overrides
@property
def port(self):
if "port" in self._overrides:
return self._overrides["port"]
return self._conf.getint("gopher","port",70)
@property
def hostname(self):
if "hostname" in self._overrides:
return self._overrides["hostname"]
return self._conf["gemini"]["hostname"]
@property
def self_hostname(self):
if "self_hostname" in self._overrides:
return self._overrides["self_hostname"]
return self._conf["gopher"]["hostname"]
@property
def server_cls(self):
name = self._conf.get("server","type","ThreadingTCPServer")
if "server_type" in self._overrides:
name = self._overrides["server_type"]
return getattr(socketserver,name)
class GeminiRequestThread(threading.Thread):
def __init__(self,requrl):
self.killswitch = threading.Event()
self.requrl = requrl
self.rc = None
self.retval = None
def run(self):
requrl = self.requrl
with socket.socket(socket.AF_INET,socket.SOCK_STREAM) as s:
s.settimeout(5)
try:
ss = ctx.wrap_socket(s,server_hostname=requrl.hostname)
ss.connect((requrl.hostname,requrl.port or 1965))
ss.send((urlparse.urlunparse(requrl)+"\r\n").encode("utf-8"))
resp = b""
while (data:=ss.recv(1024)):
resp+=data
header, resp = resp.split(b'\r\n',1)
header = header.decode("utf-8")
status, meta = header.split(None,1)
assert status[0] in '123456', ReturnCode.INVALID_RESPONSE
self.retval = header
assert status[0] in '2345', ReturnCode.UNSUPPORTED
assert status[0] in '23', ReturnCode.ERROR
if status[0]=='3':
resp = '=> '+meta+' Redirect target'
meta = 'text/gemini'
resp = resp.encode('utf-8')
self.rc = ReturnCode.SUCCESS
self.retval = [meta,resp]
except UnicodeDecodeError:
self.rc = ReturnCode.INVALID_RESPONSE
except AssertionError as e:
self.rc = e.args[0]
except socket.timeout:
self.rc = ReturnCode.SOCKET_TIMEOUT
except:
self.rc = ReturnCode.UNKNOWN_ERROR
class Gempher(socketserver.StreamRequestHandler):
def handle(self):
self.gplus = False
req, query = self.rfile.readline().strip(), None
req = req.decode("ascii")
if "\t" in req:
req, query = req.split("\t",1)
if query[0]=="+":
self.gplus=True
query=None
requrl = self.PARSED_URL._replace(path=req,query=query)
if requrl.path.startswith("/x/"):
nurl = requrl.path[3:].split("/",2)
requrl = requrl._replace(protocol=nurl[0],netloc=nurl[1],path="/"+nurl[2])
if requrl.scheme=="gopher":
itemtype = "1"
if requrl.path[1] in "0123456789gI:;<dhs" and requrl.path[2]=="/":
itemtype = requrl.path[1]
requrl = requrl._replace(path=requrl.path[2:])
port = requrl.port or 70
self.send_response((f"{itemtype}Click here to follow through\t{requrl.path}\t{requrl.hostname}\t{port}\r\n").encode())
return
if requrl.scheme!="gemini":
hn = self.CONFIG.self_hostname
port = self.CONFIG.port
ru = urlparse.urlunparse(requrl)
self.send_response((f"hClick here to follow through\tURL:{ru}\t{hn}\t{port}\r\n").encode())
t = GeminiRequestThread(requrl)
start = time.time()
t.start()
while t.is_alive():
if (time.time()-start)>5:
# if the server hasn't sent anything in 5 seconds, a socket timeout will occur
# if the server is *still* sending things 5 seconds later, setting the killswitch will terminate the read loop
t.killswitch.set()
t.join()
if t.rc==ReturnCode.SUCCESS: # success/redirect
if t.retval[0].startswith("text/gemini"):
mimetype, params = utils.parse_mime(t.retval[0])
self.send_gemini(t.retval[1],params.get("encoding","utf-8"),requrl)
else:
self.send_response(t.retval[1])
elif t.rc==ReturnCode.ERROR: # error provided by the server
self.send_error(t.retval)
elif t.rc==ReturnCode.INVALID_RESPONSE: # error caused by the server
self.send_error("Server returned invalid response")
elif t.rc==ReturnCode.UNSUPPORTED:
self.send_error("Server returned valid response that we could not handle")
elif t.rc==ReturnCode.SOCKET_TIMEOUT:
self.send_error("Server timed out",2)
elif t.rc>ReturnCode.ERROR: # any other unspecified error
self.send_error("Unknown error occurred",2)
def send_response(self,resp,error=None):
if self.gplus:
if error is None:
l = len(resp)
self.wfile.write((f"+{l!s}\r\n").encode("ascii"))
else:
self.wfile.write((f"--{error!s}\r\n").encode("ascii"))
else:
if error is not None:
self.wfile.write(b"3")
self.wfile.write(resp)
if error is not None:
if not self.gplus:
self.wfile.write(b"\t.\tnull.host\t70")
self.wfile.write(b"\r\n")
def send_error(self,err,code=1):
self.send_response(err,code)
def send_gemini(self,body,encoding,requrl):
body = body.decode(encoding)
# run it through the gemtext->html->text gauntlet and send it
self.send_response(utils.gemtext2gopher(body,urlparse.unparse(requrl),self.CONFIG.self_hostname,self.CONFIG.port,self.CONFIG.hostname))
def create_server(server_address, config_fn, overrides={}, server=None):
conf = Config(config_fn,overrides)
if server is None: server = conf.server_cls
handler = type("Gempher",(Gempher,),{"CONFIG":conf,"PARSED_URL":urlparse.urlparse("gemini://"+conf.hostname)})
ret = server(server_address,handler)
def __shutdown():
ret._BaseServer__shutdown_request=True
ret.shutdown = __shutdown
def __join():
ret._BaseServer__is_shut_down.wait()
ret.join = __join
return ret

122
utils.py Normal file
View File

@ -0,0 +1,122 @@
import string, gem2html, html2text, re, os.path
import urllib.parse as urlparse
# Utility function to parse a MIME type
def parse_mime(mimetype):
mimetype = mimetype.strip()
index = 0
type = ""
# type is everything before the /
while index<len(mimetype) and mimetype[index]!="/":
type+=mimetype[index]
index+=1
index+=1
subtype = ""
# subtype is everything after the slash and before the semicolon (if the latter exists)
while index<len(mimetype) and mimetype[index]!=";":
subtype+=mimetype[index]
index+=1
index+=1
# if there's no semicolon, there are no params
if index>=len(mimetype): return [type,subtype], dict()
params = dict()
while index<len(mimetype):
# skip whitespace
while index<len(mimetype) and mimetype[index] in string.whitespace:
index+=1
paramName = ""
# the parameter name is everything before the = or ;
while index<len(mimetype) and mimetype[index] not in "=;":
paramName+=mimetype[index]
index+=1
# if the string is over or there isn't an equals sign, there's no param value
if index>=len(mimetype) or mimetype[index]==";":
index+=1
params[paramName]=None
continue
# otherwise, grab the param value
index+=1
paramValue = ""
if mimetype[index]=='"':
index+=1
while True:
while index<len(mimetype) and mimetype[index] not in '\\"':
paramValue+=mimetype[index]
index+=1
if index>=len(mimetype): break
c = mimetype[index]
index+=1
if c=="\\":
if index>=len(mimetype):
paramValue+=c
break
paramValue+=mimetype[index]
index+=1
else:
break
# skip until next ;
while index<len(mimetype) and mimetype[index]!=";": index+=1
else:
while index<len(mimetype) and mimetype[index]!=";":
paramValue+=mimetype[index]
index+=1
if paramName: params[paramName]=paramValue
return [type, subtype], params
LINK_LINE = re.compile(r"^\[([^\]]+)\]\(([^)]+)\)$",re.MULTILINE)
LINK_INLINE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
# Convert gemtext to gopher
def gemtext2gopher(gemtext,baseurl="gemini://tilde.team",basehost="tilde.team",baseport=70,gemtexthost="tilde.team"):
# step 1, convert gemtext to html
html = gem2html.gem2html(gemtext)
# step 2, convert html to plaintext
h = html2text.HTML2Text()
h.use_automatic_links = False # only use [link text](link url) format, even in link text and link url are one and the same
h.feed(html)
text = h.finish()
h.close()
# step 3, convert plaintext to gophermap
for linkname, linkurl in LINK_LINE.finditer(text):
rawurl = f"[{linkname}]({linkurl})"
absoluteurl = urlparse.urljoin(baseurl,linkurl)
ext = os.path.splitext(absoluteurl)[1]
linktype = "1"
if ext==".gif": # g line - GIF image
linktype = "g"
elif ext in (".png",".jpg",".jpeg",".tiff"): # I line - non-GIF image
linktype = "I"
elif ext in (".bmp"): # : line - BMP image
linktype = ":"
elif ext in (".mp3",".flac",".aac"): # < line - audio file
linktype = "<"
parsed = urlparse.urlparse(absoluteurl)
selector, host, port = None, None, None
if parsed.scheme=="gopher":
selector = parsed.path
host = parsed.hostname
port = parsed.port or 70
elif parsed.scheme=="gemini":
if parsed.hostname==gemtexthost:
selector = parsed.path
host = basehost
port = baseport
else:
selector = f"/x/{parsed.scheme}/{parsed.netloc}{parsed.path}"
host = basehost
port = baseport
else:
linktype = "h" # force H link type for external links
selector = "URL:"+absoluteurl
host = basehost
port = baseport
text.replace(rawurl,f"{linktype}{linkname}\t{selector}\t{host}\t{port}")
text = LINK_INLINE.sub(lambda m: m.group(1),text)
# now apply dummy i-lines
lines = text.splitlines()
out = []
for line in lines:
if not "\t" in line:
out.append(f"i{line}\t.\tnull.host\t70")
else:
out.append(line)
return text