Initial commit
This commit is contained in:
commit
47fbcfa13a
|
@ -0,0 +1,21 @@
|
|||
The MIT License (MIT)
|
||||
|
||||
Copyright (c) 2021 Robert 'khuxkm' Miles, https://khuxkm.tilde.team <khuxkm@tilde.team>
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in
|
||||
all copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
||||
THE SOFTWARE.
|
|
@ -0,0 +1,3 @@
|
|||
# gempher
|
||||
|
||||
Host gemini content via gopher. Requires html2text.
|
|
@ -0,0 +1,76 @@
|
|||
import random, functools, os
|
||||
from html import escape
|
||||
|
||||
_rand_n = lambda: functools.reduce(lambda x, y: (x<<8)+y,os.urandom(4))
|
||||
|
||||
ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
|
||||
USED_IDS = set()
|
||||
def rand_id():
|
||||
n = _rand_n()
|
||||
id = ""
|
||||
while n>0:
|
||||
n, index = divmod(n,len(ALPHABET))
|
||||
id = ALPHABET[index]+id
|
||||
if id in USED_IDS: return rand_id()
|
||||
return id
|
||||
|
||||
def gem2html(content,link_callback=lambda url, text: (url, text)):
|
||||
lines = content.splitlines()
|
||||
out = "<body>\n"
|
||||
pre = False
|
||||
pre_alt = False
|
||||
for line in lines:
|
||||
if pre:
|
||||
if line[:3]=="```":
|
||||
pre=False
|
||||
out+="</pre>\n"
|
||||
if pre_alt:
|
||||
out+="</figure>\n"
|
||||
pre_alt=False
|
||||
else:
|
||||
out+=escape(line)+"\n"
|
||||
else:
|
||||
if line[:3]=="```":
|
||||
if len(line)>3:
|
||||
cap_id = rand_id()
|
||||
out+="<figure role='img' aria-captionedby='{0}'><figcaption id='{0}' style='clip: rect(0 0 0 0); clip-path: inset(50%); height: 1px; overflow: hidden; position: absolute; white-space: nowrap; width: 1px;'>{1}</figcaption>\n".format(cap_id,escape(line[3:]))
|
||||
pre_alt = True
|
||||
pre = True
|
||||
out+="<pre>\n"
|
||||
elif line.startswith("#"):
|
||||
if line[:3]=="###":
|
||||
out+="<h3>{}</h3>".format(escape(line[3:].strip()))
|
||||
elif line[:2]=="##":
|
||||
out+="<h2>{}</h2>".format(escape(line[2:].strip()))
|
||||
elif line[:1]=="#":
|
||||
out+="<h1>{}</h1>".format(escape(line[1:].strip()))
|
||||
elif line.startswith("* "):
|
||||
out += "<ul>\n<li>{}</li>\n</ul>\n".format(escape(line[1:].strip()))
|
||||
# combine consecutive unordered list items into one unordered list
|
||||
out = out.replace("</ul>\n<ul>\n","")
|
||||
elif line.startswith("=>"):
|
||||
parts = line.split(None,2)
|
||||
try:
|
||||
url, text = parts[1:]
|
||||
except ValueError:
|
||||
try:
|
||||
url=parts[1]
|
||||
text=parts[1]
|
||||
except:
|
||||
# no link content at all
|
||||
# just put a literal => in there
|
||||
out+="<p></p>".format(escape(parts[0]))
|
||||
continue
|
||||
# now comes the fun part, use the link callback to mutilate these
|
||||
url, text = link_callback(url, text)
|
||||
# and now render
|
||||
out+="<p><a href='{}'>{}</a></p>".format(escape(url),escape(text))
|
||||
elif line.startswith(">"):
|
||||
out+="<blockquote><p>{}</p></blockquote>".format(escape(line))
|
||||
else: # any other line is a text line
|
||||
if line:
|
||||
out+="<p>{}</p>".format(escape(line))
|
||||
else:
|
||||
out+="<p><br></p>"
|
||||
out+="</body>"
|
||||
return out
|
|
@ -0,0 +1,171 @@
|
|||
import configparser, argparse, socketserver, ssl, threading, enum, time, utils
|
||||
from urllib import parse as urlparse
|
||||
# monkeypatch urllib.parse to understand gemini URLs
|
||||
urlparse.uses_relative.append('gemini')
|
||||
urlparse.uses_netloc.append('gemini')
|
||||
# now import the utils (which will use the monkeypatched urllib.parse
|
||||
import utils
|
||||
# setup ssl context
|
||||
ctx = ssl.create_default_context()
|
||||
ctx.check_hostname=False
|
||||
ctx.verify_mode=ssl.CERT_NONE
|
||||
|
||||
class ReturnCode(enum.IntEnum):
|
||||
SUCCESS = 0
|
||||
ERROR = auto()
|
||||
INVALID_RESPONSE = auto()
|
||||
SOCKET_TIMEOUT = auto()
|
||||
UNKNOWN_ERROR = 9999
|
||||
|
||||
class Config:
|
||||
def __init__(self,filename=None,overrides=dict()):
|
||||
self._conf = configparser.ConfigParser()
|
||||
if filename is not None: self._conf.read(filename)
|
||||
self._overrides = overrides
|
||||
@property
|
||||
def port(self):
|
||||
if "port" in self._overrides:
|
||||
return self._overrides["port"]
|
||||
return self._conf.getint("gopher","port",70)
|
||||
@property
|
||||
def hostname(self):
|
||||
if "hostname" in self._overrides:
|
||||
return self._overrides["hostname"]
|
||||
return self._conf["gemini"]["hostname"]
|
||||
@property
|
||||
def self_hostname(self):
|
||||
if "self_hostname" in self._overrides:
|
||||
return self._overrides["self_hostname"]
|
||||
return self._conf["gopher"]["hostname"]
|
||||
@property
|
||||
def server_cls(self):
|
||||
name = self._conf.get("server","type","ThreadingTCPServer")
|
||||
if "server_type" in self._overrides:
|
||||
name = self._overrides["server_type"]
|
||||
return getattr(socketserver,name)
|
||||
|
||||
class GeminiRequestThread(threading.Thread):
|
||||
def __init__(self,requrl):
|
||||
self.killswitch = threading.Event()
|
||||
self.requrl = requrl
|
||||
self.rc = None
|
||||
self.retval = None
|
||||
def run(self):
|
||||
requrl = self.requrl
|
||||
with socket.socket(socket.AF_INET,socket.SOCK_STREAM) as s:
|
||||
s.settimeout(5)
|
||||
try:
|
||||
ss = ctx.wrap_socket(s,server_hostname=requrl.hostname)
|
||||
ss.connect((requrl.hostname,requrl.port or 1965))
|
||||
ss.send((urlparse.urlunparse(requrl)+"\r\n").encode("utf-8"))
|
||||
resp = b""
|
||||
while (data:=ss.recv(1024)):
|
||||
resp+=data
|
||||
header, resp = resp.split(b'\r\n',1)
|
||||
header = header.decode("utf-8")
|
||||
status, meta = header.split(None,1)
|
||||
assert status[0] in '123456', ReturnCode.INVALID_RESPONSE
|
||||
self.retval = header
|
||||
assert status[0] in '2345', ReturnCode.UNSUPPORTED
|
||||
assert status[0] in '23', ReturnCode.ERROR
|
||||
if status[0]=='3':
|
||||
resp = '=> '+meta+' Redirect target'
|
||||
meta = 'text/gemini'
|
||||
resp = resp.encode('utf-8')
|
||||
self.rc = ReturnCode.SUCCESS
|
||||
self.retval = [meta,resp]
|
||||
except UnicodeDecodeError:
|
||||
self.rc = ReturnCode.INVALID_RESPONSE
|
||||
except AssertionError as e:
|
||||
self.rc = e.args[0]
|
||||
except socket.timeout:
|
||||
self.rc = ReturnCode.SOCKET_TIMEOUT
|
||||
except:
|
||||
self.rc = ReturnCode.UNKNOWN_ERROR
|
||||
|
||||
class Gempher(socketserver.StreamRequestHandler):
|
||||
def handle(self):
|
||||
self.gplus = False
|
||||
req, query = self.rfile.readline().strip(), None
|
||||
req = req.decode("ascii")
|
||||
if "\t" in req:
|
||||
req, query = req.split("\t",1)
|
||||
if query[0]=="+":
|
||||
self.gplus=True
|
||||
query=None
|
||||
requrl = self.PARSED_URL._replace(path=req,query=query)
|
||||
if requrl.path.startswith("/x/"):
|
||||
nurl = requrl.path[3:].split("/",2)
|
||||
requrl = requrl._replace(protocol=nurl[0],netloc=nurl[1],path="/"+nurl[2])
|
||||
if requrl.scheme=="gopher":
|
||||
itemtype = "1"
|
||||
if requrl.path[1] in "0123456789gI:;<dhs" and requrl.path[2]=="/":
|
||||
itemtype = requrl.path[1]
|
||||
requrl = requrl._replace(path=requrl.path[2:])
|
||||
port = requrl.port or 70
|
||||
self.send_response((f"{itemtype}Click here to follow through\t{requrl.path}\t{requrl.hostname}\t{port}\r\n").encode())
|
||||
return
|
||||
if requrl.scheme!="gemini":
|
||||
hn = self.CONFIG.self_hostname
|
||||
port = self.CONFIG.port
|
||||
ru = urlparse.urlunparse(requrl)
|
||||
self.send_response((f"hClick here to follow through\tURL:{ru}\t{hn}\t{port}\r\n").encode())
|
||||
t = GeminiRequestThread(requrl)
|
||||
start = time.time()
|
||||
t.start()
|
||||
while t.is_alive():
|
||||
if (time.time()-start)>5:
|
||||
# if the server hasn't sent anything in 5 seconds, a socket timeout will occur
|
||||
# if the server is *still* sending things 5 seconds later, setting the killswitch will terminate the read loop
|
||||
t.killswitch.set()
|
||||
t.join()
|
||||
if t.rc==ReturnCode.SUCCESS: # success/redirect
|
||||
if t.retval[0].startswith("text/gemini"):
|
||||
mimetype, params = utils.parse_mime(t.retval[0])
|
||||
self.send_gemini(t.retval[1],params.get("encoding","utf-8"),requrl)
|
||||
else:
|
||||
self.send_response(t.retval[1])
|
||||
elif t.rc==ReturnCode.ERROR: # error provided by the server
|
||||
self.send_error(t.retval)
|
||||
elif t.rc==ReturnCode.INVALID_RESPONSE: # error caused by the server
|
||||
self.send_error("Server returned invalid response")
|
||||
elif t.rc==ReturnCode.UNSUPPORTED:
|
||||
self.send_error("Server returned valid response that we could not handle")
|
||||
elif t.rc==ReturnCode.SOCKET_TIMEOUT:
|
||||
self.send_error("Server timed out",2)
|
||||
elif t.rc>ReturnCode.ERROR: # any other unspecified error
|
||||
self.send_error("Unknown error occurred",2)
|
||||
def send_response(self,resp,error=None):
|
||||
if self.gplus:
|
||||
if error is None:
|
||||
l = len(resp)
|
||||
self.wfile.write((f"+{l!s}\r\n").encode("ascii"))
|
||||
else:
|
||||
self.wfile.write((f"--{error!s}\r\n").encode("ascii"))
|
||||
else:
|
||||
if error is not None:
|
||||
self.wfile.write(b"3")
|
||||
self.wfile.write(resp)
|
||||
if error is not None:
|
||||
if not self.gplus:
|
||||
self.wfile.write(b"\t.\tnull.host\t70")
|
||||
self.wfile.write(b"\r\n")
|
||||
def send_error(self,err,code=1):
|
||||
self.send_response(err,code)
|
||||
def send_gemini(self,body,encoding,requrl):
|
||||
body = body.decode(encoding)
|
||||
# run it through the gemtext->html->text gauntlet and send it
|
||||
self.send_response(utils.gemtext2gopher(body,urlparse.unparse(requrl),self.CONFIG.self_hostname,self.CONFIG.port,self.CONFIG.hostname))
|
||||
|
||||
def create_server(server_address, config_fn, overrides={}, server=None):
|
||||
conf = Config(config_fn,overrides)
|
||||
if server is None: server = conf.server_cls
|
||||
handler = type("Gempher",(Gempher,),{"CONFIG":conf,"PARSED_URL":urlparse.urlparse("gemini://"+conf.hostname)})
|
||||
ret = server(server_address,handler)
|
||||
def __shutdown():
|
||||
ret._BaseServer__shutdown_request=True
|
||||
ret.shutdown = __shutdown
|
||||
def __join():
|
||||
ret._BaseServer__is_shut_down.wait()
|
||||
ret.join = __join
|
||||
return ret
|
|
@ -0,0 +1,122 @@
|
|||
import string, gem2html, html2text, re, os.path
|
||||
import urllib.parse as urlparse
|
||||
# Utility function to parse a MIME type
|
||||
def parse_mime(mimetype):
|
||||
mimetype = mimetype.strip()
|
||||
index = 0
|
||||
type = ""
|
||||
# type is everything before the /
|
||||
while index<len(mimetype) and mimetype[index]!="/":
|
||||
type+=mimetype[index]
|
||||
index+=1
|
||||
index+=1
|
||||
subtype = ""
|
||||
# subtype is everything after the slash and before the semicolon (if the latter exists)
|
||||
while index<len(mimetype) and mimetype[index]!=";":
|
||||
subtype+=mimetype[index]
|
||||
index+=1
|
||||
index+=1
|
||||
# if there's no semicolon, there are no params
|
||||
if index>=len(mimetype): return [type,subtype], dict()
|
||||
params = dict()
|
||||
while index<len(mimetype):
|
||||
# skip whitespace
|
||||
while index<len(mimetype) and mimetype[index] in string.whitespace:
|
||||
index+=1
|
||||
paramName = ""
|
||||
# the parameter name is everything before the = or ;
|
||||
while index<len(mimetype) and mimetype[index] not in "=;":
|
||||
paramName+=mimetype[index]
|
||||
index+=1
|
||||
# if the string is over or there isn't an equals sign, there's no param value
|
||||
if index>=len(mimetype) or mimetype[index]==";":
|
||||
index+=1
|
||||
params[paramName]=None
|
||||
continue
|
||||
# otherwise, grab the param value
|
||||
index+=1
|
||||
paramValue = ""
|
||||
if mimetype[index]=='"':
|
||||
index+=1
|
||||
while True:
|
||||
while index<len(mimetype) and mimetype[index] not in '\\"':
|
||||
paramValue+=mimetype[index]
|
||||
index+=1
|
||||
if index>=len(mimetype): break
|
||||
c = mimetype[index]
|
||||
index+=1
|
||||
if c=="\\":
|
||||
if index>=len(mimetype):
|
||||
paramValue+=c
|
||||
break
|
||||
paramValue+=mimetype[index]
|
||||
index+=1
|
||||
else:
|
||||
break
|
||||
# skip until next ;
|
||||
while index<len(mimetype) and mimetype[index]!=";": index+=1
|
||||
else:
|
||||
while index<len(mimetype) and mimetype[index]!=";":
|
||||
paramValue+=mimetype[index]
|
||||
index+=1
|
||||
if paramName: params[paramName]=paramValue
|
||||
return [type, subtype], params
|
||||
|
||||
LINK_LINE = re.compile(r"^\[([^\]]+)\]\(([^)]+)\)$",re.MULTILINE)
|
||||
LINK_INLINE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
|
||||
|
||||
# Convert gemtext to gopher
|
||||
def gemtext2gopher(gemtext,baseurl="gemini://tilde.team",basehost="tilde.team",baseport=70,gemtexthost="tilde.team"):
|
||||
# step 1, convert gemtext to html
|
||||
html = gem2html.gem2html(gemtext)
|
||||
# step 2, convert html to plaintext
|
||||
h = html2text.HTML2Text()
|
||||
h.use_automatic_links = False # only use [link text](link url) format, even in link text and link url are one and the same
|
||||
h.feed(html)
|
||||
text = h.finish()
|
||||
h.close()
|
||||
# step 3, convert plaintext to gophermap
|
||||
for linkname, linkurl in LINK_LINE.finditer(text):
|
||||
rawurl = f"[{linkname}]({linkurl})"
|
||||
absoluteurl = urlparse.urljoin(baseurl,linkurl)
|
||||
ext = os.path.splitext(absoluteurl)[1]
|
||||
linktype = "1"
|
||||
if ext==".gif": # g line - GIF image
|
||||
linktype = "g"
|
||||
elif ext in (".png",".jpg",".jpeg",".tiff"): # I line - non-GIF image
|
||||
linktype = "I"
|
||||
elif ext in (".bmp"): # : line - BMP image
|
||||
linktype = ":"
|
||||
elif ext in (".mp3",".flac",".aac"): # < line - audio file
|
||||
linktype = "<"
|
||||
parsed = urlparse.urlparse(absoluteurl)
|
||||
selector, host, port = None, None, None
|
||||
if parsed.scheme=="gopher":
|
||||
selector = parsed.path
|
||||
host = parsed.hostname
|
||||
port = parsed.port or 70
|
||||
elif parsed.scheme=="gemini":
|
||||
if parsed.hostname==gemtexthost:
|
||||
selector = parsed.path
|
||||
host = basehost
|
||||
port = baseport
|
||||
else:
|
||||
selector = f"/x/{parsed.scheme}/{parsed.netloc}{parsed.path}"
|
||||
host = basehost
|
||||
port = baseport
|
||||
else:
|
||||
linktype = "h" # force H link type for external links
|
||||
selector = "URL:"+absoluteurl
|
||||
host = basehost
|
||||
port = baseport
|
||||
text.replace(rawurl,f"{linktype}{linkname}\t{selector}\t{host}\t{port}")
|
||||
text = LINK_INLINE.sub(lambda m: m.group(1),text)
|
||||
# now apply dummy i-lines
|
||||
lines = text.splitlines()
|
||||
out = []
|
||||
for line in lines:
|
||||
if not "\t" in line:
|
||||
out.append(f"i{line}\t.\tnull.host\t70")
|
||||
else:
|
||||
out.append(line)
|
||||
return text
|
Loading…
Reference in New Issue