Initial commit

2021-08-16 01:30:42 +00:00 · 2021-08-16 01:30:42 +00:00 · 47fbcfa13a
commit 47fbcfa13a
5 changed files with 393 additions and 0 deletions
--- a/21
+++ b/21
@ -0,0 +1,21 @@
+The MIT License (MIT)
+
+Copyright (c) 2021 Robert 'khuxkm' Miles, https://khuxkm.tilde.team <khuxkm@tilde.team>
+
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+
+The above copyright notice and this permission notice shall be included in
+all copies or substantial portions of the Software.
+
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+THE SOFTWARE.
--- a/README.md
+++ b/README.md
@ -0,0 +1,3 @@
+# gempher
+
+Host gemini content via gopher. Requires html2text.
--- a/gem2html.py
+++ b/gem2html.py
@ -0,0 +1,76 @@
+import random, functools, os
+from html import escape
+
+_rand_n = lambda: functools.reduce(lambda x, y: (x<<8)+y,os.urandom(4))
+
+ALPHABET = "0123456789abcdefghijklmnopqrstuvwxyz"
+USED_IDS = set()
+def rand_id():
+	n = _rand_n()
+	id = ""
+	while n>0:
+		n, index = divmod(n,len(ALPHABET))
+		id = ALPHABET[index]+id
+	if id in USED_IDS: return rand_id()
+	return id
+
+def gem2html(content,link_callback=lambda url, text: (url, text)):
+	lines = content.splitlines()
+	out = "<body>\n"
+	pre = False
+	pre_alt = False
+	for line in lines:
+		if pre:
+			if line[:3]=="```":
+				pre=False
+				out+="</pre>\n"
+				if pre_alt:
+					out+="</figure>\n"
+					pre_alt=False
+			else:
+				out+=escape(line)+"\n"
+		else:
+			if line[:3]=="```":
+				if len(line)>3:
+					cap_id = rand_id()
+					out+="<figure role='img' aria-captionedby='{0}'><figcaption id='{0}' style='clip: rect(0 0 0 0); clip-path: inset(50%); height: 1px; overflow: hidden; position: absolute; white-space: nowrap; width: 1px;'>{1}</figcaption>\n".format(cap_id,escape(line[3:]))
+					pre_alt = True
+				pre = True
+				out+="<pre>\n"
+			elif line.startswith("#"):
+				if line[:3]=="###":
+					out+="<h3>{}</h3>".format(escape(line[3:].strip()))
+				elif line[:2]=="##":
+					out+="<h2>{}</h2>".format(escape(line[2:].strip()))
+				elif line[:1]=="#":
+					out+="<h1>{}</h1>".format(escape(line[1:].strip()))
+			elif line.startswith("* "):
+				out += "<ul>\n<li>{}</li>\n</ul>\n".format(escape(line[1:].strip()))
+				# combine consecutive unordered list items into one unordered list
+				out = out.replace("</ul>\n<ul>\n","")
+			elif line.startswith("=>"):
+				parts = line.split(None,2)
+				try:
+					url, text = parts[1:]
+				except ValueError:
+					try:
+						url=parts[1]
+						text=parts[1]
+					except:
+						# no link content at all
+						# just put a literal => in there
+						out+="<p></p>".format(escape(parts[0]))
+						continue
+				# now comes the fun part, use the link callback to mutilate these
+				url, text = link_callback(url, text)
+				# and now render
+				out+="<p><a href='{}'>{}</a></p>".format(escape(url),escape(text))
+			elif line.startswith(">"):
+				out+="<blockquote><p>{}</p></blockquote>".format(escape(line))
+			else: # any other line is a text line
+				if line:
+					out+="<p>{}</p>".format(escape(line))
+				else:
+					out+="<p><br></p>"
+	out+="</body>"
+	return out
--- a/gempher.py
+++ b/gempher.py
@ -0,0 +1,171 @@
+import configparser, argparse, socketserver, ssl, threading, enum, time, utils
+from urllib import parse as urlparse
+# monkeypatch urllib.parse to understand gemini URLs
+urlparse.uses_relative.append('gemini')
+urlparse.uses_netloc.append('gemini')
+# now import the utils (which will use the monkeypatched urllib.parse
+import utils
+# setup ssl context
+ctx = ssl.create_default_context()
+ctx.check_hostname=False
+ctx.verify_mode=ssl.CERT_NONE
+
+class ReturnCode(enum.IntEnum):
+	SUCCESS = 0
+	ERROR = auto()
+	INVALID_RESPONSE = auto()
+	SOCKET_TIMEOUT = auto()
+	UNKNOWN_ERROR = 9999
+
+class Config:
+	def __init__(self,filename=None,overrides=dict()):
+		self._conf = configparser.ConfigParser()
+		if filename is not None: self._conf.read(filename)
+		self._overrides = overrides
+	@property
+	def port(self):
+		if "port" in self._overrides:
+			return self._overrides["port"]
+		return self._conf.getint("gopher","port",70)
+	@property
+	def hostname(self):
+		if "hostname" in self._overrides:
+			return self._overrides["hostname"]
+		return self._conf["gemini"]["hostname"]
+	@property
+	def self_hostname(self):
+		if "self_hostname" in self._overrides:
+			return self._overrides["self_hostname"]
+		return self._conf["gopher"]["hostname"]
+	@property
+	def server_cls(self):
+		name = self._conf.get("server","type","ThreadingTCPServer")
+		if "server_type" in self._overrides:
+			name = self._overrides["server_type"]
+		return getattr(socketserver,name)
+
+class GeminiRequestThread(threading.Thread):
+	def __init__(self,requrl):
+		self.killswitch = threading.Event()
+		self.requrl = requrl
+		self.rc = None
+		self.retval = None
+	def run(self):
+		requrl = self.requrl
+		with socket.socket(socket.AF_INET,socket.SOCK_STREAM) as s:
+			s.settimeout(5)
+			try:
+				ss = ctx.wrap_socket(s,server_hostname=requrl.hostname)
+				ss.connect((requrl.hostname,requrl.port or 1965))
+				ss.send((urlparse.urlunparse(requrl)+"\r\n").encode("utf-8"))
+				resp = b""
+				while (data:=ss.recv(1024)):
+					resp+=data
+				header, resp = resp.split(b'\r\n',1)
+				header = header.decode("utf-8")
+				status, meta = header.split(None,1)
+				assert status[0] in '123456', ReturnCode.INVALID_RESPONSE
+				self.retval = header
+				assert status[0] in '2345', ReturnCode.UNSUPPORTED
+				assert status[0] in '23', ReturnCode.ERROR
+				if status[0]=='3':
+					resp = '=> '+meta+' Redirect target'
+					meta = 'text/gemini'
+					resp = resp.encode('utf-8')
+				self.rc = ReturnCode.SUCCESS
+				self.retval = [meta,resp]
+			except UnicodeDecodeError:
+				self.rc = ReturnCode.INVALID_RESPONSE
+			except AssertionError as e:
+				self.rc = e.args[0]
+			except socket.timeout:
+				self.rc = ReturnCode.SOCKET_TIMEOUT
+			except:
+				self.rc = ReturnCode.UNKNOWN_ERROR
+
+class Gempher(socketserver.StreamRequestHandler):
+	def handle(self):
+		self.gplus = False
+		req, query = self.rfile.readline().strip(), None
+		req = req.decode("ascii")
+		if "\t" in req:
+			req, query = req.split("\t",1)
+		if query[0]=="+":
+			self.gplus=True
+			query=None
+		requrl = self.PARSED_URL._replace(path=req,query=query)
+		if requrl.path.startswith("/x/"):
+			nurl = requrl.path[3:].split("/",2)
+			requrl = requrl._replace(protocol=nurl[0],netloc=nurl[1],path="/"+nurl[2])
+		if requrl.scheme=="gopher":
+			itemtype = "1"
+			if requrl.path[1] in "0123456789gI:;<dhs" and requrl.path[2]=="/":
+				itemtype = requrl.path[1]
+				requrl = requrl._replace(path=requrl.path[2:])
+			port = requrl.port or 70
+			self.send_response((f"{itemtype}Click here to follow through\t{requrl.path}\t{requrl.hostname}\t{port}\r\n").encode())
+			return
+		if requrl.scheme!="gemini":
+			hn = self.CONFIG.self_hostname
+			port = self.CONFIG.port
+			ru = urlparse.urlunparse(requrl)
+			self.send_response((f"hClick here to follow through\tURL:{ru}\t{hn}\t{port}\r\n").encode())
+		t = GeminiRequestThread(requrl)
+		start = time.time()
+		t.start()
+		while t.is_alive():
+			if (time.time()-start)>5:
+				# if the server hasn't sent anything in 5 seconds, a socket timeout will occur
+				# if the server is *still* sending things 5 seconds later, setting the killswitch will terminate the read loop
+				t.killswitch.set()
+				t.join()
+		if t.rc==ReturnCode.SUCCESS: # success/redirect
+			if t.retval[0].startswith("text/gemini"):
+				mimetype, params = utils.parse_mime(t.retval[0])
+				self.send_gemini(t.retval[1],params.get("encoding","utf-8"),requrl)
+			else:
+				self.send_response(t.retval[1])
+		elif t.rc==ReturnCode.ERROR: # error provided by the server
+			self.send_error(t.retval)
+		elif t.rc==ReturnCode.INVALID_RESPONSE: # error caused by the server
+			self.send_error("Server returned invalid response")
+		elif t.rc==ReturnCode.UNSUPPORTED:
+			self.send_error("Server returned valid response that we could not handle")
+		elif t.rc==ReturnCode.SOCKET_TIMEOUT:
+			self.send_error("Server timed out",2)
+		elif t.rc>ReturnCode.ERROR: # any other unspecified error
+			self.send_error("Unknown error occurred",2)
+	def send_response(self,resp,error=None):
+		if self.gplus:
+			if error is None:
+				l = len(resp)
+				self.wfile.write((f"+{l!s}\r\n").encode("ascii"))
+			else:
+				self.wfile.write((f"--{error!s}\r\n").encode("ascii"))
+		else:
+			if error is not None:
+				self.wfile.write(b"3")
+		self.wfile.write(resp)
+		if error is not None:
+			if not self.gplus:
+				self.wfile.write(b"\t.\tnull.host\t70")
+			self.wfile.write(b"\r\n")
+	def send_error(self,err,code=1):
+		self.send_response(err,code)
+	def send_gemini(self,body,encoding,requrl):
+		body = body.decode(encoding)
+		# run it through the gemtext->html->text gauntlet and send it
+		self.send_response(utils.gemtext2gopher(body,urlparse.unparse(requrl),self.CONFIG.self_hostname,self.CONFIG.port,self.CONFIG.hostname))
+
+def create_server(server_address, config_fn, overrides={}, server=None):
+	conf = Config(config_fn,overrides)
+	if server is None: server = conf.server_cls
+	handler = type("Gempher",(Gempher,),{"CONFIG":conf,"PARSED_URL":urlparse.urlparse("gemini://"+conf.hostname)})
+	ret = server(server_address,handler)
+	def __shutdown():
+		ret._BaseServer__shutdown_request=True
+	ret.shutdown = __shutdown
+	def __join():
+		ret._BaseServer__is_shut_down.wait()
+	ret.join = __join
+	return ret
--- a/utils.py
+++ b/utils.py
@ -0,0 +1,122 @@
+import string, gem2html, html2text, re, os.path
+import urllib.parse as urlparse
+# Utility function to parse a MIME type
+def parse_mime(mimetype):
+	mimetype = mimetype.strip()
+	index = 0
+	type = ""
+	# type is everything before the /
+	while index<len(mimetype) and mimetype[index]!="/":
+		type+=mimetype[index]
+		index+=1
+	index+=1
+	subtype = ""
+	# subtype is everything after the slash and before the semicolon (if the latter exists)
+	while index<len(mimetype) and mimetype[index]!=";":
+		subtype+=mimetype[index]
+		index+=1
+	index+=1
+	# if there's no semicolon, there are no params
+	if index>=len(mimetype): return [type,subtype], dict()
+	params = dict()
+	while index<len(mimetype):
+		# skip whitespace
+		while index<len(mimetype) and mimetype[index] in string.whitespace:
+			index+=1
+		paramName = ""
+		# the parameter name is everything before the = or ;
+		while index<len(mimetype) and mimetype[index] not in "=;":
+			paramName+=mimetype[index]
+			index+=1
+		# if the string is over or there isn't an equals sign, there's no param value
+		if index>=len(mimetype) or mimetype[index]==";":
+			index+=1
+			params[paramName]=None
+			continue
+		# otherwise, grab the param value
+		index+=1
+		paramValue = ""
+		if mimetype[index]=='"':
+			index+=1
+			while True:
+				while index<len(mimetype) and mimetype[index] not in '\\"':
+					paramValue+=mimetype[index]
+					index+=1
+				if index>=len(mimetype): break
+				c = mimetype[index]
+				index+=1
+				if c=="\\":
+					if index>=len(mimetype):
+						paramValue+=c
+						break
+					paramValue+=mimetype[index]
+					index+=1
+				else:
+					break
+			# skip until next ;
+			while index<len(mimetype) and mimetype[index]!=";": index+=1
+		else:
+			while index<len(mimetype) and mimetype[index]!=";":
+				paramValue+=mimetype[index]
+				index+=1
+		if paramName: params[paramName]=paramValue
+	return [type, subtype], params
+
+LINK_LINE = re.compile(r"^\[([^\]]+)\]\(([^)]+)\)$",re.MULTILINE)
+LINK_INLINE = re.compile(r"\[([^\]]+)\]\(([^)]+)\)")
+
+# Convert gemtext to gopher
+def gemtext2gopher(gemtext,baseurl="gemini://tilde.team",basehost="tilde.team",baseport=70,gemtexthost="tilde.team"):
+	# step 1, convert gemtext to html
+	html = gem2html.gem2html(gemtext)
+	# step 2, convert html to plaintext
+	h = html2text.HTML2Text()
+	h.use_automatic_links = False # only use [link text](link url) format, even in link text and link url are one and the same
+	h.feed(html)
+	text = h.finish()
+	h.close()
+	# step 3, convert plaintext to gophermap
+	for linkname, linkurl in LINK_LINE.finditer(text):
+		rawurl = f"[{linkname}]({linkurl})"
+		absoluteurl = urlparse.urljoin(baseurl,linkurl)
+		ext = os.path.splitext(absoluteurl)[1]
+		linktype = "1"
+		if ext==".gif": # g line - GIF image
+			linktype = "g"
+		elif ext in (".png",".jpg",".jpeg",".tiff"): # I line - non-GIF image
+			linktype = "I"
+		elif ext in (".bmp"): # : line - BMP image
+			linktype = ":"
+		elif ext in (".mp3",".flac",".aac"): # < line - audio file
+			linktype = "<"
+		parsed = urlparse.urlparse(absoluteurl)
+		selector, host, port = None, None, None
+		if parsed.scheme=="gopher":
+			selector = parsed.path
+			host = parsed.hostname
+			port = parsed.port or 70
+		elif parsed.scheme=="gemini":
+			if parsed.hostname==gemtexthost:
+				selector = parsed.path
+				host = basehost
+				port = baseport
+			else:
+				selector = f"/x/{parsed.scheme}/{parsed.netloc}{parsed.path}"
+				host = basehost
+				port = baseport
+		else:
+			linktype = "h" # force H link type for external links
+			selector = "URL:"+absoluteurl
+			host = basehost
+			port = baseport
+		text.replace(rawurl,f"{linktype}{linkname}\t{selector}\t{host}\t{port}")
+	text = LINK_INLINE.sub(lambda m: m.group(1),text)
+	# now apply dummy i-lines
+	lines = text.splitlines()
+	out = []
+	for line in lines:
+		if not "\t" in line:
+			out.append(f"i{line}\t.\tnull.host\t70")
+		else:
+			out.append(line)
+	return text