From 469335d73a982a130c7ee32ec993b4e281f8125c Mon Sep 17 00:00:00 2001
From: khuxkm fbexl <khuxkm@ttm.sh>
Date: Sun, 15 Nov 2020 03:38:24 +0000
Subject: [PATCH] Initial code commit

---
 .gitignore | 148 ++++++++++++++++++++++++++++++++++++++++
 README.md  |  44 ++++++++++++
 config.py  |  31 +++++++++
 gemini.py  |  51 ++++++++++++++
 index.gmi  |  33 +++++++++
 molniya.py |  41 +++++++++++
 next.cgi   |   4 ++
 prev.cgi   |   4 ++
 rand.cgi   |   4 ++
 zenit.py   | 195 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 10 files changed, 555 insertions(+)
 create mode 100644 .gitignore
 create mode 100644 README.md
 create mode 100644 config.py
 create mode 100644 gemini.py
 create mode 100644 index.gmi
 create mode 100644 molniya.py
 create mode 100755 next.cgi
 create mode 100755 prev.cgi
 create mode 100755 rand.cgi
 create mode 100644 zenit.py

diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..1cbddf0
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,148 @@
+
+# Created by https://www.toptal.com/developers/gitignore/api/python
+# Edit at https://www.toptal.com/developers/gitignore?templates=python
+
+### Python ###
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+pip-wheel-metadata/
+share/python-wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+*.py,cover
+.hypothesis/
+.pytest_cache/
+pytestdebug.log
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+db.sqlite3-journal
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+doc/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# pipenv
+#   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
+#   However, in case of collaboration, if having platform-specific dependencies or dependencies
+#   having no cross-platform support, pipenv may install dependencies that don't work, or not
+#   install all needed dependencies.
+#Pipfile.lock
+
+# PEP 582; used by e.g. github.com/David-OConnor/pyflow
+__pypackages__/
+
+# Celery stuff
+celerybeat-schedule
+celerybeat.pid
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+pythonenv*
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# Pyre type checker
+.pyre/
+
+# pytype static type analyzer
+.pytype/
+
+# profiling data
+.prof
+
+# End of https://www.toptal.com/developers/gitignore/api/python
+
+# orbit.json file (contains the URLs in the orbit)
+orbit.json
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..3024318
--- /dev/null
+++ b/README.md
@@ -0,0 +1,44 @@
+# molniya - Gemini orbit software
+
+An orbit is like a webring but for gemini. This code was originally written to service LEO[1], the first (to my knowledge) orbit ever made.
+
+A Molniya orbit is a type of satellite orbit designed to provide communications and remote sensing coverage over high latitudes (thanks Wikipedia[2]). It was invented by the Russians for their spy satellites. I feel like it's a fitting name.
+
+[1]: gemini://tilde.team/~khuxkm/leo/
+[2]: https://en.wikipedia.org/wiki/Molniya_orbit
+
+## How to use Molniya
+
+In order to use Molniya, do the following:
+
+1. Clone this repository into a place on your Gemini root directory. (If you're using a userdir for this, that's fine too.)
+
+2. Modify `config.py`. Namely, update `MAIN_PAGE` to be the link to the index.gmi file, and update `REQUIRED_LINKS` to be the links to `next.cgi`, `prev.cgi`, and `rand.cgi`, wherever they are accessible via Gemini. `BACKLINKS` and `determine_capsule` shouldn't need to be changed (unless GUS is down or you want to change how Zenit decides what makes up a capsule.
+
+3. Modify `index.gmi` to link to your files instead of mine.
+
+4. Get people to link to your main page and one of `next.cgi`, `prev.cgi`, and `rand.cgi`.
+
+5. Set a cronjob to run Zenit every now and again. (If I knew how often GUS indexed, I'd give a specific frequency, but I don't, so I won't.)
+
+## What is the orbit.json file?
+
+The `orbit.json` file contains all of the URLs in the orbit. It's how Molniya and Zenit know what URLs are there, and in what order.
+
+## You keep mentioning Zenit. What is it?
+
+Zenit is the Molniya indexer. It uses GUS's backlinks feature to get a list of pages that link to the orbit, and then checks them for having a link to allow people to continue to traverse the webring.
+
+## Can you add links to orbit.json manually?
+
+Of course you can! Just make sure to keep `orbit.json` valid JSON, or it'll break the Molniya library and the entire ring will break.
+
+## Why does Molniya want the next and previous links to contain the URL of the page as a query?
+
+Because Gemini lacks referrers (a good choice), there's no way to tell where a client came from just by studying the request. As such, it needs some way of indicating where in the orbit the client is.
+
+`rand.cgi` lacks this requirement because it just gets a random page anyways. That being said, it's best if the client also puts their URL in the `rand.cgi` link, just so that users selecting the random link option aren't sent back to the very site they just came from.
+
+## Why does Molniya abuse redirects to send the user on to the next page? Why not have a landing page?
+
+Because I don't feel like implementing a landing page to just have the user click off of it. The redirect *should* replace the orbit link in the client's history, so the user shouldn't be adversely affected by this.
diff --git a/config.py b/config.py
new file mode 100644
index 0000000..9301374
--- /dev/null
+++ b/config.py
@@ -0,0 +1,31 @@
+# ------------------------------------------------------------------------------
+# The configuration for the Molniya orbit software.
+# Implemented as a Python library to allow for determine_capsule logic.
+# ------------------------------------------------------------------------------
+
+# The main page of the orbit.
+# Is used to seed the orbit when it's first created, as well as to find new pages to include in the orbit.
+MAIN_PAGE = "gemini://tilde.team/~khuxkm/leo/"
+
+# The backlinks base page.
+# Takes a URL as a query and returns a list of pages that backlink to it.
+# In case GUS ever goes down, is replaced, or changes URL, this will allow for easy fixing of Zenit.
+BACKLINKS = "gemini://gus.guru/backlinks"
+
+# Determine a "capsule".
+# Used to prevent one person from flooding the orbit with pages.
+# One page is allowed in the list per "capsule".
+# This function is passed a urllib.parse.ParseResult object, and returns a string that identifies which capsule the URL belongs to.
+def determine_capsule(parsed):
+	capsule = parsed.netloc
+	if parsed.path.startswith("/~"): # allow for each userdir to be its own capsule
+		capsule+="/"+parsed.path.split("/")[1]
+	return capsule
+
+# Required links.
+# A site must have one of these links to be included in the orbit.
+REQUIRED_LINKS = [
+	"gemini://tilde.team/~khuxkm/leo/next.cgi",
+	"gemini://tilde.team/~khuxkm/leo/prev.cgi",
+	"gemini://tilde.team/~khuxkm/leo/rand.cgi"
+]
diff --git a/gemini.py b/gemini.py
new file mode 100644
index 0000000..a657dad
--- /dev/null
+++ b/gemini.py
@@ -0,0 +1,51 @@
+import os,subprocess
+from urllib.parse import urljoin
+
+def start_response(meta,code="20"):
+	print(f"{code} {meta}",end="\r\n")
+
+def get_input():
+	return os.environ.get("QUERY_STRING","")
+
+# utility func
+def has_input():
+	return bool(get_input())
+
+def link(text,url):
+	print(f"=> {url} {text}")
+
+def header(text,level=1):
+	print("#"*level + " " + text)
+
+# named for convenience; in practice just use header func with different levels
+def h1(text):
+	header(text,1)
+
+def h2(text):
+	header(text,2)
+
+def h3(text):
+	header(text,3)
+
+def text(text=""):
+	print(text)
+
+def figlet(tex,**opts):
+	if len(opts.keys())==0:
+		opts = {"f":"standard"}
+	ct = ["/usr/bin/figlet"]
+	for k in opts:
+		ct.append("-"+k)
+		ct.append(opts[k])
+	ct.append(tex)
+	text("```")
+	command_out(ct)
+	text("```")
+
+def command_lines(ct):
+	for l in subprocess.check_output(ct).decode("ascii").split("\n"):
+		yield l
+
+def command_out(ct,f=text):
+	for l in command_lines(ct):
+		f(l)
diff --git a/index.gmi b/index.gmi
new file mode 100644
index 0000000..b40c41c
--- /dev/null
+++ b/index.gmi
@@ -0,0 +1,33 @@
+#LEO
+
+LEO is an orbit, a term used here to mean "a webring but for Gemini instead of the web". The name is both a play on Low Earth Orbit, as well as the constellation Leo, the Lion.
+
+##How to join LEO
+
+To join LEO, here's what you need to do:
+
+1. Link to `gemini://tilde.team/~khuxkm/leo/` on your page. This is what will get the indexer to notice your page.
+2. Make sure your page is indexed by GUS (gemini://gus.guru/). GUS's backlink page is what drives the indexer.
+3. Also include a link to one of the following:
+
+```
+Next Page - gemini://tilde.team/~khuxkm/leo/next.cgi?<your url>
+Last Page - gemini://tilde.team/~khuxkm/leo/prev.cgi?<your url>
+Random Page - gemini://tilde.team/~khuxkm/leo/rand.cgi
+```
+
+If your page links to this page and at least one (1) of the links above, your page will be added to the orbit the next time the indexer runs.
+
+Also, only one page can be added to the orbit per capsule (capsule being defined as the host:port, as well as optional userdir). This is to prevent any one person from flooding LEO with their own pages.
+
+##How do I start my own orbit?
+
+To start your own orbit, download Molniya at the link below. Place it in a gemini root, and modify config.py as directed in Molniya's README.
+
+=> https://tildegit.org/khuxkm/molniya Molniya (will take you to HTTP land, tildegit.org)
+
+##Can I go to the next page now?
+
+Yep!
+
+=> next.cgi Beam me up, Scotty!
diff --git a/molniya.py b/molniya.py
new file mode 100644
index 0000000..85252e7
--- /dev/null
+++ b/molniya.py
@@ -0,0 +1,41 @@
+"""The utility library for the Molniya CGI pages.
+
+Handles loading the orbit, as well as next/previous/random links."""
+import json, random, os, urllib.parse
+# stolen from AV-98
+urllib.parse.uses_relative.append("gemini")
+urllib.parse.uses_netloc.append("gemini")
+
+# The URL of the main page of the orbit
+MAIN_PAGE = "gemini://tilde.team/~khuxkm/leo/"
+
+URLS = [MAIN_PAGE]
+try:
+	with open("orbit.json") as f:
+		URLS = json.load(f)["urls"]
+except:
+	pass
+
+CURRENT_URL = urllib.parse.unquote(os.environ.get("QUERY_STRING",MAIN_PAGE))
+CURRENT_URL_PARSED = urllib.parse.urlparse(CURRENT_URL)
+try:
+	CURRENT_URL_INDEX = URLS.index(CURRENT_URL)
+except:
+	CURRENT_URL = MAIN_PAGE
+	try:
+		CURRENT_URL_INDEX = URLS.index(CURRENT_URL)
+	except:
+		# give up and just start at 0
+		CURRENT_URL_INDEX = 0
+
+def next_url():
+	return URLS[(CURRENT_URL_INDEX+1)%len(URLS)]
+
+def prev_url():
+	return URLS[(CURRENT_URL_INDEX-1)%len(URLS)]
+
+def rand_url():
+	ret = random.choice(URLS)
+	while ret==CURRENT_URL or ret==MAIN_PAGE:
+		ret = random.choice(URLS)
+	return ret
diff --git a/next.cgi b/next.cgi
new file mode 100755
index 0000000..560704e
--- /dev/null
+++ b/next.cgi
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+import molniya, gemini
+
+gemini.start_response(molniya.next_url(),"30")
diff --git a/prev.cgi b/prev.cgi
new file mode 100755
index 0000000..560704e
--- /dev/null
+++ b/prev.cgi
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+import molniya, gemini
+
+gemini.start_response(molniya.next_url(),"30")
diff --git a/rand.cgi b/rand.cgi
new file mode 100755
index 0000000..560704e
--- /dev/null
+++ b/rand.cgi
@@ -0,0 +1,4 @@
+#!/usr/bin/env python3
+import molniya, gemini
+
+gemini.start_response(molniya.next_url(),"30")
diff --git a/zenit.py b/zenit.py
new file mode 100644
index 0000000..cc9b66f
--- /dev/null
+++ b/zenit.py
@@ -0,0 +1,195 @@
+"""Zenit - the Molniya indexer.
+
+Zenit was a series of military photoreconnaissance satellites launched by the Soviet Union between 1961 and 1994. In keeping with the Soviet spy satellite theme, I chose this name for the indexer."""
+import json, urllib.parse, traceback, sys, ssl, socket, string
+from config import *
+# stolen from AV-98
+urllib.parse.uses_relative.append("gemini")
+urllib.parse.uses_netloc.append("gemini")
+
+# Load URL list
+URLS = [MAIN_PAGE]
+try:
+	with open("orbit.json") as f:
+		URLS = json.load(f)["urls"]
+except IOError as e: # we can be a bit more outgoing about our errors here
+	print(f"Error loading orbit.json: {e!r}")
+	print("Continuing on anyways with a list containing only the URL of the main page.")
+except KeyError as e:
+	print("Malformed orbit.json: no urls list")
+	print("Continuing on anyways with a list containing only the URL of the main page.")
+except:
+	print("Error loading orbit.json (not IOError or KeyError):")
+	traceback.print_exc()
+	print("Exiting.")
+	sys.exit(1)
+
+# Utility function to parse a MIME type
+def parse_mime(mimetype):
+	mimetype = mimetype.strip()
+	index = 0
+	type = ""
+	# type is everything before the /
+	while index<len(mimetype) and mimetype[index]!="/":
+		type+=mimetype[index]
+		index+=1
+	index+=1
+	subtype = ""
+	# subtype is everything after the slash and before the semicolon (if the latter exists)
+	while index<len(mimetype) and mimetype[index]!=";":
+		subtype+=mimetype[index]
+		index+=1
+	index+=1
+	# if there's no semicolon, there are no params
+	if index>=len(mimetype): return [type,subtype], dict()
+	params = dict()
+	while index<len(mimetype):
+		# skip whitespace
+		while index<len(mimetype) and mimetype[index] in string.whitespace:
+			index+=1
+		paramName = ""
+		# the parameter name is everything before the = or ;
+		while index<len(mimetype) and mimetype[index] not in "=;":
+			paramName+=mimetype[index]
+			index+=1
+		# if the string is over or there isn't an equals sign, there's no param value
+		if index>=len(mimetype) or mimetype[index]==";":
+			index+=1
+			params[paramName]=None
+			continue
+		# otherwise, grab the param value
+		index+=1
+		paramValue = ""
+		if mimetype[index]=='"':
+			index+=1
+			while True:
+				while index<len(mimetype) and mimetype[index] not in '\\"':
+					paramValue+=mimetype[index]
+					index+=1
+				if index>=len(mimetype): break
+				c = mimetype[index]
+				index+=1
+				if c=="\\":
+					if index>=len(mimetype):
+						paramValue+=c
+						break
+					paramValue+=mimetype[index]
+					index+=1
+				else:
+					break
+			# skip until next ;
+			while index<len(mimetype) and mimetype[index]!=";": index+=1
+		else:
+			while index<len(mimetype) and mimetype[index]!=";":
+				paramValue+=mimetype[index]
+				index+=1
+		if paramName: params[paramName]=paramValue
+	return [type, subtype], params
+
+# Utility function to grab content from a URL
+# Context setup courtesy of my own half-baked spartan client
+def grab_content(url,redirect_num=0):
+	if redirect_num>=5:
+		return "Too many redirects!","text/plain"
+	parsed = urllib.parse.urlparse(url)
+	if "ctx" not in globals():
+		ctx = ssl.create_default_context()
+		ctx.check_hostname = False
+		ctx.verify_mode = ssl.CERT_NONE
+		globals()["ctx"]=ctx
+	else:
+		ctx = globals()["ctx"]
+	with socket.socket(socket.AF_INET,socket.SOCK_STREAM) as s:
+		ss = ctx.wrap_socket(s,server_hostname=parsed.hostname)
+		ss.connect((parsed.hostname,parsed.port or 1965))
+		ss.send((url.strip()+"\r\n").encode("UTF-8"))
+		out = b""
+		while (data:=ss.recv(2048)):
+			out+=data
+		header, content = out.split(b"\r\n",1)
+		status, meta = header.decode("utf-8").split(None,1)
+		assert len(meta)<1024
+		if status[0]=="2":
+			types, params = parse_mime(meta)
+			if types[0]=="text":
+				# assume UTF-8
+				charset = "utf-8"
+				# ...but if another charset is given accept it
+				if "charset" in params:
+					charset = params["charset"]
+				# decode and return
+				return content.decode(charset), meta
+			else:
+				# if it's not a text result, just return the content
+				return content, meta
+		elif status[0]=="3":
+			# if it's a redirect, then let's follow it
+			return grab_content(meta,redirect_num+1)
+		else:
+			# Either:
+			# 1x - it wants an input, which we have no agency to give
+			# 6x - it wants a client cert, which we have no agency to give
+			# 4x or 5x - there's an error
+			# Return the header with a mimetype of text/plain. If this were a real library I might throw an error here, but this is just to make Zenit work.
+			return header.decode("utf-8"), "text/plain"
+
+CAPSULES_IN_ORBIT = set(determine_capsule(urllib.parse.urlparse(url)) for url in URLS)
+modified_orbit = False
+
+backlinks_url = "?".join([BACKLINKS, urllib.parse.quote(MAIN_PAGE)])
+response, mime = grab_content(backlinks_url)
+# Backlinks page should return a text/gemini doc
+assert mime.startswith("text/gemini"),f"Backlinks URL returned a response that wasn't text/gemini! ({response},{mime})"
+
+links = []
+stage = 1
+for line in response.splitlines():
+	if stage==1:
+		if line.startswith("=>"):
+			stage=2
+	if stage==2:
+		if line.startswith("=>"):
+			parts = line.split(None,2)
+			links.append(parts[1])
+		else:
+			stage=3
+			break
+	# stage 3 is to ignore the rest of the lines.
+	# if we're in stage 3 and somehow miss breaking out of the loop, nothing will happen
+
+# filter out just the new links
+links = [link for link in links if link not in URLS]
+
+print("Found {} new link{} to index{}".format(len(links),"" if len(links)==1 else "s","..." if len(links)>0 else "."))
+
+for link in links:
+	# Things to consider for a new link:
+	# Does its capsule already have representation in the orbit?
+	capsule = determine_capsule(urllib.parse.urlparse(link))
+	if capsule in CAPSULES_IN_ORBIT:
+		# skip
+		print(f"Skipping {link} (capsule already in orbit)...")
+		continue
+	# Does it link to any of the required links?
+	response, mime = grab_content(link)
+	try:
+		assert mime.startswith("text/gemini"), f"{mime} response isn't text/gemini and therefore can't link back"
+		links_to_orbit = False
+		for line in response.splitlines():
+			if line.startswith("=>"):
+				parts = line.split(None,2)
+				for reqlink in REQUIRED_LINKS:
+					links_to_orbit=links_to_orbit or parts[1].startswith(reqlink)
+		assert links_to_orbit, "doesn't link back to orbit"
+	except AssertionError as e:
+		print(f"Skipping {link} ({e.args[0]})...")
+		continue
+	# If we haven't continue'd by now, the link meets all of the criteria
+	print(f"Adding {link} to the orbit...")
+	URLS.append(link)
+	modified_orbit = True
+
+if modified_orbit:
+	print("Saving modified orbit...")
+	with open("orbit.json","w") as f:
+		json.dump(dict(urls=URLS),f)