commit 469335d73a982a130c7ee32ec993b4e281f8125c Author: khuxkm fbexl Date: Sun Nov 15 03:38:24 2020 +0000 Initial code commit diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..1cbddf0 --- /dev/null +++ b/.gitignore @@ -0,0 +1,148 @@ + +# Created by https://www.toptal.com/developers/gitignore/api/python +# Edit at https://www.toptal.com/developers/gitignore?templates=python + +### Python ### +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +pip-wheel-metadata/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +pytestdebug.log + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ +doc/_build/ + +# PyBuilder +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +.python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ +pythonenv* + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# profiling data +.prof + +# End of https://www.toptal.com/developers/gitignore/api/python + +# orbit.json file (contains the URLs in the orbit) +orbit.json diff --git a/README.md b/README.md new file mode 100644 index 0000000..3024318 --- /dev/null +++ b/README.md @@ -0,0 +1,44 @@ +# molniya - Gemini orbit software + +An orbit is like a webring but for gemini. This code was originally written to service LEO[1], the first (to my knowledge) orbit ever made. + +A Molniya orbit is a type of satellite orbit designed to provide communications and remote sensing coverage over high latitudes (thanks Wikipedia[2]). It was invented by the Russians for their spy satellites. I feel like it's a fitting name. + +[1]: gemini://tilde.team/~khuxkm/leo/ +[2]: https://en.wikipedia.org/wiki/Molniya_orbit + +## How to use Molniya + +In order to use Molniya, do the following: + +1. Clone this repository into a place on your Gemini root directory. (If you're using a userdir for this, that's fine too.) + +2. Modify `config.py`. Namely, update `MAIN_PAGE` to be the link to the index.gmi file, and update `REQUIRED_LINKS` to be the links to `next.cgi`, `prev.cgi`, and `rand.cgi`, wherever they are accessible via Gemini. `BACKLINKS` and `determine_capsule` shouldn't need to be changed (unless GUS is down or you want to change how Zenit decides what makes up a capsule. + +3. Modify `index.gmi` to link to your files instead of mine. + +4. Get people to link to your main page and one of `next.cgi`, `prev.cgi`, and `rand.cgi`. + +5. Set a cronjob to run Zenit every now and again. (If I knew how often GUS indexed, I'd give a specific frequency, but I don't, so I won't.) + +## What is the orbit.json file? + +The `orbit.json` file contains all of the URLs in the orbit. It's how Molniya and Zenit know what URLs are there, and in what order. + +## You keep mentioning Zenit. What is it? + +Zenit is the Molniya indexer. It uses GUS's backlinks feature to get a list of pages that link to the orbit, and then checks them for having a link to allow people to continue to traverse the webring. + +## Can you add links to orbit.json manually? + +Of course you can! Just make sure to keep `orbit.json` valid JSON, or it'll break the Molniya library and the entire ring will break. + +## Why does Molniya want the next and previous links to contain the URL of the page as a query? + +Because Gemini lacks referrers (a good choice), there's no way to tell where a client came from just by studying the request. As such, it needs some way of indicating where in the orbit the client is. + +`rand.cgi` lacks this requirement because it just gets a random page anyways. That being said, it's best if the client also puts their URL in the `rand.cgi` link, just so that users selecting the random link option aren't sent back to the very site they just came from. + +## Why does Molniya abuse redirects to send the user on to the next page? Why not have a landing page? + +Because I don't feel like implementing a landing page to just have the user click off of it. The redirect *should* replace the orbit link in the client's history, so the user shouldn't be adversely affected by this. diff --git a/config.py b/config.py new file mode 100644 index 0000000..9301374 --- /dev/null +++ b/config.py @@ -0,0 +1,31 @@ +# ------------------------------------------------------------------------------ +# The configuration for the Molniya orbit software. +# Implemented as a Python library to allow for determine_capsule logic. +# ------------------------------------------------------------------------------ + +# The main page of the orbit. +# Is used to seed the orbit when it's first created, as well as to find new pages to include in the orbit. +MAIN_PAGE = "gemini://tilde.team/~khuxkm/leo/" + +# The backlinks base page. +# Takes a URL as a query and returns a list of pages that backlink to it. +# In case GUS ever goes down, is replaced, or changes URL, this will allow for easy fixing of Zenit. +BACKLINKS = "gemini://gus.guru/backlinks" + +# Determine a "capsule". +# Used to prevent one person from flooding the orbit with pages. +# One page is allowed in the list per "capsule". +# This function is passed a urllib.parse.ParseResult object, and returns a string that identifies which capsule the URL belongs to. +def determine_capsule(parsed): + capsule = parsed.netloc + if parsed.path.startswith("/~"): # allow for each userdir to be its own capsule + capsule+="/"+parsed.path.split("/")[1] + return capsule + +# Required links. +# A site must have one of these links to be included in the orbit. +REQUIRED_LINKS = [ + "gemini://tilde.team/~khuxkm/leo/next.cgi", + "gemini://tilde.team/~khuxkm/leo/prev.cgi", + "gemini://tilde.team/~khuxkm/leo/rand.cgi" +] diff --git a/gemini.py b/gemini.py new file mode 100644 index 0000000..a657dad --- /dev/null +++ b/gemini.py @@ -0,0 +1,51 @@ +import os,subprocess +from urllib.parse import urljoin + +def start_response(meta,code="20"): + print(f"{code} {meta}",end="\r\n") + +def get_input(): + return os.environ.get("QUERY_STRING","") + +# utility func +def has_input(): + return bool(get_input()) + +def link(text,url): + print(f"=> {url} {text}") + +def header(text,level=1): + print("#"*level + " " + text) + +# named for convenience; in practice just use header func with different levels +def h1(text): + header(text,1) + +def h2(text): + header(text,2) + +def h3(text): + header(text,3) + +def text(text=""): + print(text) + +def figlet(tex,**opts): + if len(opts.keys())==0: + opts = {"f":"standard"} + ct = ["/usr/bin/figlet"] + for k in opts: + ct.append("-"+k) + ct.append(opts[k]) + ct.append(tex) + text("```") + command_out(ct) + text("```") + +def command_lines(ct): + for l in subprocess.check_output(ct).decode("ascii").split("\n"): + yield l + +def command_out(ct,f=text): + for l in command_lines(ct): + f(l) diff --git a/index.gmi b/index.gmi new file mode 100644 index 0000000..b40c41c --- /dev/null +++ b/index.gmi @@ -0,0 +1,33 @@ +#LEO + +LEO is an orbit, a term used here to mean "a webring but for Gemini instead of the web". The name is both a play on Low Earth Orbit, as well as the constellation Leo, the Lion. + +##How to join LEO + +To join LEO, here's what you need to do: + +1. Link to `gemini://tilde.team/~khuxkm/leo/` on your page. This is what will get the indexer to notice your page. +2. Make sure your page is indexed by GUS (gemini://gus.guru/). GUS's backlink page is what drives the indexer. +3. Also include a link to one of the following: + +``` +Next Page - gemini://tilde.team/~khuxkm/leo/next.cgi? +Last Page - gemini://tilde.team/~khuxkm/leo/prev.cgi? +Random Page - gemini://tilde.team/~khuxkm/leo/rand.cgi +``` + +If your page links to this page and at least one (1) of the links above, your page will be added to the orbit the next time the indexer runs. + +Also, only one page can be added to the orbit per capsule (capsule being defined as the host:port, as well as optional userdir). This is to prevent any one person from flooding LEO with their own pages. + +##How do I start my own orbit? + +To start your own orbit, download Molniya at the link below. Place it in a gemini root, and modify config.py as directed in Molniya's README. + +=> https://tildegit.org/khuxkm/molniya Molniya (will take you to HTTP land, tildegit.org) + +##Can I go to the next page now? + +Yep! + +=> next.cgi Beam me up, Scotty! diff --git a/molniya.py b/molniya.py new file mode 100644 index 0000000..85252e7 --- /dev/null +++ b/molniya.py @@ -0,0 +1,41 @@ +"""The utility library for the Molniya CGI pages. + +Handles loading the orbit, as well as next/previous/random links.""" +import json, random, os, urllib.parse +# stolen from AV-98 +urllib.parse.uses_relative.append("gemini") +urllib.parse.uses_netloc.append("gemini") + +# The URL of the main page of the orbit +MAIN_PAGE = "gemini://tilde.team/~khuxkm/leo/" + +URLS = [MAIN_PAGE] +try: + with open("orbit.json") as f: + URLS = json.load(f)["urls"] +except: + pass + +CURRENT_URL = urllib.parse.unquote(os.environ.get("QUERY_STRING",MAIN_PAGE)) +CURRENT_URL_PARSED = urllib.parse.urlparse(CURRENT_URL) +try: + CURRENT_URL_INDEX = URLS.index(CURRENT_URL) +except: + CURRENT_URL = MAIN_PAGE + try: + CURRENT_URL_INDEX = URLS.index(CURRENT_URL) + except: + # give up and just start at 0 + CURRENT_URL_INDEX = 0 + +def next_url(): + return URLS[(CURRENT_URL_INDEX+1)%len(URLS)] + +def prev_url(): + return URLS[(CURRENT_URL_INDEX-1)%len(URLS)] + +def rand_url(): + ret = random.choice(URLS) + while ret==CURRENT_URL or ret==MAIN_PAGE: + ret = random.choice(URLS) + return ret diff --git a/next.cgi b/next.cgi new file mode 100755 index 0000000..560704e --- /dev/null +++ b/next.cgi @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +import molniya, gemini + +gemini.start_response(molniya.next_url(),"30") diff --git a/prev.cgi b/prev.cgi new file mode 100755 index 0000000..560704e --- /dev/null +++ b/prev.cgi @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +import molniya, gemini + +gemini.start_response(molniya.next_url(),"30") diff --git a/rand.cgi b/rand.cgi new file mode 100755 index 0000000..560704e --- /dev/null +++ b/rand.cgi @@ -0,0 +1,4 @@ +#!/usr/bin/env python3 +import molniya, gemini + +gemini.start_response(molniya.next_url(),"30") diff --git a/zenit.py b/zenit.py new file mode 100644 index 0000000..cc9b66f --- /dev/null +++ b/zenit.py @@ -0,0 +1,195 @@ +"""Zenit - the Molniya indexer. + +Zenit was a series of military photoreconnaissance satellites launched by the Soviet Union between 1961 and 1994. In keeping with the Soviet spy satellite theme, I chose this name for the indexer.""" +import json, urllib.parse, traceback, sys, ssl, socket, string +from config import * +# stolen from AV-98 +urllib.parse.uses_relative.append("gemini") +urllib.parse.uses_netloc.append("gemini") + +# Load URL list +URLS = [MAIN_PAGE] +try: + with open("orbit.json") as f: + URLS = json.load(f)["urls"] +except IOError as e: # we can be a bit more outgoing about our errors here + print(f"Error loading orbit.json: {e!r}") + print("Continuing on anyways with a list containing only the URL of the main page.") +except KeyError as e: + print("Malformed orbit.json: no urls list") + print("Continuing on anyways with a list containing only the URL of the main page.") +except: + print("Error loading orbit.json (not IOError or KeyError):") + traceback.print_exc() + print("Exiting.") + sys.exit(1) + +# Utility function to parse a MIME type +def parse_mime(mimetype): + mimetype = mimetype.strip() + index = 0 + type = "" + # type is everything before the / + while index=len(mimetype): return [type,subtype], dict() + params = dict() + while index=len(mimetype) or mimetype[index]==";": + index+=1 + params[paramName]=None + continue + # otherwise, grab the param value + index+=1 + paramValue = "" + if mimetype[index]=='"': + index+=1 + while True: + while index=len(mimetype): break + c = mimetype[index] + index+=1 + if c=="\\": + if index>=len(mimetype): + paramValue+=c + break + paramValue+=mimetype[index] + index+=1 + else: + break + # skip until next ; + while index=5: + return "Too many redirects!","text/plain" + parsed = urllib.parse.urlparse(url) + if "ctx" not in globals(): + ctx = ssl.create_default_context() + ctx.check_hostname = False + ctx.verify_mode = ssl.CERT_NONE + globals()["ctx"]=ctx + else: + ctx = globals()["ctx"] + with socket.socket(socket.AF_INET,socket.SOCK_STREAM) as s: + ss = ctx.wrap_socket(s,server_hostname=parsed.hostname) + ss.connect((parsed.hostname,parsed.port or 1965)) + ss.send((url.strip()+"\r\n").encode("UTF-8")) + out = b"" + while (data:=ss.recv(2048)): + out+=data + header, content = out.split(b"\r\n",1) + status, meta = header.decode("utf-8").split(None,1) + assert len(meta)<1024 + if status[0]=="2": + types, params = parse_mime(meta) + if types[0]=="text": + # assume UTF-8 + charset = "utf-8" + # ...but if another charset is given accept it + if "charset" in params: + charset = params["charset"] + # decode and return + return content.decode(charset), meta + else: + # if it's not a text result, just return the content + return content, meta + elif status[0]=="3": + # if it's a redirect, then let's follow it + return grab_content(meta,redirect_num+1) + else: + # Either: + # 1x - it wants an input, which we have no agency to give + # 6x - it wants a client cert, which we have no agency to give + # 4x or 5x - there's an error + # Return the header with a mimetype of text/plain. If this were a real library I might throw an error here, but this is just to make Zenit work. + return header.decode("utf-8"), "text/plain" + +CAPSULES_IN_ORBIT = set(determine_capsule(urllib.parse.urlparse(url)) for url in URLS) +modified_orbit = False + +backlinks_url = "?".join([BACKLINKS, urllib.parse.quote(MAIN_PAGE)]) +response, mime = grab_content(backlinks_url) +# Backlinks page should return a text/gemini doc +assert mime.startswith("text/gemini"),f"Backlinks URL returned a response that wasn't text/gemini! ({response},{mime})" + +links = [] +stage = 1 +for line in response.splitlines(): + if stage==1: + if line.startswith("=>"): + stage=2 + if stage==2: + if line.startswith("=>"): + parts = line.split(None,2) + links.append(parts[1]) + else: + stage=3 + break + # stage 3 is to ignore the rest of the lines. + # if we're in stage 3 and somehow miss breaking out of the loop, nothing will happen + +# filter out just the new links +links = [link for link in links if link not in URLS] + +print("Found {} new link{} to index{}".format(len(links),"" if len(links)==1 else "s","..." if len(links)>0 else ".")) + +for link in links: + # Things to consider for a new link: + # Does its capsule already have representation in the orbit? + capsule = determine_capsule(urllib.parse.urlparse(link)) + if capsule in CAPSULES_IN_ORBIT: + # skip + print(f"Skipping {link} (capsule already in orbit)...") + continue + # Does it link to any of the required links? + response, mime = grab_content(link) + try: + assert mime.startswith("text/gemini"), f"{mime} response isn't text/gemini and therefore can't link back" + links_to_orbit = False + for line in response.splitlines(): + if line.startswith("=>"): + parts = line.split(None,2) + for reqlink in REQUIRED_LINKS: + links_to_orbit=links_to_orbit or parts[1].startswith(reqlink) + assert links_to_orbit, "doesn't link back to orbit" + except AssertionError as e: + print(f"Skipping {link} ({e.args[0]})...") + continue + # If we haven't continue'd by now, the link meets all of the criteria + print(f"Adding {link} to the orbit...") + URLS.append(link) + modified_orbit = True + +if modified_orbit: + print("Saving modified orbit...") + with open("orbit.json","w") as f: + json.dump(dict(urls=URLS),f)