2020-03-16 18:45:04 +00:00
|
|
|
import argparse
|
2020-03-16 17:42:58 +00:00
|
|
|
import datetime
|
|
|
|
import glob
|
2020-03-16 21:18:37 +00:00
|
|
|
import os
|
2020-03-16 17:42:58 +00:00
|
|
|
import os.path
|
2020-03-16 21:44:33 +00:00
|
|
|
import stat
|
2020-03-16 19:29:52 +00:00
|
|
|
import urllib.parse
|
2020-03-16 17:42:58 +00:00
|
|
|
|
|
|
|
from feedgen.feed import FeedGenerator
|
|
|
|
|
2020-03-17 12:09:51 +00:00
|
|
|
def is_world_readable(filename):
|
|
|
|
st = os.stat(filename)
|
|
|
|
return st.st_mode & stat.S_IROTH
|
|
|
|
|
|
|
|
def get_feed_title():
|
|
|
|
default = "Just another Gemini feed"
|
|
|
|
for extension in ("gmi", "gem", "gemini"):
|
|
|
|
filename = "index.{}".format(extension)
|
|
|
|
print(filename)
|
|
|
|
if os.path.exists(filename) and is_world_readable(filename):
|
|
|
|
return extract_first_heading(filename, default)
|
|
|
|
return default
|
|
|
|
|
2020-03-16 17:54:49 +00:00
|
|
|
def find_files():
|
|
|
|
files = []
|
|
|
|
for extension in ("gmi", "gem", "gemini"):
|
|
|
|
files.extend(glob.glob("*.{}".format(extension)))
|
|
|
|
index = "index.{}".format(extension)
|
|
|
|
if index in files:
|
|
|
|
files.remove(index)
|
2020-03-17 12:09:51 +00:00
|
|
|
return [f for f in files if is_world_readable(f)]
|
|
|
|
|
|
|
|
def extract_first_heading(filename, default=""):
|
|
|
|
with open(filename) as fp:
|
|
|
|
for line in fp:
|
|
|
|
if line.startswith("#"):
|
|
|
|
while line[0] == "#":
|
|
|
|
line = line[1:]
|
|
|
|
return line.strip()
|
|
|
|
return default
|
2020-03-16 17:42:58 +00:00
|
|
|
|
2020-03-16 17:54:49 +00:00
|
|
|
def populate_entry_from_file(filename, base_url, entry):
|
2020-03-16 19:29:52 +00:00
|
|
|
url = urljoin(base_url, filename)
|
2020-03-16 17:42:58 +00:00
|
|
|
entry.guid(url)
|
2020-03-16 19:01:55 +00:00
|
|
|
entry.link(href=url, rel="alternate")
|
2020-03-16 17:42:58 +00:00
|
|
|
updated = os.path.getctime(filename)
|
|
|
|
updated = datetime.datetime.fromtimestamp(updated, tz=datetime.timezone.utc)
|
|
|
|
entry.updated(updated)
|
2020-03-17 12:09:51 +00:00
|
|
|
title = extract_first_heading(filename, filename)
|
2020-03-16 17:42:58 +00:00
|
|
|
entry.title(title)
|
|
|
|
|
2020-03-16 19:29:52 +00:00
|
|
|
def urljoin(base, url):
|
|
|
|
base = urllib.parse.urlsplit(base)
|
|
|
|
base = base._replace(scheme="https")
|
|
|
|
base = urllib.parse.urlunsplit(base)
|
|
|
|
joined = urllib.parse.urljoin(base, url)
|
|
|
|
joined = urllib.parse.urlsplit(joined)
|
|
|
|
joined = joined._replace(scheme="gemini")
|
|
|
|
return urllib.parse.urlunsplit(joined)
|
|
|
|
|
2020-03-16 18:45:04 +00:00
|
|
|
def main():
|
|
|
|
|
2020-03-17 12:09:51 +00:00
|
|
|
# Get default title from index page, if there is one
|
|
|
|
feed_title = get_feed_title()
|
|
|
|
|
2020-03-16 18:45:04 +00:00
|
|
|
# Parse arguments
|
|
|
|
parser = argparse.ArgumentParser(description='Generate an Atom feed for Gemini content.')
|
2020-03-16 19:09:17 +00:00
|
|
|
parser.add_argument('-a', '--author', dest='author', type=str,
|
|
|
|
help="feed author's name")
|
2020-03-16 18:45:04 +00:00
|
|
|
parser.add_argument('-b', '--base', dest='base_url', type=str,
|
|
|
|
required=True, help='base URL for feed and entries')
|
2020-03-16 19:09:17 +00:00
|
|
|
parser.add_argument('-e', '--email', dest='email', type=str,
|
|
|
|
help="feed author's email address")
|
2020-03-16 18:45:04 +00:00
|
|
|
parser.add_argument('-o', '--output', dest='output', type=str,
|
|
|
|
default="atom.xml", help='output filename')
|
2020-03-16 19:09:17 +00:00
|
|
|
parser.add_argument('-s', '--subtitle', dest='subtitle', type=str,
|
|
|
|
help='feed subtitle')
|
2020-03-16 18:45:04 +00:00
|
|
|
parser.add_argument('-t', '--title', dest='title', type=str,
|
2020-03-17 12:09:51 +00:00
|
|
|
default=feed_title, help='feed title')
|
2020-03-16 18:45:04 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
2020-03-16 19:29:52 +00:00
|
|
|
# Normalise base URL
|
|
|
|
base_url = urllib.parse.urlsplit(args.base_url)
|
|
|
|
if not base_url.netloc and base_url.path:
|
|
|
|
# Handle a naked domain, which urlsplit will interpet at a local path
|
|
|
|
base_url = base_url._replace(netloc=base_url.path, path="")
|
|
|
|
base_url = base_url._replace(scheme="gemini")
|
|
|
|
args.base_url = urllib.parse.urlunsplit(base_url)
|
2020-03-16 17:54:49 +00:00
|
|
|
|
|
|
|
# Setup feed
|
|
|
|
feed = FeedGenerator()
|
2020-03-16 18:45:04 +00:00
|
|
|
feed.id(args.base_url)
|
|
|
|
feed.title(args.title)
|
2020-03-16 19:09:17 +00:00
|
|
|
if args.subtitle:
|
|
|
|
feed.subtitle(args.subtitle)
|
|
|
|
author = {}
|
|
|
|
if args.author:
|
|
|
|
author["name"] = args.author
|
|
|
|
if args.email:
|
|
|
|
author["email"] = args.email
|
|
|
|
if author:
|
|
|
|
feed.author(author)
|
2020-03-16 18:45:04 +00:00
|
|
|
feed.link(href=args.base_url, rel='alternate')
|
2020-03-16 19:29:52 +00:00
|
|
|
feed.link(href=urljoin(args.base_url, args.output), rel='self')
|
2020-03-16 17:54:49 +00:00
|
|
|
|
|
|
|
# Add one entry per .gmi file
|
|
|
|
files = find_files()
|
|
|
|
latest_update = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
|
|
|
|
for filename in files:
|
|
|
|
entry = feed.add_entry()
|
2020-03-16 18:45:04 +00:00
|
|
|
populate_entry_from_file(filename, args.base_url, entry)
|
2020-03-16 17:54:49 +00:00
|
|
|
print("Adding {} with title '{}'...".format(filename, entry.title()))
|
|
|
|
if entry.updated() > latest_update:
|
|
|
|
latest_update = entry.updated()
|
|
|
|
feed.updated(latest_update)
|
|
|
|
|
|
|
|
# Write file
|
2020-03-16 18:45:04 +00:00
|
|
|
feed.atom_file(args.output, pretty=True)
|
|
|
|
print("Wrote Atom feed to {}.".format(args.output))
|
2020-03-16 17:54:49 +00:00
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
main()
|