gemfeed/gemfeed.py

123 lines
4.1 KiB
Python
Raw Normal View History

import argparse
import datetime
import glob
2020-03-16 21:18:37 +00:00
import os
import os.path
import stat
2020-03-16 19:29:52 +00:00
import urllib.parse
from feedgen.feed import FeedGenerator
def is_world_readable(filename):
st = os.stat(filename)
return st.st_mode & stat.S_IROTH
def get_feed_title():
default = "Just another Gemini feed"
for extension in ("gmi", "gem", "gemini"):
filename = "index.{}".format(extension)
print(filename)
if os.path.exists(filename) and is_world_readable(filename):
return extract_first_heading(filename, default)
return default
2020-03-16 17:54:49 +00:00
def find_files():
files = []
for extension in ("gmi", "gem", "gemini"):
files.extend(glob.glob("*.{}".format(extension)))
index = "index.{}".format(extension)
if index in files:
files.remove(index)
return [f for f in files if is_world_readable(f)]
def extract_first_heading(filename, default=""):
with open(filename) as fp:
for line in fp:
if line.startswith("#"):
while line[0] == "#":
line = line[1:]
return line.strip()
return default
2020-03-16 17:54:49 +00:00
def populate_entry_from_file(filename, base_url, entry):
2020-03-16 19:29:52 +00:00
url = urljoin(base_url, filename)
entry.guid(url)
2020-03-16 19:01:55 +00:00
entry.link(href=url, rel="alternate")
updated = os.path.getctime(filename)
updated = datetime.datetime.fromtimestamp(updated, tz=datetime.timezone.utc)
entry.updated(updated)
title = extract_first_heading(filename, filename)
entry.title(title)
2020-03-16 19:29:52 +00:00
def urljoin(base, url):
base = urllib.parse.urlsplit(base)
base = base._replace(scheme="https")
base = urllib.parse.urlunsplit(base)
joined = urllib.parse.urljoin(base, url)
joined = urllib.parse.urlsplit(joined)
joined = joined._replace(scheme="gemini")
return urllib.parse.urlunsplit(joined)
def main():
# Get default title from index page, if there is one
feed_title = get_feed_title()
# Parse arguments
parser = argparse.ArgumentParser(description='Generate an Atom feed for Gemini content.')
parser.add_argument('-a', '--author', dest='author', type=str,
help="feed author's name")
parser.add_argument('-b', '--base', dest='base_url', type=str,
required=True, help='base URL for feed and entries')
parser.add_argument('-e', '--email', dest='email', type=str,
help="feed author's email address")
parser.add_argument('-o', '--output', dest='output', type=str,
default="atom.xml", help='output filename')
parser.add_argument('-s', '--subtitle', dest='subtitle', type=str,
help='feed subtitle')
parser.add_argument('-t', '--title', dest='title', type=str,
default=feed_title, help='feed title')
args = parser.parse_args()
2020-03-16 19:29:52 +00:00
# Normalise base URL
base_url = urllib.parse.urlsplit(args.base_url)
if not base_url.netloc and base_url.path:
# Handle a naked domain, which urlsplit will interpet at a local path
base_url = base_url._replace(netloc=base_url.path, path="")
base_url = base_url._replace(scheme="gemini")
args.base_url = urllib.parse.urlunsplit(base_url)
2020-03-16 17:54:49 +00:00
# Setup feed
feed = FeedGenerator()
feed.id(args.base_url)
feed.title(args.title)
if args.subtitle:
feed.subtitle(args.subtitle)
author = {}
if args.author:
author["name"] = args.author
if args.email:
author["email"] = args.email
if author:
feed.author(author)
feed.link(href=args.base_url, rel='alternate')
2020-03-16 19:29:52 +00:00
feed.link(href=urljoin(args.base_url, args.output), rel='self')
2020-03-16 17:54:49 +00:00
# Add one entry per .gmi file
files = find_files()
latest_update = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
for filename in files:
entry = feed.add_entry()
populate_entry_from_file(filename, args.base_url, entry)
2020-03-16 17:54:49 +00:00
print("Adding {} with title '{}'...".format(filename, entry.title()))
if entry.updated() > latest_update:
latest_update = entry.updated()
feed.updated(latest_update)
# Write file
feed.atom_file(args.output, pretty=True)
print("Wrote Atom feed to {}.".format(args.output))
2020-03-16 17:54:49 +00:00
if __name__ == "__main__":
main()