gemfeed/gemfeed.py

import argparse
import datetime
import glob
import os
import os.path
import stat
import urllib.parse

from feedgen.feed import FeedGenerator

def is_world_readable(filename):
    st = os.stat(filename)
    return st.st_mode & stat.S_IROTH

def get_feed_title():
    default = "Just another Gemini feed"
    for extension in ("gmi", "gem", "gemini"):
        filename = "index.{}".format(extension)
        print(filename)
        if os.path.exists(filename) and is_world_readable(filename):
            return extract_first_heading(filename, default)
    return default

def find_files():
    files = []
    for extension in ("gmi", "gem", "gemini"):
        files.extend(glob.glob("*.{}".format(extension)))
        index = "index.{}".format(extension)
        if index in files:
            files.remove(index)
    return [f for f in files if is_world_readable(f)]

def extract_first_heading(filename, default=""):
    with open(filename) as fp:
        for line in fp:
            if line.startswith("#"):
                while line[0] == "#":
                    line = line[1:]
                return line.strip()
    return default

def populate_entry_from_file(filename, base_url, entry):
    url = urljoin(base_url, filename)
    entry.guid(url)
    entry.link(href=url, rel="alternate")
    updated = os.path.getctime(filename)
    updated = datetime.datetime.fromtimestamp(updated, tz=datetime.timezone.utc)
    entry.updated(updated)
    title = extract_first_heading(filename, filename)
    entry.title(title)

def urljoin(base, url):
    base = urllib.parse.urlsplit(base)
    base = base._replace(scheme="https")
    base = urllib.parse.urlunsplit(base)
    joined = urllib.parse.urljoin(base, url)
    joined = urllib.parse.urlsplit(joined)
    joined = joined._replace(scheme="gemini")
    return urllib.parse.urlunsplit(joined)

def main():

    # Get default title from index page, if there is one
    feed_title = get_feed_title()

    # Parse arguments
    parser = argparse.ArgumentParser(description='Generate an Atom feed for Gemini content.')
    parser.add_argument('-a', '--author', dest='author', type=str,
            help="feed author's name")
    parser.add_argument('-b', '--base', dest='base_url', type=str,
            required=True, help='base URL for feed and entries')
    parser.add_argument('-e', '--email', dest='email', type=str,
            help="feed author's email address")
    parser.add_argument('-o', '--output', dest='output', type=str,
            default="atom.xml", help='output filename')
    parser.add_argument('-s', '--subtitle', dest='subtitle', type=str,
            help='feed subtitle')
    parser.add_argument('-t', '--title', dest='title', type=str,
            default=feed_title, help='feed title')
    args = parser.parse_args()

    # Normalise base URL
    base_url = urllib.parse.urlsplit(args.base_url)
    if not base_url.netloc and base_url.path:
        # Handle a naked domain, which urlsplit will interpet at a local path
        base_url = base_url._replace(netloc=base_url.path, path="")
    base_url = base_url._replace(scheme="gemini")
    args.base_url = urllib.parse.urlunsplit(base_url)

    # Setup feed
    feed = FeedGenerator()
    feed.id(args.base_url)
    feed.title(args.title)
    if args.subtitle:
        feed.subtitle(args.subtitle)
    author = {}
    if args.author:
        author["name"] = args.author
    if args.email:
        author["email"] = args.email
    if author:
        feed.author(author)
    feed.link(href=args.base_url, rel='alternate')
    feed.link(href=urljoin(args.base_url, args.output), rel='self')

    # Add one entry per .gmi file
    files = find_files()
    latest_update = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)
    for filename in files:
        entry = feed.add_entry()
        populate_entry_from_file(filename, args.base_url, entry)
        print("Adding {} with title '{}'...".format(filename, entry.title()))
        if entry.updated() > latest_update:
            latest_update = entry.updated()
    feed.updated(latest_update)

    # Write file
    feed.atom_file(args.output, pretty=True)
    print("Wrote Atom feed to {}.".format(args.output))

if __name__ == "__main__":
    main()
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`import argparse`
Initial proof-of-concept implementation. 2020-03-16 17:42:58 +00:00			`import datetime`
			`import glob`
Include only world readable files. 2020-03-16 21:18:37 +00:00			`import os`
Initial proof-of-concept implementation. 2020-03-16 17:42:58 +00:00			`import os.path`
Fix stat nonsense, git can die in a fire. 2020-03-16 21:44:33 +00:00			`import stat`
Better URL handling. 2020-03-16 19:29:52 +00:00			`import urllib.parse`
Initial proof-of-concept implementation. 2020-03-16 17:42:58 +00:00
			`from feedgen.feed import FeedGenerator`

Extract feed title from index.gmi if it exists and is world readable. 2020-03-17 12:09:51 +00:00			`def is_world_readable(filename):`
			`st = os.stat(filename)`
			`return st.st_mode & stat.S_IROTH`

			`def get_feed_title():`
			`default = "Just another Gemini feed"`
			`for extension in ("gmi", "gem", "gemini"):`
			`filename = "index.{}".format(extension)`
			`print(filename)`
			`if os.path.exists(filename) and is_world_readable(filename):`
			`return extract_first_heading(filename, default)`
			`return default`

Break into functions. 2020-03-16 17:54:49 +00:00			`def find_files():`
			`files = []`
			`for extension in ("gmi", "gem", "gemini"):`
			`files.extend(glob.glob("*.{}".format(extension)))`
			`index = "index.{}".format(extension)`
			`if index in files:`
			`files.remove(index)`
Extract feed title from index.gmi if it exists and is world readable. 2020-03-17 12:09:51 +00:00			`return [f for f in files if is_world_readable(f)]`

			`def extract_first_heading(filename, default=""):`
			`with open(filename) as fp:`
			`for line in fp:`
			`if line.startswith("#"):`
			`while line[0] == "#":`
			`line = line[1:]`
			`return line.strip()`
			`return default`
Initial proof-of-concept implementation. 2020-03-16 17:42:58 +00:00
Break into functions. 2020-03-16 17:54:49 +00:00			`def populate_entry_from_file(filename, base_url, entry):`
Better URL handling. 2020-03-16 19:29:52 +00:00			`url = urljoin(base_url, filename)`
Initial proof-of-concept implementation. 2020-03-16 17:42:58 +00:00			`entry.guid(url)`
Add rel attribute for entry links. 2020-03-16 19:01:55 +00:00			`entry.link(href=url, rel="alternate")`
Initial proof-of-concept implementation. 2020-03-16 17:42:58 +00:00			`updated = os.path.getctime(filename)`
			`updated = datetime.datetime.fromtimestamp(updated, tz=datetime.timezone.utc)`
			`entry.updated(updated)`
Extract feed title from index.gmi if it exists and is world readable. 2020-03-17 12:09:51 +00:00			`title = extract_first_heading(filename, filename)`
Initial proof-of-concept implementation. 2020-03-16 17:42:58 +00:00			`entry.title(title)`

Better URL handling. 2020-03-16 19:29:52 +00:00			`def urljoin(base, url):`
			`base = urllib.parse.urlsplit(base)`
			`base = base._replace(scheme="https")`
			`base = urllib.parse.urlunsplit(base)`
			`joined = urllib.parse.urljoin(base, url)`
			`joined = urllib.parse.urlsplit(joined)`
			`joined = joined._replace(scheme="gemini")`
			`return urllib.parse.urlunsplit(joined)`

Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`def main():`

Extract feed title from index.gmi if it exists and is world readable. 2020-03-17 12:09:51 +00:00			`# Get default title from index page, if there is one`
			`feed_title = get_feed_title()`

Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`# Parse arguments`
			`parser = argparse.ArgumentParser(description='Generate an Atom feed for Gemini content.')`
Add command line options for feed subtitle and author details. 2020-03-16 19:09:17 +00:00			`parser.add_argument('-a', '--author', dest='author', type=str,`
			`help="feed author's name")`
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`parser.add_argument('-b', '--base', dest='base_url', type=str,`
			`required=True, help='base URL for feed and entries')`
Add command line options for feed subtitle and author details. 2020-03-16 19:09:17 +00:00			`parser.add_argument('-e', '--email', dest='email', type=str,`
			`help="feed author's email address")`
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`parser.add_argument('-o', '--output', dest='output', type=str,`
			`default="atom.xml", help='output filename')`
Add command line options for feed subtitle and author details. 2020-03-16 19:09:17 +00:00			`parser.add_argument('-s', '--subtitle', dest='subtitle', type=str,`
			`help='feed subtitle')`
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`parser.add_argument('-t', '--title', dest='title', type=str,`
Extract feed title from index.gmi if it exists and is world readable. 2020-03-17 12:09:51 +00:00			`default=feed_title, help='feed title')`
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`args = parser.parse_args()`

Better URL handling. 2020-03-16 19:29:52 +00:00			`# Normalise base URL`
			`base_url = urllib.parse.urlsplit(args.base_url)`
			`if not base_url.netloc and base_url.path:`
			`# Handle a naked domain, which urlsplit will interpet at a local path`
			`base_url = base_url._replace(netloc=base_url.path, path="")`
			`base_url = base_url._replace(scheme="gemini")`
			`args.base_url = urllib.parse.urlunsplit(base_url)`
Break into functions. 2020-03-16 17:54:49 +00:00
			`# Setup feed`
			`feed = FeedGenerator()`
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`feed.id(args.base_url)`
			`feed.title(args.title)`
Add command line options for feed subtitle and author details. 2020-03-16 19:09:17 +00:00			`if args.subtitle:`
			`feed.subtitle(args.subtitle)`
			`author = {}`
			`if args.author:`
			`author["name"] = args.author`
			`if args.email:`
			`author["email"] = args.email`
			`if author:`
			`feed.author(author)`
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`feed.link(href=args.base_url, rel='alternate')`
Better URL handling. 2020-03-16 19:29:52 +00:00			`feed.link(href=urljoin(args.base_url, args.output), rel='self')`
Break into functions. 2020-03-16 17:54:49 +00:00
			`# Add one entry per .gmi file`
			`files = find_files()`
			`latest_update = datetime.datetime.fromtimestamp(0, tz=datetime.timezone.utc)`
			`for filename in files:`
			`entry = feed.add_entry()`
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`populate_entry_from_file(filename, args.base_url, entry)`
Break into functions. 2020-03-16 17:54:49 +00:00			`print("Adding {} with title '{}'...".format(filename, entry.title()))`
			`if entry.updated() > latest_update:`
			`latest_update = entry.updated()`
			`feed.updated(latest_update)`

			`# Write file`
Add command line options for base URL and feed title. 2020-03-16 18:45:04 +00:00			`feed.atom_file(args.output, pretty=True)`
			`print("Wrote Atom feed to {}.".format(args.output))`
Break into functions. 2020-03-16 17:54:49 +00:00
			`if __name__ == "__main__":`
			`main()`