Better URL handling.

This commit is contained in:
Solderpunk 2020-03-16 20:29:52 +01:00
parent 113d75f1db
commit af6caf2369
1 changed files with 19 additions and 7 deletions

View File

@ -2,6 +2,7 @@ import argparse
import datetime
import glob
import os.path
import urllib.parse
from feedgen.feed import FeedGenerator
@ -15,7 +16,7 @@ def find_files():
return files
def populate_entry_from_file(filename, base_url, entry):
url = base_url + filename
url = urljoin(base_url, filename)
entry.guid(url)
entry.link(href=url, rel="alternate")
updated = os.path.getctime(filename)
@ -32,6 +33,15 @@ def populate_entry_from_file(filename, base_url, entry):
title = filename
entry.title(title)
def urljoin(base, url):
base = urllib.parse.urlsplit(base)
base = base._replace(scheme="https")
base = urllib.parse.urlunsplit(base)
joined = urllib.parse.urljoin(base, url)
joined = urllib.parse.urlsplit(joined)
joined = joined._replace(scheme="gemini")
return urllib.parse.urlunsplit(joined)
def main():
# Parse arguments
@ -50,11 +60,13 @@ def main():
default="A Gemini feed", help='feed title')
args = parser.parse_args()
# Normalise base URL, quick and dirty
if not args.base_url.startswith("gemini://"):
args.base_url = "gemini://" + args.base_url
if not args.base_url.endswith("/"):
args.base_url = args.base_url + "/"
# Normalise base URL
base_url = urllib.parse.urlsplit(args.base_url)
if not base_url.netloc and base_url.path:
# Handle a naked domain, which urlsplit will interpet at a local path
base_url = base_url._replace(netloc=base_url.path, path="")
base_url = base_url._replace(scheme="gemini")
args.base_url = urllib.parse.urlunsplit(base_url)
# Setup feed
feed = FeedGenerator()
@ -70,7 +82,7 @@ def main():
if author:
feed.author(author)
feed.link(href=args.base_url, rel='alternate')
feed.link(href=args.base_url + args.output, rel='self')
feed.link(href=urljoin(args.base_url, args.output), rel='self')
# Add one entry per .gmi file
files = find_files()