From af6caf2369c6da40924e12207cd70a55e90d12ec Mon Sep 17 00:00:00 2001 From: Solderpunk Date: Mon, 16 Mar 2020 20:29:52 +0100 Subject: [PATCH] Better URL handling. --- gemfeed.py | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/gemfeed.py b/gemfeed.py index 5b1e68c..0cbe6a4 100644 --- a/gemfeed.py +++ b/gemfeed.py @@ -2,6 +2,7 @@ import argparse import datetime import glob import os.path +import urllib.parse from feedgen.feed import FeedGenerator @@ -15,7 +16,7 @@ def find_files(): return files def populate_entry_from_file(filename, base_url, entry): - url = base_url + filename + url = urljoin(base_url, filename) entry.guid(url) entry.link(href=url, rel="alternate") updated = os.path.getctime(filename) @@ -32,6 +33,15 @@ def populate_entry_from_file(filename, base_url, entry): title = filename entry.title(title) +def urljoin(base, url): + base = urllib.parse.urlsplit(base) + base = base._replace(scheme="https") + base = urllib.parse.urlunsplit(base) + joined = urllib.parse.urljoin(base, url) + joined = urllib.parse.urlsplit(joined) + joined = joined._replace(scheme="gemini") + return urllib.parse.urlunsplit(joined) + def main(): # Parse arguments @@ -50,11 +60,13 @@ def main(): default="A Gemini feed", help='feed title') args = parser.parse_args() - # Normalise base URL, quick and dirty - if not args.base_url.startswith("gemini://"): - args.base_url = "gemini://" + args.base_url - if not args.base_url.endswith("/"): - args.base_url = args.base_url + "/" + # Normalise base URL + base_url = urllib.parse.urlsplit(args.base_url) + if not base_url.netloc and base_url.path: + # Handle a naked domain, which urlsplit will interpet at a local path + base_url = base_url._replace(netloc=base_url.path, path="") + base_url = base_url._replace(scheme="gemini") + args.base_url = urllib.parse.urlunsplit(base_url) # Setup feed feed = FeedGenerator() @@ -70,7 +82,7 @@ def main(): if author: feed.author(author) feed.link(href=args.base_url, rel='alternate') - feed.link(href=args.base_url + args.output, rel='self') + feed.link(href=urljoin(args.base_url, args.output), rel='self') # Add one entry per .gmi file files = find_files()