diff --git a/bin/build_nhc_opml b/bin/build_nhc_opml index 296779d..fb636fd 100755 --- a/bin/build_nhc_opml +++ b/bin/build_nhc_opml @@ -1,35 +1,5 @@ #!/bin/sh -e -node -e 'const baseUrl = "https://www.nhc.noaa.gov/aboutrss.shtml"; -require("node:https").get(baseUrl, res => { - const chunks = []; - res.on("data", chunk => chunks.push(chunk)); - res.on("end", () => { - process.stdout.write(JSON.stringify({ - opml: { - "@version": "2.0", - head: { - title: "National Hurricane Center and Central Pacific Hurricane Center RSS feeds", - dateModified: new Date().toUTCString(), - ownerName: "lucidiot", - ownerEmail: "lucidiot@envs.net", - ownerId: "https://tilde.town/~lucidiot/contact.html", - docs: "http://dev.opml.org/spec2.html" - }, - body: { - outline: Array.from( - Buffer.concat(chunks).toString("latin1").matchAll(/]*href="[^"]+\.xml"[^>]*>\s*]*src="[^"]*gifs\/xml.gif"[^>]*>\s*<\/a>\s*(?[^<]+)]*href="(?[^"]+\.xml)"[^>]*>/gi), - ({ groups: { name, url } }) => { - const text = name.replace(/[:\s]*$/g, "").replace(/&#(\d+);/, (_, code) => String.fromCharCode(parseInt(code, 10))).replace(/&(amp|gt|lt|apos|quot);/, (_, name) => ({ amp: "&", gt: ">", lt: "<", apos: "'"'"'", quot: "\"" }[name])); - return { - "@type": "rss", - "@text": text, - "@xmlUrl": new URL(url, baseUrl).href, - "@language": ["Español", "Atlantico", "Cartera"].some(word => text.includes(word)) ? "es" : "en-us" - } - }, - ) - } - } - }), "utf-8"); - }); -})' | oq -i json -o xml . +# The page is encoded as latin1 but xidel does not convert it by itself +curl --silent --fail -H 'User-Agent: RSRSSS/1.0 (+https://envs.net/~lucidiot/rsrsss/feed.xml)' https://www.nhc.noaa.gov/aboutrss.shtml | + iconv -f ISO-8859-1 -t UTF-8 | + xidel --silent - --extract-kind=xquery3 --extract-file="$(dirname "$(CDPATH= cd -- "$(dirname -- "$0")" && pwd -P)")/xquery/nhc_opml.xqy" --output-format=xml diff --git a/xquery/nhc_opml.xqy b/xquery/nhc_opml.xqy new file mode 100644 index 0000000..ade799d --- /dev/null +++ b/xquery/nhc_opml.xqy @@ -0,0 +1,31 @@ +xquery version "3.0" encoding "utf-8"; + + + National Hurricane Center and Central Pacific Hurricane Center RSS feeds + {format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")} + lucidiot + lucidiot@envs.net + https://tilde.town/~lucidiot/contact.html + http://dev.opml.org/spec2.html + + + { + (: + Each feed is listed as a link with an RSS icon inside it, + followed by a text node representing the feed name, + followed by another link with the URL as its text. + Start by picking the link with the image in it… + :) + for $feed in //a[ends-with(@href, ".xml")][./img] + (: Find the text node that follows the image to get the feed name :) + let $text := substring-before($feed/following-sibling::text()[1], ":") + return + (: The only way to determine the feed's language without fetching the feeds themselves is to look for Spanish keywords. :) + } + +