Use a common function to build OPML structures

This commit is contained in:
~lucidiot 2023-11-20 10:47:15 +01:00
parent 755b4777c9
commit 9424b590ca
12 changed files with 246 additions and 311 deletions

View File

@ -4,7 +4,7 @@ all: css/sprites.css $(patsubst xquery/opml/%.xqy,opml/%.opml,$(wildcard xquery/
USER_AGENT:="RSRSSS/1.0 (+https://envs.net/~lucidiot/rsrsss/feed.xml)"
export XIDEL_OPTIONS:=--silent --user-agent=$(USER_AGENT) --wait=1 --trace-stack
opml/%.opml: xquery/opml/%.xqy
opml/%.opml: xquery/opml/%.xqy xquery/functions.xqy
mkdir -p -ma=x,u=rwx $(dir $@)
xidel --extract-kind=xquery3 --extract-file="$<" --output-format=xml > $@
chmod a=r,u=rw $@

20
xquery/functions.xqy Normal file
View File

@ -0,0 +1,20 @@
xquery version "3.0" encoding "utf-8";
module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions";
(: TODO: stricter type for $outlines, to require outline elements :)
declare function rsrsss:make-opml($title as xs:string, $outlines as element(outline)*) as element(opml) {
<opml version="2.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="https://envs.net/~lucidiot/rsrsss/xsd/opml.xsd">
<head>
<title>{$title}</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
{$outlines}
</body>
</opml>
};

View File

@ -1,13 +1,8 @@
<opml version="2.0">
<head>
<title>Alert-Hub.org CAP alert feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../functions.xqy";
rsrsss:make-opml(
"Alert-Hub.org CAP alert feeds",
(
<outline text="Sources">
{
json(fn:unparsed-text("https://alert-hub-sources.s3.amazonaws.com/json"))/sources/source[capAlertFeedStatus != "bypassed"]
@ -19,7 +14,7 @@
htmlUrl="{registerUrl}"
/>
}
</outline>
</outline>,
<outline text="Generated feeds">
{
json(fn:unparsed-text("https://alert-hub-subscriptions.s3.amazonaws.com/json"))//subscriptions/subscription
@ -34,5 +29,5 @@
/>
}
</outline>
</body>
</opml>
)
)

View File

@ -1,28 +1,19 @@
<opml version="2.0">
<head>
<title>Chicago Transit Authority RSS feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../functions.xqy";
rsrsss:make-opml(
"Chicago Transit Authority RSS feeds",
(: Open the CTA RSS feeds page and grab the list items. :)
doc('https://www.transitchicago.com/rss/default.aspx')//ul[@class="rsslist"]/li
! <outline text="{substring-after(a/normalize-space(text()), 'All ')}">
{
(: Open the CTA RSS feeds page and grab the list items. :)
doc('https://www.transitchicago.com/rss/default.aspx')//ul[@class="rsslist"]/li
! <outline text="{substring-after(a/normalize-space(text()), 'All ')}">
{
(., ul/li)
! <outline
type="rss"
version="RSS"
text="{a/normalize-space(text())}"
xmlUrl="{resolve-uri(a/@href, 'https://www.transitchicago.com/rss/default.aspx')}"
language="en-us"
/>
}
</outline>
(., ul/li)
! <outline
type="rss"
version="RSS"
text="{a/normalize-space(text())}"
xmlUrl="{resolve-uri(a/@href, 'https://www.transitchicago.com/rss/default.aspx')}"
language="en-us"
/>
}
</body>
</opml>
</outline>
)

View File

@ -1,60 +1,50 @@
doc("https://feeds.enviroflash.info/")//*[contains(@class, "formrow")]//tr/
<opml version="2.0">
<head>
<title>EnviroFlash RSS feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
{
.//a[ends-with(@href, ".xml")]
! <outline
type="rss"
version="Atom"
text="{text()}"
xmlUrl="{resolve-uri(@href)}"
language="en-us"
/>
}
{
for $stateCode in .//option/@value[.!="0"]
return x:parse-html(pxp:json(x:request(x:form(
<form action="https://feeds.enviroflash.info/ajax/feed_ajax.cfm" method="post">
<input name="method" value="getForecastCities" />
<input name="sStateId" value="{$stateCode}" />
</form>
))/raw)/cityBody) ! (
(: Ignore states where there are no feeds listed at all :)
if (exists(//tr[td[not(@colspan)][@id='name']])) then
<outline text="{$stateCode}">
{
//tr[td[not(@colspan)][@id='name']]
! <outline text="{td[@id='name']/text()}">
{
.//a[ends-with(@href, ".xml")]
! (
(: CAP feeds are not exactly feeds, since they only contain one alert at once and are not supported by feedreaders :)
if (starts-with(@href, "cap/")) then
<outline type="link" text="CAP feed" url="{resolve-uri(@href, 'https://feeds.enviroflash.info/')}" />
else
<outline
type="rss"
version="{upper-case(substring-before(@href, '/'))}"
text="{substring-after(./img/(@alt,@title)[1], 'View ')}"
xmlUrl="{resolve-uri(@href, 'https://feeds.enviroflash.info/')}"
language="en-us"
/>
)
}
</outline>
}
</outline>
else ()
)
}
</body>
</opml>
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../functions.xqy";
rsrsss:make-opml(
"EnviroFlash RSS feeds",
doc("https://feeds.enviroflash.info/")//*[contains(@class, "formrow")]//tr/
(
.//a[ends-with(@href, ".xml")]
! <outline
type="rss"
version="Atom"
text="{text()}"
xmlUrl="{resolve-uri(@href)}"
language="en-us"
/>,
for $stateCode in .//option/@value[.!="0"]
return x:parse-html(pxp:json(x:request(x:form(
<form action="https://feeds.enviroflash.info/ajax/feed_ajax.cfm" method="post">
<input name="method" value="getForecastCities" />
<input name="sStateId" value="{$stateCode}" />
</form>
))/raw)/cityBody) ! (
(: Ignore states where there are no feeds listed at all :)
if (exists(//tr[td[not(@colspan)][@id='name']])) then
<outline text="{$stateCode}">
{
//tr[td[not(@colspan)][@id='name']]
! <outline text="{td[@id='name']/text()}">
{
.//a[ends-with(@href, ".xml")]
! (
(: CAP feeds are not exactly feeds, since they only contain one alert at once and are not supported by feedreaders :)
if (starts-with(@href, "cap/")) then
<outline type="link" text="CAP feed" url="{resolve-uri(@href, 'https://feeds.enviroflash.info/')}" />
else
<outline
type="rss"
version="{upper-case(substring-before(@href, '/'))}"
text="{substring-after(./img/(@alt,@title)[1], 'View ')}"
xmlUrl="{resolve-uri(@href, 'https://feeds.enviroflash.info/')}"
language="en-us"
/>
)
}
</outline>
}
</outline>
else ()
)
)
)

View File

@ -1,36 +1,27 @@
xquery version "3.0" encoding "utf-8";
<opml version="2.0">
<head>
<title>RSRSSS curated feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
{
for $item in /rss/channel/item[category[@domain='https://envs.net/~lucidiot/rsrsss/' and (text()='Feed' or text()='OPML')] and link]
return element outline {
attribute type {
if (exists($item/category[@domain='https://envs.net/~lucidiot/rsrsss/' and (text()='OPML')]))
then 'include'
else 'rss'
},
attribute text {$item/title},
attribute title {$item/title},
attribute xmlUrl {$item/link},
if (exists($item/description)) then attribute description {normalize-space($item/description)} else (),
if (exists($item/pubDate)) then attribute created {$item/pubDate} else (),
let $categories := string-join(
$item/category[@domain='https://envs.net/~lucidiot/rsrsss/' and (text()!='OPML' and text()!='Feed')]/text()
! concat('/', .),
','
)
return if (string-length($categories) > 0)
then attribute category {$categories}
else ()
}
}
</body>
</opml>
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../functions.xqy";
rsrsss:make-opml(
"RSRSSS curated feeds",
for $item in /rss/channel/item[category[@domain='https://envs.net/~lucidiot/rsrsss/' and (text()='Feed' or text()='OPML')] and link]
return element outline {
attribute type {
if (exists($item/category[@domain='https://envs.net/~lucidiot/rsrsss/' and (text()='OPML')]))
then 'include'
else 'rss'
},
attribute text {$item/title},
attribute title {$item/title},
attribute xmlUrl {$item/link},
if (exists($item/description)) then attribute description {normalize-space($item/description)} else (),
if (exists($item/pubDate)) then attribute created {$item/pubDate} else (),
let $categories := string-join(
$item/category[@domain='https://envs.net/~lucidiot/rsrsss/' and (text()!='OPML' and text()!='Feed')]/text()
! concat('/', .),
','
)
return if (string-length($categories) > 0)
then attribute category {$categories}
else ()
}
)

View File

@ -1,26 +1,17 @@
<opml version="2.0">
<head>
<title>MeteoAlarm feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../functions.xqy";
rsrsss:make-opml(
"MeteoAlarm feeds",
doc('https://feeds.meteoalarm.org/')//h2
! <outline text="{text()}">
{
doc('https://feeds.meteoalarm.org/')//h2
! <outline text="{text()}">
{
./following-sibling::ul[1]/li/a/@href
! <outline
type="rss"
xmlUrl="{.}"
text="{if (contains(., 'atom')) then 'Atom + CAP' else 'RSS'}"
version="{if (contains(., 'atom')) then 'ATOM' else 'RSS'}"
/>
}
</outline>
./following-sibling::ul[1]/li/a/@href
! <outline
type="rss"
xmlUrl="{.}"
text="{if (contains(., 'atom')) then 'Atom + CAP' else 'RSS'}"
version="{if (contains(., 'atom')) then 'ATOM' else 'RSS'}"
/>
}
</body>
</opml>
</outline>
)

View File

@ -1,14 +1,8 @@
xquery version "3.0" encoding "utf-8";
<opml version="2.0">
<head>
<title>National Data Buoy Center RSS feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../../functions.xqy";
rsrsss:make-opml(
"National Data Buoy Center RSS feeds",
<outline
type="rss"
version="RSS"
@ -26,6 +20,4 @@ xquery version "3.0" encoding "utf-8";
language="en-us"
/>
}
</body>
</opml>
)

View File

@ -1,37 +1,28 @@
xquery version "3.0" encoding "utf-8";
(:
We use unparsed-text(url, encoding) instead of doc(url)
because xidel does not support changing the encoding dynamically
depending on the HTML content, so it would always use UTF-8
:)
x:parse-html(unparsed-text("https://www.nhc.noaa.gov/aboutrss.shtml", "iso-8859-1"))/
<opml version="2.0">
<head>
<title>National Hurricane Center and Central Pacific Hurricane Center RSS feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
{
(:
Each feed is listed as a link with an RSS icon inside it,
followed by a text node representing the feed name,
followed by another link with the URL as its text.
Start by picking the link with the image in it…
:)
for $feed in //a[ends-with(@href, ".xml")][./img]
(: Find the text node that follows the image to get the feed name :)
let $text := substring-before($feed/following-sibling::text()[1], ":")
return <outline
type="rss"
text="{$text}"
xmlUrl="{resolve-uri($feed/@href, 'https://www.nhc.noaa.gov/aboutrss.shtml')}"
language="{if (some $word in ('Español', 'Atlantico', 'Cartera') satisfies contains($text, $word)) then 'es' else 'en-us'}"
/>
(: The only way to determine the feed's language without fetching the feeds themselves is to look for Spanish keywords. :)
}
</body>
</opml>
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../../functions.xqy";
rsrsss:make-opml(
"National Hurricane Center and Central Pacific Hurricane Center RSS feeds",
(:
We use unparsed-text(url, encoding) instead of doc(url)
because xidel does not support changing the encoding dynamically
depending on the HTML content, so it would always use UTF-8
:)
x:parse-html(unparsed-text("https://www.nhc.noaa.gov/aboutrss.shtml", "iso-8859-1"))/
(:
Each feed is listed as a link with an RSS icon inside it,
followed by a text node representing the feed name,
followed by another link with the URL as its text.
Start by picking the link with the image in it…
:)
for $feed in //a[ends-with(@href, ".xml")][./img]
(: Find the text node that follows the image to get the feed name :)
let $text := substring-before($feed/following-sibling::text()[1], ":")
return <outline
type="rss"
text="{$text}"
xmlUrl="{resolve-uri($feed/@href, 'https://www.nhc.noaa.gov/aboutrss.shtml')}"
language="{if (some $word in ('Español', 'Atlantico', 'Cartera') satisfies contains($text, $word)) then 'es' else 'en-us'}"
/>
(: The only way to determine the feed's language without fetching the feeds themselves is to look for Spanish keywords. :)
)

View File

@ -1,30 +1,13 @@
<opml version="2.0">
<head>
<title>UK National Highways RSS feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
{
doc('https://nationalhighways.co.uk/travel-updates/traffic-information-rss-feeds/')//div[contains(@class, "container--section")]
! (
.//div[contains(@class, "accordion-card")]
! <outline text="{./header/a/normalize-space(text())}">
{
.//li/a[ends-with(@href, ".xml")]
! <outline
type="rss"
version="RSS"
text="{normalize-space(text())}"
xmlUrl="{resolve-uri(@href)}"
language="en-gb"
/>
}
</outline>,
.//div/h2/a
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../functions.xqy";
rsrsss:make-opml(
"UK National Highways RSS feeds",
doc('https://nationalhighways.co.uk/travel-updates/traffic-information-rss-feeds/')//div[contains(@class, "container--section")]
! (
.//div[contains(@class, "accordion-card")]
! <outline text="{./header/a/normalize-space(text())}">
{
.//li/a[ends-with(@href, ".xml")]
! <outline
type="rss"
version="RSS"
@ -32,8 +15,15 @@
xmlUrl="{resolve-uri(@href)}"
language="en-gb"
/>
)
}
</body>
</opml>
}
</outline>,
.//div/h2/a
! <outline
type="rss"
version="RSS"
text="{normalize-space(text())}"
xmlUrl="{resolve-uri(@href)}"
language="en-gb"
/>
)
)

View File

@ -1,50 +1,42 @@
<opml version="2.0">
<head>
<title>USGS Earthquake Hazards Program feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
<outline text="Atom">
{
doc('https://earthquake.usgs.gov/earthquakes/feed/v1.0/atom.php')//div[h2[text() = 'Feeds']]/h3
! <outline text="{normalize-space(text())}">
{
./following-sibling::ul//a
! <outline
type="rss"
version="ATOM"
text="{normalize-space(text())}"
xmlUrl="{resolve-uri(@href)}"
language="en-us"
/>
}
</outline>
}
</outline>
<outline text="KML">
{
doc('https://earthquake.usgs.gov/earthquakes/feed/v1.0/kml.php')//div[h2[text() = 'Automatic Feeds']]/h2
! <outline text="{normalize-space(text())}">
{
./following-sibling::h3
! <outline text="{normalize-space(text())}">
{
./following-sibling::ul//a
! <outline
type="link"
text="{normalize-space(text())}"
url="{resolve-uri(@href)}"
/>
}
</outline>
}
</outline>
}
</outline>
</body>
</opml>
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../../functions.xqy";
rsrsss:make-opml(
"USGS Earthquake Hazards Program feeds",
<outline text="Atom">
{
doc('https://earthquake.usgs.gov/earthquakes/feed/v1.0/atom.php')//div[h2[text() = 'Feeds']]/h3
! <outline text="{normalize-space(text())}">
{
./following-sibling::ul//a
! <outline
type="rss"
version="ATOM"
text="{normalize-space(text())}"
xmlUrl="{resolve-uri(@href)}"
language="en-us"
/>
}
</outline>
}
</outline>
<outline text="KML">
{
doc('https://earthquake.usgs.gov/earthquakes/feed/v1.0/kml.php')//div[h2[text() = 'Automatic Feeds']]/h2
! <outline text="{normalize-space(text())}">
{
./following-sibling::h3
! <outline text="{normalize-space(text())}">
{
./following-sibling::ul//a
! <outline
type="link"
text="{normalize-space(text())}"
url="{resolve-uri(@href)}"
/>
}
</outline>
}
</outline>
}
</outline>
)

View File

@ -1,3 +1,5 @@
import module namespace rsrsss = "https://envs.net/~lucidiot/rsrsss/rsrsss-functions" at "../functions.xqy";
declare function local:get-feeds($url as xs:anyURI, $language as xs:string) as node()* {
doc(resolve-uri('./w/index.php?title=Special:SiteMatrix', $url))//link[@rel = 'alternate'][@type = 'application/rss+xml' or @type = 'application/atom+xml']
! element outline {
@ -13,32 +15,22 @@ declare function local:get-feeds($url as xs:anyURI, $language as xs:string) as n
}
};
doc('https://commons.wikimedia.org/w/api.php?action=sitematrix&amp;format=xml')//sitematrix
! <opml version="2.0">
<head>
<title>Wikimedia RSS feeds</title>
<dateModified>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</dateModified>
<ownerName>lucidiot</ownerName>
<ownerEmail>lucidiot@envs.net</ownerEmail>
<ownerId>https://tilde.town/~lucidiot/contact.html</ownerId>
<docs>http://dev.opml.org/spec2.html</docs>
</head>
<body>
{
for $language in ./language
return <outline text="{$language/@localname}">
{
$language/site/site[not(@private)]
! local:get-feeds(@url, $language/@code)
}
</outline>
}
rsrsss:make-opml(
"Wikimedia RSS feeds",
doc('https://commons.wikimedia.org/w/api.php?action=sitematrix&amp;format=xml')//sitematrix
! (
for $language in ./language
return <outline text="{$language/@localname}">
{
$language/site/site[not(@private)]
! local:get-feeds(@url, $language/@code)
}
</outline>,
<outline text="Specials">
{
./specials/special[not(@private)]
! local:get-feeds(@url, @lang)
}
</outline>
</body>
</opml>
)
)