Rewrite ATSB feed generator, closes #119
continuous-integration/drone/push Build is passing Details

This commit is contained in:
~lucidiot 2024-02-24 19:47:38 +00:00
parent d4317056f2
commit 5ef1805fd8
Signed by: lucidiot
GPG Key ID: 3358C1CA6906FB8D
2 changed files with 61 additions and 6 deletions

View File

@ -298,12 +298,7 @@
<frequency>100 reports/year</frequency>
<url>https://www.atsb.gov.au/</url>
<feed format="rss" lang="English" id="atsb-rss">
<curl>
<url>https://www.atsb.gov.au/publications/safety-investigation-reports/?s=1&amp;sort=OccurrenceReleaseDate&amp;sortAscending=descending&amp;investigationStatus=Completed,Discontinued&amp;printAll=true</url>
</curl>
<pup>table.selectable_grid tr:not(.header)</pup>
<jq path="atsb.jq" />
<json2xml />
<xquery path="atsb.xqy" user-agent="a/1" />
<output>atsb.xml</output>
</feed>
</source>

60
xquery/atsb.xqy Normal file
View File

@ -0,0 +1,60 @@
<rss
version="2.0"
xmlns:admin="http://webns.net/mvcb/"
xmlns:atom="http://www.w3.org/2005/Atom"
xmlns:dc="http://purl.org/dc/elements/1.1/"
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
xmlns:webfeeds="http://webfeeds.org/rss/1.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:noNamespaceSchemaLocation="xsd/rss.xsd"
xsi:schemaLocation="
http://purl.org/rss/1.0/modules/syndication/ xsd/syndication.xsd
http://webfeeds.org/rss/1.0 xsd/webfeeds.xsd
"
>
<channel>
<title>ATSB</title>
<description>Australian Transport Safety Bureau accident reports</description>
<link>https://www.atsb.gov.au/marine-investigation-reports?field_mode_of_transport_target_id=All</link>
<language>en-au</language>
<pubDate>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</pubDate>
<webMaster>lucidiot@envs.net (lucidiot)</webMaster>
<docs>http://www.rssboard.org/rss-specification</docs>
<ttl>1440</ttl>
<admin:errorReportsTo rdf:resource="mailto:lucidiot@envs.net" />
<admin:generatorAgent rdf:resource="https://tildegit.org/lucidiot/itsb/" />
<atom:link href="https://tilde.town/~lucidiot/itsb/feeds/atsb.xml" rel="self" type="application/rss+xml" />
<dc:format>application/rss+xml</dc:format>
<sy:updatePeriod>daily</sy:updatePeriod>
<sy:updateFrequency>1</sy:updateFrequency>
<sy:updateBase>1990-01-01T15:00+00:00</sy:updateBase>
<webfeeds:partial>true</webfeeds:partial>
<webfeeds:deprecated>false</webfeeds:deprecated>
{
(
163, (: Completed :)
168 (: Discontinued :)
) ! x:parse-html(x:request({
"url": concat("https://www.atsb.gov.au/marine-investigation-reports?field_investigation_status_target_id=", ., "&amp;field_mode_of_transport_target_id=All"),
"headers": "Accept-Encoding: lol
Accept-Language: zz,
Cache-Control: lol"
})/raw)//table[contains(@class, "views-table")]/tbody/tr
! <item>
<title>{./td[2]/a/text()}</title>
<link>{fn:resolve-uri(./td[2]/a/@href)}</link>
<guid>{fn:resolve-uri(./td[2]/a/@href)}</guid>
<description>{./td[1]/a/text()}</description>
<pubDate>{format-dateTime(xs:dateTime(.//time/@datetime[1]), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</pubDate>
<category domain="https://www.atsb.gov.au/">{./td[4]/text()}</category>
</item>
}
</channel>
</rss>