Compare commits
3 Commits
cded417589
...
5ef1805fd8
Author | SHA1 | Date |
---|---|---|
~lucidiot | 5ef1805fd8 | |
~lucidiot | d4317056f2 | |
~lucidiot | f447ee9fc1 |
18
itsb.xml
18
itsb.xml
|
@ -298,12 +298,7 @@
|
|||
<frequency>100 reports/year</frequency>
|
||||
<url>https://www.atsb.gov.au/</url>
|
||||
<feed format="rss" lang="English" id="atsb-rss">
|
||||
<curl>
|
||||
<url>https://www.atsb.gov.au/publications/safety-investigation-reports/?s=1&sort=OccurrenceReleaseDate&sortAscending=descending&investigationStatus=Completed,Discontinued&printAll=true</url>
|
||||
</curl>
|
||||
<pup>table.selectable_grid tr:not(.header)</pup>
|
||||
<jq path="atsb.jq" />
|
||||
<json2xml />
|
||||
<xquery path="atsb.xqy" user-agent="a/1" />
|
||||
<output>atsb.xml</output>
|
||||
</feed>
|
||||
</source>
|
||||
|
@ -908,18 +903,13 @@
|
|||
<region>Namibia</region>
|
||||
<type>Aviation</type>
|
||||
<frequency>3-5 reports/year</frequency>
|
||||
<url>https://mwt.gov.na/directorate-of-aircraft-accident-and-incident-investigations</url>
|
||||
<url>https://mwt.gov.na/web/mwt/aviation</url>
|
||||
<feed format="rss" lang="English" id="daai-rss">
|
||||
<curl>
|
||||
<!-- The SSL verification fails on tilde.town for some reason, but it works properly in web browsers. -->
|
||||
<url verify-ssl="false">https://mwt.gov.na/published-daai-report</url>
|
||||
<url verify-ssl="false">https://mwt.gov.na/web/mwt/completed-investigations</url>
|
||||
</curl>
|
||||
<!--
|
||||
We would need both :not(:first-child) and :not(:nth-child(2)) to
|
||||
properly remove all the header rows since there are two of them,
|
||||
but pup does not allow that so the jq script will ignore the first row it gets.
|
||||
-->
|
||||
<pup>#our-content tr:not(:first-child)</pup>
|
||||
<pup>.journal-content-article tr</pup>
|
||||
<jq path="daai.jq" />
|
||||
<json2xml />
|
||||
<output>daai.xml</output>
|
||||
|
|
82
itsb.xsd
82
itsb.xsd
|
@ -216,6 +216,87 @@
|
|||
</xs:simpleContent>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:simpleType name="XidelOutputFormat">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
Output format of a `xidel` command.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:restriction base="xs:string">
|
||||
<xs:enumeration value="xml">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
XML document.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:enumeration>
|
||||
<xs:enumeration value="html">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
XHTML document.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:enumeration>
|
||||
<xs:enumeration value="adhoc">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
Human-readable representation. This will output the `text()` content of XML nodes, and JSON structures are output as they are, with indentation.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:enumeration>
|
||||
<xs:enumeration value="xml-wrapped">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
Human-readable representation, embedded within an XML structure.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:enumeration>
|
||||
<xs:enumeration value="json-wrapped">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
Human-readable representation, embedded within a JSON structure.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:enumeration>
|
||||
</xs:restriction>
|
||||
</xs:simpleType>
|
||||
|
||||
<xs:complexType name="XQueryAction">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
Run an XQuery script.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
<xs:attribute name="path" type="xs:string" use="required">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
Path to the XQuery script relative to the project's xquery/ directory.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:attribute>
|
||||
<xs:attribute name="timeout" type="xs:nonNegativeInteger" use="optional" default="60">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
Maximum execution time for the script, in seconds. Set to 0 to disable.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:attribute>
|
||||
<xs:attribute name="user-agent" type="xs:string" use="optional" default="itsb/1.0 (+https://tilde.town/~lucidiot/itsb/)">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
User-Agent header to send along with any HTTP requests.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:attribute>
|
||||
<xs:attribute name="output-format" type="XidelOutputFormat" use="optional" default="xml">
|
||||
<xs:annotation>
|
||||
<xs:documentation>
|
||||
Format to use to output the script's results.
|
||||
</xs:documentation>
|
||||
</xs:annotation>
|
||||
</xs:attribute>
|
||||
</xs:complexType>
|
||||
|
||||
<xs:group name="Command">
|
||||
<xs:choice>
|
||||
<xs:element name="curl" type="CurlCommand" />
|
||||
|
@ -244,6 +325,7 @@
|
|||
</xs:unique>
|
||||
</xs:element>
|
||||
<xs:element name="shell" type="ShellCommand" />
|
||||
<xs:element name="xquery" type="XQueryAction" />
|
||||
</xs:choice>
|
||||
</xs:group>
|
||||
|
||||
|
|
|
@ -16,7 +16,9 @@ import "./helpers" as helpers;
|
|||
"ttl": 1440,
|
||||
"generator": "ITSB",
|
||||
"item": [
|
||||
.[1:][].children
|
||||
.[].children
|
||||
# Only include rows that contain hyperlinks, to skip blank rows or headers
|
||||
| select(try .[].children[].tag == "a")
|
||||
| {
|
||||
"title": "\(.[1].children[0].text) - \(.[2].children[0].text) \(.[3].children[0].children[0].text) - \(.[-1].children[-1].text)",
|
||||
"link": (.[3].children[0].children[0].href | helpers::urlresolve("https://mwt.gov.na/published-daai-report")),
|
||||
|
|
|
@ -0,0 +1,60 @@
|
|||
<rss
|
||||
version="2.0"
|
||||
xmlns:admin="http://webns.net/mvcb/"
|
||||
xmlns:atom="http://www.w3.org/2005/Atom"
|
||||
xmlns:dc="http://purl.org/dc/elements/1.1/"
|
||||
xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#"
|
||||
xmlns:sy="http://purl.org/rss/1.0/modules/syndication/"
|
||||
xmlns:webfeeds="http://webfeeds.org/rss/1.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:noNamespaceSchemaLocation="xsd/rss.xsd"
|
||||
xsi:schemaLocation="
|
||||
http://purl.org/rss/1.0/modules/syndication/ xsd/syndication.xsd
|
||||
http://webfeeds.org/rss/1.0 xsd/webfeeds.xsd
|
||||
"
|
||||
>
|
||||
<channel>
|
||||
<title>ATSB</title>
|
||||
<description>Australian Transport Safety Bureau accident reports</description>
|
||||
<link>https://www.atsb.gov.au/marine-investigation-reports?field_mode_of_transport_target_id=All</link>
|
||||
<language>en-au</language>
|
||||
<pubDate>{format-dateTime(adjust-dateTime-to-timezone(current-dateTime(), xs:dayTimeDuration("PT0S")), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</pubDate>
|
||||
<webMaster>lucidiot@envs.net (lucidiot)</webMaster>
|
||||
<docs>http://www.rssboard.org/rss-specification</docs>
|
||||
<ttl>1440</ttl>
|
||||
|
||||
<admin:errorReportsTo rdf:resource="mailto:lucidiot@envs.net" />
|
||||
<admin:generatorAgent rdf:resource="https://tildegit.org/lucidiot/itsb/" />
|
||||
|
||||
<atom:link href="https://tilde.town/~lucidiot/itsb/feeds/atsb.xml" rel="self" type="application/rss+xml" />
|
||||
|
||||
<dc:format>application/rss+xml</dc:format>
|
||||
|
||||
<sy:updatePeriod>daily</sy:updatePeriod>
|
||||
<sy:updateFrequency>1</sy:updateFrequency>
|
||||
<sy:updateBase>1990-01-01T15:00+00:00</sy:updateBase>
|
||||
|
||||
<webfeeds:partial>true</webfeeds:partial>
|
||||
<webfeeds:deprecated>false</webfeeds:deprecated>
|
||||
|
||||
{
|
||||
(
|
||||
163, (: Completed :)
|
||||
168 (: Discontinued :)
|
||||
) ! x:parse-html(x:request({
|
||||
"url": concat("https://www.atsb.gov.au/marine-investigation-reports?field_investigation_status_target_id=", ., "&field_mode_of_transport_target_id=All"),
|
||||
"headers": "Accept-Encoding: lol
|
||||
Accept-Language: zz,
|
||||
Cache-Control: lol"
|
||||
})/raw)//table[contains(@class, "views-table")]/tbody/tr
|
||||
! <item>
|
||||
<title>{./td[2]/a/text()}</title>
|
||||
<link>{fn:resolve-uri(./td[2]/a/@href)}</link>
|
||||
<guid>{fn:resolve-uri(./td[2]/a/@href)}</guid>
|
||||
<description>{./td[1]/a/text()}</description>
|
||||
<pubDate>{format-dateTime(xs:dateTime(.//time/@datetime[1]), "[FNn,*-3], [D01] [MNn,*-3] [Y0001] [H01]:[m01]:[s01] GMT")}</pubDate>
|
||||
<category domain="https://www.atsb.gov.au/">{./td[4]/text()}</category>
|
||||
</item>
|
||||
}
|
||||
</channel>
|
||||
</rss>
|
|
@ -95,6 +95,15 @@ if ! command -v json2xml >/dev/null 2>&1; then
|
|||
fi
|
||||
|
||||
]]></xsl:text>
|
||||
<xsl:if test="//itsb:xquery">
|
||||
<xsl:text><![CDATA[
|
||||
if ! command -v xidel >/dev/null 2>&1; then
|
||||
echo "xidel is not installed or available in \$PATH." >&2
|
||||
echo "See <https://www.videlibri.de/xidel.html#downloads> for installation instructions." >&2
|
||||
exit 1
|
||||
fi
|
||||
]]></xsl:text>
|
||||
</xsl:if>
|
||||
<xsl:apply-templates
|
||||
select="//itsb:link[@verify-ssl = 'false' or @verify-ssl = '0'][generate-id() = generate-id(key('ssl-hosts', substring-before(substring-after(text(), 'https://'), '/'))[1])]"
|
||||
mode="check"
|
||||
|
@ -128,7 +137,7 @@ rm "$DIR/.itsb-feedgen"]]></xsl:text>
|
|||
</xsl:text>
|
||||
</xsl:if>
|
||||
|
||||
<xsl:for-each select="./itsb:curl|./itsb:jq|./itsb:pup|./itsb:json2xml|./itsb:xml2json|./itsb:shell|./itsb:output">
|
||||
<xsl:for-each select="./itsb:curl|./itsb:jq|./itsb:pup|./itsb:json2xml|./itsb:xml2json|./itsb:shell|./itsb:xquery|./itsb:output">
|
||||
<xsl:apply-templates select="." />
|
||||
<xsl:if test="not(position()=last())">
|
||||
<xsl:text> \
|
||||
|
@ -263,6 +272,45 @@ rm "$DIR/.itsb-feedgen"]]></xsl:text>
|
|||
<xsl:value-of select="text()" />
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="itsb:xquery">
|
||||
<xsl:text>timeout </xsl:text>
|
||||
<xsl:choose>
|
||||
<xsl:when test="@timeout">
|
||||
<xsl:value-of select="@timeout" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:text>60</xsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
<xsl:text> xidel --silent --trace-stack --wait=1 --user-agent='</xsl:text>
|
||||
<xsl:call-template name="escape">
|
||||
<xsl:with-param name="text">
|
||||
<xsl:choose>
|
||||
<xsl:when test="@user-agent">
|
||||
<xsl:value-of select="@user-agent" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:text>itsb/1.0 (+https://tilde.town/~lucidiot/itsb/)</xsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
</xsl:with-param>
|
||||
</xsl:call-template>
|
||||
<xsl:text>' --output-format=</xsl:text>
|
||||
<xsl:choose>
|
||||
<xsl:when test="@output-format">
|
||||
<xsl:value-of select="@output-format" />
|
||||
</xsl:when>
|
||||
<xsl:otherwise>
|
||||
<xsl:text>xml</xsl:text>
|
||||
</xsl:otherwise>
|
||||
</xsl:choose>
|
||||
<xsl:text> --extract-kind=xquery3 --extract-file='xquery/</xsl:text>
|
||||
<xsl:call-template name="escape">
|
||||
<xsl:with-param name="text" select="@path" />
|
||||
</xsl:call-template>
|
||||
<xsl:text>'</xsl:text>
|
||||
</xsl:template>
|
||||
|
||||
<xsl:template match="itsb:output">
|
||||
<xsl:text>> $DIR/feeds/</xsl:text>
|
||||
<xsl:value-of select="text()" />
|
||||
|
|
Loading…
Reference in New Issue