Rewrite DAAI feed, close #87

This commit is contained in:
Erwan Rouchet 2021-09-28 09:06:49 +02:00
parent aa26c36bc9
commit eb431dc9bf
No known key found for this signature in database
GPG Key ID: FF629EE969FFE294
2 changed files with 22 additions and 11 deletions

View File

@ -820,9 +820,14 @@
<frequency>3-5 reports/year</frequency> <frequency>3-5 reports/year</frequency>
<feed format="rss" lang="English" id="daai-rss"> <feed format="rss" lang="English" id="daai-rss">
<curl> <curl>
<url verify-ssl="false">https://mwt.gov.na/published-daai-report/-/document_library/PVB0Gp2amxBo/view/1344073?_com_liferay_document_library_web_portlet_DLPortlet_INSTANCE_PVB0Gp2amxBo_displayStyle=list</url> <url>https://mwt.gov.na/published-daai-report</url>
</curl> </curl>
<pup>tr[data-title]</pup> <!--
We would need both :not(:first-child) and :not(:nth-child(2)) to
properly remove all the header rows since there are two of them,
but pup does not allow that so the jq script will ignore the first row it gets.
-->
<pup>#our-content tr:not(:first-child)</pup>
<jq path="daai.jq" /> <jq path="daai.jq" />
<json2xml /> <json2xml />
<output>daai.xml</output> <output>daai.xml</output>

View File

@ -9,22 +9,28 @@ import "./helpers" as helpers;
"channel": { "channel": {
"title": "DAAI", "title": "DAAI",
"description": "Namibian Directorate of Aircraft Accidents and Investigation reports", "description": "Namibian Directorate of Aircraft Accidents and Investigation reports",
"link": "https://mwt.gov.na/web/mwt/published-reports", "link": "https://mwt.gov.na/published-daai-report",
"language": "en-na", "language": "en-na",
"pubDate": (now | strftime("%a, %d %b %Y %T %z")), "pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://www.rssboard.org/rss-specification", "docs": "https://www.rssboard.org/rss-specification",
"ttl": 1440, "ttl": 1440,
"generator": "ITSB", "generator": "ITSB",
"item": [ "item": [
.[] .[1:][].children
| { | {
"title": .["data-title"], "title": "\(.[1].children[0].text) - \(.[2].children[0].text) \(.[3].children[0].children[0].text) - \(.[-1].children[-1].text)",
"link": (.children[0].children[0].href | sub("\\?.*$"; "")), "link": (.[3].children[0].children[0].href | helpers::urlresolve("https://mwt.gov.na/published-daai-report")),
"pubDate": (.children[3].children[0].onmouseover | match("&#39;(.*)&#39;").captures[0].string | strptime("%e/%m/%y %I:%M %p") | mktime | strftime("%a, %d %b %Y %T %z")), "category": [
"guid": { [.[1].children[0].text, .[-1].children[-1].text][]
"@isPermaLink": "true", | {
"#text": (.children[0].children[0].href | sub("\\?.*$"; "")) "@domain": "https://mwt.gov.na",
} "#text": .
}
]
}
| .guid = {
"@isPermaLink": "true",
"#text": .link
} }
] ]
} }