IoM Ship Registry custom feed, close #50

This commit is contained in:
Lucidiot 2021-02-07 17:20:47 +01:00
parent 28f2175950
commit d771003eea
Signed by: lucidiot
GPG Key ID: 3358C1CA6906FB8D
3 changed files with 86 additions and 0 deletions

View File

@ -26,6 +26,15 @@ if ! command -v json2xml >/dev/null 2>&1; then
PATH="$DIR/bin:$PATH"
fi
log Building IoM Ship Registry feed to $DIR/feeds/iom.xml
curl -s https://www.iomshipregistry.com/forms-reports/casualty-reports/ \
| pup '.rte + .accordion p json{}' \
| jq -L $DIR/jq -f $DIR/jq/iom.jq \
| json2xml > $DIR/feeds/iom.xml.new \
&& mv $DIR/feeds/iom.xml.new $DIR/feeds/iom.xml
exit
log Building ATSB feed to $DIR/feeds/atsb.xml
curl -s 'https://www.atsb.gov.au/publications/safety-investigation-reports/?s=1&sort=OccurrenceReleaseDate&sortAscending=descending&investigationStatus=Completed,Discontinued&printAll=true' \
| pup 'table.selectable_grid tr:not(.header) json{}' \

View File

@ -844,6 +844,10 @@
<h3>Directorate of Aviation Accidents and Investigation</h3>
<table>
<tbody>
<tr>
<td><strong>Country</strong></td>
<td>Namibia</td>
</tr>
<tr>
<td><strong>Language</strong></td>
<td>English</td>
@ -864,6 +868,34 @@
</tr>
</tbody>
</table>
<h3>Isle of Man Ship Registry</h3>
<table>
<tbody>
<tr>
<td><strong>Country</strong></td>
<td>United Kingdom (Isle of Man)</td>
</tr>
<tr>
<td><strong>Language</strong></td>
<td>English</td>
</tr>
<tr>
<td><strong>Type</strong></td>
<td>Marine</td>
</tr>
<tr>
<td><strong>Frequency</strong></td>
<td>0-2 reports/year</td>
</tr>
<tr>
<td><strong>Feed</strong></td>
<td>
<a href="feeds/iom.xml" target="_blank"><img src="img/rss.gif" alt="RSS" /></a>
</td>
</tr>
</tbody>
</table>
</div>
</body>
</html>

45
jq/iom.jq Normal file
View File

@ -0,0 +1,45 @@
# Isle of Man Ship Registry feed generator
# Expects pup JSON output holding <a> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
import "./helpers" as helpers;
{
"rss": {
"@version": "2.0",
"channel": {
"title": "IoM Ship Registry reports",
"description": "Isle of Man Ship Registry casualty reports",
"link": "https://www.iomshipregistry.com/forms-reports/casualty-reports/",
"language": "en-im",
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://www.rssboard.org/rss-specification",
"ttl": 1440,
"generator": "ITSB",
"item": [
.[]
| . as $item
| [.children[] | select(.tag == "a")][0]
| {
"title": .title,
"link": (.href | helpers::urlresolve("https://www.iomshipregistry.com/forms-reports/casualty-reports/")),
"guid": {
"@isPermaLink": "true",
"#text": (.href | helpers::urlresolve("https://www.iomshipregistry.com/forms-reports/casualty-reports/"))
}
}
# Optional pubDate
| .pubDate = try (
# Get the entire paragraph as a string
[$item|..|.text?|strings]
| add
# Look for the year between brackets
| [scan("\\([0-9]+\\)")][0]
| strptime("(%Y)")
# Add 1 day because it assumes YYYY-00-00, which is Dec 31st of the previous year
| mktime + 86400
| strftime("%a, %d %b %Y %T %z")
)
]
}
}
}