Add TTSB feeds, close #8

This commit is contained in:
Lucidiot 2020-07-25 23:06:58 +02:00
parent 62af7da7ae
commit b08f260fa9
Signed by: lucidiot
GPG Key ID: 3358C1CA6906FB8D
4 changed files with 107 additions and 0 deletions

View File

@ -78,6 +78,28 @@ curl -s 'https://en.havarikommissionen.dk/railway-archive/' \
| json2xml -s -ns 'http://search.yahoo.com/mrss/' media > $DIR/feeds/aibd/rail.xml.new \
&& mv $DIR/feeds/aibd/rail.xml.new $DIR/feeds/aibd/rail.xml
log Building TTSB Aviation English feed to $DIR/feeds/ttsb/en/aviation.xml
mkdir -p $DIR/feeds/ttsb/en
curl -s 'https://www.ttsb.gov.tw/english/16051/16052/16053/16058/Lpsimplelist?PageSize=1000' \
| pup '#LP-ContentPage .TableList table tbody tr json{}' \
| jq -L $DIR/jq -f $DIR/jq/ttsb.jq \
--arg title 'TTSB Aviation' \
--arg lang 'en-TW' \
--arg link 'https://www.ttsb.gov.tw/english/16051/16052/16053/16058/Lpsimplelist?PageSize=1000' \
| json2xml > $DIR/feeds/ttsb/en/aviation.xml.new \
&& mv $DIR/feeds/ttsb/en/aviation.xml.new $DIR/feeds/ttsb/en/aviation.xml
log Building TTSB Aviation Chinese feed to $DIR/feeds/ttsb/zh/aviation.xml
mkdir -p $DIR/feeds/ttsb/zh
curl -s 'https://www.ttsb.gov.tw/1133/1154/1155/1159/Lpsimplelist?PageSize=1000' \
| pup '#LP-ContentPage .TableList table tbody tr json{}' \
| jq -L $DIR/jq -f $DIR/jq/ttsb.jq \
--arg title '國家運輸安全調查委員會' \
--arg lang 'zh-Hant-TW' \
--arg link 'https://www.ttsb.gov.tw/1133/1154/1155/1159/Lpsimplelist?PageSize=1000' \
| json2xml > $DIR/feeds/ttsb/zh/aviation.xml.new \
&& mv $DIR/feeds/ttsb/zh/aviation.xml.new $DIR/feeds/ttsb/zh/aviation.xml
log Building BFU English feed to $DIR/feeds/bfu/en.xml
mkdir -p $DIR/feeds/bfu
curl -s 'https://www.bfu-web.de/EN/Publications/Investigation%20Report/reports_node.html?cms_gts=238148_list%253DdateOfIssue_dt%252Bdesc' \

BIN
img/rss-chinese.gif Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 208 B

View File

@ -658,6 +658,42 @@
</tr>
</tbody>
</table>
<h3>Taiwan Transportation Safety Board</h3>
<table>
<tbody>
<tr>
<td><strong>Country</strong></td>
<td>Taiwan</td>
</tr>
<tr>
<td><strong>Language</strong></td>
<td>Traditional Chinese, English</td>
</tr>
<tr>
<td><strong>Types</strong></td>
<td>Aviation, rail, marine, highway</td>
</tr>
<tr>
<td><strong>Frequency</strong></td>
<td>10 reports/year</td>
</tr>
<tr>
<td><strong>Feeds</strong></td>
<td>
<table>
<tr>
<td>Aviation</td>
<td>
<a href="feeds/ttsb/zh/aviation.xml" target="_blank"><img src="img/rss-chinese.gif" alt="Chinese RSS" /></a>
<a href="feeds/ttsb/en/aviation.xml" target="_blank"><img src="img/rss-english.gif" alt="English RSS" /></a>
</td>
</tr>
</table>
</td>
</tr>
</tbody>
</table>
</div>
</body>
</html>

49
jq/ttsb.jq Normal file
View File

@ -0,0 +1,49 @@
# Taiwan Transportation Safety Board Aviation feed generator
# Required arguments:
# $title: Feed title
# $lang: Feed language
# $link: Feed link
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
import "./helpers" as helpers;
{
"rss": {
"@version": "2.0",
"channel": {
"title": $title,
"description": $title,
"link": $link,
"language": $lang,
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://cyber.harvard.edu/rss/rss.html",
"ttl": 86400,
"generator": "ITSB",
"item": [.[] | {
"title": .children[2].children[0].title,
"description": [
.children[3:][]
| (
.["data-th"] + ": " + (
.children[0].text
// .children[0].children[0].text
// "—"
)
)
] | join("\n"),
"link": (.children[2].children[0].children[0].href | helpers::urlresolve($link)),
"pubDate": (
.children[1].children[0].text
| strptime("%Y-%m-%d")
# strptime returns a [year, month, day, …] array
# To handle the Chinese calendar, which is sometimes used by the TTSB feeds,
# we add 1911 years to the parsed date when it is below 1998,
# 1999 being the year of their earliest investigation.
| .[0] |= if . <= 1998 then (. + 1911) else . end
| mktime
| strftime("%a, %d %b %Y %T %z")
)
}]
}
}
}