Generate JTSB feeds

This commit is contained in:
Lucidiot 2019-12-14 15:26:17 +01:00
parent 4d0d0ab417
commit fd5e71199d
Signed by: lucidiot
GPG Key ID: 3358C1CA6906FB8D
8 changed files with 295 additions and 1 deletions

View File

@ -4,6 +4,7 @@ echo Building to $DIR/feeds
mkdir -p $DIR/feeds
if ! command -v json2xml >/dev/null 2>&1; then
echo "Adding $DIR/bin to PATH"
PATH="$DIR/bin:$PATH"
fi
@ -20,3 +21,47 @@ curl -s 'https://www.taic.org.nz/inquiries?order=field_publication_date&sort=des
| jq -f $DIR/jq/taic.jq \
| json2xml > $DIR/feeds/taic.xml.new \
&& mv $DIR/feeds/taic.xml.new $DIR/feeds/taic.xml
echo Building JTSB Aviation English feed to $DIR/feeds/jtsb/en/air.xml
mkdir -p $DIR/feeds/jtsb/en
curl -s 'http://www.mlit.go.jp/jtsb/airrep.html' \
| pup 'table.kankokuiken-en tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/en/air.jq \
| json2xml > $DIR/feeds/jtsb/en/air.xml.new \
&& mv $DIR/feeds/jtsb/en/air.xml.new $DIR/feeds/jtsb/en/air.xml
echo Building JTSB Rail English feed to $DIR/feeds/jtsb/en/rail.xml
curl -s 'http://www.mlit.go.jp/jtsb/railrep.html' \
| pup 'table.kankokuiken-en tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/en/rail.jq \
| json2xml > $DIR/feeds/jtsb/en/rail.xml.new \
&& mv $DIR/feeds/jtsb/en/rail.xml.new $DIR/feeds/jtsb/en/rail.xml
echo Building JTSB Marine English feed to $DIR/feeds/jtsb/en/marine.xml
curl -s 'http://www.mlit.go.jp/jtsb/marrep.html' \
| pup 'table.kankokuiken-en tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/en/marine.jq \
| json2xml > $DIR/feeds/jtsb/en/marine.xml.new \
&& mv $DIR/feeds/jtsb/en/marine.xml.new $DIR/feeds/jtsb/en/marine.xml
echo Building JTSB Aviation Japanese feed to $DIR/feeds/jtsb/jp/air.xml
mkdir -p $DIR/feeds/jtsb/jp
curl -s 'https://jtsb.mlit.go.jp/jtsb/aircraft/air-kensaku-list.php?sort=desc&by=p' \
| pup 'table.jiko-information tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/jp/air.jq \
| json2xml > $DIR/feeds/jtsb/jp/air.xml.new \
&& mv $DIR/feeds/jtsb/jp/air.xml.new $DIR/feeds/jtsb/jp/air.xml
echo Building JTSB Rail Japanese feed to $DIR/feeds/jtsb/jp/rail.xml
curl -s 'https://jtsb.mlit.go.jp/jtsb/railway/rail-kensaku-list.php?sort=desc&by=p' \
| pup 'table.jiko-information tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/jp/rail.jq \
| json2xml > $DIR/feeds/jtsb/jp/rail.xml.new \
&& mv $DIR/feeds/jtsb/jp/rail.xml.new $DIR/feeds/jtsb/jp/rail.xml
echo Building JTSB Marine Japanese feed to $DIR/feeds/jtsb/jp/marine.xml
curl -s 'https://jtsb.mlit.go.jp/jtsb/ship/ship-kensaku-list.php?sort=desc&by=p' \
| pup 'table.jiko-information tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/jp/marine.jq \
| json2xml > $DIR/feeds/jtsb/jp/marine.xml.new \
&& mv $DIR/feeds/jtsb/jp/marine.xml.new $DIR/feeds/jtsb/jp/marine.xml

View File

@ -193,7 +193,7 @@
</tr>
<tr>
<td><strong>Types</strong></td>
<td>Road, rail, aviation, fluvial, ski lifts</td>
<td>Rail, aviation, marine</td>
</tr>
<tr>
<td><strong>Frequency</strong></td>
@ -231,6 +231,41 @@
</tr>
</tbody>
</table>
<h3>Japan Transport Safety Board</h3>
<table>
<tbody>
<tr>
<td><strong>Country</strong></td>
<td>Japan</td>
</tr>
<tr>
<td><strong>Languages</strong></td>
<td>Japanese, English</td>
</tr>
<tr>
<td><strong>Types</strong></td>
<td>Rail, aviation, marine</td>
</tr>
<tr>
<td><strong>Frequency</strong></td>
<td>
Japanese: 100+ reports/year<br />
English: 30-40 reports/year
</td>
</tr>
<tr>
<td><strong>Feeds</strong></td>
<td>
<ul>
<li>Aviation: <a href="feeds/jtsb/en/air.xml" target="_blank">English</a> - <a href="feeds/jtsb/jp/air.xml" target="_blank">Japanese</a></li>
<li>Rail: <a href="feeds/jtsb/en/rail.xml" target="_blank">English</a> - <a href="feeds/jtsb/jp/rail.xml" target="_blank">Japanese</a></li>
<li>Marine: <a href="feeds/jtsb/en/marine.xml" target="_blank">English</a> - <a href="feeds/jtsb/jp/marine.xml" target="_blank">Japanese</a></li>
</ul>
</td>
</tr>
</tbody>
</table>
</div>
</body>
</html>

39
jq/jtsb/en/air.jq Normal file
View File

@ -0,0 +1,39 @@
# JTSB aviation feed generator (English version) from http://www.mlit.go.jp/jtsb/airrep.html
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
{
"rss": {
"@version": "2.0",
"channel": {
"title": "JTSB Aviation",
"description": "Japan Transport Safety Board aviation accident reports",
"link": "http://www.mlit.go.jp/jtsb/airrep.html",
"language": "en",
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://cyber.harvard.edu/rss/rss.html",
"ttl": 86400,
"generator": "ITSB",
"item": [.[] | {
"title": (
.children[0].children[0].text
+ " - "
+ .children[2].children[0].text
+ " - "
+ .children[5].children[0].text
),
"description": (
.children[4].children[0].text
+ " "
+ .children[5].children[0].text
+ " operated by "
+ .children[3].children[0].text
+ " - "
+ .children[6].children[0].text
),
"link": ("http://www.mlit.go.jp/jtsb/" + .children[-1].children[0].href),
"pubDate": (try (.children[1].children[0].text | strptime("%Y.%m.%d") | mktime | strftime("%a, %d %b %Y %T %z")))
}]
}
}
}

33
jq/jtsb/en/marine.jq Normal file
View File

@ -0,0 +1,33 @@
# JTSB marine feed generator (English version) from http://www.mlit.go.jp/jtsb/marrep.html
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
{
"rss": {
"@version": "2.0",
"channel": {
"title": "JTSB Marine",
"description": "Japan Transport Safety Board marine accident reports",
"link": "http://www.mlit.go.jp/jtsb/marrep.html",
"language": "en",
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://cyber.harvard.edu/rss/rss.html",
"ttl": 86400,
"generator": "ITSB",
"item": [.[] | {
"title": (
.children[0].children[0].text
+ " - "
+ .children[4].children[0].text
),
"description": (
.children[3].children[0].text
+ " - "
+ .children[5].children[0].text
),
"link": ("http://www.mlit.go.jp/jtsb/" + .children[-1].children[0].href),
"pubDate": (try (.children[1].children[0].text | strptime("%Y.%m.%d") | mktime | strftime("%a, %d %b %Y %T %z")))
}]
}
}
}

33
jq/jtsb/en/rail.jq Normal file
View File

@ -0,0 +1,33 @@
# JTSB rail feed generator (English version) from http://www.mlit.go.jp/jtsb/railrep.html
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
{
"rss": {
"@version": "2.0",
"channel": {
"title": "JTSB Rail",
"description": "Japan Transport Safety Board railroad accident reports",
"link": "http://www.mlit.go.jp/jtsb/railrep.html",
"language": "en",
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://cyber.harvard.edu/rss/rss.html",
"ttl": 86400,
"generator": "ITSB",
"item": [.[] | {
"title": (
.children[0].children[0].text
+ " - "
+ .children[4].children[0].text
),
"description": (
.children[3].children[0].text
+ " - "
+ .children[5].children[0].text
),
"link": ("http://www.mlit.go.jp/jtsb/" + .children[-1].children[0].href),
"pubDate": (try (.children[1].children[0].text | strptime("%Y.%m.%d") | mktime | strftime("%a, %d %b %Y %T %z")))
}]
}
}
}

38
jq/jtsb/jp/air.jq Normal file
View File

@ -0,0 +1,38 @@
# JTSB aviation feed generator (Japanese version) from https://jtsb.mlit.go.jp/jtsb/aircraft/air-kensaku-list.php?sort=desc&by=p
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
{
"rss": {
"@version": "2.0",
"channel": {
"title": "JTSB 航空報告",
"description": "運輸安全委員会 航空報告",
"link": "https://jtsb.mlit.go.jp/jtsb/aircraft/air-kensaku-list.php",
"language": "ja-JP",
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://cyber.harvard.edu/rss/rss.html",
"ttl": 86400,
"generator": "ITSB",
"item": [.[] | {
"title": (
.children[0].children[0].text
+ " "
+ .children[4].children[0].text
),
"description": (
.children[4].text
+ " "
+ .children[3].text
+ .children[3].children[0].text
+ " "
+ .children[2].text
+ " "
+ .children[1].text
),
"link": .children[5].children[2].href,
"pubDate": (try (.children[5].text | sub("[^0-9]"; ""; "g") | strptime("%Y%m%d") | mktime | strftime("%a, %d %b %Y %T %z")))
}]
}
}
}

35
jq/jtsb/jp/marine.jq Normal file
View File

@ -0,0 +1,35 @@
# JTSB marine feed generator (Japanese version) from https://jtsb.mlit.go.jp/jtsb/ship/ship-kensaku-list.php?sort=desc&by=p
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
{
"rss": {
"@version": "2.0",
"channel": {
"title": "JTSB 船舶報告",
"description": "運輸安全委員会 船舶報告",
"link": "https://jtsb.mlit.go.jp/jtsb/ship/ship-kensaku-list.php",
"language": "ja-JP",
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://cyber.harvard.edu/rss/rss.html",
"ttl": 86400,
"generator": "ITSB",
"item": [.[] | {
"title": (
.children[0].children[0].text
+ " "
+ .children[3].children[0].text
),
"description": (
.children[3].text
+ " "
+ .children[2].text
+ " "
+ .children[1].text
),
"link": .children[4].children[2].href,
"pubDate": (try (.children[4].text | sub("[^0-9]"; ""; "g") | strptime("%Y%m%d") | mktime | strftime("%a, %d %b %Y %T %z")))
}]
}
}
}

36
jq/jtsb/jp/rail.jq Normal file
View File

@ -0,0 +1,36 @@
# JTSB rail feed generator (Japanese version) from https://jtsb.mlit.go.jp/jtsb/railway/rail-kensaku-list.php?sort=desc&by=p
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
{
"rss": {
"@version": "2.0",
"channel": {
"title": "JTSB 鉄道報告",
"description": "運輸安全委員会 鉄道報告",
"link": "https://jtsb.mlit.go.jp/jtsb/railway/rail-kensaku-list.php",
"language": "ja-JP",
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
"docs": "https://cyber.harvard.edu/rss/rss.html",
"ttl": 86400,
"generator": "ITSB",
"item": [.[] | {
"title": (
.children[0].children[0].text
+ " "
+ .children[3].children[0].text
),
"description": (
.children[3].text
+ " "
+ .children[2].text
+ .children[2].children[0].text
+ " "
+ .children[1].text
),
"link": .children[4].children[2].href,
"pubDate": (try (.children[4].text | sub("[^0-9]"; ""; "g") | strptime("%Y%m%d") | mktime | strftime("%a, %d %b %Y %T %z")))
}]
}
}
}