Add AAIA custom feeds, close #37
This commit is contained in:
parent
0685bdac52
commit
8f47f99818
43
itsb.xml
43
itsb.xml
|
@ -1127,5 +1127,48 @@
|
||||||
<output>oeaif/de.xml</output>
|
<output>oeaif/de.xml</output>
|
||||||
</feed>
|
</feed>
|
||||||
</source>
|
</source>
|
||||||
|
|
||||||
|
<source id="aaia">
|
||||||
|
<name>Air Accident Investigation Authority</name>
|
||||||
|
<region>Hong Kong</region>
|
||||||
|
<type>Aviation</type>
|
||||||
|
<frequency>5-10 reports/year</frequency>
|
||||||
|
<feed type="aviation" lang="English" format="rss" id="aaia-en">
|
||||||
|
<curl>
|
||||||
|
<url>https://www.thb.gov.hk/aaia/eng/investigation_reports/index.htm</url>
|
||||||
|
</curl>
|
||||||
|
<pup>td:not([colspan]) table tr:not(:first-child)</pup>
|
||||||
|
<jq path="aaia.jq">
|
||||||
|
<arg name="lang">en</arg>
|
||||||
|
<arg name="link">https://www.thb.gov.hk/aaia/eng/investigation_reports/index.htm</arg>
|
||||||
|
</jq>
|
||||||
|
<json2xml />
|
||||||
|
<output>aaia/en.xml</output>
|
||||||
|
</feed>
|
||||||
|
<feed type="aviation" lang="Simplified Chinese" format="rss" id="aaia-zh-hans">
|
||||||
|
<curl>
|
||||||
|
<url>https://www.thb.gov.hk/aaia/sc/investigation_reports/index.htm</url>
|
||||||
|
</curl>
|
||||||
|
<pup>td:not([colspan]) table tr:not(:first-child)</pup>
|
||||||
|
<jq path="aaia.jq">
|
||||||
|
<arg name="lang">zh-Hans</arg>
|
||||||
|
<arg name="link">https://www.thb.gov.hk/aaia/sc/investigation_reports/index.htm</arg>
|
||||||
|
</jq>
|
||||||
|
<json2xml />
|
||||||
|
<output>aaia/zh-hans.xml</output>
|
||||||
|
</feed>
|
||||||
|
<feed type="aviation" lang="Traditional Chinese" format="rss" id="aaia-zh-hant">
|
||||||
|
<curl>
|
||||||
|
<url>https://www.thb.gov.hk/aaia/tc/investigation_reports/index.htm</url>
|
||||||
|
</curl>
|
||||||
|
<pup>td:not([colspan]) table tr:not(:first-child)</pup>
|
||||||
|
<jq path="aaia.jq">
|
||||||
|
<arg name="lang">zh-Hant</arg>
|
||||||
|
<arg name="link">https://www.thb.gov.hk/aaia/tc/investigation_reports/index.htm</arg>
|
||||||
|
</jq>
|
||||||
|
<json2xml />
|
||||||
|
<output>aaia/zh-hant.xml</output>
|
||||||
|
</feed>
|
||||||
|
</source>
|
||||||
</section>
|
</section>
|
||||||
</itsb>
|
</itsb>
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
# AAIA feed generator
|
||||||
|
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
|
||||||
|
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
|
||||||
|
# Expected variables:
|
||||||
|
# $lang: Language code, without the country code (en/zh-Hans/zh-Hant).
|
||||||
|
# $link: Link to the investigation list.
|
||||||
|
import "./helpers" as helpers;
|
||||||
|
|
||||||
|
{
|
||||||
|
"rss": {
|
||||||
|
"@version": "2.0",
|
||||||
|
"channel": {
|
||||||
|
"title": "AAIA (\($lang))",
|
||||||
|
"description": "Air Accident Investigation Authority reports",
|
||||||
|
"link": $link,
|
||||||
|
"language": "\($lang)-HK",
|
||||||
|
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
|
||||||
|
"docs": "https://www.rssboard.org/rss-specification",
|
||||||
|
"ttl": 1440,
|
||||||
|
"generator": "ITSB",
|
||||||
|
"item": [
|
||||||
|
.[].children // []
|
||||||
|
# Ignore empty lines
|
||||||
|
| select(length)
|
||||||
|
| . as $data
|
||||||
|
# Grab all the report links, as we will use the last one as the link and put all of them in the description
|
||||||
|
| [
|
||||||
|
.[2:][].children
|
||||||
|
| ..
|
||||||
|
| select(.tag? == "a")
|
||||||
|
| .href |= helpers::urlresolve($link)
|
||||||
|
]
|
||||||
|
| {
|
||||||
|
"title": ($data[1].text // $data[1].children[0].text),
|
||||||
|
"link": .[-1].href,
|
||||||
|
"description": (
|
||||||
|
[.[] | "<li><a href=\"\(.href)\" target=\"_blank\">\(.text)</a></li>"]
|
||||||
|
| join("")
|
||||||
|
| "<ul>\(.)</ul>"
|
||||||
|
),
|
||||||
|
"pubDate": (
|
||||||
|
$data[0].text // $data[0].children[0].text
|
||||||
|
| if $lang == "en" then strptime("%d %B %Y") | mktime else helpers::parse_chinese_date end
|
||||||
|
| strftime("%a, %d %b %Y %T %z")
|
||||||
|
)
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
|
@ -41,3 +41,41 @@ def urlresolve(base):
|
||||||
) else . end
|
) else . end
|
||||||
| urlunparse
|
| urlunparse
|
||||||
) end;
|
) end;
|
||||||
|
|
||||||
|
# Basic Chinese number parsing meant for Chinese date parsing.
|
||||||
|
def parse_chinese_number:
|
||||||
|
. as $input
|
||||||
|
| {
|
||||||
|
"零": "0",
|
||||||
|
"〇": "0",
|
||||||
|
"一": "1",
|
||||||
|
"二": "2",
|
||||||
|
"三": "3",
|
||||||
|
"四": "4",
|
||||||
|
"五": "5",
|
||||||
|
"六": "6",
|
||||||
|
"七": "7",
|
||||||
|
"八": "8",
|
||||||
|
"九": "9",
|
||||||
|
# 10 is ignored here as we will parse number by number.
|
||||||
|
"十": ""
|
||||||
|
} as $charmap
|
||||||
|
| $input / ""
|
||||||
|
| map($charmap[.] // .)
|
||||||
|
| join("")
|
||||||
|
# Special case for when we parse 十 alone
|
||||||
|
| if . == "" then 1 else . end
|
||||||
|
| tonumber
|
||||||
|
# Parsing number by number, ignoring 10, will work as long as there is a digit after 10:
|
||||||
|
# 二十八 works because we parse it as 二八 (2 and 8), but 二十 would yield 2 only,
|
||||||
|
# so we multiply manually by 10 when the number ends with 10.
|
||||||
|
| if $input|endswith("十") then . * 10 else . end;
|
||||||
|
|
||||||
|
# Parse a Traditional or Simplified Chinese date into a Unix timestamp.
|
||||||
|
def parse_chinese_date:
|
||||||
|
capture("(?<year>[0123456789零〇一二三四五六七八九十]+)年(?<month>[0123456789零〇一二三四五六七八九十]+)月(?<day>[0123456789零〇一二三四五六七八九十]+)日")
|
||||||
|
| map_values(parse_chinese_number)
|
||||||
|
# Handle the Chinese calendar by assuming we never would get dates before 1900. This will no longer work in the year 2811.
|
||||||
|
| if .year < 1900 then .year += 1911 else . end
|
||||||
|
| "\(.year)-\(.month)-\(.day)T00:00:00Z"
|
||||||
|
| fromdateiso8601;
|
||||||
|
|
Loading…
Reference in New Issue