Add AAIA custom feeds, close #37
This commit is contained in:
parent
0685bdac52
commit
8f47f99818
43
itsb.xml
43
itsb.xml
|
@ -1127,5 +1127,48 @@
|
|||
<output>oeaif/de.xml</output>
|
||||
</feed>
|
||||
</source>
|
||||
|
||||
<source id="aaia">
|
||||
<name>Air Accident Investigation Authority</name>
|
||||
<region>Hong Kong</region>
|
||||
<type>Aviation</type>
|
||||
<frequency>5-10 reports/year</frequency>
|
||||
<feed type="aviation" lang="English" format="rss" id="aaia-en">
|
||||
<curl>
|
||||
<url>https://www.thb.gov.hk/aaia/eng/investigation_reports/index.htm</url>
|
||||
</curl>
|
||||
<pup>td:not([colspan]) table tr:not(:first-child)</pup>
|
||||
<jq path="aaia.jq">
|
||||
<arg name="lang">en</arg>
|
||||
<arg name="link">https://www.thb.gov.hk/aaia/eng/investigation_reports/index.htm</arg>
|
||||
</jq>
|
||||
<json2xml />
|
||||
<output>aaia/en.xml</output>
|
||||
</feed>
|
||||
<feed type="aviation" lang="Simplified Chinese" format="rss" id="aaia-zh-hans">
|
||||
<curl>
|
||||
<url>https://www.thb.gov.hk/aaia/sc/investigation_reports/index.htm</url>
|
||||
</curl>
|
||||
<pup>td:not([colspan]) table tr:not(:first-child)</pup>
|
||||
<jq path="aaia.jq">
|
||||
<arg name="lang">zh-Hans</arg>
|
||||
<arg name="link">https://www.thb.gov.hk/aaia/sc/investigation_reports/index.htm</arg>
|
||||
</jq>
|
||||
<json2xml />
|
||||
<output>aaia/zh-hans.xml</output>
|
||||
</feed>
|
||||
<feed type="aviation" lang="Traditional Chinese" format="rss" id="aaia-zh-hant">
|
||||
<curl>
|
||||
<url>https://www.thb.gov.hk/aaia/tc/investigation_reports/index.htm</url>
|
||||
</curl>
|
||||
<pup>td:not([colspan]) table tr:not(:first-child)</pup>
|
||||
<jq path="aaia.jq">
|
||||
<arg name="lang">zh-Hant</arg>
|
||||
<arg name="link">https://www.thb.gov.hk/aaia/tc/investigation_reports/index.htm</arg>
|
||||
</jq>
|
||||
<json2xml />
|
||||
<output>aaia/zh-hant.xml</output>
|
||||
</feed>
|
||||
</source>
|
||||
</section>
|
||||
</itsb>
|
||||
|
|
|
@ -0,0 +1,50 @@
|
|||
# AAIA feed generator
|
||||
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
|
||||
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
|
||||
# Expected variables:
|
||||
# $lang: Language code, without the country code (en/zh-Hans/zh-Hant).
|
||||
# $link: Link to the investigation list.
|
||||
import "./helpers" as helpers;
|
||||
|
||||
{
|
||||
"rss": {
|
||||
"@version": "2.0",
|
||||
"channel": {
|
||||
"title": "AAIA (\($lang))",
|
||||
"description": "Air Accident Investigation Authority reports",
|
||||
"link": $link,
|
||||
"language": "\($lang)-HK",
|
||||
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
|
||||
"docs": "https://www.rssboard.org/rss-specification",
|
||||
"ttl": 1440,
|
||||
"generator": "ITSB",
|
||||
"item": [
|
||||
.[].children // []
|
||||
# Ignore empty lines
|
||||
| select(length)
|
||||
| . as $data
|
||||
# Grab all the report links, as we will use the last one as the link and put all of them in the description
|
||||
| [
|
||||
.[2:][].children
|
||||
| ..
|
||||
| select(.tag? == "a")
|
||||
| .href |= helpers::urlresolve($link)
|
||||
]
|
||||
| {
|
||||
"title": ($data[1].text // $data[1].children[0].text),
|
||||
"link": .[-1].href,
|
||||
"description": (
|
||||
[.[] | "<li><a href=\"\(.href)\" target=\"_blank\">\(.text)</a></li>"]
|
||||
| join("")
|
||||
| "<ul>\(.)</ul>"
|
||||
),
|
||||
"pubDate": (
|
||||
$data[0].text // $data[0].children[0].text
|
||||
| if $lang == "en" then strptime("%d %B %Y") | mktime else helpers::parse_chinese_date end
|
||||
| strftime("%a, %d %b %Y %T %z")
|
||||
)
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
|
@ -41,3 +41,41 @@ def urlresolve(base):
|
|||
) else . end
|
||||
| urlunparse
|
||||
) end;
|
||||
|
||||
# Basic Chinese number parsing meant for Chinese date parsing.
|
||||
def parse_chinese_number:
|
||||
. as $input
|
||||
| {
|
||||
"零": "0",
|
||||
"〇": "0",
|
||||
"一": "1",
|
||||
"二": "2",
|
||||
"三": "3",
|
||||
"四": "4",
|
||||
"五": "5",
|
||||
"六": "6",
|
||||
"七": "7",
|
||||
"八": "8",
|
||||
"九": "9",
|
||||
# 10 is ignored here as we will parse number by number.
|
||||
"十": ""
|
||||
} as $charmap
|
||||
| $input / ""
|
||||
| map($charmap[.] // .)
|
||||
| join("")
|
||||
# Special case for when we parse 十 alone
|
||||
| if . == "" then 1 else . end
|
||||
| tonumber
|
||||
# Parsing number by number, ignoring 10, will work as long as there is a digit after 10:
|
||||
# 二十八 works because we parse it as 二八 (2 and 8), but 二十 would yield 2 only,
|
||||
# so we multiply manually by 10 when the number ends with 10.
|
||||
| if $input|endswith("十") then . * 10 else . end;
|
||||
|
||||
# Parse a Traditional or Simplified Chinese date into a Unix timestamp.
|
||||
def parse_chinese_date:
|
||||
capture("(?<year>[0123456789零〇一二三四五六七八九十]+)年(?<month>[0123456789零〇一二三四五六七八九十]+)月(?<day>[0123456789零〇一二三四五六七八九十]+)日")
|
||||
| map_values(parse_chinese_number)
|
||||
# Handle the Chinese calendar by assuming we never would get dates before 1900. This will no longer work in the year 2811.
|
||||
| if .year < 1900 then .year += 1911 else . end
|
||||
| "\(.year)-\(.month)-\(.day)T00:00:00Z"
|
||||
| fromdateiso8601;
|
||||
|
|
Loading…
Reference in New Issue