itsb/feedgen.sh

234 lines
11 KiB
Bash
Executable File

#!/bin/bash
function usage {
echo "Usage: $0 [-d BASE_DIR] [-q]"
echo " -d, --base-dir Set the base directory for the feed generator files and output."
echo " -q, --quiet Hide status logs - useful when running the script as a cronjob."
exit 1
}
while [[ "$#" > 0 ]]; do case $1 in
-d|--base-dir) DIR="$2"; shift;;
-q|--quiet) QUIET=yes;;
*) echo "Unknown parameter: $1"; usage;;
esac; shift; done
function log {
test -z "$QUIET" && echo "$@"
}
[ -z "$DIR" ] && DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" >/dev/null 2>&1 && pwd )"
log Building to $DIR/feeds
mkdir -p $DIR/feeds
if ! command -v json2xml >/dev/null 2>&1; then
log "Adding $DIR/bin to PATH"
PATH="$DIR/bin:$PATH"
fi
log Building ATSB feed to $DIR/feeds/atsb.xml
curl -s 'https://www.atsb.gov.au/publications/safety-investigation-reports/?s=1&sort=OccurrenceReleaseDate&sortAscending=descending&investigationStatus=Completed,Discontinued&printAll=true' \
| pup 'table.selectable_grid tr:not(.header) json{}' \
| jq -L $DIR/jq -f $DIR/jq/atsb.jq \
| json2xml > $DIR/feeds/atsb.xml.new \
&& mv $DIR/feeds/atsb.xml.new $DIR/feeds/atsb.xml
log Building TAIC feed to $DIR/feeds/taic.xml
curl -s 'https://www.taic.org.nz/inquiries?order=field_publication_date&sort=desc' \
| pup '#view-table-wrapper tbody tr json{}' \
| jq -L $DIR/jq -f $DIR/jq/taic.jq \
| json2xml > $DIR/feeds/taic.xml.new \
&& mv $DIR/feeds/taic.xml.new $DIR/feeds/taic.xml
log Building WMSC feed to $DIR/feeds/wmsc.xml
curl -s 'https://wmsc.gov/oversight/reports/' \
| pup '.post-content li json{}' \
| jq -f $DIR/jq/wmsc.jq \
| json2xml > $DIR/feeds/wmsc.xml.new \
&& mv $DIR/feeds/wmsc.xml.new $DIR/feeds/wmsc.xml
log Building DMAIB feed to $DIR/feeds/dmaib.xml
curl -s 'https://dmaib.com/reports/?categorizations=9510' \
| pup 'ul.items > li[data-url] json{}' \
| jq -f $DIR/jq/denmark.jq \
--arg title 'DMAIB' \
--arg description 'Danish Maritime Accident Investigation Board accident reports' \
--arg link 'https://dmaib.com/reports/?categorizations=9510' \
| json2xml -s -ns 'http://search.yahoo.com/mrss/' media > $DIR/feeds/dmaib.xml.new \
&& mv $DIR/feeds/dmaib.xml.new $DIR/feeds/dmaib.xml
log Building AIBD Aviation feed to $DIR/feeds/aibd/aviation.xml
mkdir -p $DIR/feeds/aibd
curl -s 'https://en.havarikommissionen.dk/aviation-archive/' \
| pup 'ul.items > li[data-url] json{}' \
| jq -f $DIR/jq/denmark.jq \
--arg title 'AIBD Aviation' \
--arg description 'Accident Investigation Board Denmark aviation accident reports' \
--arg link 'https://en.havarikommissionen.dk/aviation-archive/' \
| json2xml -s -ns 'http://search.yahoo.com/mrss/' media > $DIR/feeds/aibd/aviation.xml.new \
&& mv $DIR/feeds/aibd/aviation.xml.new $DIR/feeds/aibd/aviation.xml
log Building AIBD Rail feed to $DIR/feeds/aibd/rail.xml
curl -s 'https://en.havarikommissionen.dk/railway-archive/' \
| pup 'ul.items > li[data-url] json{}' \
| jq -f $DIR/jq/denmark.jq \
--arg title 'AIBD Rail' \
--arg description 'Accident Investigation Board Denmark railroad accident reports' \
--arg link 'https://en.havarikommissionen.dk/railway-archive/' \
| json2xml -s -ns 'http://search.yahoo.com/mrss/' media > $DIR/feeds/aibd/rail.xml.new \
&& mv $DIR/feeds/aibd/rail.xml.new $DIR/feeds/aibd/rail.xml
log Building TTSB Aviation English feed to $DIR/feeds/ttsb/en/aviation.xml
mkdir -p $DIR/feeds/ttsb/en
curl -s 'https://www.ttsb.gov.tw/english/16051/16052/16053/16058/Lpsimplelist?PageSize=1000' \
| pup '#LP-ContentPage .TableList table tbody tr json{}' \
| jq -L $DIR/jq -f $DIR/jq/ttsb.jq \
--arg title 'TTSB Aviation' \
--arg lang 'en-TW' \
--arg link 'https://www.ttsb.gov.tw/english/16051/16052/16053/16058/Lpsimplelist?PageSize=1000' \
| json2xml > $DIR/feeds/ttsb/en/aviation.xml.new \
&& mv $DIR/feeds/ttsb/en/aviation.xml.new $DIR/feeds/ttsb/en/aviation.xml
log Building TTSB Aviation Chinese feed to $DIR/feeds/ttsb/zh/aviation.xml
mkdir -p $DIR/feeds/ttsb/zh
curl -s 'https://www.ttsb.gov.tw/1133/1154/1155/1159/Lpsimplelist?PageSize=1000' \
| pup '#LP-ContentPage .TableList table tbody tr json{}' \
| jq -L $DIR/jq -f $DIR/jq/ttsb.jq \
--arg title '國家運輸安全調查委員會' \
--arg lang 'zh-Hant-TW' \
--arg link 'https://www.ttsb.gov.tw/1133/1154/1155/1159/Lpsimplelist?PageSize=1000' \
| json2xml > $DIR/feeds/ttsb/zh/aviation.xml.new \
&& mv $DIR/feeds/ttsb/zh/aviation.xml.new $DIR/feeds/ttsb/zh/aviation.xml
log Building BFU English feed to $DIR/feeds/bfu/en.xml
mkdir -p $DIR/feeds/bfu
curl -s 'https://www.bfu-web.de/EN/Publications/Investigation%20Report/reports_node.html?cms_gts=238148_list%253DdateOfIssue_dt%252Bdesc' \
| pup 'table.links tbody tr json{}' \
| jq -L $DIR/jq -f $DIR/jq/bfu/en.jq \
| json2xml > $DIR/feeds/bfu/en.xml.new \
&& mv $DIR/feeds/bfu/en.xml.new $DIR/feeds/bfu/en.xml
log Building BFU Deutsch feed to $DIR/feeds/bfu/de.xml
curl -s 'https://www.bfu-web.de/DE/Publikationen/Untersuchungsberichte/untersuchungsberichte_node.html?cms_gts=235998_list%253DdateOfIssue_dt%252Bdesc' \
| pup 'table.links tbody tr json{}' \
| jq -L $DIR/jq -f $DIR/jq/bfu/de.jq \
| json2xml > $DIR/feeds/bfu/de.xml.new \
&& mv $DIR/feeds/bfu/de.xml.new $DIR/feeds/bfu/de.xml
log Building UZPLN English feed to $DIR/feeds/uzpln/en.xml
mkdir -p $DIR/feeds/uzpln
curl -s 'https://www.uzpln.cz/en/reports' \
| pup 'table.table tbody tr:not(:first-child) json{}' \
| jq -L $DIR/jq \
-f $DIR/jq/uzpln.jq \
--arg language 'en' \
--arg description 'Air Accidents Investigation Institute' \
--arg link 'https://www.uzpln.cz/en/reports' \
| json2xml > $DIR/feeds/uzpln/en.xml.new \
&& mv $DIR/feeds/uzpln/en.xml.new $DIR/feeds/uzpln/en.xml
log Building UZPLN Czech feed to $DIR/feeds/uzpln/cz.xml
curl -s 'https://www.uzpln.cz/zpravy-ln' \
| pup 'table.table tbody tr:not(:first-child) json{}' \
| jq -L $DIR/jq \
-f $DIR/jq/uzpln.jq \
--arg language 'cz' \
--arg description 'Ústav pro odborné Zjišťování Příčin Leteckých Nehod' \
--arg link 'https://www.uzpln.cz/zpravy-ln' \
| json2xml > $DIR/feeds/uzpln/cz.xml.new \
&& mv $DIR/feeds/uzpln/cz.xml.new $DIR/feeds/uzpln/cz.xml
log Building TSIB Marine feed to $DIR/feeds/tsib/marine.xml
mkdir -p $DIR/feeds/tsib
curl -s 'https://www.mot.gov.sg/about-mot/transport-safety-investigation-bureau/msib/investigation-report/' \
| pup '.container-twelve .eight > .row:last-child tbody > tr json{}' \
| jq -L $DIR/jq \
-f $DIR/jq/tsib.jq \
--arg type Marine \
--arg link 'https://www.mot.gov.sg/about-mot/transport-safety-investigation-bureau/msib/investigation-report/' \
| json2xml > $DIR/feeds/tsib/marine.xml.new \
&& mv $DIR/feeds/tsib/marine.xml.new $DIR/feeds/tsib/marine.xml
log Building TSIB Aviation feed to $DIR/feeds/tsib/air.xml
curl -s 'https://www.mot.gov.sg/about-mot/transport-safety-investigation-bureau/aaib/investigation-report/' \
| pup '.container-twelve .eight > .row:last-child tbody > tr json{}' \
| jq -L $DIR/jq \
-f $DIR/jq/tsib.jq \
--arg type Aviation \
--arg link 'https://www.mot.gov.sg/about-mot/transport-safety-investigation-bureau/aaib/investigation-report/' \
| json2xml > $DIR/feeds/tsib/air.xml.new \
&& mv $DIR/feeds/tsib/air.xml.new $DIR/feeds/tsib/air.xml
log Building JTSB Aviation English feed to $DIR/feeds/jtsb/en/air.xml
mkdir -p $DIR/feeds/jtsb/en
curl -s 'https://www.mlit.go.jp/jtsb/airrep.html' \
| pup 'table.kankokuiken-en tr:not(:first-child) json{}' \
| jq -L $DIR/jq -f $DIR/jq/jtsb/en/air.jq \
| json2xml > $DIR/feeds/jtsb/en/air.xml.new \
&& mv $DIR/feeds/jtsb/en/air.xml.new $DIR/feeds/jtsb/en/air.xml
log Building JTSB Rail English feed to $DIR/feeds/jtsb/en/rail.xml
curl -s 'https://www.mlit.go.jp/jtsb/railrep.html' \
| pup 'table.kankokuiken-en tr:not(:first-child) json{}' \
| jq -L $DIR/jq -f $DIR/jq/jtsb/en/rail.jq \
| json2xml > $DIR/feeds/jtsb/en/rail.xml.new \
&& mv $DIR/feeds/jtsb/en/rail.xml.new $DIR/feeds/jtsb/en/rail.xml
log Building JTSB Marine English feed to $DIR/feeds/jtsb/en/marine.xml
curl -s 'https://www.mlit.go.jp/jtsb/marrep.html' \
| pup 'table.kankokuiken-en tr:not(:first-child) json{}' \
| jq -L $DIR/jq -f $DIR/jq/jtsb/en/marine.jq \
| json2xml > $DIR/feeds/jtsb/en/marine.xml.new \
&& mv $DIR/feeds/jtsb/en/marine.xml.new $DIR/feeds/jtsb/en/marine.xml
log Building JTSB Aviation Japanese feed to $DIR/feeds/jtsb/jp/air.xml
mkdir -p $DIR/feeds/jtsb/jp
curl -s 'https://jtsb.mlit.go.jp/jtsb/aircraft/air-kensaku-list.php?sort=desc&by=p' \
| pup 'table.jiko-information tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/jp/air.jq \
| json2xml > $DIR/feeds/jtsb/jp/air.xml.new \
&& mv $DIR/feeds/jtsb/jp/air.xml.new $DIR/feeds/jtsb/jp/air.xml
log Building JTSB Rail Japanese feed to $DIR/feeds/jtsb/jp/rail.xml
curl -s 'https://jtsb.mlit.go.jp/jtsb/railway/rail-kensaku-list.php?sort=desc&by=p' \
| pup 'table.jiko-information tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/jp/rail.jq \
| json2xml > $DIR/feeds/jtsb/jp/rail.xml.new \
&& mv $DIR/feeds/jtsb/jp/rail.xml.new $DIR/feeds/jtsb/jp/rail.xml
log Building JTSB Marine Japanese feed to $DIR/feeds/jtsb/jp/marine.xml
curl -s 'https://jtsb.mlit.go.jp/jtsb/ship/ship-kensaku-list.php?sort=desc&by=p' \
| pup 'table.jiko-information tr:not(:first-child) json{}' \
| jq -f $DIR/jq/jtsb/jp/marine.jq \
| json2xml > $DIR/feeds/jtsb/jp/marine.xml.new \
&& mv $DIR/feeds/jtsb/jp/marine.xml.new $DIR/feeds/jtsb/jp/marine.xml
function saia_build {
# saia_build language categoryId title description fileName
log Building $3 feed to $DIR/feeds/saia/$1/$5
mkdir -p $DIR/feeds/saia/$1
local url="https://www.havkom.se/$([ $1 == "en" ] && echo 'en/')utredningar?categoryId=$2&onGoingWithReport=1&sortReportDate=1"
curl -s "$url" \
| pup '.investigation .desc json{}' \
| jq -f $DIR/jq/saia.jq \
--arg title "$3" \
--arg language "$1" \
--arg description "$4" \
--arg link "$url" \
| json2xml > $DIR/feeds/saia/$1/$5.new \
&& mv $DIR/feeds/saia/$1/$5.new $DIR/feeds/saia/$1/$5
}
saia_build en 0 SAIA 'Swedish Accident Investigation Authority English accident reports' all.xml
saia_build en 203 'SAIA Road' 'Swedish Accident Investigation Authority English road accident reports' road.xml
saia_build en 204 'SAIA Rail' 'Swedish Accident Investigation Authority English rail accident reports' rail.xml
saia_build en 202 'SAIA Air' 'Swedish Accident Investigation Authority English aviation accident reports' air.xml
saia_build en 205 'SAIA Marine' 'Swedish Accident Investigation Authority English marine accident reports' marine.xml
saia_build en 206 'SAIA Military' 'Swedish Accident Investigation Authority English military accident reports' mil.xml
saia_build sv 0 SHK 'Svenska Statens haverikommission olycksrapporter' all.xml
saia_build sv 203 'SHK Väg' 'Svenska Statens haverikommission Vägtrafik olycksrapporter' road.xml
saia_build sv 204 'SHK Spärbunden' 'Svenska Statens haverikommission Spårbunden trafik olycksrapporter' rail.xml
saia_build sv 202 'SHK Luftfart' 'Svenska Statens haverikommission luftfart olycksrapporter' air.xml
saia_build sv 205 'SHK Sjöfart' 'Svenska Statens haverikommission sjöfart olycksrapporter' marine.xml
saia_build sv 206 'SHK Militär' 'Svenska Statens haverikommission militär olycksrapporter' mil.xml