Remove jsessionid from BFU URLs to get permalinks, closes #10

This commit is contained in:
Lucidiot 2020-07-19 14:47:21 +02:00
parent 8b06e0eb6f
commit 1d7ececd00
Signed by: lucidiot
GPG Key ID: 3358C1CA6906FB8D
3 changed files with 18 additions and 4 deletions

View File

@ -119,14 +119,14 @@ log Building BFU English feed to $DIR/feeds/bfu/en.xml
mkdir -p $DIR/feeds/bfu mkdir -p $DIR/feeds/bfu
curl -s 'https://www.bfu-web.de/EN/Publications/Investigation%20Report/reports_node.html?cms_gts=238148_list%253DdateOfIssue_dt%252Bdesc' \ curl -s 'https://www.bfu-web.de/EN/Publications/Investigation%20Report/reports_node.html?cms_gts=238148_list%253DdateOfIssue_dt%252Bdesc' \
| pup 'table.links tbody tr json{}' \ | pup 'table.links tbody tr json{}' \
| jq -f $DIR/jq/bfu/en.jq \ | jq -L $DIR/jq -f $DIR/jq/bfu/en.jq \
| json2xml > $DIR/feeds/bfu/en.xml.new \ | json2xml > $DIR/feeds/bfu/en.xml.new \
&& mv $DIR/feeds/bfu/en.xml.new $DIR/feeds/bfu/en.xml && mv $DIR/feeds/bfu/en.xml.new $DIR/feeds/bfu/en.xml
log Building BFU Deutsch feed to $DIR/feeds/bfu/de.xml log Building BFU Deutsch feed to $DIR/feeds/bfu/de.xml
curl -s 'https://www.bfu-web.de/DE/Publikationen/Untersuchungsberichte/untersuchungsberichte_node.html?cms_gts=235998_list%253DdateOfIssue_dt%252Bdesc' \ curl -s 'https://www.bfu-web.de/DE/Publikationen/Untersuchungsberichte/untersuchungsberichte_node.html?cms_gts=235998_list%253DdateOfIssue_dt%252Bdesc' \
| pup 'table.links tbody tr json{}' \ | pup 'table.links tbody tr json{}' \
| jq -f $DIR/jq/bfu/de.jq \ | jq -L $DIR/jq -f $DIR/jq/bfu/de.jq \
| json2xml > $DIR/feeds/bfu/de.xml.new \ | json2xml > $DIR/feeds/bfu/de.xml.new \
&& mv $DIR/feeds/bfu/de.xml.new $DIR/feeds/bfu/de.xml && mv $DIR/feeds/bfu/de.xml.new $DIR/feeds/bfu/de.xml

View File

@ -1,6 +1,7 @@
# BFU English feed generator # BFU English feed generator
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON # Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale. # WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
import "./helpers" as helpers;
{ {
"rss": { "rss": {
@ -16,7 +17,13 @@
"generator": "ITSB", "generator": "ITSB",
"item": [.[] | { "item": [.[] | {
"title": (.children[3].children[0].text + ": " + .children[2].text + ", " + .children[1].text), "title": (.children[3].children[0].text + ": " + .children[2].text + ", " + .children[1].text),
"link": ("https://www.bfu-web.de" + .children[3].children[0].href), "link": (
.children[3].children[0].href
| helpers::urlparse
# Remove jsessionid path parameter
| .params = null
| helpers::urlresolve("https://www.bfu-web.de")
),
"pubDate": (.children[0].text | strptime("%d.%m.%Y") | mktime | strftime("%a, %d %b %Y %T %z")) "pubDate": (.children[0].text | strptime("%d.%m.%Y") | mktime | strftime("%a, %d %b %Y %T %z"))
}] }]
} }

View File

@ -1,6 +1,7 @@
# BFU English feed generator # BFU English feed generator
# Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON # Expects pup JSON output holding <tr> tags, outputs xmltodict-compatible JSON
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale. # WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
import "./helpers" as helpers;
{ {
"rss": { "rss": {
@ -16,7 +17,13 @@
"generator": "ITSB", "generator": "ITSB",
"item": [.[] | { "item": [.[] | {
"title": (.children[3].children[0].text + ": " + .children[2].text + ", " + .children[1].text), "title": (.children[3].children[0].text + ": " + .children[2].text + ", " + .children[1].text),
"link": ("https://www.bfu-web.de" + .children[3].children[0].href), "link": (
.children[3].children[0].href
| helpers::urlparse
# Remove jsessionid path parameter
| .params = null
| helpers::urlresolve("https://www.bfu-web.de")
),
"pubDate": (.children[0].text | strptime("%Y.%m.%d") | mktime | strftime("%a, %d %b %Y %T %z")) "pubDate": (.children[0].text | strptime("%Y.%m.%d") | mktime | strftime("%a, %d %b %Y %T %z"))
}] }]
} }