Add WMSC custom feed
This commit is contained in:
parent
b7423a1c8c
commit
32d9ad4af4
|
@ -150,3 +150,10 @@ curl -s 'http://www.uzpln.cz/zpravy-ln' \
|
||||||
--arg link 'http://www.uzpln.cz/zpravy-ln' \
|
--arg link 'http://www.uzpln.cz/zpravy-ln' \
|
||||||
| json2xml > $DIR/feeds/uzpln/cz.xml.new \
|
| json2xml > $DIR/feeds/uzpln/cz.xml.new \
|
||||||
&& mv $DIR/feeds/uzpln/cz.xml.new $DIR/feeds/uzpln/cz.xml
|
&& mv $DIR/feeds/uzpln/cz.xml.new $DIR/feeds/uzpln/cz.xml
|
||||||
|
|
||||||
|
log Building WMSC feed to $DIR/feeds/wmsc.xml
|
||||||
|
curl -s 'https://wmsc.gov/oversight/reports/' \
|
||||||
|
| pup '.post-content li json{}' \
|
||||||
|
| jq -f $DIR/jq/wmsc.jq \
|
||||||
|
| json2xml > $DIR/feeds/wmsc.xml.new \
|
||||||
|
&& mv $DIR/feeds/wmsc.xml.new $DIR/feeds/wmsc.xml
|
||||||
|
|
24
index.html
24
index.html
|
@ -532,6 +532,30 @@
|
||||||
</tr>
|
</tr>
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
|
|
||||||
|
<h3>Washington Metrorail Safety Commission</h3>
|
||||||
|
<table>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Country</strong></td>
|
||||||
|
<td>United States</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Language</strong></td>
|
||||||
|
<td>English</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Type</strong></td>
|
||||||
|
<td>Rail</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Frequency</strong></td>
|
||||||
|
<td>20 reports/year</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td><strong>Feed</strong></td>
|
||||||
|
<td><a href="feeds/wmsc.xml" target="_blank"><img src="img/rss.gif" alt="RSS" /></a></td>
|
||||||
|
</tr>
|
||||||
|
</table>
|
||||||
</div>
|
</div>
|
||||||
</body>
|
</body>
|
||||||
</html>
|
</html>
|
||||||
|
|
|
@ -0,0 +1,48 @@
|
||||||
|
# WMSC feed generator
|
||||||
|
# Expects pup JSON output holding <li> tags, outputs xmltodict-compatible JSON
|
||||||
|
# WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale.
|
||||||
|
|
||||||
|
{
|
||||||
|
"rss": {
|
||||||
|
"@version": "2.0",
|
||||||
|
"channel": {
|
||||||
|
"title": "WMSC",
|
||||||
|
"description": "Washington Metrorail Safety Commission safety reports",
|
||||||
|
"link": "https://wmsc.gov/oversight/reports/",
|
||||||
|
"language": "en-us",
|
||||||
|
"pubDate": (now | strftime("%a, %d %b %Y %T %z")),
|
||||||
|
"docs": "https://cyber.harvard.edu/rss/rss.html",
|
||||||
|
"ttl": 86400,
|
||||||
|
"generator": "ITSB",
|
||||||
|
"item": (
|
||||||
|
# The WMSC parsing is unusually complex due to the lack of structured data.
|
||||||
|
# We need at least a title, a URL, and a date, the date being the harder part.
|
||||||
|
# The original data is not sorted by the only date we have (there is an implicit unspecified publication date),
|
||||||
|
# so we first compute a timestamp that we can use to sort items with. This helps with bad RSS reader implementations.
|
||||||
|
[.[] | .timestamp = (
|
||||||
|
.text
|
||||||
|
# Dates are in the middle of the bullet contents, sometimes with their parts separated with punctuation, sometimes not:
|
||||||
|
# 2019-02-31, 20190231, 2019_0231, 2019 02-31 etc.
|
||||||
|
| match("[^[:alnum:]](?<year>[0-9]{4})[^[:alnum:]]?(?<month>(?:0[1-9]|1[0-2]))[^[:alnum:]]?(?<day>(?:[0-2][0-9]|3[01]))").captures
|
||||||
|
# Turn the captured groups into a { group_name: matched_text } object
|
||||||
|
| [ .[] | { "key": .name, "value": .string } ]
|
||||||
|
| from_entries
|
||||||
|
# Turn those matched dates back into parseable stuff
|
||||||
|
| (.year + "-" + .month + "-" + .day)
|
||||||
|
# Get an actual timestamp
|
||||||
|
| strptime("%Y-%m-%d")
|
||||||
|
| mktime
|
||||||
|
)]
|
||||||
|
# Sort by timestamp in descending order
|
||||||
|
| sort_by(.timestamp)
|
||||||
|
| reverse
|
||||||
|
# Get the actual RSS item
|
||||||
|
| [.[] | {
|
||||||
|
"title": .text,
|
||||||
|
"pubDate": (.timestamp | strftime("%a, %d %b %Y %T %z")),
|
||||||
|
"link": (.children | map(select(.tag == "a")) | first | .href)
|
||||||
|
}]
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue