# WMSC feed generator # Expects pup JSON output holding
  • tags, outputs xmltodict-compatible JSON # WARNING: Dates are locale-sensitive; the RSS feed might not generate correctly with another locale. { "rss": { "@version": "2.0", "channel": { "title": "WMSC", "description": "Washington Metrorail Safety Commission safety reports", "link": "https://wmsc.gov/oversight/reports/", "language": "en-us", "pubDate": (now | strftime("%a, %d %b %Y %T %z")), "docs": "https://www.rssboard.org/rss-specification", "ttl": 1440, "generator": "ITSB", "item": ( # The WMSC parsing is unusually complex due to the lack of structured data. # We need at least a title, a URL, and a date, the date being the harder part. # The original data is not sorted by the only date we have (there is an implicit unspecified publication date), # so we first compute a timestamp that we can use to sort items with. This helps with bad RSS reader implementations. [.[] | .timestamp = ( .text # Dates are in the middle of the bullet contents, sometimes with their parts separated with punctuation, sometimes not: # 2019-02-31, 20190231, 2019_0231, 2019 02-31 etc. | match("[^[:alnum:]](?[0-9]{4})[^[:alnum:]]?(?(?:0[1-9]|1[0-2]))[^[:alnum:]]?(?(?:[0-2][0-9]|3[01]))").captures # Turn the captured groups into a { group_name: matched_text } object | [ .[] | { "key": .name, "value": .string } ] | from_entries # Turn those matched dates back into parseable stuff | (.year + "-" + .month + "-" + .day) # Get an actual timestamp | strptime("%Y-%m-%d") | mktime )] # Sort by timestamp in descending order | sort_by(.timestamp) | reverse # Get the actual RSS item | [.[] | { "title": .text, "pubDate": (.timestamp | strftime("%a, %d %b %Y %T %z")), "link": (.children | map(select(.tag == "a")) | first | .href) }] ) } } }