"
+ ),
+ "pubDate": (
+ $data[0].text // $data[0].children[0].text
+ | if $lang == "en" then strptime("%d %B %Y") | mktime else helpers::parse_chinese_date end
+ | strftime("%a, %d %b %Y %T %z")
+ )
+ }
+ ]
+ }
+ }
+}
diff --git a/jq/helpers.jq b/jq/helpers.jq
index 1633ec6..c864a67 100644
--- a/jq/helpers.jq
+++ b/jq/helpers.jq
@@ -41,3 +41,41 @@ def urlresolve(base):
) else . end
| urlunparse
) end;
+
+# Basic Chinese number parsing meant for Chinese date parsing.
+def parse_chinese_number:
+ . as $input
+ | {
+ "零": "0",
+ "〇": "0",
+ "一": "1",
+ "二": "2",
+ "三": "3",
+ "四": "4",
+ "五": "5",
+ "六": "6",
+ "七": "7",
+ "八": "8",
+ "九": "9",
+ # 10 is ignored here as we will parse number by number.
+ "十": ""
+ } as $charmap
+ | $input / ""
+ | map($charmap[.] // .)
+ | join("")
+ # Special case for when we parse 十 alone
+ | if . == "" then 1 else . end
+ | tonumber
+ # Parsing number by number, ignoring 10, will work as long as there is a digit after 10:
+ # 二十八 works because we parse it as 二八 (2 and 8), but 二十 would yield 2 only,
+ # so we multiply manually by 10 when the number ends with 10.
+ | if $input|endswith("十") then . * 10 else . end;
+
+# Parse a Traditional or Simplified Chinese date into a Unix timestamp.
+def parse_chinese_date:
+ capture("(?[0123456789零〇一二三四五六七八九十]+)年(?[0123456789零〇一二三四五六七八九十]+)月(?[0123456789零〇一二三四五六七八九十]+)日")
+ | map_values(parse_chinese_number)
+ # Handle the Chinese calendar by assuming we never would get dates before 1900. This will no longer work in the year 2811.
+ | if .year < 1900 then .year += 1911 else . end
+ | "\(.year)-\(.month)-\(.day)T00:00:00Z"
+ | fromdateiso8601;