2020-07-19 12:46:47 +00:00
|
|
|
|
# Parse URLs into an object with {scheme, netloc, path, params, query, fragment}.
|
|
|
|
|
# Similar to Python's urllib.parse.urlparse.
|
2020-07-19 14:12:01 +00:00
|
|
|
|
def urlparse: capture("^(?:(?<scheme>[^:/?#]+):)?(?://(?<netloc>[^/?#]*))?(?:(?<path>(?:[^?#]+/)?[^?#;]*)(?:;(?<params>[^?#/]*))?)?(?:\\?(?<query>[^#]*))?(?:#(?<fragment>.*))?$");
|
2020-07-19 12:46:47 +00:00
|
|
|
|
|
|
|
|
|
# Parse URLs into an object with {scheme, netloc, path, query, fragment}. Path parameters are not parsed.
|
|
|
|
|
# Similar to Python's urllib.parse.urlsplit.
|
2020-07-19 14:12:01 +00:00
|
|
|
|
def urlsplit: capture("^(?:(?<scheme>[^:/?#]+):)?(?://(?<netloc>[^/?#]*))?(?<path>(?:[^?#]+/)?[^?#]*)?(?:\\?(?<query>[^#]*))?(?:#(?<fragment>.*))?$");
|
2020-07-19 12:46:47 +00:00
|
|
|
|
|
|
|
|
|
# Reverse operation of either urlparse or urlsplit.
|
|
|
|
|
def urlunparse:
|
|
|
|
|
(if .scheme then .scheme + "://" else "" end)
|
|
|
|
|
+ (.netloc // "")
|
|
|
|
|
+ (.path // "")
|
|
|
|
|
+ (if .params then ";" + .params else "" end)
|
|
|
|
|
+ (if .query then "?" + .query else "" end)
|
|
|
|
|
+ (if .fragment then "#" + .fragment else "" end);
|
|
|
|
|
|
|
|
|
|
# Resolve a possibly relative URI into an absolute URI.
|
|
|
|
|
def urlresolve(base):
|
|
|
|
|
(if type == "string" then urlsplit else . end) as $parsed
|
|
|
|
|
# There is a scheme: this is an absolute URL
|
|
|
|
|
| if $parsed.scheme then . else (
|
|
|
|
|
base|(if type == "string" then urlsplit else . end) as $parsedbase
|
|
|
|
|
# No scheme but a domain: use the base's scheme
|
|
|
|
|
| $parsed
|
|
|
|
|
| if .netloc then (
|
|
|
|
|
.scheme = $parsedbase.scheme
|
|
|
|
|
# No scheme and no domain: resolve the relative URI
|
|
|
|
|
) elif .path then (
|
|
|
|
|
.scheme = $parsedbase.scheme
|
|
|
|
|
| .netloc = $parsedbase.netloc
|
|
|
|
|
# When the path does not start with a slash, make it relative to the base's path
|
2021-02-08 21:03:06 +00:00
|
|
|
|
# by removing the filename from the base's path and appending the path
|
2020-07-19 12:46:47 +00:00
|
|
|
|
| if .path|startswith("/")|not then (
|
2021-02-08 21:03:06 +00:00
|
|
|
|
.path = (($parsedbase.path|split("/")[:-1]|join("/")) + "/" + ($parsed.path|ltrimstr("/")))
|
2020-07-19 12:46:47 +00:00
|
|
|
|
) else . end
|
|
|
|
|
) elif (.query // .fragment) then (
|
|
|
|
|
.scheme = $parsedbase.scheme
|
|
|
|
|
| .netloc = $parsedbase.netloc
|
|
|
|
|
| .path = $parsedbase.path
|
|
|
|
|
) else . end
|
|
|
|
|
| urlunparse
|
|
|
|
|
) end;
|
2021-03-19 23:34:25 +00:00
|
|
|
|
|
|
|
|
|
# Basic Chinese number parsing meant for Chinese date parsing.
|
|
|
|
|
def parse_chinese_number:
|
|
|
|
|
. as $input
|
|
|
|
|
| {
|
|
|
|
|
"零": "0",
|
|
|
|
|
"〇": "0",
|
|
|
|
|
"一": "1",
|
|
|
|
|
"二": "2",
|
|
|
|
|
"三": "3",
|
|
|
|
|
"四": "4",
|
|
|
|
|
"五": "5",
|
|
|
|
|
"六": "6",
|
|
|
|
|
"七": "7",
|
|
|
|
|
"八": "8",
|
|
|
|
|
"九": "9",
|
|
|
|
|
# 10 is ignored here as we will parse number by number.
|
|
|
|
|
"十": ""
|
|
|
|
|
} as $charmap
|
|
|
|
|
| $input / ""
|
|
|
|
|
| map($charmap[.] // .)
|
|
|
|
|
| join("")
|
|
|
|
|
# Special case for when we parse 十 alone
|
|
|
|
|
| if . == "" then 1 else . end
|
|
|
|
|
| tonumber
|
2021-03-21 16:31:26 +00:00
|
|
|
|
# Parsing number by number, ignoring 10, will work as long as there is a digit before and after 10:
|
2021-03-19 23:34:25 +00:00
|
|
|
|
# 二十八 works because we parse it as 二八 (2 and 8), but 二十 would yield 2 only,
|
|
|
|
|
# so we multiply manually by 10 when the number ends with 10.
|
2021-03-21 16:31:26 +00:00
|
|
|
|
# 十八 gives 8 when it should give 18 because it would be parsed as 八, so we add 10 when the number starts with 10.
|
|
|
|
|
| if $input|endswith("十") then . * 10
|
|
|
|
|
elif $input|startswith("十") then . + 10
|
|
|
|
|
else . end;
|
2021-03-19 23:34:25 +00:00
|
|
|
|
|
|
|
|
|
# Parse a Traditional or Simplified Chinese date into a Unix timestamp.
|
|
|
|
|
def parse_chinese_date:
|
|
|
|
|
capture("(?<year>[0123456789零〇一二三四五六七八九十]+)年(?<month>[0123456789零〇一二三四五六七八九十]+)月(?<day>[0123456789零〇一二三四五六七八九十]+)日")
|
|
|
|
|
| map_values(parse_chinese_number)
|
|
|
|
|
# Handle the Chinese calendar by assuming we never would get dates before 1900. This will no longer work in the year 2811.
|
|
|
|
|
| if .year < 1900 then .year += 1911 else . end
|
|
|
|
|
| "\(.year)-\(.month)-\(.day)T00:00:00Z"
|
|
|
|
|
| fromdateiso8601;
|