itsb/jq/helpers.jq

85 lines
3.7 KiB
Plaintext
Raw Normal View History

2020-07-19 12:46:47 +00:00
# Parse URLs into an object with {scheme, netloc, path, params, query, fragment}.
# Similar to Python's urllib.parse.urlparse.
2020-07-19 14:12:01 +00:00
def urlparse: capture("^(?:(?<scheme>[^:/?#]+):)?(?://(?<netloc>[^/?#]*))?(?:(?<path>(?:[^?#]+/)?[^?#;]*)(?:;(?<params>[^?#/]*))?)?(?:\\?(?<query>[^#]*))?(?:#(?<fragment>.*))?$");
2020-07-19 12:46:47 +00:00
# Parse URLs into an object with {scheme, netloc, path, query, fragment}. Path parameters are not parsed.
# Similar to Python's urllib.parse.urlsplit.
2020-07-19 14:12:01 +00:00
def urlsplit: capture("^(?:(?<scheme>[^:/?#]+):)?(?://(?<netloc>[^/?#]*))?(?<path>(?:[^?#]+/)?[^?#]*)?(?:\\?(?<query>[^#]*))?(?:#(?<fragment>.*))?$");
2020-07-19 12:46:47 +00:00
# Reverse operation of either urlparse or urlsplit.
def urlunparse:
(if .scheme then .scheme + "://" else "" end)
+ (.netloc // "")
+ (.path // "")
+ (if .params then ";" + .params else "" end)
+ (if .query then "?" + .query else "" end)
+ (if .fragment then "#" + .fragment else "" end);
# Resolve a possibly relative URI into an absolute URI.
def urlresolve(base):
(if type == "string" then urlsplit else . end) as $parsed
# There is a scheme: this is an absolute URL
| if $parsed.scheme then . else (
base|(if type == "string" then urlsplit else . end) as $parsedbase
# No scheme but a domain: use the base's scheme
| $parsed
| if .netloc then (
.scheme = $parsedbase.scheme
# No scheme and no domain: resolve the relative URI
) elif .path then (
.scheme = $parsedbase.scheme
| .netloc = $parsedbase.netloc
# When the path does not start with a slash, make it relative to the base's path
# by removing the filename from the base's path and appending the path
2020-07-19 12:46:47 +00:00
| if .path|startswith("/")|not then (
.path = (($parsedbase.path|split("/")[:-1]|join("/")) + "/" + ($parsed.path|ltrimstr("/")))
2020-07-19 12:46:47 +00:00
) else . end
) elif (.query // .fragment) then (
.scheme = $parsedbase.scheme
| .netloc = $parsedbase.netloc
| .path = $parsedbase.path
) else . end
| urlunparse
) end;
2021-03-19 23:34:25 +00:00
# Basic Chinese number parsing meant for Chinese date parsing.
def parse_chinese_number:
. as $input
| {
"零": "0",
"": "0",
"一": "1",
"二": "2",
"三": "3",
"四": "4",
"五": "5",
"六": "6",
"七": "7",
"八": "8",
"九": "9",
# 10 is ignored here as we will parse number by number.
"十": ""
} as $charmap
| $input / ""
| map($charmap[.] // .)
| join("")
# Special case for when we parse 十 alone
| if . == "" then 1 else . end
| tonumber
# Parsing number by number, ignoring 10, will work as long as there is a digit before and after 10:
2021-03-19 23:34:25 +00:00
# 二十八 works because we parse it as 二八 (2 and 8), but 二十 would yield 2 only,
# so we multiply manually by 10 when the number ends with 10.
# 十八 gives 8 when it should give 18 because it would be parsed as 八, so we add 10 when the number starts with 10.
| if $input|endswith("十") then . * 10
elif $input|startswith("十") then . + 10
else . end;
2021-03-19 23:34:25 +00:00
# Parse a Traditional or Simplified Chinese date into a Unix timestamp.
def parse_chinese_date:
capture("(?<year>[0123456789零一二三四五六七八九十]+)年(?<month>[0123456789零一二三四五六七八九十]+)月(?<day>[0123456789零一二三四五六七八九十]+)日")
| map_values(parse_chinese_number)
# Handle the Chinese calendar by assuming we never would get dates before 1900. This will no longer work in the year 2811.
| if .year < 1900 then .year += 1911 else . end
| "\(.year)-\(.month)-\(.day)T00:00:00Z"
| fromdateiso8601;