itsb/jq/helpers.jq

82 lines
3.5 KiB
Plaintext
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

# Parse URLs into an object with {scheme, netloc, path, params, query, fragment}.
# Similar to Python's urllib.parse.urlparse.
def urlparse: capture("^(?:(?<scheme>[^:/?#]+):)?(?://(?<netloc>[^/?#]*))?(?:(?<path>(?:[^?#]+/)?[^?#;]*)(?:;(?<params>[^?#/]*))?)?(?:\\?(?<query>[^#]*))?(?:#(?<fragment>.*))?$");
# Parse URLs into an object with {scheme, netloc, path, query, fragment}. Path parameters are not parsed.
# Similar to Python's urllib.parse.urlsplit.
def urlsplit: capture("^(?:(?<scheme>[^:/?#]+):)?(?://(?<netloc>[^/?#]*))?(?<path>(?:[^?#]+/)?[^?#]*)?(?:\\?(?<query>[^#]*))?(?:#(?<fragment>.*))?$");
# Reverse operation of either urlparse or urlsplit.
def urlunparse:
(if .scheme then .scheme + "://" else "" end)
+ (.netloc // "")
+ (.path // "")
+ (if .params then ";" + .params else "" end)
+ (if .query then "?" + .query else "" end)
+ (if .fragment then "#" + .fragment else "" end);
# Resolve a possibly relative URI into an absolute URI.
def urlresolve(base):
(if type == "string" then urlsplit else . end) as $parsed
# There is a scheme: this is an absolute URL
| if $parsed.scheme then . else (
base|(if type == "string" then urlsplit else . end) as $parsedbase
# No scheme but a domain: use the base's scheme
| $parsed
| if .netloc then (
.scheme = $parsedbase.scheme
# No scheme and no domain: resolve the relative URI
) elif .path then (
.scheme = $parsedbase.scheme
| .netloc = $parsedbase.netloc
# When the path does not start with a slash, make it relative to the base's path
# by removing the filename from the base's path and appending the path
| if .path|startswith("/")|not then (
.path = (($parsedbase.path|split("/")[:-1]|join("/")) + "/" + ($parsed.path|ltrimstr("/")))
) else . end
) elif (.query // .fragment) then (
.scheme = $parsedbase.scheme
| .netloc = $parsedbase.netloc
| .path = $parsedbase.path
) else . end
| urlunparse
) end;
# Basic Chinese number parsing meant for Chinese date parsing.
def parse_chinese_number:
. as $input
| {
"零": "0",
"": "0",
"一": "1",
"二": "2",
"三": "3",
"四": "4",
"五": "5",
"六": "6",
"七": "7",
"八": "8",
"九": "9",
# 10 is ignored here as we will parse number by number.
"十": ""
} as $charmap
| $input / ""
| map($charmap[.] // .)
| join("")
# Special case for when we parse 十 alone
| if . == "" then 1 else . end
| tonumber
# Parsing number by number, ignoring 10, will work as long as there is a digit after 10:
# 二十八 works because we parse it as 二八 (2 and 8), but 二十 would yield 2 only,
# so we multiply manually by 10 when the number ends with 10.
| if $input|endswith("十") then . * 10 else . end;
# Parse a Traditional or Simplified Chinese date into a Unix timestamp.
def parse_chinese_date:
capture("(?<year>[0123456789零一二三四五六七八九十]+)年(?<month>[0123456789零一二三四五六七八九十]+)月(?<day>[0123456789零一二三四五六七八九十]+)日")
| map_values(parse_chinese_number)
# Handle the Chinese calendar by assuming we never would get dates before 1900. This will no longer work in the year 2811.
| if .year < 1900 then .year += 1911 else . end
| "\(.year)-\(.month)-\(.day)T00:00:00Z"
| fromdateiso8601;