# Parse URLs into an object with {scheme, netloc, path, params, query, fragment}. # Similar to Python's urllib.parse.urlparse. def urlparse: capture("^(?:(?[^:/?#]+):)?(?://(?[^/?#]*))?(?:(?(?:[^?#]+/)?[^?#;]*)(?:;(?[^?#/]*))?)?(?:\\?(?[^#]*))?(?:#(?.*))?$"); # Parse URLs into an object with {scheme, netloc, path, query, fragment}. Path parameters are not parsed. # Similar to Python's urllib.parse.urlsplit. def urlsplit: capture("^(?:(?[^:/?#]+):)?(?://(?[^/?#]*))?(?(?:[^?#]+/)?[^?#]*)?(?:\\?(?[^#]*))?(?:#(?.*))?$"); # Reverse operation of either urlparse or urlsplit. def urlunparse: (if .scheme then .scheme + "://" else "" end) + (.netloc // "") + (.path // "") + (if .params then ";" + .params else "" end) + (if .query then "?" + .query else "" end) + (if .fragment then "#" + .fragment else "" end); # Resolve a possibly relative URI into an absolute URI. def urlresolve(base): (if type == "string" then urlsplit else . end) as $parsed # There is a scheme: this is an absolute URL | if $parsed.scheme then . else ( base|(if type == "string" then urlsplit else . end) as $parsedbase # No scheme but a domain: use the base's scheme | $parsed | if .netloc then ( .scheme = $parsedbase.scheme # No scheme and no domain: resolve the relative URI ) elif .path then ( .scheme = $parsedbase.scheme | .netloc = $parsedbase.netloc # When the path does not start with a slash, make it relative to the base's path # by removing the filename from the base's path and appending the path | if .path|startswith("/")|not then ( .path = (($parsedbase.path|split("/")[:-1]|join("/")) + "/" + ($parsed.path|ltrimstr("/"))) ) else . end ) elif (.query // .fragment) then ( .scheme = $parsedbase.scheme | .netloc = $parsedbase.netloc | .path = $parsedbase.path ) else . end | urlunparse ) end; # Basic Chinese number parsing meant for Chinese date parsing. def parse_chinese_number: . as $input | { "零": "0", "〇": "0", "一": "1", "二": "2", "三": "3", "四": "4", "五": "5", "六": "6", "七": "7", "八": "8", "九": "9", # 10 is ignored here as we will parse number by number. "十": "" } as $charmap | $input / "" | map($charmap[.] // .) | join("") # Special case for when we parse 十 alone | if . == "" then 1 else . end | tonumber # Parsing number by number, ignoring 10, will work as long as there is a digit before and after 10: # 二十八 works because we parse it as 二八 (2 and 8), but 二十 would yield 2 only, # so we multiply manually by 10 when the number ends with 10. # 十八 gives 8 when it should give 18 because it would be parsed as 八, so we add 10 when the number starts with 10. | if $input|endswith("十") then . * 10 elif $input|startswith("十") then . + 10 else . end; # Parse a Traditional or Simplified Chinese date into a Unix timestamp. def parse_chinese_date: capture("(?[0123456789零〇一二三四五六七八九十]+)年(?[0123456789零〇一二三四五六七八九十]+)月(?[0123456789零〇一二三四五六七八九十]+)日") | map_values(parse_chinese_number) # Handle the Chinese calendar by assuming we never would get dates before 1900. This will no longer work in the year 2811. | if .year < 1900 then .year += 1911 else . end | "\(.year)-\(.month)-\(.day)T00:00:00Z" | fromdateiso8601;