Support vCard

This commit is contained in:
~lucidiot 2023-02-26 14:04:12 +01:00
parent ae4fe16ee4
commit 2f222dd33d
1 changed files with 7 additions and 6 deletions

View File

@ -1,5 +1,5 @@
#!/usr/bin/jq -sRf
# RFC 5545 (iCalendar) parser
# RFC 5545 (iCalendar) + RFC 6350 (vCard) parser
# ~lucidiot, 2023
# iCalendar (.ics) files contain a series of components delimited by BEGIN:<type> and END:<type>.
@ -8,16 +8,17 @@
# THING;PARAM1=VALUE1;PARAM2=VALUE2:CONTENT
# Parameters may have a list of values instead of just one parameter value:
# THING;PARAM=VAL1,VAL2,VAL3,"VAL4,WITH,COMMAS":CONTENT
# We do parse the parameter syntax here, but they will in the end only be stored as a single string.
# vCard (.vcf) files have a similar syntax, but with different quoting and escaping rules,
# and RFC 6868 adds a layer of complexity with a second escape character, ^.
# For these reasons, we will not parse parameters here, and they are just one string in the output.
# Remove any final newlines as this would appear to us as an empty line
rtrimstr("\n")
| rtrimstr("\r")
# Lines are supposed to end after 75 characters. Adding a space at the beginning of the next line
# means that the next line is really just part of the previous line, so we remove those extra
# line breaks to merge every line.
| gsub("\r?\n "; "")
# line breaks to merge every line. vCard also allows tabs and not just spaces.
| gsub("\r?\n[ \t]"; "")
# Iterate on each line.
| reduce split("\n")[] as $item (
# Initial state of the parser
@ -34,7 +35,7 @@ rtrimstr("\n")
| (
$item
# Parse a whole line as { name: "...", param: "..." (or null), value: "..." }
| capture("^(?'name'[a-zA-Z0-9-]+)(?:;(?'params'[a-zA-Z0-9-]+=(?:\"[^[:cntrl:]\"]*\"|[^[:cntrl:]\",;:]*)(?:,(?:\"[^[:cntrl:]\"]*\"|[^[:cntrl:]\",;:]*))*(?:;[a-zA-Z0-9-]+=(?:\"[^[:cntrl:]\"]*\"|[^[:cntrl:]\",;:]*)(?:,(?:\"[^[:cntrl:]\"]*\"|[^[:cntrl:]\",;:]*))*)*))?:(?'value'[^[:cntrl:]]*)\r?$")
| capture("^(?'name'[a-zA-Z0-9-]+)(?:;(?'params'(?:[^:]*\".*\")*[^:]+))?:(?'value'[^[:cntrl:]]*)\r?$")
) as {$name, $params, $value}
# Property names should be case-insensitive, we will use lowercase everywhere
| ($name | ascii_downcase) as $name