#!/usr/bin/jq -sRf # RFC 5545 (iCalendar) parser # ~lucidiot, 2023 # iCalendar (.ics) files contain a series of components delimited by BEGIN: and END:. # Each component can have properties. Each property has a name and a value. # Each property may have some optional key/value parameters: # THING;PARAM1=VALUE1;PARAM2=VALUE2:CONTENT # Parameters may have a list of values instead of just one parameter value: # THING;PARAM=VAL1,VAL2,VAL3,"VAL4,WITH,COMMAS":CONTENT # We do parse the parameter syntax here, but they will in the end only be stored as a single string. # Remove any final newlines as this would appear to us as an empty line rtrimstr("\n") | rtrimstr("\r") # Lines are supposed to end after 75 characters. Adding a space at the beginning of the next line # means that the next line is really just part of the previous line, so we remove those extra # line breaks to merge every line. | gsub("\r?\n "; "") # Iterate on each line. | reduce split("\n")[] as $item ( # Initial state of the parser { # Placeholder for the root component of this file. # The _type will be filled in with the type specified in BEGIN:. # "_" is not an allowed character in property names, so we can use it for our own purposes. "root": {"_type": null}, # Path within this state where the parser is currently inserting new properties. # This is used to keep track of where we are in the hierarchy when parsing nested components. "current_path": ["root"] }; . as $state | ( $item # Parse a whole line as { name: "...", param: "..." (or null), value: "..." } | capture("^(?'name'[a-zA-Z0-9-]+)(?:;(?'params'[a-zA-Z0-9-]+=(?:\"[^[:cntrl:]\"]*\"|[^[:cntrl:]\",;:]*)(?:,(?:\"[^[:cntrl:]\"]*\"|[^[:cntrl:]\",;:]*))*(?:;[a-zA-Z0-9-]+=(?:\"[^[:cntrl:]\"]*\"|[^[:cntrl:]\",;:]*)(?:,(?:\"[^[:cntrl:]\"]*\"|[^[:cntrl:]\",;:]*))*)*))?:(?'value'[^[:cntrl:]]*)\r?$") ) as {$name, $params, $value} # Property names should be case-insensitive, we will use lowercase everywhere | ($name | ascii_downcase) as $name | $state | if .current_path[0] != "root" then # If we get any line after an `END:` that was meant for the root component, # the current_path will be set to []. We should not allow parsing anything else. error("Unexpected end of root component") elif getpath([.current_path[], "_type"]) == null then # When the type was not yet filled in, we are expecting a BEGIN for the root component. if $name == "begin" then setpath([.current_path[], "_type"]; $value) else error("Expected BEGIN, got \($name)") end elif $name == "begin" then # This BEGIN: declares a nested component. # When we are somewhere other than the root component, we will never get a `null` type # because we can set it as soon as we get here; so we know that the above branch only # runs for the root component and we are always working with nested components. # We will nest components under an array called `_components`. # We therefore add to our paths the `_components` key, and then the index of this new # component. The length of the array matches the last index of the array + 1, so this # will append our new component at the end of the list. .current_path += ["_components", ((getpath(.current_path)._components // []) | length)] # Add the new component now at our new path with the type from the BEGIN:. | setpath(.current_path; {"_type": $value}) elif $name == "end" then # Handle an END by checking that its type matches the type we are working with now, # and going back up in the structure by removing the array index and the `_components` from the path. if $value == getpath([.current_path[], "_type"]) then .current_path |= .[:-2] else error("Unexpected end of \($value) component while in a \(getpath([.current_path[], "_type"])) component") end else # This is not any special case, so we will just set a property. # Since some properties could have multiple values for the same set of parameters # and multiple sets of parameters for the same name, we structure the output like this: # { "name": {"parameters": ["value1", "value2"] } } # When there are no parameters, we will use the "" key. setpath( [.current_path[], $name, ($params // "")]; (getpath([.current_path[], $name, ($params // "")]) // []) + [$value] ) end ) # Return the parsed calendar from our parser's state. | .root