use std::collections::{BTreeMap, VecDeque}; use super::util::TakeWord as _; use super::Line; const TAG_STOP: [&[u8]; 2] = [b"", b"="]; #[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum Error { /// An empty byte array was passed to the tokeniser. Empty, /// A line is invalid if it has no `COMMAND` (e.g. `PRIVMSG`). MissingCommand, /// Commands must be ascii encoded. CommandDecode, /// Message tag keys must be utf8 encoded. TagKeyDecode, /// Message tag values must be utf8 encoded. TagValueDecode, } impl std::fmt::Display for Error { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { match self { Error::Empty => write!(f, "empty slice passed to tokeniser"), Error::MissingCommand => write!(f, "missing command"), Error::CommandDecode => write!(f, "commands must be ascii encoded"), Error::TagKeyDecode => write!(f, "message tag keys must be utf8 encoded"), Error::TagValueDecode => write!(f, "message tag values must be utf8 encoded"), } } } impl std::error::Error for Error {} fn tag_decode(input: &str) -> String { let mut escaped = false; let mut output = String::with_capacity(input.len()); for char in input.chars() { if escaped { escaped = false; let replace = match char { ':' => ';', 's' => ' ', 'r' => '\r', 'n' => '\n', _ => char, }; output.push(replace); } else if char == '\\' { // backslash escaped = true; } else { output.push(char); } } output } impl Line { #[allow(clippy::doc_markdown)] /// Attempt to tokenise a byte string by [RFC1459] and [IRCv3] protocol rules. /// /// Expects a byte string that does NOT contain a trailing CRLF. /// /// [RFC1459]: https://www.rfc-editor.org/rfc/rfc1459#section-2.3 /// [IRCv3]: https://ircv3.net/specs/extensions/message-tags.html pub fn tokenise(line: impl std::convert::AsRef<[u8]>) -> Result { let mut line = line.as_ref(); let tags = if line.first() == Some(&b'@') { let mut tags = &line.take_word(b' ')[1..]; let mut tags_map = BTreeMap::new(); while !tags.is_empty() { let mut tag_key_value = tags.take_word(b';'); let tag_key = String::from_utf8(tag_key_value.take_word(b'=').to_vec()) .map_err(|_| Error::TagKeyDecode)?; let tag_value = if TAG_STOP.contains(&tag_key_value) { None } else { Some( std::str::from_utf8(tag_key_value) .map(tag_decode) .map_err(|_| Error::TagValueDecode)?, ) }; tags_map.insert(tag_key, tag_value); } Some(tags_map) } else { None }; let source = (line.first() == Some(&b':')).then(|| line.take_word(b' ')[1..].to_vec()); let mut args = VecDeque::>::new(); while !line.is_empty() { if line[0] == b':' { args.push_back(line[1..].to_vec()); line = &[]; } else { args.push_back(line.take_word(b' ').to_vec()); } } let command = args.pop_front().ok_or(Error::MissingCommand)?; Ok(Self { tags, source, command: String::from_utf8(command).map_err(|_| Error::CommandDecode)?, arguments: args.into(), }) } } /// Implementation that simply calls [`Line::tokenise()`]. impl TryFrom<&[u8]> for Line { type Error = Error; fn try_from(value: &[u8]) -> Result { Self::tokenise(value) } }