diff --git a/Cargo.toml b/Cargo.toml index 9091349..f7a6715 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -2,6 +2,12 @@ name = "irctokens" version = "0.1.0" edition = "2021" +license = "MIT" +description = "RFC1459 and IRCv3 protocol tokeniser" +homepage = "https://github.com/jesopo/irctokens-rs" +documentation = "https://github.com/jesopo/irctokens-rs" +repository = "https://github.com/jesopo/irctokens-rs" +readme = "README.md" # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html diff --git a/src/format.rs b/src/format.rs new file mode 100644 index 0000000..2afb82d --- /dev/null +++ b/src/format.rs @@ -0,0 +1,58 @@ +use super::Line; + +fn tag_encode(input: &str) -> String { + let mut output = String::with_capacity(input.len() * 2); + + for char in input.chars() { + output.push_str(&match char { + ';' => "\\:".to_string(), + ' ' => "\\s".to_string(), + '\\' => "\\".to_string(), + '\r' => "\\r".to_string(), + '\n' => "\\n".to_string(), + _ => char.to_string(), + }); + } + + output +} + +impl Line { + pub fn format(&self) -> Vec { + let mut output = Vec::new(); + + if let Some(tags) = &self.tags { + output.push(b'@'); + for (i, (key, value)) in tags.iter().enumerate() { + if i != 0 { + output.push(b';'); + } + + output.extend_from_slice(key.as_bytes()); + if let Some(value) = value { + output.push(b'='); + output.extend_from_slice(tag_encode(value).as_bytes()); + } + } + output.push(b' '); + } + + if let Some(source) = &self.source { + output.push(b':'); + output.extend_from_slice(source); + output.push(b' '); + } + + output.extend_from_slice(self.command.as_bytes()); + + for (i, arg) in self.args.iter().enumerate() { + output.push(b' '); + if i == self.args.len() - 1 { + output.push(b':'); + } + output.extend_from_slice(arg); + } + + output + } +} diff --git a/src/lib.rs b/src/lib.rs index 44a4b8c..7e5ba71 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,116 +1,6 @@ -use std::collections::{HashMap, VecDeque}; +mod format; +mod obj; +mod tokenise; +mod util; -pub struct Line { - // tags are promised to be utf8 encoded - pub tags: Option>>, - pub source: Option>, - // commands are promised to be ascii encoded - pub command: String, - pub args: Vec>, -} - -#[derive(Debug)] -pub enum Error { - Empty, - MissingCommand, - CommandDecode, - TagKeyDecode, - TagValueDecode, -} - -trait TakeWord<'a> { - fn take_word(&mut self, sep: u8) -> &'a [u8]; -} - -impl<'a> TakeWord<'a> for &'a [u8] { - fn take_word(&mut self, sep: u8) -> &'a [u8] { - if let Some(i) = self.iter().position(|c| c == &sep) { - let word = &self[..i]; - *self = &self[i + 1..]; - word - } else { - let word = &self[..]; - *self = &self[self.len()..]; - word - } - } -} - -fn tag_decode(input: &str) -> String { - let mut escaped = false; - let mut output = String::with_capacity(input.len()); - - for char in input.chars() { - if escaped { - escaped = false; - let replace = match char { - ':' => ';', - 's' => ' ', - 'r' => '\r', - 'n' => '\n', - _ => char, - }; - - output.push(replace); - } else if char == 0x5c as char { - // backslash - escaped = true; - } else { - output.push(char); - } - } - - output -} - -pub fn tokenise(mut line: &[u8]) -> Result { - let tags = match line.first() { - Some(b'@') => { - let mut tags = &line.take_word(b' ')[1..]; - let mut tags_map = HashMap::new(); - - while !tags.is_empty() { - let mut tag_key_value = tags.take_word(b';'); - let tag_key = String::from_utf8(tag_key_value.take_word(b'=').to_vec()) - .map_err(|_| Error::TagKeyDecode)?; - let tag_value = match tag_key_value { - b"" | b"=" => None, - _ => Some( - std::str::from_utf8(tag_key_value) - .map(tag_decode) - .map_err(|_| Error::TagValueDecode)?, - ), - }; - - tags_map.insert(tag_key, tag_value); - } - - Some(tags_map) - } - _ => None, - }; - - let source = match line.first() { - Some(b':') => Some(line.take_word(b' ')[1..].to_vec()), - _ => None, - }; - - let mut args = VecDeque::>::new(); - while !line.is_empty() { - if line[0] == b':' { - args.push_back(line[1..].to_vec()); - line = &[]; - } else { - args.push_back(line.take_word(b' ').to_vec()); - } - } - - let command = args.pop_front().ok_or(Error::MissingCommand)?; - - Ok(Line { - tags, - source, - command: String::from_utf8(command).map_err(|_| Error::CommandDecode)?, - args: args.into(), - }) -} +pub use self::obj::{Error, Line}; diff --git a/src/obj.rs b/src/obj.rs new file mode 100644 index 0000000..7ca3ecf --- /dev/null +++ b/src/obj.rs @@ -0,0 +1,19 @@ +use std::collections::BTreeMap; + +pub struct Line { + // tags are promised to be utf8 encoded + pub tags: Option>>, + pub source: Option>, + // commands are promised to be ascii encoded + pub command: String, + pub args: Vec>, +} + +#[derive(Debug)] +pub enum Error { + Empty, + MissingCommand, + CommandDecode, + TagKeyDecode, + TagValueDecode, +} diff --git a/src/tokenise.rs b/src/tokenise.rs new file mode 100644 index 0000000..b5d5cab --- /dev/null +++ b/src/tokenise.rs @@ -0,0 +1,84 @@ +use std::collections::{BTreeMap, VecDeque}; + +use super::util::TakeWord as _; +use super::{Error, Line}; + +const TAG_STOP: [&[u8]; 2] = [b"", b"="]; + +fn tag_decode(input: &str) -> String { + let mut escaped = false; + let mut output = String::with_capacity(input.len()); + + for char in input.chars() { + if escaped { + escaped = false; + let replace = match char { + ':' => ';', + 's' => ' ', + 'r' => '\r', + 'n' => '\n', + _ => char, + }; + + output.push(replace); + } else if char == 0x5c as char { + // backslash + escaped = true; + } else { + output.push(char); + } + } + + output +} + +impl Line { + pub fn tokenise(mut line: &[u8]) -> Result { + let tags = if line.first() == Some(&b'@') { + let mut tags = &line.take_word(b' ')[1..]; + let mut tags_map = BTreeMap::new(); + + while !tags.is_empty() { + let mut tag_key_value = tags.take_word(b';'); + let tag_key = String::from_utf8(tag_key_value.take_word(b'=').to_vec()) + .map_err(|_| Error::TagKeyDecode)?; + let tag_value = if TAG_STOP.contains(&tag_key_value) { + None + } else { + Some( + std::str::from_utf8(tag_key_value) + .map(tag_decode) + .map_err(|_| Error::TagValueDecode)?, + ) + }; + + tags_map.insert(tag_key, tag_value); + } + + Some(tags_map) + } else { + None + }; + + let source = (line.first() == Some(&b':')).then(|| line.take_word(b' ')[1..].to_vec()); + + let mut args = VecDeque::>::new(); + while !line.is_empty() { + if line[0] == b':' { + args.push_back(line[1..].to_vec()); + line = &[]; + } else { + args.push_back(line.take_word(b' ').to_vec()); + } + } + + let command = args.pop_front().ok_or(Error::MissingCommand)?; + + Ok(Self { + tags, + source, + command: String::from_utf8(command).map_err(|_| Error::CommandDecode)?, + args: args.into(), + }) + } +} diff --git a/src/util.rs b/src/util.rs new file mode 100644 index 0000000..e24c24b --- /dev/null +++ b/src/util.rs @@ -0,0 +1,17 @@ +pub(crate) trait TakeWord<'a> { + fn take_word(&mut self, sep: u8) -> &'a [u8]; +} + +impl<'a> TakeWord<'a> for &'a [u8] { + fn take_word(&mut self, sep: u8) -> &'a [u8] { + if let Some(i) = self.iter().position(|c| c == &sep) { + let word = &self[..i]; + *self = &self[i + 1..]; + word + } else { + let word = &self[..]; + *self = &self[self.len()..]; + word + } + } +} diff --git a/tests/format.rs b/tests/format.rs new file mode 100644 index 0000000..101fd36 --- /dev/null +++ b/tests/format.rs @@ -0,0 +1,26 @@ +use irctokens::Line; +use std::collections::BTreeMap; + +#[test] +fn basic() { + let line = Line { + tags: Some(BTreeMap::from([ + ("tag1".to_string(), Some("tag1value".to_string())), + ("tag2".to_string(), None), + ("tag3".to_string(), Some("a;a".to_string())), + ])), + source: Some(b"source".to_vec()), + command: "COMMAND".to_string(), + args: Vec::from([ + b"arg1".to_vec(), + b"arg2".to_vec(), + b"arg3 with space".to_vec(), + ]), + } + .format(); + + assert_eq!( + line, + b"@tag1=tag1value;tag2;tag3=a\\:a :source COMMAND arg1 arg2 :arg3 with space" + ); +} diff --git a/tests/basic.rs b/tests/tokenise.rs similarity index 75% rename from tests/basic.rs rename to tests/tokenise.rs index 03813b6..58d1292 100644 --- a/tests/basic.rs +++ b/tests/tokenise.rs @@ -1,9 +1,10 @@ -use irctokens::tokenise; +use irctokens::Line; #[test] fn basic() { let line = - tokenise(b"@tag1=tag1value;tag2=;tag3 :source COMMAND arg1 arg2 :arg3 with space").unwrap(); + Line::tokenise(b"@tag1=tag1value;tag2=;tag3 :source COMMAND arg1 arg2 :arg3 with space") + .unwrap(); assert_eq!(line.source, Some(b"source".to_vec())); assert_eq!(&line.command, "COMMAND"); @@ -22,7 +23,7 @@ fn basic() { #[test] fn complex_tags() { - let line = tokenise(b"@tag1=a\\:a COMMAND").unwrap(); + let line = Line::tokenise(b"@tag1=a\\:a COMMAND").unwrap(); let tags = line.tags.unwrap(); assert_eq!(tags["tag1"], Some("a;a".to_string()));