From b484a0834ef76ba9aa02cc34da0516ac278506d2 Mon Sep 17 00:00:00 2001 From: jesopo Date: Tue, 21 Mar 2023 21:50:02 +0000 Subject: [PATCH] first code commit --- .gitignore | 2 ++ Cargo.toml | 8 +++++ src/lib.rs | 88 ++++++++++++++++++++++++++++++++++++++++++++++++++ tests/basic.rs | 21 ++++++++++++ 4 files changed, 119 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.toml create mode 100644 src/lib.rs create mode 100644 tests/basic.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..4fffb2f --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +/target +/Cargo.lock diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..5157cae --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,8 @@ +[package] +name = "irctokens" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..ee8aa4a --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,88 @@ +use std::collections::{HashMap, VecDeque}; + +pub struct Line { + // tags are promised to be utf8 encoded + pub tags: Option>>, + pub source: Option>, + // commands are promised to be ascii encoded + pub command: String, + pub args: Vec>, +} + +#[derive(Debug)] +pub enum Error { + Empty, + MissingCommand, + CommandDecode, + TagKeyDecode, + TagValueDecode, +} + +trait TakeWord<'a> { + fn take_word(&mut self, sep: u8) -> &'a [u8]; +} + +impl<'a> TakeWord<'a> for &'a [u8] { + fn take_word(&mut self, sep: u8) -> &'a [u8] { + if let Some(i) = self.iter().position(|c| c == &sep) { + let word = &self[..i]; + *self = &self[i + 1..]; + word + } else { + let word = &self[..]; + *self = &self[self.len()..]; + word + } + } +} + +pub fn tokenise(mut line: &[u8]) -> Result { + let tags = match line.first() { + Some(b'@') => { + let mut tags = &line.take_word(b' ')[1..]; + let mut tags_map = HashMap::new(); + + while !tags.is_empty() { + let mut keyvalue = tags.take_word(b';'); + let tag = keyvalue.take_word(b'='); + tags_map.insert( + String::from_utf8(tag.to_vec()).map_err(|_| Error::TagKeyDecode)?, + match keyvalue { + b"" | b"=" => None, + _ => Some( + String::from_utf8(keyvalue.to_vec()) + .map_err(|_| Error::TagValueDecode)?, + ), + }, + ); + } + + Some(tags_map) + } + _ => None, + }; + + let source = match line.first() { + Some(b':') => Some(line.take_word(b' ')[1..].to_vec()), + _ => None, + }; + + let mut args = VecDeque::>::new(); + while !line.is_empty() { + if line[0] == b':' { + args.push_back(line[1..].to_vec()); + line = &[]; + } else { + args.push_back(line.take_word(b' ').to_vec()); + } + } + + let command = args.pop_front().ok_or(Error::MissingCommand)?; + + Ok(Line { + tags, + source, + command: String::from_utf8(command).map_err(|_| Error::CommandDecode)?, + args: args.into(), + }) +} diff --git a/tests/basic.rs b/tests/basic.rs new file mode 100644 index 0000000..acfd5fb --- /dev/null +++ b/tests/basic.rs @@ -0,0 +1,21 @@ +use irctokens::tokenise; + +#[test] +fn basic() { + let line = + tokenise(b"@tag1=tag1value;tag2=;tag3 :source COMMAND arg1 arg2 :arg3 with space").unwrap(); + + assert_eq!(line.source, Some(b"source".to_vec())); + assert_eq!(&line.command, "COMMAND"); + + assert_eq!(line.args.len(), 3); + assert_eq!(line.args[0], b"arg1"); + assert_eq!(line.args[1], b"arg2"); + assert_eq!(line.args[2], b"arg3 with space"); + + let tags = line.tags.unwrap(); + assert_eq!(tags.len(), 3); + assert_eq!(tags["tag1"], Some("tag1value".to_string())); + assert_eq!(tags["tag2"], None); + assert_eq!(tags["tag3"], None); +}