split out files, add Line.format()

This commit is contained in:
jesopo 2023-03-22 22:14:17 +00:00
parent a16648b351
commit 3f3892158b
8 changed files with 219 additions and 118 deletions

View File

@ -2,6 +2,12 @@
name = "irctokens"
version = "0.1.0"
edition = "2021"
license = "MIT"
description = "RFC1459 and IRCv3 protocol tokeniser"
homepage = "https://github.com/jesopo/irctokens-rs"
documentation = "https://github.com/jesopo/irctokens-rs"
repository = "https://github.com/jesopo/irctokens-rs"
readme = "README.md"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html

58
src/format.rs Normal file
View File

@ -0,0 +1,58 @@
use super::Line;
fn tag_encode(input: &str) -> String {
let mut output = String::with_capacity(input.len() * 2);
for char in input.chars() {
output.push_str(&match char {
';' => "\\:".to_string(),
' ' => "\\s".to_string(),
'\\' => "\\".to_string(),
'\r' => "\\r".to_string(),
'\n' => "\\n".to_string(),
_ => char.to_string(),
});
}
output
}
impl Line {
pub fn format(&self) -> Vec<u8> {
let mut output = Vec::new();
if let Some(tags) = &self.tags {
output.push(b'@');
for (i, (key, value)) in tags.iter().enumerate() {
if i != 0 {
output.push(b';');
}
output.extend_from_slice(key.as_bytes());
if let Some(value) = value {
output.push(b'=');
output.extend_from_slice(tag_encode(value).as_bytes());
}
}
output.push(b' ');
}
if let Some(source) = &self.source {
output.push(b':');
output.extend_from_slice(source);
output.push(b' ');
}
output.extend_from_slice(self.command.as_bytes());
for (i, arg) in self.args.iter().enumerate() {
output.push(b' ');
if i == self.args.len() - 1 {
output.push(b':');
}
output.extend_from_slice(arg);
}
output
}
}

View File

@ -1,116 +1,6 @@
use std::collections::{HashMap, VecDeque};
mod format;
mod obj;
mod tokenise;
mod util;
pub struct Line {
// tags are promised to be utf8 encoded
pub tags: Option<HashMap<String, Option<String>>>,
pub source: Option<Vec<u8>>,
// commands are promised to be ascii encoded
pub command: String,
pub args: Vec<Vec<u8>>,
}
#[derive(Debug)]
pub enum Error {
Empty,
MissingCommand,
CommandDecode,
TagKeyDecode,
TagValueDecode,
}
trait TakeWord<'a> {
fn take_word(&mut self, sep: u8) -> &'a [u8];
}
impl<'a> TakeWord<'a> for &'a [u8] {
fn take_word(&mut self, sep: u8) -> &'a [u8] {
if let Some(i) = self.iter().position(|c| c == &sep) {
let word = &self[..i];
*self = &self[i + 1..];
word
} else {
let word = &self[..];
*self = &self[self.len()..];
word
}
}
}
fn tag_decode(input: &str) -> String {
let mut escaped = false;
let mut output = String::with_capacity(input.len());
for char in input.chars() {
if escaped {
escaped = false;
let replace = match char {
':' => ';',
's' => ' ',
'r' => '\r',
'n' => '\n',
_ => char,
};
output.push(replace);
} else if char == 0x5c as char {
// backslash
escaped = true;
} else {
output.push(char);
}
}
output
}
pub fn tokenise(mut line: &[u8]) -> Result<Line, Error> {
let tags = match line.first() {
Some(b'@') => {
let mut tags = &line.take_word(b' ')[1..];
let mut tags_map = HashMap::new();
while !tags.is_empty() {
let mut tag_key_value = tags.take_word(b';');
let tag_key = String::from_utf8(tag_key_value.take_word(b'=').to_vec())
.map_err(|_| Error::TagKeyDecode)?;
let tag_value = match tag_key_value {
b"" | b"=" => None,
_ => Some(
std::str::from_utf8(tag_key_value)
.map(tag_decode)
.map_err(|_| Error::TagValueDecode)?,
),
};
tags_map.insert(tag_key, tag_value);
}
Some(tags_map)
}
_ => None,
};
let source = match line.first() {
Some(b':') => Some(line.take_word(b' ')[1..].to_vec()),
_ => None,
};
let mut args = VecDeque::<Vec<u8>>::new();
while !line.is_empty() {
if line[0] == b':' {
args.push_back(line[1..].to_vec());
line = &[];
} else {
args.push_back(line.take_word(b' ').to_vec());
}
}
let command = args.pop_front().ok_or(Error::MissingCommand)?;
Ok(Line {
tags,
source,
command: String::from_utf8(command).map_err(|_| Error::CommandDecode)?,
args: args.into(),
})
}
pub use self::obj::{Error, Line};

19
src/obj.rs Normal file
View File

@ -0,0 +1,19 @@
use std::collections::BTreeMap;
pub struct Line {
// tags are promised to be utf8 encoded
pub tags: Option<BTreeMap<String, Option<String>>>,
pub source: Option<Vec<u8>>,
// commands are promised to be ascii encoded
pub command: String,
pub args: Vec<Vec<u8>>,
}
#[derive(Debug)]
pub enum Error {
Empty,
MissingCommand,
CommandDecode,
TagKeyDecode,
TagValueDecode,
}

84
src/tokenise.rs Normal file
View File

@ -0,0 +1,84 @@
use std::collections::{BTreeMap, VecDeque};
use super::util::TakeWord as _;
use super::{Error, Line};
const TAG_STOP: [&[u8]; 2] = [b"", b"="];
fn tag_decode(input: &str) -> String {
let mut escaped = false;
let mut output = String::with_capacity(input.len());
for char in input.chars() {
if escaped {
escaped = false;
let replace = match char {
':' => ';',
's' => ' ',
'r' => '\r',
'n' => '\n',
_ => char,
};
output.push(replace);
} else if char == 0x5c as char {
// backslash
escaped = true;
} else {
output.push(char);
}
}
output
}
impl Line {
pub fn tokenise(mut line: &[u8]) -> Result<Self, Error> {
let tags = if line.first() == Some(&b'@') {
let mut tags = &line.take_word(b' ')[1..];
let mut tags_map = BTreeMap::new();
while !tags.is_empty() {
let mut tag_key_value = tags.take_word(b';');
let tag_key = String::from_utf8(tag_key_value.take_word(b'=').to_vec())
.map_err(|_| Error::TagKeyDecode)?;
let tag_value = if TAG_STOP.contains(&tag_key_value) {
None
} else {
Some(
std::str::from_utf8(tag_key_value)
.map(tag_decode)
.map_err(|_| Error::TagValueDecode)?,
)
};
tags_map.insert(tag_key, tag_value);
}
Some(tags_map)
} else {
None
};
let source = (line.first() == Some(&b':')).then(|| line.take_word(b' ')[1..].to_vec());
let mut args = VecDeque::<Vec<u8>>::new();
while !line.is_empty() {
if line[0] == b':' {
args.push_back(line[1..].to_vec());
line = &[];
} else {
args.push_back(line.take_word(b' ').to_vec());
}
}
let command = args.pop_front().ok_or(Error::MissingCommand)?;
Ok(Self {
tags,
source,
command: String::from_utf8(command).map_err(|_| Error::CommandDecode)?,
args: args.into(),
})
}
}

17
src/util.rs Normal file
View File

@ -0,0 +1,17 @@
pub(crate) trait TakeWord<'a> {
fn take_word(&mut self, sep: u8) -> &'a [u8];
}
impl<'a> TakeWord<'a> for &'a [u8] {
fn take_word(&mut self, sep: u8) -> &'a [u8] {
if let Some(i) = self.iter().position(|c| c == &sep) {
let word = &self[..i];
*self = &self[i + 1..];
word
} else {
let word = &self[..];
*self = &self[self.len()..];
word
}
}
}

26
tests/format.rs Normal file
View File

@ -0,0 +1,26 @@
use irctokens::Line;
use std::collections::BTreeMap;
#[test]
fn basic() {
let line = Line {
tags: Some(BTreeMap::from([
("tag1".to_string(), Some("tag1value".to_string())),
("tag2".to_string(), None),
("tag3".to_string(), Some("a;a".to_string())),
])),
source: Some(b"source".to_vec()),
command: "COMMAND".to_string(),
args: Vec::from([
b"arg1".to_vec(),
b"arg2".to_vec(),
b"arg3 with space".to_vec(),
]),
}
.format();
assert_eq!(
line,
b"@tag1=tag1value;tag2;tag3=a\\:a :source COMMAND arg1 arg2 :arg3 with space"
);
}

View File

@ -1,9 +1,10 @@
use irctokens::tokenise;
use irctokens::Line;
#[test]
fn basic() {
let line =
tokenise(b"@tag1=tag1value;tag2=;tag3 :source COMMAND arg1 arg2 :arg3 with space").unwrap();
Line::tokenise(b"@tag1=tag1value;tag2=;tag3 :source COMMAND arg1 arg2 :arg3 with space")
.unwrap();
assert_eq!(line.source, Some(b"source".to_vec()));
assert_eq!(&line.command, "COMMAND");
@ -22,7 +23,7 @@ fn basic() {
#[test]
fn complex_tags() {
let line = tokenise(b"@tag1=a\\:a COMMAND").unwrap();
let line = Line::tokenise(b"@tag1=a\\:a COMMAND").unwrap();
let tags = line.tags.unwrap();
assert_eq!(tags["tag1"], Some("a;a".to_string()));