split out files, add Line.format()
This commit is contained in:
parent
a16648b351
commit
3f3892158b
|
@ -2,6 +2,12 @@
|
|||
name = "irctokens"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
license = "MIT"
|
||||
description = "RFC1459 and IRCv3 protocol tokeniser"
|
||||
homepage = "https://github.com/jesopo/irctokens-rs"
|
||||
documentation = "https://github.com/jesopo/irctokens-rs"
|
||||
repository = "https://github.com/jesopo/irctokens-rs"
|
||||
readme = "README.md"
|
||||
|
||||
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
|
||||
|
||||
|
|
|
@ -0,0 +1,58 @@
|
|||
use super::Line;
|
||||
|
||||
fn tag_encode(input: &str) -> String {
|
||||
let mut output = String::with_capacity(input.len() * 2);
|
||||
|
||||
for char in input.chars() {
|
||||
output.push_str(&match char {
|
||||
';' => "\\:".to_string(),
|
||||
' ' => "\\s".to_string(),
|
||||
'\\' => "\\".to_string(),
|
||||
'\r' => "\\r".to_string(),
|
||||
'\n' => "\\n".to_string(),
|
||||
_ => char.to_string(),
|
||||
});
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
impl Line {
|
||||
pub fn format(&self) -> Vec<u8> {
|
||||
let mut output = Vec::new();
|
||||
|
||||
if let Some(tags) = &self.tags {
|
||||
output.push(b'@');
|
||||
for (i, (key, value)) in tags.iter().enumerate() {
|
||||
if i != 0 {
|
||||
output.push(b';');
|
||||
}
|
||||
|
||||
output.extend_from_slice(key.as_bytes());
|
||||
if let Some(value) = value {
|
||||
output.push(b'=');
|
||||
output.extend_from_slice(tag_encode(value).as_bytes());
|
||||
}
|
||||
}
|
||||
output.push(b' ');
|
||||
}
|
||||
|
||||
if let Some(source) = &self.source {
|
||||
output.push(b':');
|
||||
output.extend_from_slice(source);
|
||||
output.push(b' ');
|
||||
}
|
||||
|
||||
output.extend_from_slice(self.command.as_bytes());
|
||||
|
||||
for (i, arg) in self.args.iter().enumerate() {
|
||||
output.push(b' ');
|
||||
if i == self.args.len() - 1 {
|
||||
output.push(b':');
|
||||
}
|
||||
output.extend_from_slice(arg);
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
}
|
120
src/lib.rs
120
src/lib.rs
|
@ -1,116 +1,6 @@
|
|||
use std::collections::{HashMap, VecDeque};
|
||||
mod format;
|
||||
mod obj;
|
||||
mod tokenise;
|
||||
mod util;
|
||||
|
||||
pub struct Line {
|
||||
// tags are promised to be utf8 encoded
|
||||
pub tags: Option<HashMap<String, Option<String>>>,
|
||||
pub source: Option<Vec<u8>>,
|
||||
// commands are promised to be ascii encoded
|
||||
pub command: String,
|
||||
pub args: Vec<Vec<u8>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Empty,
|
||||
MissingCommand,
|
||||
CommandDecode,
|
||||
TagKeyDecode,
|
||||
TagValueDecode,
|
||||
}
|
||||
|
||||
trait TakeWord<'a> {
|
||||
fn take_word(&mut self, sep: u8) -> &'a [u8];
|
||||
}
|
||||
|
||||
impl<'a> TakeWord<'a> for &'a [u8] {
|
||||
fn take_word(&mut self, sep: u8) -> &'a [u8] {
|
||||
if let Some(i) = self.iter().position(|c| c == &sep) {
|
||||
let word = &self[..i];
|
||||
*self = &self[i + 1..];
|
||||
word
|
||||
} else {
|
||||
let word = &self[..];
|
||||
*self = &self[self.len()..];
|
||||
word
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn tag_decode(input: &str) -> String {
|
||||
let mut escaped = false;
|
||||
let mut output = String::with_capacity(input.len());
|
||||
|
||||
for char in input.chars() {
|
||||
if escaped {
|
||||
escaped = false;
|
||||
let replace = match char {
|
||||
':' => ';',
|
||||
's' => ' ',
|
||||
'r' => '\r',
|
||||
'n' => '\n',
|
||||
_ => char,
|
||||
};
|
||||
|
||||
output.push(replace);
|
||||
} else if char == 0x5c as char {
|
||||
// backslash
|
||||
escaped = true;
|
||||
} else {
|
||||
output.push(char);
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
pub fn tokenise(mut line: &[u8]) -> Result<Line, Error> {
|
||||
let tags = match line.first() {
|
||||
Some(b'@') => {
|
||||
let mut tags = &line.take_word(b' ')[1..];
|
||||
let mut tags_map = HashMap::new();
|
||||
|
||||
while !tags.is_empty() {
|
||||
let mut tag_key_value = tags.take_word(b';');
|
||||
let tag_key = String::from_utf8(tag_key_value.take_word(b'=').to_vec())
|
||||
.map_err(|_| Error::TagKeyDecode)?;
|
||||
let tag_value = match tag_key_value {
|
||||
b"" | b"=" => None,
|
||||
_ => Some(
|
||||
std::str::from_utf8(tag_key_value)
|
||||
.map(tag_decode)
|
||||
.map_err(|_| Error::TagValueDecode)?,
|
||||
),
|
||||
};
|
||||
|
||||
tags_map.insert(tag_key, tag_value);
|
||||
}
|
||||
|
||||
Some(tags_map)
|
||||
}
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let source = match line.first() {
|
||||
Some(b':') => Some(line.take_word(b' ')[1..].to_vec()),
|
||||
_ => None,
|
||||
};
|
||||
|
||||
let mut args = VecDeque::<Vec<u8>>::new();
|
||||
while !line.is_empty() {
|
||||
if line[0] == b':' {
|
||||
args.push_back(line[1..].to_vec());
|
||||
line = &[];
|
||||
} else {
|
||||
args.push_back(line.take_word(b' ').to_vec());
|
||||
}
|
||||
}
|
||||
|
||||
let command = args.pop_front().ok_or(Error::MissingCommand)?;
|
||||
|
||||
Ok(Line {
|
||||
tags,
|
||||
source,
|
||||
command: String::from_utf8(command).map_err(|_| Error::CommandDecode)?,
|
||||
args: args.into(),
|
||||
})
|
||||
}
|
||||
pub use self::obj::{Error, Line};
|
||||
|
|
|
@ -0,0 +1,19 @@
|
|||
use std::collections::BTreeMap;
|
||||
|
||||
pub struct Line {
|
||||
// tags are promised to be utf8 encoded
|
||||
pub tags: Option<BTreeMap<String, Option<String>>>,
|
||||
pub source: Option<Vec<u8>>,
|
||||
// commands are promised to be ascii encoded
|
||||
pub command: String,
|
||||
pub args: Vec<Vec<u8>>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Error {
|
||||
Empty,
|
||||
MissingCommand,
|
||||
CommandDecode,
|
||||
TagKeyDecode,
|
||||
TagValueDecode,
|
||||
}
|
|
@ -0,0 +1,84 @@
|
|||
use std::collections::{BTreeMap, VecDeque};
|
||||
|
||||
use super::util::TakeWord as _;
|
||||
use super::{Error, Line};
|
||||
|
||||
const TAG_STOP: [&[u8]; 2] = [b"", b"="];
|
||||
|
||||
fn tag_decode(input: &str) -> String {
|
||||
let mut escaped = false;
|
||||
let mut output = String::with_capacity(input.len());
|
||||
|
||||
for char in input.chars() {
|
||||
if escaped {
|
||||
escaped = false;
|
||||
let replace = match char {
|
||||
':' => ';',
|
||||
's' => ' ',
|
||||
'r' => '\r',
|
||||
'n' => '\n',
|
||||
_ => char,
|
||||
};
|
||||
|
||||
output.push(replace);
|
||||
} else if char == 0x5c as char {
|
||||
// backslash
|
||||
escaped = true;
|
||||
} else {
|
||||
output.push(char);
|
||||
}
|
||||
}
|
||||
|
||||
output
|
||||
}
|
||||
|
||||
impl Line {
|
||||
pub fn tokenise(mut line: &[u8]) -> Result<Self, Error> {
|
||||
let tags = if line.first() == Some(&b'@') {
|
||||
let mut tags = &line.take_word(b' ')[1..];
|
||||
let mut tags_map = BTreeMap::new();
|
||||
|
||||
while !tags.is_empty() {
|
||||
let mut tag_key_value = tags.take_word(b';');
|
||||
let tag_key = String::from_utf8(tag_key_value.take_word(b'=').to_vec())
|
||||
.map_err(|_| Error::TagKeyDecode)?;
|
||||
let tag_value = if TAG_STOP.contains(&tag_key_value) {
|
||||
None
|
||||
} else {
|
||||
Some(
|
||||
std::str::from_utf8(tag_key_value)
|
||||
.map(tag_decode)
|
||||
.map_err(|_| Error::TagValueDecode)?,
|
||||
)
|
||||
};
|
||||
|
||||
tags_map.insert(tag_key, tag_value);
|
||||
}
|
||||
|
||||
Some(tags_map)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let source = (line.first() == Some(&b':')).then(|| line.take_word(b' ')[1..].to_vec());
|
||||
|
||||
let mut args = VecDeque::<Vec<u8>>::new();
|
||||
while !line.is_empty() {
|
||||
if line[0] == b':' {
|
||||
args.push_back(line[1..].to_vec());
|
||||
line = &[];
|
||||
} else {
|
||||
args.push_back(line.take_word(b' ').to_vec());
|
||||
}
|
||||
}
|
||||
|
||||
let command = args.pop_front().ok_or(Error::MissingCommand)?;
|
||||
|
||||
Ok(Self {
|
||||
tags,
|
||||
source,
|
||||
command: String::from_utf8(command).map_err(|_| Error::CommandDecode)?,
|
||||
args: args.into(),
|
||||
})
|
||||
}
|
||||
}
|
|
@ -0,0 +1,17 @@
|
|||
pub(crate) trait TakeWord<'a> {
|
||||
fn take_word(&mut self, sep: u8) -> &'a [u8];
|
||||
}
|
||||
|
||||
impl<'a> TakeWord<'a> for &'a [u8] {
|
||||
fn take_word(&mut self, sep: u8) -> &'a [u8] {
|
||||
if let Some(i) = self.iter().position(|c| c == &sep) {
|
||||
let word = &self[..i];
|
||||
*self = &self[i + 1..];
|
||||
word
|
||||
} else {
|
||||
let word = &self[..];
|
||||
*self = &self[self.len()..];
|
||||
word
|
||||
}
|
||||
}
|
||||
}
|
|
@ -0,0 +1,26 @@
|
|||
use irctokens::Line;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let line = Line {
|
||||
tags: Some(BTreeMap::from([
|
||||
("tag1".to_string(), Some("tag1value".to_string())),
|
||||
("tag2".to_string(), None),
|
||||
("tag3".to_string(), Some("a;a".to_string())),
|
||||
])),
|
||||
source: Some(b"source".to_vec()),
|
||||
command: "COMMAND".to_string(),
|
||||
args: Vec::from([
|
||||
b"arg1".to_vec(),
|
||||
b"arg2".to_vec(),
|
||||
b"arg3 with space".to_vec(),
|
||||
]),
|
||||
}
|
||||
.format();
|
||||
|
||||
assert_eq!(
|
||||
line,
|
||||
b"@tag1=tag1value;tag2;tag3=a\\:a :source COMMAND arg1 arg2 :arg3 with space"
|
||||
);
|
||||
}
|
|
@ -1,9 +1,10 @@
|
|||
use irctokens::tokenise;
|
||||
use irctokens::Line;
|
||||
|
||||
#[test]
|
||||
fn basic() {
|
||||
let line =
|
||||
tokenise(b"@tag1=tag1value;tag2=;tag3 :source COMMAND arg1 arg2 :arg3 with space").unwrap();
|
||||
Line::tokenise(b"@tag1=tag1value;tag2=;tag3 :source COMMAND arg1 arg2 :arg3 with space")
|
||||
.unwrap();
|
||||
|
||||
assert_eq!(line.source, Some(b"source".to_vec()));
|
||||
assert_eq!(&line.command, "COMMAND");
|
||||
|
@ -22,7 +23,7 @@ fn basic() {
|
|||
|
||||
#[test]
|
||||
fn complex_tags() {
|
||||
let line = tokenise(b"@tag1=a\\:a COMMAND").unwrap();
|
||||
let line = Line::tokenise(b"@tag1=a\\:a COMMAND").unwrap();
|
||||
|
||||
let tags = line.tags.unwrap();
|
||||
assert_eq!(tags["tag1"], Some("a;a".to_string()));
|
Loading…
Reference in New Issue