From 0196a11a8ac19a962792d147a96e371aea784dec Mon Sep 17 00:00:00 2001 From: TheDaemoness Date: Sat, 8 Apr 2023 11:32:32 -0700 Subject: [PATCH] Add some ergonomics improvements (#2) * Generalize tokenise to any AsRef<[u8]> * Add Line::write_to() Also rewrites some functions in format.rs to be more Write-focused. * Add some basic trait impls for Line and Error * cargo fmt --------- Co-authored-by: jesopo --- src/format.rs | 108 +++++++++++++++++++++++++++--------------------- src/lib.rs | 2 +- src/obj.rs | 2 +- src/tokenise.rs | 19 ++++++++- 4 files changed, 80 insertions(+), 51 deletions(-) diff --git a/src/format.rs b/src/format.rs index 5c896dd..e50790f 100644 --- a/src/format.rs +++ b/src/format.rs @@ -1,23 +1,67 @@ +use std::io::Write; + use super::Line; -fn tag_encode(input: &str) -> String { - let mut output = String::with_capacity(input.len() * 2); - +fn tag_encode(input: &str, output: &mut (impl Write + ?Sized)) -> std::io::Result<()> { for char in input.chars() { - output.push_str(&match char { - ';' => "\\:".to_string(), - ' ' => "\\s".to_string(), - '\\' => "\\".to_string(), - '\r' => "\\r".to_string(), - '\n' => "\\n".to_string(), - _ => char.to_string(), - }); + match char { + ';' => write!(output, "\\:")?, + ' ' => write!(output, "\\s")?, + '\\' => write!(output, "\\")?, + '\r' => write!(output, "\\r")?, + '\n' => write!(output, "\\n")?, + _ => write!(output, "{char}")?, + } } - - output + Ok(()) } impl Line { + #[allow(clippy::doc_markdown)] + /// Write `self` to `output` as a formatted byte string by [RFC1459] and [IRCV3] protocol rules. + /// + /// Does NOT write a CRLF nor flush the stream. + /// This function makes a large number of small writes; + /// it is advised to use a buffered [`Write`] implementation here. + /// + /// [RFC1459]: https://www.rfc-editor.org/rfc/rfc1459#section-2.3 + /// [IRCv3]: https://ircv3.net/specs/extensions/message-tags.html + pub fn write_to(&self, output: &mut (impl Write + ?Sized)) -> std::io::Result<()> { + if let Some(tags) = &self.tags { + let mut not_at_start = false; + for (key, value) in tags { + if not_at_start { + write!(output, ";{key}")?; + } else { + not_at_start = true; + write!(output, "@{key}")?; + } + if let Some(value) = value { + output.write_all(b"=")?; + tag_encode(value, output)?; + } + } + output.write_all(b" ")?; + } + + if let Some(source) = &self.source { + output.write_all(b":")?; + output.write_all(source)?; + output.write_all(b" ")?; + } + + output.write_all(self.command.as_bytes())?; + + if let Some((last, args)) = self.arguments.split_last() { + for arg in args { + output.write_all(b" ")?; + output.write_all(arg)?; + } + output.write_all(b" :")?; + output.write_all(last)?; + } + Ok(()) + } #[allow(clippy::doc_markdown)] /// Format `self` into a byte string by [RFC1459] and [IRCv3] protocol rules. /// @@ -27,40 +71,10 @@ impl Line { /// [IRCv3]: https://ircv3.net/specs/extensions/message-tags.html #[must_use] pub fn format(&self) -> Vec { - let mut output = Vec::new(); - - if let Some(tags) = &self.tags { - output.push(b'@'); - for (i, (key, value)) in tags.iter().enumerate() { - if i != 0 { - output.push(b';'); - } - - output.extend_from_slice(key.as_bytes()); - if let Some(value) = value { - output.push(b'='); - output.extend_from_slice(tag_encode(value).as_bytes()); - } - } - output.push(b' '); - } - - if let Some(source) = &self.source { - output.push(b':'); - output.extend_from_slice(source); - output.push(b' '); - } - - output.extend_from_slice(self.command.as_bytes()); - - for (i, arg) in self.arguments.iter().enumerate() { - output.push(b' '); - if i == self.arguments.len() - 1 { - output.push(b':'); - } - output.extend_from_slice(arg); - } - + // Minimum size of a message is its command's length plus 2 bytes per argument. + // In practice reallocation is basically guaranteed, but this provides a starting point. + let mut output = Vec::with_capacity(self.command.len() + self.arguments.len() * 2); + std::mem::drop(self.write_to(&mut output)); output } } diff --git a/src/lib.rs b/src/lib.rs index 770cb5b..8ff42c9 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -1,7 +1,7 @@ //! ## usage //! //! ### tokenisation -//! +//! //! ``` //! let bytes = b"@id=123 :jess!~jess@hostname PRIVMSG #chat :hello there!"; //! let line = irctokens::Line::tokenise(bytes).unwrap(); diff --git a/src/obj.rs b/src/obj.rs index fa085db..68e0106 100644 --- a/src/obj.rs +++ b/src/obj.rs @@ -3,7 +3,7 @@ use std::collections::BTreeMap; /// A struct representing all the constituent pieces of an RFC1459/IRCv3 protocol line. /// /// `@tagkey=tagvalue :source COMMAND arg1 arg2 :arg3 with space` -#[derive(Debug)] +#[derive(Clone, PartialEq, Eq, Hash, Debug)] pub struct Line { /// [Message tags] of an IRC line. /// [`None`] if no message tags were present. diff --git a/src/tokenise.rs b/src/tokenise.rs index 55cbcad..bae829b 100644 --- a/src/tokenise.rs +++ b/src/tokenise.rs @@ -5,7 +5,7 @@ use super::Line; const TAG_STOP: [&[u8]; 2] = [b"", b"="]; -#[derive(Debug)] +#[derive(Clone, Copy, PartialEq, Eq, Debug)] pub enum Error { /// An empty byte array was passed to the tokeniser. Empty, @@ -19,6 +19,20 @@ pub enum Error { TagValueDecode, } +impl std::fmt::Display for Error { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + match self { + Error::Empty => write!(f, "empty slice passed to tokeniser"), + Error::MissingCommand => write!(f, "missing command"), + Error::CommandDecode => write!(f, "commands must be ascii encoded"), + Error::TagKeyDecode => write!(f, "message tag keys must be utf8 encoded"), + Error::TagValueDecode => write!(f, "message tag values must be utf8 encoded"), + } + } +} + +impl std::error::Error for Error {} + fn tag_decode(input: &str) -> String { let mut escaped = false; let mut output = String::with_capacity(input.len()); @@ -54,7 +68,8 @@ impl Line { /// /// [RFC1459]: https://www.rfc-editor.org/rfc/rfc1459#section-2.3 /// [IRCv3]: https://ircv3.net/specs/extensions/message-tags.html - pub fn tokenise(mut line: &[u8]) -> Result { + pub fn tokenise(line: impl std::convert::AsRef<[u8]>) -> Result { + let mut line = line.as_ref(); let tags = if line.first() == Some(&b'@') { let mut tags = &line.take_word(b' ')[1..]; let mut tags_map = BTreeMap::new();