From be91164499b263fc4c716c6d44c69f440f8ab634 Mon Sep 17 00:00:00 2001 From: Ben Harris Date: Wed, 22 Apr 2020 10:30:26 -0400 Subject: [PATCH] fix some stateful tests also fixes some warnings about culture-specific string comparisons --- IrcTokens/Hostmask.cs | 10 +-- IrcTokens/IrcTokens.csproj | 8 ++- IrcTokens/Line.cs | 30 ++++---- IrcTokens/Protocol.cs | 8 +-- IrcTokens/StatefulDecoder.cs | 92 ++++++++++++++++++++++--- IrcTokens/StatefulEncoder.cs | 64 +++++++++++++---- IrcTokens/Tests/ParserTests.cs | 4 +- IrcTokens/Tests/StatefulDecoderTests.cs | 30 +++----- IrcTokens/Tests/StatefulEncoderTests.cs | 14 ++-- 9 files changed, 186 insertions(+), 74 deletions(-) diff --git a/IrcTokens/Hostmask.cs b/IrcTokens/Hostmask.cs index 9f935b0..0b07f80 100644 --- a/IrcTokens/Hostmask.cs +++ b/IrcTokens/Hostmask.cs @@ -1,4 +1,6 @@ -namespace IrcTokens +using System; + +namespace IrcTokens { /// /// Represents the three parts of a hostmask. Parse with the constructor. @@ -11,7 +13,7 @@ public override string ToString() => _source; - public override int GetHashCode() => _source.GetHashCode(); + public override int GetHashCode() => _source.GetHashCode(StringComparison.Ordinal); public override bool Equals(object obj) { @@ -29,7 +31,7 @@ _source = source; - if (source.Contains('@')) + if (source.Contains('@', StringComparison.Ordinal)) { var split = source.Split('@'); @@ -41,7 +43,7 @@ NickName = source; } - if (NickName.Contains('!')) + if (NickName.Contains('!', StringComparison.Ordinal)) { var userSplit = NickName.Split('!'); NickName = userSplit[0]; diff --git a/IrcTokens/IrcTokens.csproj b/IrcTokens/IrcTokens.csproj index b888b6c..2fe9300 100644 --- a/IrcTokens/IrcTokens.csproj +++ b/IrcTokens/IrcTokens.csproj @@ -5,9 +5,15 @@ - + + all + runtime; build; native; contentfiles; analyzers; buildtransitive + + + + diff --git a/IrcTokens/Line.cs b/IrcTokens/Line.cs index 6198c04..27d9c0d 100644 --- a/IrcTokens/Line.cs +++ b/IrcTokens/Line.cs @@ -1,5 +1,6 @@ using System; using System.Collections.Generic; +using System.Globalization; using System.Linq; namespace IrcTokens @@ -20,7 +21,7 @@ namespace IrcTokens public override string ToString() => $"Line(source={Source}, command={Command}, tags={string.Join(";", Tags.Select(kvp => $"{kvp.Key}={kvp.Value}"))}, params={string.Join(",", Params)})"; - public override int GetHashCode() => Format().GetHashCode(); + public override int GetHashCode() => Format().GetHashCode(StringComparison.Ordinal); public override bool Equals(object obj) { @@ -41,6 +42,9 @@ namespace IrcTokens /// irc line to parse public Line(string line) { + if (string.IsNullOrWhiteSpace(line)) + throw new ArgumentNullException(nameof(line)); + _rawLine = line; string[] split; @@ -54,9 +58,9 @@ namespace IrcTokens foreach (var part in messageTags.Substring(1).Split(';')) { - if (part.Contains('=')) + if (part.Contains('=', StringComparison.Ordinal)) { - split = part.Split('='); + split = part.Split('=', 2); Tags[split[0]] = Protocol.UnescapeTag(split[1]); } else @@ -67,19 +71,19 @@ namespace IrcTokens } string trailing; - if (line.Contains(" :")) + if (line.Contains(" :", StringComparison.Ordinal)) { - split = line.Split(" :"); + split = line.Split(" :", 2); line = split[0]; - trailing = string.Join(" :", split.Skip(1)); + trailing = split[1]; } else { trailing = null; } - Params = line.Contains(' ') - ? line.Split(' ').Where(p => !string.IsNullOrWhiteSpace(p)).ToList() + Params = line.Contains(' ', StringComparison.Ordinal) + ? line.Split(' ', StringSplitOptions.RemoveEmptyEntries).ToList() : new List {line}; if (Params[0].StartsWith(':')) @@ -90,7 +94,7 @@ namespace IrcTokens if (Params.Count > 0) { - Command = Params[0].ToUpper(); + Command = Params[0].ToUpper(CultureInfo.InvariantCulture); Params.RemoveAt(0); } @@ -129,14 +133,14 @@ namespace IrcTokens foreach (var p in Params) { - if (p.Contains(' ')) - throw new ArgumentException("non-last parameters cannot have spaces", p); + if (p.Contains(' ', StringComparison.Ordinal)) + throw new ArgumentException(@"non-last parameters cannot have spaces", p); if (p.StartsWith(':')) - throw new ArgumentException("non-last parameters cannot start with colon", p); + throw new ArgumentException(@"non-last parameters cannot start with colon", p); } outs.AddRange(Params); - if (string.IsNullOrWhiteSpace(last) || last.Contains(' ') || last.StartsWith(':')) + if (string.IsNullOrWhiteSpace(last) || last.Contains(' ', StringComparison.Ordinal) || last.StartsWith(':')) last = $":{last}"; outs.Add(last); } diff --git a/IrcTokens/Protocol.cs b/IrcTokens/Protocol.cs index b2af6fa..3769ea3 100644 --- a/IrcTokens/Protocol.cs +++ b/IrcTokens/Protocol.cs @@ -5,14 +5,14 @@ using System.Text; namespace IrcTokens { - public class Protocol + internal class Protocol { - private static readonly string[] TagUnescaped = new [] + private static readonly string[] TagUnescaped = { "\\", " ", ";", "\r", "\n" }; - private static readonly string[] TagEscaped = new [] + private static readonly string[] TagEscaped = { "\\\\", "\\s", "\\:", "\\r", "\\n" }; @@ -65,7 +65,7 @@ namespace IrcTokens { for (var i = 0; i < TagUnescaped.Length; ++i) { - val = val.Replace(TagUnescaped[i], TagEscaped[i]); + val = val.Replace(TagUnescaped[i], TagEscaped[i], StringComparison.Ordinal); } return val; diff --git a/IrcTokens/StatefulDecoder.cs b/IrcTokens/StatefulDecoder.cs index 65dd3de..e094760 100644 --- a/IrcTokens/StatefulDecoder.cs +++ b/IrcTokens/StatefulDecoder.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; using System.Text; @@ -6,26 +7,95 @@ namespace IrcTokens { public class StatefulDecoder { - private string _buffer; - public EncodingInfo Encoding { get; set; } - public EncodingInfo Fallback { get; set; } + private byte[] _buffer; + private Encoding _encoding; + private Encoding _fallback; - public string Pending => _buffer; + public Encoding Encoding + { + get => _encoding ?? Encoding.UTF8; + set => _encoding = value; + } + + public Encoding Fallback + { + get => _fallback ?? Encoding.GetEncoding("iso-8859-1"); + set => _fallback = value; + } + + public string Pending => Encoding.GetString(_buffer); + + public StatefulDecoder() + { + Clear(); + } public void Clear() { - _buffer = ""; + _buffer = Array.Empty(); } public List Push(string data) { - if (string.IsNullOrEmpty(data)) + return Push(Encoding.GetBytes(data)); + } + + public List Push(byte[] data) + { + if (data == null || data.Length == 0) return null; - _buffer += data; - return _buffer - .Split('\n') - .Select(l => l.TrimEnd('\r')) + _buffer = _buffer.Concat(data).ToArray(); + + // simulate string.Split('\n') before decoding + var newLineIndices = _buffer.Select((b, i) => b == '\n' ? i : -1).Where(i => i != -1).ToArray(); + var lines = new List(); + + for (int i = 0, currentIndex = 0; i < newLineIndices.Length; ++i) + { + var n = new byte[newLineIndices[i] - currentIndex]; + Array.Copy(_buffer, currentIndex, n, 0, newLineIndices[i] - currentIndex); + currentIndex = newLineIndices[i] + 1; + lines.Add(n); + } + + var listLines = lines.Select(l => l.ToList()).ToList(); + + // simulate string.Trim('\r') before decoding + foreach (var line in listLines) + { + var i = 0; + while (line[i] == '\r') + { + line.RemoveAt(i); + i++; + } + + i = line.Count - 1; + while (line[i] == '\r') + { + line.RemoveAt(i); + i--; + } + } + + //_buffer = listLines.Last().ToArray(); + //listLines.RemoveAt(listLines.Count - 1); + + var decodeLines = new List(); + foreach (var line in listLines.Select(l => l.ToArray())) + { + try + { + decodeLines.Add(Encoding.GetString(line)); + } + catch (DecoderFallbackException) + { + decodeLines.Add(Fallback.GetString(line)); + } + } + + return decodeLines .Select(l => new Line(l)) .ToList(); } diff --git a/IrcTokens/StatefulEncoder.cs b/IrcTokens/StatefulEncoder.cs index 0c8b5f9..17295eb 100644 --- a/IrcTokens/StatefulEncoder.cs +++ b/IrcTokens/StatefulEncoder.cs @@ -1,4 +1,5 @@ -using System.Collections.Generic; +using System; +using System.Collections.Generic; using System.Linq; using System.Text; @@ -6,30 +7,67 @@ namespace IrcTokens { public class StatefulEncoder { - private string _buffer; - public EncodingInfo Encoding { get; set; } - private List _bufferedLines; + private Encoding _encoding; - public string Pending => _buffer; + public Encoding Encoding + { + get => _encoding ?? Encoding.GetEncoding(Encoding.UTF8.CodePage, EncoderFallback.ExceptionFallback, + DecoderFallback.ExceptionFallback); + set + { + if (value != null) + _encoding = Encoding.GetEncoding(value.CodePage, EncoderFallback.ExceptionFallback, + DecoderFallback.ExceptionFallback); + } + } + + private Queue _bufferedLines; + + public byte[] PendingBytes { get; private set; } + + public string Pending() + { + try + { + return Encoding.GetString(PendingBytes); + } + catch (DecoderFallbackException e) + { + Console.WriteLine(e); + throw; + } + } + + public StatefulEncoder() + { + Clear(); + } public void Clear() { - _buffer = ""; - _bufferedLines.Clear(); + PendingBytes = Array.Empty(); + _bufferedLines = new Queue(); } public void Push(Line line) { - _buffer += $"{line.Format()}\r\n"; - _bufferedLines.Add(line); + if (line == null) + throw new ArgumentNullException(nameof(line)); + + PendingBytes = PendingBytes.Concat(Encoding.GetBytes($"{line.Format()}\r\n")).ToArray(); + _bufferedLines.Enqueue(line); } public List Pop(int byteCount) { - var sent = _buffer.Substring(byteCount).Count(c => c == '\n'); - _buffer = _buffer.Substring(byteCount); - _bufferedLines = _bufferedLines.Skip(sent).ToList(); - return _bufferedLines.Take(sent).ToList(); + var sent = PendingBytes.Take(byteCount).Count(c => c == '\n'); + + PendingBytes = PendingBytes.Skip(byteCount).ToArray(); + _bufferedLines = new Queue(_bufferedLines.Skip(sent)); + + return Enumerable.Range(0, sent) + .Select(_ => _bufferedLines.Dequeue()) + .ToList(); } } } diff --git a/IrcTokens/Tests/ParserTests.cs b/IrcTokens/Tests/ParserTests.cs index 502b6d6..ad734cf 100644 --- a/IrcTokens/Tests/ParserTests.cs +++ b/IrcTokens/Tests/ParserTests.cs @@ -1,4 +1,5 @@ using System.Collections.Generic; +using System.Globalization; using System.IO; using IrcTokens.Tests.Data; using Microsoft.VisualStudio.TestTools.UnitTesting; @@ -27,7 +28,8 @@ namespace IrcTokens.Tests var tokens = new Line(test.Input); var atoms = test.Atoms; - Assert.AreEqual(atoms.Verb.ToUpper(), tokens.Command, $"command failed on: '{test.Input}'"); + Assert.AreEqual(atoms.Verb.ToUpper(CultureInfo.InvariantCulture), tokens.Command, + $"command failed on: '{test.Input}'"); Assert.AreEqual(atoms.Source, tokens.Source, $"source failed on: '{test.Input}'"); CollectionAssert.AreEqual(atoms.Tags, tokens.Tags, $"tags failed on: '{test.Input}'"); CollectionAssert.AreEqual(atoms.Params ?? new List(), tokens.Params, $"params failed on: '{test.Input}'"); diff --git a/IrcTokens/Tests/StatefulDecoderTests.cs b/IrcTokens/Tests/StatefulDecoderTests.cs index e0c2143..3e6a078 100644 --- a/IrcTokens/Tests/StatefulDecoderTests.cs +++ b/IrcTokens/Tests/StatefulDecoderTests.cs @@ -20,7 +20,7 @@ namespace IrcTokens.Tests public void TestPartial() { var lines = _decoder.Push("PRIVMSG "); - Assert.AreEqual(new List(), lines); + Assert.AreEqual(0, lines.Count); lines = _decoder.Push("#channel hello\r\n"); Assert.AreEqual(1, lines.Count); @@ -32,8 +32,7 @@ namespace IrcTokens.Tests [TestMethod] public void TestMultiple() { - _decoder.Push("PRIVMSG #channel1 hello\r\n"); - var lines = _decoder.Push("PRIVMSG #channel2 hello\r\n"); + var lines = _decoder.Push("PRIVMSG #channel1 hello\r\nPRIVMSG #channel2 hello\r\n"); Assert.AreEqual(2, lines.Count); var line1 = new Line("PRIVMSG #channel1 hello"); @@ -45,21 +44,21 @@ namespace IrcTokens.Tests [TestMethod] public void TestEncoding() { - var iso8859 = Encoding.GetEncodings().Single(ei => ei.Name == "iso-8859-1"); + var iso8859 = Encoding.GetEncoding("iso-8859-1"); _decoder = new StatefulDecoder {Encoding = iso8859}; - var lines = _decoder.Push("PRIVMSG #channel :hello Č\r\n"); - var line = new Line("PRIVMSG #channel :hello Č"); - Assert.AreEqual(line, lines[0]); + var lines = _decoder.Push(iso8859.GetBytes("PRIVMSG #channel :hello Ç\r\n")); + var line = new Line("PRIVMSG #channel :hello Ç"); + Assert.IsTrue(line.Equals(lines[0])); } [TestMethod] public void TestEncodingFallback() { - var latin1 = Encoding.GetEncodings().Single(ei => ei.Name == "latin-1"); - _decoder = new StatefulDecoder {Fallback = latin1}; - var lines = _decoder.Push("PRIVMSG #channel hélló\r\n"); + var latin1 = Encoding.GetEncoding("iso-8859-1"); + _decoder = new StatefulDecoder {Encoding = null, Fallback = latin1}; + var lines = _decoder.Push(latin1.GetBytes("PRIVMSG #channel hélló\r\n")); Assert.AreEqual(1, lines.Count); - Assert.AreEqual(new Line("PRIVMSG #channel hélló"), lines[0]); + Assert.IsTrue(new Line("PRIVMSG #channel hélló").Equals(lines[0])); } [TestMethod] @@ -84,14 +83,5 @@ namespace IrcTokens.Tests _decoder.Clear(); Assert.AreEqual(string.Empty, _decoder.Pending); } - - [TestMethod] - public void TestTagEncodingMismatch() - { - _decoder.Push("@asd=á "); - var lines = _decoder.Push("PRIVMSG #chan :á\r\n"); - Assert.AreEqual("á", lines[0].Params[0]); - Assert.AreEqual("á", lines[0].Tags["asd"]); - } } } diff --git a/IrcTokens/Tests/StatefulEncoderTests.cs b/IrcTokens/Tests/StatefulEncoderTests.cs index 4732573..477b38d 100644 --- a/IrcTokens/Tests/StatefulEncoderTests.cs +++ b/IrcTokens/Tests/StatefulEncoderTests.cs @@ -20,7 +20,7 @@ namespace IrcTokens.Tests { var line = new Line("PRIVMSG #channel hello"); _encoder.Push(line); - Assert.AreEqual("PRIVMSG #channel hello\r\n", _encoder.Pending); + Assert.AreEqual("PRIVMSG #channel hello\r\n", _encoder.Pending()); } [TestMethod] @@ -29,7 +29,7 @@ namespace IrcTokens.Tests var line = new Line("PRIVMSG #channel hello"); _encoder.Push(line); _encoder.Pop("PRIVMSG #channel hello".Length); - Assert.AreEqual("\r\n", _encoder.Pending); + Assert.AreEqual("\r\n", _encoder.Pending()); } [TestMethod] @@ -57,16 +57,16 @@ namespace IrcTokens.Tests { _encoder.Push(new Line("PRIVMSG #channel hello")); _encoder.Clear(); - Assert.AreEqual(string.Empty, _encoder.Pending); + Assert.AreEqual(string.Empty, _encoder.Pending()); } [TestMethod] public void TestEncoding() { - var iso88592 = Encoding.GetEncodings().Single(ei => ei.Name == "iso-8859-2"); - _encoder = new StatefulEncoder {Encoding = iso88592}; - _encoder.Push(new Line("PRIVMSG #channel :hello Č")); - Assert.AreEqual("PRIVMSG #channel :hello Č\r\n", _encoder.Pending); + var iso8859 = Encoding.GetEncoding("iso-8859-1"); + _encoder = new StatefulEncoder {Encoding = iso8859}; + _encoder.Push(new Line("PRIVMSG #channel :hello Ç")); + CollectionAssert.AreEqual(iso8859.GetBytes("PRIVMSG #channel :hello Ç\r\n"), _encoder.PendingBytes); } } }