add test/parser_tests.py, from github:ircdocs/parser-tests

This commit is contained in:
jesopo 2020-03-23 16:48:47 +00:00
parent 31ab81741e
commit a3b55f6292
3 changed files with 371 additions and 1 deletions

View File

@ -2,4 +2,5 @@ from .tokenise import *
from .format import *
from .stateful_decode import *
from .stateful_encode import *
from .hostmask import *
from .hostmask import *
from .parser_tests import *

343
test/_data/msg-split.yaml Normal file
View File

@ -0,0 +1,343 @@
# IRC parser tests
# splitting messages into usable atoms
# Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
#
# To the extent possible under law, the author(s) have dedicated all copyright
# and related and neighboring rights to this software to the public domain
# worldwide. This software is distributed without any warranty.
#
# You should have received a copy of the CC0 Public Domain Dedication along
# with this software. If not, see
# <http://creativecommons.org/publicdomain/zero/1.0/>.
# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
# https://github.com/grawity/code/tree/master/lib/tests
# some of the tests here originate from Mozilla's test vectors, which is public domain
# https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
# https://github.com/SaberUK/ircparser/tree/master/test
# we follow RFC1459 with regards to multiple ascii spaces splitting atoms:
# The prefix, command, and all parameters are
# separated by one (or more) ASCII space character(s) (0x20).
# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane
tests:
# input is the string coming directly from the server to parse
# the atoms dict has the keys:
# * tags: tags dict
# tags with no value are an empty string
# * source: source string, without single leading colon
# * verb: verb string
# * params: params split up as a list
# if the params key does not exist, assume it is empty
# if any other keys do no exist, assume they are null
# a key that is null does not exist or is not specified with the
# given input string
# simple
- input: "foo bar baz asdf"
atoms:
verb: "foo"
params:
- "bar"
- "baz"
- "asdf"
# with source
- input: ":coolguy foo bar baz asdf"
atoms:
source: "coolguy"
verb: "foo"
params:
- "bar"
- "baz"
- "asdf"
# with trailing param
- input: "foo bar baz :asdf quux"
atoms:
verb: "foo"
params:
- "bar"
- "baz"
- "asdf quux"
- input: "foo bar baz :"
atoms:
verb: "foo"
params:
- "bar"
- "baz"
- ""
- input: "foo bar baz ::asdf"
atoms:
verb: "foo"
params:
- "bar"
- "baz"
- ":asdf"
# with source and trailing param
- input: ":coolguy foo bar baz :asdf quux"
atoms:
source: "coolguy"
verb: "foo"
params:
- "bar"
- "baz"
- "asdf quux"
- input: ":coolguy foo bar baz : asdf quux "
atoms:
source: "coolguy"
verb: "foo"
params:
- "bar"
- "baz"
- " asdf quux "
- input: ":coolguy PRIVMSG bar :lol :) "
atoms:
source: "coolguy"
verb: "PRIVMSG"
params:
- "bar"
- "lol :) "
- input: ":coolguy foo bar baz :"
atoms:
source: "coolguy"
verb: "foo"
params:
- "bar"
- "baz"
- ""
- input: ":coolguy foo bar baz : "
atoms:
source: "coolguy"
verb: "foo"
params:
- "bar"
- "baz"
- " "
# with tags
- input: "@a=b;c=32;k;rt=ql7 foo"
atoms:
verb: "foo"
tags:
"a": "b"
"c": "32"
"k": ""
"rt": "ql7"
# with escaped tags
- input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo"
atoms:
verb: "foo"
tags:
"a": "b\\and\nk"
"c": "72 45"
"d": "gh;764"
# with tags and source
- input: "@c;h=;a=b :quux ab cd"
atoms:
tags:
"c": ""
"h": ""
"a": "b"
source: "quux"
verb: "ab"
params:
- "cd"
# different forms of last param
- input: ":src JOIN #chan"
atoms:
source: "src"
verb: "JOIN"
params:
- "#chan"
- input: ":src JOIN :#chan"
atoms:
source: "src"
verb: "JOIN"
params:
- "#chan"
# with and without last param
- input: ":src AWAY"
atoms:
source: "src"
verb: "AWAY"
- input: ":src AWAY "
atoms:
source: "src"
verb: "AWAY"
# tab is not considered <SPACE>
- input: ":cool\tguy foo bar baz"
atoms:
source: "cool\tguy"
verb: "foo"
params:
- "bar"
- "baz"
# with weird control codes in the source
- input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz"
atoms:
source: "coolguy!ag@net\x035w\x03ork.admin"
verb: "PRIVMSG"
params:
- "foo"
- "bar baz"
- input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz"
atoms:
source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin"
verb: "PRIVMSG"
params:
- "foo"
- "bar baz"
- input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3"
atoms:
tags:
tag1: "value1"
tag2: ""
vendor1/tag3: "value2"
vendor2/tag4: ""
source: "irc.example.com"
verb: "COMMAND"
params:
- "param1"
- "param2"
- "param3 param3"
- input: ":irc.example.com COMMAND param1 param2 :param3 param3"
atoms:
source: "irc.example.com"
verb: "COMMAND"
params:
- "param1"
- "param2"
- "param3 param3"
- input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3"
atoms:
tags:
tag1: "value1"
tag2: ""
vendor1/tag3: "value2"
vendor2/tag4: ""
verb: "COMMAND"
params:
- "param1"
- "param2"
- "param3 param3"
- input: "COMMAND"
atoms:
verb: "COMMAND"
# yaml encoding + slashes is fun
- input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
atoms:
tags:
foo: "\\\\;\\s \r\n"
verb: "COMMAND"
# broken messages from unreal
- input: ":gravel.mozilla.org 432 #momo :Erroneous Nickname: Illegal characters"
atoms:
source: "gravel.mozilla.org"
verb: "432"
params:
- "#momo"
- "Erroneous Nickname: Illegal characters"
- input: ":gravel.mozilla.org MODE #tckk +n "
atoms:
source: "gravel.mozilla.org"
verb: "MODE"
params:
- "#tckk"
- "+n"
- input: ":services.esper.net MODE #foo-bar +o foobar "
atoms:
source: "services.esper.net"
verb: "MODE"
params:
- "#foo-bar"
- "+o"
- "foobar"
# tag values should be parsed char-at-a-time to prevent wayward replacements.
- input: "@tag1=value\\\\ntest COMMAND"
atoms:
tags:
tag1: "value\\ntest"
verb: "COMMAND"
# If a tag value has a slash followed by a character which doesn't need
# to be escaped, the slash should be dropped.
- input: "@tag1=value\\1 COMMAND"
atoms:
tags:
tag1: "value1"
verb: "COMMAND"
# A slash at the end of a tag value should be dropped
- input: "@tag1=value1\\ COMMAND"
atoms:
tags:
tag1: "value1"
verb: "COMMAND"
# Duplicate tags: Parsers SHOULD disregard all but the final occurence
- input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND"
atoms:
tags:
tag1: "5"
tag2: "3"
tag3: "4"
verb: "COMMAND"
# vendored tags can have the same name as a non-vendored tag
- input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND"
atoms:
tags:
tag1: "5"
tag2: "3"
tag3: "4"
vendor/tag2: "8"
verb: "COMMAND"
# Some parsers handle /MODE in a special way, make sure they do it right
- input: ":SomeOp MODE #channel :+i"
atoms:
source: "SomeOp"
verb: "MODE"
params:
- "#channel"
- "+i"
- input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser"
atoms:
source: "SomeOp"
verb: "MODE"
params:
- "#channel"
- "+oo"
- "SomeUser"
- "AnotherUser"

26
test/parser_tests.py Normal file
View File

@ -0,0 +1,26 @@
import os.path, unittest
import yaml
import irctokens
# run test cases sourced from:
# https://github.com/ircdocs/parser-tests
dir = os.path.dirname(os.path.realpath(__file__))
data_dir = os.path.join(dir, "_data")
class ParserTestsSplit(unittest.TestCase):
def test_split(self):
data_path = os.path.join(data_dir, "msg-split.yaml")
with open(data_path) as data_file:
tests = yaml.safe_load(data_file.read())["tests"]
for test in tests:
input = test["input"]
atoms = test["atoms"]
tokens = irctokens.tokenise(input)
self.assertEqual(tokens.tags, atoms.get("tags", None))
self.assertEqual(tokens.source, atoms.get("source", None))
self.assertEqual(tokens.command, atoms["verb"].upper())
self.assertEqual(tokens.params, atoms.get("params", []))