mirror of https://github.com/jesopo/irctokens
add test/parser_tests.py, from github:ircdocs/parser-tests
This commit is contained in:
parent
31ab81741e
commit
a3b55f6292
|
@ -2,4 +2,5 @@ from .tokenise import *
|
|||
from .format import *
|
||||
from .stateful_decode import *
|
||||
from .stateful_encode import *
|
||||
from .hostmask import *
|
||||
from .hostmask import *
|
||||
from .parser_tests import *
|
||||
|
|
|
@ -0,0 +1,343 @@
|
|||
# IRC parser tests
|
||||
# splitting messages into usable atoms
|
||||
|
||||
# Written in 2015 by Daniel Oaks <daniel@danieloaks.net>
|
||||
#
|
||||
# To the extent possible under law, the author(s) have dedicated all copyright
|
||||
# and related and neighboring rights to this software to the public domain
|
||||
# worldwide. This software is distributed without any warranty.
|
||||
#
|
||||
# You should have received a copy of the CC0 Public Domain Dedication along
|
||||
# with this software. If not, see
|
||||
# <http://creativecommons.org/publicdomain/zero/1.0/>.
|
||||
|
||||
# some of the tests here originate from grawity's test vectors, which is WTFPL v2 licensed
|
||||
# https://github.com/grawity/code/tree/master/lib/tests
|
||||
# some of the tests here originate from Mozilla's test vectors, which is public domain
|
||||
# https://dxr.mozilla.org/comm-central/source/chat/protocols/irc/test/test_ircMessage.js
|
||||
# some of the tests here originate from SaberUK's test vectors, which he's indicated I am free to include here
|
||||
# https://github.com/SaberUK/ircparser/tree/master/test
|
||||
|
||||
# we follow RFC1459 with regards to multiple ascii spaces splitting atoms:
|
||||
# The prefix, command, and all parameters are
|
||||
# separated by one (or more) ASCII space character(s) (0x20).
|
||||
# because doing it as RFC2812 says (strictly as a single ascii space) isn't sane
|
||||
|
||||
tests:
|
||||
# input is the string coming directly from the server to parse
|
||||
|
||||
# the atoms dict has the keys:
|
||||
# * tags: tags dict
|
||||
# tags with no value are an empty string
|
||||
# * source: source string, without single leading colon
|
||||
# * verb: verb string
|
||||
# * params: params split up as a list
|
||||
# if the params key does not exist, assume it is empty
|
||||
# if any other keys do no exist, assume they are null
|
||||
# a key that is null does not exist or is not specified with the
|
||||
# given input string
|
||||
|
||||
# simple
|
||||
- input: "foo bar baz asdf"
|
||||
atoms:
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- "asdf"
|
||||
|
||||
# with source
|
||||
- input: ":coolguy foo bar baz asdf"
|
||||
atoms:
|
||||
source: "coolguy"
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- "asdf"
|
||||
|
||||
# with trailing param
|
||||
- input: "foo bar baz :asdf quux"
|
||||
atoms:
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- "asdf quux"
|
||||
|
||||
- input: "foo bar baz :"
|
||||
atoms:
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- ""
|
||||
|
||||
- input: "foo bar baz ::asdf"
|
||||
atoms:
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- ":asdf"
|
||||
|
||||
# with source and trailing param
|
||||
- input: ":coolguy foo bar baz :asdf quux"
|
||||
atoms:
|
||||
source: "coolguy"
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- "asdf quux"
|
||||
|
||||
- input: ":coolguy foo bar baz : asdf quux "
|
||||
atoms:
|
||||
source: "coolguy"
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- " asdf quux "
|
||||
|
||||
- input: ":coolguy PRIVMSG bar :lol :) "
|
||||
atoms:
|
||||
source: "coolguy"
|
||||
verb: "PRIVMSG"
|
||||
params:
|
||||
- "bar"
|
||||
- "lol :) "
|
||||
|
||||
- input: ":coolguy foo bar baz :"
|
||||
atoms:
|
||||
source: "coolguy"
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- ""
|
||||
|
||||
- input: ":coolguy foo bar baz : "
|
||||
atoms:
|
||||
source: "coolguy"
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
- " "
|
||||
|
||||
# with tags
|
||||
- input: "@a=b;c=32;k;rt=ql7 foo"
|
||||
atoms:
|
||||
verb: "foo"
|
||||
tags:
|
||||
"a": "b"
|
||||
"c": "32"
|
||||
"k": ""
|
||||
"rt": "ql7"
|
||||
|
||||
# with escaped tags
|
||||
- input: "@a=b\\\\and\\nk;c=72\\s45;d=gh\\:764 foo"
|
||||
atoms:
|
||||
verb: "foo"
|
||||
tags:
|
||||
"a": "b\\and\nk"
|
||||
"c": "72 45"
|
||||
"d": "gh;764"
|
||||
|
||||
# with tags and source
|
||||
- input: "@c;h=;a=b :quux ab cd"
|
||||
atoms:
|
||||
tags:
|
||||
"c": ""
|
||||
"h": ""
|
||||
"a": "b"
|
||||
source: "quux"
|
||||
verb: "ab"
|
||||
params:
|
||||
- "cd"
|
||||
|
||||
# different forms of last param
|
||||
- input: ":src JOIN #chan"
|
||||
atoms:
|
||||
source: "src"
|
||||
verb: "JOIN"
|
||||
params:
|
||||
- "#chan"
|
||||
|
||||
- input: ":src JOIN :#chan"
|
||||
atoms:
|
||||
source: "src"
|
||||
verb: "JOIN"
|
||||
params:
|
||||
- "#chan"
|
||||
|
||||
# with and without last param
|
||||
- input: ":src AWAY"
|
||||
atoms:
|
||||
source: "src"
|
||||
verb: "AWAY"
|
||||
|
||||
- input: ":src AWAY "
|
||||
atoms:
|
||||
source: "src"
|
||||
verb: "AWAY"
|
||||
|
||||
# tab is not considered <SPACE>
|
||||
- input: ":cool\tguy foo bar baz"
|
||||
atoms:
|
||||
source: "cool\tguy"
|
||||
verb: "foo"
|
||||
params:
|
||||
- "bar"
|
||||
- "baz"
|
||||
|
||||
# with weird control codes in the source
|
||||
- input: ":coolguy!ag@net\x035w\x03ork.admin PRIVMSG foo :bar baz"
|
||||
atoms:
|
||||
source: "coolguy!ag@net\x035w\x03ork.admin"
|
||||
verb: "PRIVMSG"
|
||||
params:
|
||||
- "foo"
|
||||
- "bar baz"
|
||||
|
||||
- input: ":coolguy!~ag@n\x02et\x0305w\x0fork.admin PRIVMSG foo :bar baz"
|
||||
atoms:
|
||||
source: "coolguy!~ag@n\x02et\x0305w\x0fork.admin"
|
||||
verb: "PRIVMSG"
|
||||
params:
|
||||
- "foo"
|
||||
- "bar baz"
|
||||
|
||||
- input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4= :irc.example.com COMMAND param1 param2 :param3 param3"
|
||||
atoms:
|
||||
tags:
|
||||
tag1: "value1"
|
||||
tag2: ""
|
||||
vendor1/tag3: "value2"
|
||||
vendor2/tag4: ""
|
||||
source: "irc.example.com"
|
||||
verb: "COMMAND"
|
||||
params:
|
||||
- "param1"
|
||||
- "param2"
|
||||
- "param3 param3"
|
||||
|
||||
- input: ":irc.example.com COMMAND param1 param2 :param3 param3"
|
||||
atoms:
|
||||
source: "irc.example.com"
|
||||
verb: "COMMAND"
|
||||
params:
|
||||
- "param1"
|
||||
- "param2"
|
||||
- "param3 param3"
|
||||
|
||||
- input: "@tag1=value1;tag2;vendor1/tag3=value2;vendor2/tag4 COMMAND param1 param2 :param3 param3"
|
||||
atoms:
|
||||
tags:
|
||||
tag1: "value1"
|
||||
tag2: ""
|
||||
vendor1/tag3: "value2"
|
||||
vendor2/tag4: ""
|
||||
verb: "COMMAND"
|
||||
params:
|
||||
- "param1"
|
||||
- "param2"
|
||||
- "param3 param3"
|
||||
|
||||
- input: "COMMAND"
|
||||
atoms:
|
||||
verb: "COMMAND"
|
||||
|
||||
# yaml encoding + slashes is fun
|
||||
- input: "@foo=\\\\\\\\\\:\\\\s\\s\\r\\n COMMAND"
|
||||
atoms:
|
||||
tags:
|
||||
foo: "\\\\;\\s \r\n"
|
||||
verb: "COMMAND"
|
||||
|
||||
# broken messages from unreal
|
||||
- input: ":gravel.mozilla.org 432 #momo :Erroneous Nickname: Illegal characters"
|
||||
atoms:
|
||||
source: "gravel.mozilla.org"
|
||||
verb: "432"
|
||||
params:
|
||||
- "#momo"
|
||||
- "Erroneous Nickname: Illegal characters"
|
||||
|
||||
- input: ":gravel.mozilla.org MODE #tckk +n "
|
||||
atoms:
|
||||
source: "gravel.mozilla.org"
|
||||
verb: "MODE"
|
||||
params:
|
||||
- "#tckk"
|
||||
- "+n"
|
||||
|
||||
- input: ":services.esper.net MODE #foo-bar +o foobar "
|
||||
atoms:
|
||||
source: "services.esper.net"
|
||||
verb: "MODE"
|
||||
params:
|
||||
- "#foo-bar"
|
||||
- "+o"
|
||||
- "foobar"
|
||||
|
||||
# tag values should be parsed char-at-a-time to prevent wayward replacements.
|
||||
- input: "@tag1=value\\\\ntest COMMAND"
|
||||
atoms:
|
||||
tags:
|
||||
tag1: "value\\ntest"
|
||||
verb: "COMMAND"
|
||||
|
||||
# If a tag value has a slash followed by a character which doesn't need
|
||||
# to be escaped, the slash should be dropped.
|
||||
- input: "@tag1=value\\1 COMMAND"
|
||||
atoms:
|
||||
tags:
|
||||
tag1: "value1"
|
||||
verb: "COMMAND"
|
||||
|
||||
# A slash at the end of a tag value should be dropped
|
||||
- input: "@tag1=value1\\ COMMAND"
|
||||
atoms:
|
||||
tags:
|
||||
tag1: "value1"
|
||||
verb: "COMMAND"
|
||||
|
||||
# Duplicate tags: Parsers SHOULD disregard all but the final occurence
|
||||
- input: "@tag1=1;tag2=3;tag3=4;tag1=5 COMMAND"
|
||||
atoms:
|
||||
tags:
|
||||
tag1: "5"
|
||||
tag2: "3"
|
||||
tag3: "4"
|
||||
verb: "COMMAND"
|
||||
|
||||
# vendored tags can have the same name as a non-vendored tag
|
||||
- input: "@tag1=1;tag2=3;tag3=4;tag1=5;vendor/tag2=8 COMMAND"
|
||||
atoms:
|
||||
tags:
|
||||
tag1: "5"
|
||||
tag2: "3"
|
||||
tag3: "4"
|
||||
vendor/tag2: "8"
|
||||
verb: "COMMAND"
|
||||
|
||||
# Some parsers handle /MODE in a special way, make sure they do it right
|
||||
- input: ":SomeOp MODE #channel :+i"
|
||||
atoms:
|
||||
source: "SomeOp"
|
||||
verb: "MODE"
|
||||
params:
|
||||
- "#channel"
|
||||
- "+i"
|
||||
|
||||
- input: ":SomeOp MODE #channel +oo SomeUser :AnotherUser"
|
||||
atoms:
|
||||
source: "SomeOp"
|
||||
verb: "MODE"
|
||||
params:
|
||||
- "#channel"
|
||||
- "+oo"
|
||||
- "SomeUser"
|
||||
- "AnotherUser"
|
|
@ -0,0 +1,26 @@
|
|||
import os.path, unittest
|
||||
import yaml
|
||||
import irctokens
|
||||
|
||||
# run test cases sourced from:
|
||||
# https://github.com/ircdocs/parser-tests
|
||||
|
||||
dir = os.path.dirname(os.path.realpath(__file__))
|
||||
data_dir = os.path.join(dir, "_data")
|
||||
|
||||
class ParserTestsSplit(unittest.TestCase):
|
||||
def test_split(self):
|
||||
data_path = os.path.join(data_dir, "msg-split.yaml")
|
||||
with open(data_path) as data_file:
|
||||
tests = yaml.safe_load(data_file.read())["tests"]
|
||||
|
||||
for test in tests:
|
||||
input = test["input"]
|
||||
atoms = test["atoms"]
|
||||
|
||||
tokens = irctokens.tokenise(input)
|
||||
|
||||
self.assertEqual(tokens.tags, atoms.get("tags", None))
|
||||
self.assertEqual(tokens.source, atoms.get("source", None))
|
||||
self.assertEqual(tokens.command, atoms["verb"].upper())
|
||||
self.assertEqual(tokens.params, atoms.get("params", []))
|
Loading…
Reference in New Issue