irctokens/irctokens/tokenise.py

66 lines
1.8 KiB
Python

from typing import Dict, Optional
from .objects import Line
from .const import TAG_ESCAPED, TAG_UNESCAPED
def _unescape_tag(value: str):
unescaped, escaped = "", list(value)
while escaped:
current = escaped.pop(0)
if current == "\\":
if escaped:
next = escaped.pop(0)
duo = current+next
if duo in TAG_ESCAPED:
index = TAG_ESCAPED.index(duo)
unescaped += TAG_UNESCAPED[index]
else:
unescaped += next
else:
unescaped += current
return unescaped
def _tokenise(tags_s: Optional[str], line: str) -> Line:
tags: Optional[Dict[str, str]] = None
if not tags_s is None:
tags = {}
for part in tags_s[1:].split(";"):
key, _, value = part.partition("=")
tags[key] = _unescape_tag(value)
line, trailing_sep, trailing = line.partition(" :")
params = list(filter(bool, line.split(" ")))
source: Optional[str] = None
if params[0][0] == ":":
source = params.pop(0)[1:]
command = params.pop(0).upper()
if trailing_sep:
params.append(trailing)
return Line(tags, source, command, params)
def tokenise_b(line_b: bytes,
encoding: str="utf8",
fallback: str="latin-1") -> Line:
tags: Optional[str] = None
if line_b[0] == ord(b"@"):
tags_b, _, line_b = line_b.partition(b" ")
tags = tags_b.decode("utf8")
try:
line = line_b.decode(encoding)
except UnicodeDecodeError:
line = line_b.decode(fallback)
return _tokenise(tags, line)
def tokenise(line: str) -> Line:
if line[0] == "@":
tags, _, line = line.partition(" ")
return _tokenise(tags, line)
else:
return _tokenise(None, line)