add bits.py

This commit is contained in:
Lilith N 2024-05-03 16:55:53 +02:00
commit fdd9a23ea1
No known key found for this signature in database
GPG Key ID: 9DC384E6C7F17050
1 changed files with 206 additions and 0 deletions

206
bits.py Normal file
View File

@ -0,0 +1,206 @@
import re
import math
from typing import Callable
import string
from dataclasses import dataclass, field
import sys
import ipaddress
@dataclass
class Scheme:
alfabet: str
block_width: int = 0
padding: dict[str, tuple[int,int]] = field(default_factory=dict)
aliases: dict[str, str] = field(default_factory=dict)
ignore_case: bool = False
def scheme_custom(spec: str) -> Scheme:
m = re.match(r"(\[(?P<W>\d+B?)\])?((?P<data>[^\:/]|\\[\\:\[]/)+)(:(?P<padding>([^\\:/]|\\[\\:\d/])+(:\d+)?))?(/(?P<aliases>(([^\\:,]|\\[\\:,])+:([^\\:,]|\\[\\:,])+,?)+))?",spec)
if m is None:
raise ValueError("invalid scheme format!")
data_chars = re.sub(r"\\([\\:])",r"\1",m["data"])
if len(set(data_chars)) != len(data_chars):
raise ValueError(f"duplicate characters have been used!")
out = Scheme(data_chars)
if m.group("W") is not None:
out.block_width = int(m["W"].removesuffix("B")) * (8 if m["W"].endswith("B") else 1)
else:
power = math.log2(len(data_chars)+1)
if not power.is_integer():
raise ValueError(f"only power of 2 bases are supported without a block width specifier! ({len(data_chars)} given, suggesting base {len(data_chars)+1})")
if m.group("padding") is not None:
pad: dict[str,int] = {}
for m2 in re.finditer(r"([^\\:]|\\[\\:\d])(:(\d+))?",m["padding"]):
c = m2[1].removeprefix("\\")
pad.setdefault(c,0)
if m2.group(3) is not None:
pad[c] += int(m2[3])
else:
pad[c] += 1
for k,v in pad.items():
Scheme.padding[k] = (v,0)
if m.group("aliases") is not None:
for m2 in re.finditer(r"(?P<org>([^\\:,]|\\[\\:,])+):(?<repl>([^\\:,]|\\[\\:,])+)",m["aliases"]):
if m2 is None:
continue
org, repl = m2["org"], m2["repl"]
org, repl = re.sub(r"\\([\\:])",r"\1",org), re.sub(r"\\([\\:])",r"\1",repl)
for i in repl:
out.aliases[i] = org
return out
b85_alfabet = string.digits + string.ascii_uppercase + string.ascii_lowercase + "!#$%&()*+-;<=>?@^_`{|}~"
a85_alfabet = "".join([chr(i+33) for i in range(85)])
z85_alfabet = string.digits + string.ascii_letters + ".-:+=^!/*?&<>()[]{}@%$#"
b64_alfabet = string.digits + string.ascii_letters + "+/"
schemes: dict[str, Callable[[str],Scheme] | Scheme] = {
"cust": scheme_custom,
"b64": Scheme(b64_alfabet,padding={"=":(2,0)}),
"b64u": Scheme(b64_alfabet[:-2]+"-_",padding={"=":(2,0)}),
"b32": Scheme(string.ascii_lowercase+"234567",padding={"=":(5,0)},ignore_case=True),
"b32x": Scheme(string.digits+string.ascii_lowercase[:22],padding={"=":(5,0)},ignore_case=True),
"b16": Scheme(string.digits+"abcdef",ignore_case=True),
"hex": Scheme(string.digits+"abcdef",ignore_case=True),
"b85": Scheme(b85_alfabet,32),
"ipv6": Scheme(b85_alfabet,128),
"a85": Scheme(a85_alfabet,32,aliases={"z":"!!!!"}),
"z85": Scheme(z85_alfabet,32),
}
def decode_bits(scheme: str, data: str) -> str:
if ":" in scheme:
scheme,spec = scheme.split(":", 1)
else:
spec = ""
if scheme in schemes:
scheme_obj: Scheme
if isinstance(schemes[scheme],Scheme):
scheme_obj = schemes[scheme] # type: ignore
else:
scheme_obj = schemes[scheme](spec) # type: ignore
else:
raise ValueError("unknown scheme")
# inefficient but whatever
for k,v in scheme_obj.aliases.items():
data.replace(k,v)
out = ""
padding = []
while data[-1] in scheme_obj.padding:
pad_param = scheme_obj.padding[data[-1]]
padding.append(f"{pad_param[1]:0{pad_param[0]}b}")
data = data[:-1]
if scheme_obj.block_width:
rblock_width = math.ceil(math.log(2**scheme_obj.block_width))
data = scheme_obj.alfabet[0]*(rblock_width-len(data)%rblock_width)+data
for block in (data[i*rblock_width:i*rblock_width+rblock_width] for i in range(len(data)//rblock_width)):
n = 0
for i in block:
n *= len(scheme_obj.alfabet)
n += scheme_obj.alfabet.index(i)
out += format(n,f"0{scheme_obj.block_width}b")
else:
out = ""
bitlen = int(math.log2(len(scheme_obj.alfabet)))
for i in data:
out += format(scheme_obj.alfabet.index(i),f"0{bitlen}b")
out += "".join(padding[::-1])
return out
def bits_to_col(bits: str, width = 8) -> tuple[int,int,int]:
bw_g = len(bits)//3 + (len(bits) % 3 != 0)
bw_r, bw_b = bw_g - (len(bits) % 3 != 0), bw_g - (len(bits) % 3 == 1)
rraw,graw,braw = bits[:bw_r],bits[bw_r:bw_r+bw_g],bits[bw_r+bw_g:]
# dark colors are ugly, so fill to 8bits with 1s
# and trim longer values to 8bits
if width:
if bw_g > width:
rraw,graw,braw = rraw[:width],graw[:width],braw[:width]
elif bw_r < width:
rraw,graw,braw = rraw.ljust(width,"1"),graw.ljust(width,"1"),braw.ljust(width,"1")
return int(rraw,base=2),int(graw,base=2),int(braw,base=2)
def bits_to_colf(bits: str, iwidth = 8) -> tuple[float,float,float]:
bw_g = len(bits)//3 + (len(bits) % 3 != 0)
bw_r, bw_b = bw_g - (len(bits) % 3 != 0), bw_g - (len(bits) % 3 == 1)
rraw,graw,braw = bits[:bw_r],bits[bw_r:bw_r+bw_g],bits[bw_r+bw_g:]
# dark colors are ugly, so fill to 8bits with 1s
# and trim longer values to 8bits
if iwidth:
if bw_r < iwidth:
rraw,graw,braw = rraw.ljust(iwidth,"1"),graw.ljust(iwidth,"1"),braw.ljust(iwidth,"1")
rraw,graw,braw = int(rraw,2)/(2**max(0,bw_r-iwidth)),int(graw,2)/(2**max(0,bw_g-iwidth)),int(braw,2)/(2**max(0,bw_g-iwidth))
else:
rraw,graw,braw = int(rraw,2)/(2**bw_r),int(graw,2)/(2**bw_g),int(braw,2)/(2**bw_b)
return rraw,graw,braw
# simple cli
def print_help():
print(
"bits.py",
" -b=<scheme> use scheme",
" -x print hex representation of data",
" -l print long color (ie with decimals)",
" -xipv6 print ipv6 representation of data",
" -<scheme> alias for -b=<scheme>",
"",
"schemes:",
" cust make your own scheme",
" b64 base 64 ┐",
" b64u url-safe base 64 │",
" b32 base 32 │",
" b32x base 32 with hex alfabet │",
" b16 │",
" hex base 16 ┴ see rfc 4648",
" b85 base 85 with 32 bit words ┐",
" ipv6 base 85 with 128 bit words ┴ see rfc 1924",
" a85 Ascii85, b85 predecessor",
" z85 Z85, Ascii85 derivative",
sep = "\n"
)
def main(argv: list[str]):
words = []
scheme = "b32"
extra = ""
for arg in argv[1:]:
if (arg.startswith("-")):
if arg[1:] in schemes:
scheme = arg[1:]
continue
if arg.startswith("-b="):
scheme = arg[3:]
continue
if arg == "-x":
extra = "hex"
continue
if arg == "-l":
extra = "long"
continue
if arg == "-xipv6":
extra = "ipv6"
continue
words.append(arg)
if len(words) == 0:
print_help()
exit()
w = max([len(word)for word in words])
for word in words:
raw = decode_bits(scheme,word)
r,g,b = bits_to_col(raw)
# assume an ansi enabled terminal
# :P sorry windows users
extrav = ""
if extra == "hex":
hraw = "0"*(4-len(raw)%4)+raw
extrav = "".join([f"{int(hraw[i*4:i*4+4],2):01x}" for i in range(math.ceil(len(raw)/4))])
elif extra == "long":
extrav = bits_to_colf(raw)
elif extra == "ipv6":
extrav = ipaddress.IPv6Address(int(raw,2))
print(word.ljust(w),"->",f"#{r:02x}{g:02x}{b:02x}",f"\033[48;2;{r};{g};{b}m \033[0m",extrav)
if __name__ == "__main__":
main(sys.argv)