switch to lxml in order to detect CDATA tags
continuous-integration/drone/push Build is passing Details

This commit is contained in:
Ben Harris 2020-09-22 13:20:20 -04:00
parent 9a25baf3b3
commit 93ac6340de
1 changed files with 31 additions and 21 deletions

View File

@ -1,14 +1,15 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
import requests, json import base64
import xml.etree.ElementTree as xml import json
from lxml import etree
from urllib.parse import quote from urllib.parse import quote
WORK_DIR = "/var/www/tilde.chat" WORK_DIR = "/var/www/tilde.chat"
r = requests.get("http://localhost:8081/stats")
r.raise_for_status()
out = {} out = {}
d = xml.fromstring(r.text) parser = etree.XMLParser(strip_cdata=False)
assert d.tag == "inspircdstats" root = etree.parse("http://localhost:8081/stats", parser)
assert root.getroot().tag == "inspircdstats"
with open(f"{WORK_DIR}/blacklist", "r") as f: with open(f"{WORK_DIR}/blacklist", "r") as f:
BLACKLIST = f.read().splitlines() BLACKLIST = f.read().splitlines()
@ -16,39 +17,48 @@ with open(f"{WORK_DIR}/blacklist", "r") as f:
def define(name, xps, vfilter=lambda x: x): def define(name, xps, vfilter=lambda x: x):
global out global out
out[name] = vfilter(d.findall(xps)[0].text) out[name] = vfilter(root.findall(xps)[0].text)
def unsanitize(node, default=""):
# workaround for weird behavior in insp's xml output
# https://github.com/inspircd/inspircd/blob/v3.7.0/src/modules/m_httpd_stats.cpp#L55
if node.text is None or node.text == "":
return default
elif str(etree.tostring(node)).startswith(f"b'<{node.tag}><![CDATA["):
missing_padding = len(node.text) % 4
if missing_padding:
v = node.text + "=" * (4 - missing_padding)
else:
v = node.text
return base64.b64decode(v).decode("utf-8")
return node.text
define("usercount", "./general/usercount", int) define("usercount", "./general/usercount", int)
define("channelcount", "./general/channelcount", int) define("channelcount", "./general/channelcount", int)
schannels = d.findall("./channellist/channel") schannels = root.findall("./channellist/channel")
channels = [] channels = []
for schannel in schannels: for schannel in schannels:
channel = dict( channel = {}
name=schannel.findall("channelname")[0].text, channel["name"] = unsanitize(schannel.find("channelname"))
usercount=int(schannel.findall("usercount")[0].text), channel["topic"] = unsanitize(schannel.find("./channeltopic/topictext"))
) channel["usercount"] = int(schannel.find("usercount").text)
channel["topic"] = ( channel["webchatlink"] = "https://web.tilde.chat/?join=" + quote(channel["name"])
schannel.findall("./channeltopic/topictext")[0].text
if schannel.findall("./channeltopic/topictext")[0].text is not None
else "No topic set"
)
if ( if (
# skip channels in the blacklist or with mode +s # skip channels in the blacklist or with mode +s
"s" in schannel.findall("./channelmodes")[0].text.split()[0] "s" in schannel.find("./channelmodes").text.split()[0]
or channel["name"] in BLACKLIST or channel["name"] in BLACKLIST
): ):
continue continue
channel["webchatlink"] = "https://web.tilde.chat/?join=" + quote(channel["name"])
channels.append(channel) channels.append(channel)
channels.sort(key=lambda x: x["name"].lower()) channels.sort(key=lambda x: x["name"].lower())
out["channels"] = channels out["channels"] = channels
# print([x.text for x in d.findall("./channellist/channel/channeltopic/topictext")])
with open(f"{WORK_DIR}/stats.json", "w") as f: with open(f"{WORK_DIR}/stats.json", "w") as f:
json.dump(out, f) json.dump(out, f)