switch to lxml in order to detect CDATA tags

2020-09-22 13:20:20 -04:00 · 2020-09-22 13:20:20 -04:00 · 93ac6340de
parent 9a25baf3b3
commit 93ac6340de
1 changed files with 31 additions and 21 deletions
--- a/52
+++ b/52
@ -1,14 +1,15 @@
 #!/usr/bin/env python3
-import requests, json
+import base64
-import xml.etree.ElementTree as xml
+import json
 from lxml import etree
 from urllib.parse import quote
 WORK_DIR = "/var/www/tilde.chat"
 r = requests.get("http://localhost:8081/stats")
 r.raise_for_status()
 out = {}
-d = xml.fromstring(r.text)
+parser = etree.XMLParser(strip_cdata=False)
-assert d.tag == "inspircdstats"
+root = etree.parse("http://localhost:8081/stats", parser)
 assert root.getroot().tag == "inspircdstats"
 with open(f"{WORK_DIR}/blacklist", "r") as f:
    BLACKLIST = f.read().splitlines()
@ -16,39 +17,48 @@ with open(f"{WORK_DIR}/blacklist", "r") as f:
 def define(name, xps, vfilter=lambda x: x):
    global out
-    out[name] = vfilter(d.findall(xps)[0].text)
+    out[name] = vfilter(root.findall(xps)[0].text)
 def unsanitize(node, default=""):
    # workaround for weird behavior in insp's xml output
    # https://github.com/inspircd/inspircd/blob/v3.7.0/src/modules/m_httpd_stats.cpp#L55
    if node.text is None or node.text == "":
        return default
    elif str(etree.tostring(node)).startswith(f"b'<{node.tag}><![CDATA["):
        missing_padding = len(node.text) % 4
        if missing_padding:
            v = node.text + "=" * (4 - missing_padding)
        else:
            v = node.text
        return base64.b64decode(v).decode("utf-8")
    return node.text
 define("usercount", "./general/usercount", int)
 define("channelcount", "./general/channelcount", int)
-schannels = d.findall("./channellist/channel")
+schannels = root.findall("./channellist/channel")
 channels = []
 for schannel in schannels:
-    channel = dict(
+    channel = {}
-        name=schannel.findall("channelname")[0].text,
+    channel["name"] = unsanitize(schannel.find("channelname"))
-        usercount=int(schannel.findall("usercount")[0].text),
+    channel["topic"] = unsanitize(schannel.find("./channeltopic/topictext"))
-    )
+    channel["usercount"] = int(schannel.find("usercount").text)
-    channel["topic"] = (
+    channel["webchatlink"] = "https://web.tilde.chat/?join=" + quote(channel["name"])
        schannel.findall("./channeltopic/topictext")[0].text
        if schannel.findall("./channeltopic/topictext")[0].text is not None
        else "No topic set"
    )
    if (
        # skip channels in the blacklist or with mode +s
-        "s" in schannel.findall("./channelmodes")[0].text.split()[0]
+        "s" in schannel.find("./channelmodes").text.split()[0]
        or channel["name"] in BLACKLIST
    ):
        continue
    channel["webchatlink"] = "https://web.tilde.chat/?join=" + quote(channel["name"])
    channels.append(channel)
 channels.sort(key=lambda x: x["name"].lower())
 out["channels"] = channels
 # print([x.text for x in d.findall("./channellist/channel/channeltopic/topictext")])
 with open(f"{WORK_DIR}/stats.json", "w") as f:
    json.dump(out, f)