From 93ac6340de55a4df948abef4fee2df7bf171b3bc Mon Sep 17 00:00:00 2001 From: Ben Harris Date: Tue, 22 Sep 2020 13:20:20 -0400 Subject: [PATCH] switch to lxml in order to detect CDATA tags --- gen_stats | 52 +++++++++++++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 21 deletions(-) diff --git a/gen_stats b/gen_stats index 98c4ef6..28d9916 100755 --- a/gen_stats +++ b/gen_stats @@ -1,14 +1,15 @@ #!/usr/bin/env python3 -import requests, json -import xml.etree.ElementTree as xml +import base64 +import json +from lxml import etree from urllib.parse import quote WORK_DIR = "/var/www/tilde.chat" -r = requests.get("http://localhost:8081/stats") -r.raise_for_status() out = {} -d = xml.fromstring(r.text) -assert d.tag == "inspircdstats" +parser = etree.XMLParser(strip_cdata=False) +root = etree.parse("http://localhost:8081/stats", parser) + +assert root.getroot().tag == "inspircdstats" with open(f"{WORK_DIR}/blacklist", "r") as f: BLACKLIST = f.read().splitlines() @@ -16,39 +17,48 @@ with open(f"{WORK_DIR}/blacklist", "r") as f: def define(name, xps, vfilter=lambda x: x): global out - out[name] = vfilter(d.findall(xps)[0].text) + out[name] = vfilter(root.findall(xps)[0].text) + + +def unsanitize(node, default=""): + # workaround for weird behavior in insp's xml output + # https://github.com/inspircd/inspircd/blob/v3.7.0/src/modules/m_httpd_stats.cpp#L55 + if node.text is None or node.text == "": + return default + elif str(etree.tostring(node)).startswith(f"b'<{node.tag}>