77 lines
2.1 KiB
Python
77 lines
2.1 KiB
Python
|
#!/usr/bin/python3
|
||
|
import fileinput
|
||
|
import json
|
||
|
import time
|
||
|
import calendar
|
||
|
import re
|
||
|
import shutil
|
||
|
import argparse
|
||
|
import logging, sys
|
||
|
import math
|
||
|
import os
|
||
|
|
||
|
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
|
||
|
|
||
|
parser = argparse.ArgumentParser(
|
||
|
description="Generate word list data based off of aggregate irc chat logs"
|
||
|
)
|
||
|
|
||
|
parser.add_argument(
|
||
|
"-outfile",
|
||
|
help="output file to write to",
|
||
|
default="/home/krowbar/logs/chatcloud_aggregate.json"
|
||
|
)
|
||
|
|
||
|
parser.add_argument(
|
||
|
"-logpath",
|
||
|
type=str,
|
||
|
help="where the log files are kept",
|
||
|
default="/home/krowbar/logs",
|
||
|
)
|
||
|
parser.add_argument
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
chatData = {
|
||
|
"columns": [ "__TIMESTAMP__" ],
|
||
|
"data": []
|
||
|
}
|
||
|
|
||
|
logging.info(
|
||
|
"Generating word graph based off words from " + args.logpath
|
||
|
)
|
||
|
|
||
|
logs = sorted([f for f in os.listdir(args.logpath) if re.match(r'chatcloud_[0-9]{4}_[0-9]{2}.json', f)])
|
||
|
for log in logs:
|
||
|
date = re.findall("[0-9]{4}_[0-9]{2}", log)[0]
|
||
|
year = re.findall("[0-9]{4}", date)[0]
|
||
|
if year < '2019':
|
||
|
continue
|
||
|
print("Processing: {}...".format(log), end='')
|
||
|
|
||
|
logData = [ date ]
|
||
|
with open(os.path.join(args.logpath, log), "r") as logfile:
|
||
|
j = json.load(logfile);
|
||
|
for col in chatData['columns']:
|
||
|
if col in j.keys():
|
||
|
logData.append(j[col])
|
||
|
elif col is not "__TIMESTAMP__":
|
||
|
logData.append(0)
|
||
|
|
||
|
for key in j.keys():
|
||
|
if key in chatData['columns']:
|
||
|
continue
|
||
|
else:
|
||
|
chatData['columns'].append(key)
|
||
|
for d in chatData['data']:
|
||
|
d.append(0)
|
||
|
# append a 0 in each other chatData.data rows
|
||
|
logData.append(j[key])
|
||
|
|
||
|
chatData['data'].append(logData);
|
||
|
print(" Columns: {}, Records: {}".format(len(chatData['columns']), len(logData)))
|
||
|
|
||
|
with open(args.outfile + ".tmp", "w") as tmpFile:
|
||
|
tmpFile.write(json.dumps(chatData))
|
||
|
# shutil.move(args.outfile + ".tmp", args.outfile)
|
||
|
print("Dumped {} records to {}".format(len(chatData['columns']), args.outfile))
|