tilde-projects/Code/python/chatbesties.py

153 lines
5.4 KiB
Python
Executable File

#!/usr/bin/python
import fileinput
import json
import time
import calendar
import shutil
import re
import math
import operator
MAX_NODES = 4
logfile = "/home/archangelic/irc/log"
# logfile = "/home/jumblesale/Code/irc/log"
outfile = "/home/krowbar/logs/chatBesties.json"
outCircle = "/home/krowbar/logs/chatcircle.json"
timePeriod = calendar.timegm(time.gmtime()) - (2 * 7 * 24 * 60 * 60) # 2 weeks
# hash keyed by "user" that contains a hash of mentioned other users with count
userData = {}
nameFix = {
"jumblesal": "jumblesale",
"hardmath1": "kc",
"hardmath123": "kc",
"bendorphan": "endorphant",
"endorphan": "endorphant",
"synergian": "synergiance",
}
users = []
# Get a list of all user names by checking the logs for people who have said things
with open(logfile, "r") as log:
for line in log:
try:
time, user, message = line.split("\t", 3)
if int(time) < timePeriod:
continue # only add users who have spoken in the last period
if nameFix.has_key(user):
user = nameFix[user]
else:
user = user.lower()
if user not in users:
users.append(user)
except ValueError:
continue # There are some bad lines in the log file that we'll ignore if we can't parse
d3data = {}
d3data["nodes"] = []
# re-read the log and this time look for instances of user names in messages
with open(logfile, "r") as log:
for line in log:
try:
time, user, message = line.split("\t", 3)
if int(time) < timePeriod:
continue # only consider the past three weeks of chats
if nameFix.has_key(user):
user = nameFix[user]
else:
user = user.lower()
except ValueError:
continue # There are some bad lines in the log file that we'll ignore if we can't parse
for word in message.split(" "):
word = re.sub("[^A-Za-z0-9]+", "", word)
if word in users: # SOMEONE MENTIONED SOMEONE
if userData.has_key(user): # This user is already set up
if userData[user]["data"].has_key(
word
): # This user has mentioned this person before
userData[user]["data"][word] += 1
else: # This user never mentioned this person before
userData[user]["data"][word] = 1
# give both the target and mentioner a point
else: # This user was never set up
userData[user] = {} # make it a dictionary!
userData[user]["data"] = {}
userData[user]["data"][word] = 1
userData[user]["score"] = 0
userData[user]["id"] = len(
d3data["nodes"]
) # so we know how to match people during the links phase
d3data["nodes"].append({"name": user, "group": 1})
if not userData.has_key(
word
): # check if the target has not been set up
userData[word] = {}
userData[word]["data"] = {}
userData[word]["score"] = 0
userData[word]["id"] = len(d3data["nodes"])
d3data["nodes"].append({"name": word, "group": 1})
userData[user]["score"] += 1
userData[word]["score"] += 1
d3data["links"] = []
# Now connect all the pople to their stuff
for user, values in userData.iteritems():
# give the user a 'group' based on their total score
d3data["nodes"][values["id"]]["group"] = int(math.ceil(math.log(values["score"])))
besties = sorted(values["data"].items(), key=operator.itemgetter(1), reverse=True)[
0:MAX_NODES
] # ONLY the top besties
for target, score in besties:
try:
print(
"Adding link from "
+ user
+ " ("
+ str(values["id"])
+ ") to "
+ target
+ " ("
+ str(userData[target]["id"])
+ ") with strength "
+ str(score)
)
d3data["links"].append(
{
"source": values["id"],
"target": userData[target]["id"],
"value": math.ceil(math.sqrt(score)) * 2,
}
)
except KeyError:
print("! Error when trying to link " + user + " to " + target)
continue
if len(values["data"]) > MAX_NODES:
print(
" ...ignoring "
+ str(len(values["data"]) - MAX_NODES)
+ " more connections from "
+ user
)
d3Circle = {}
d3Circle["names"] = [""] * len(userData)
d3Circle["matrix"] = [[0] * len(userData)] * len(userData)
for user, values in userData.iteritems():
d3Circle["names"][values["id"]] = user
for name, score in values["data"].iteritems():
d3Circle["matrix"][values["id"]][userData[name]["id"]] = (
score if score > 1 else 0
)
with open(outfile + ".tmp", "w") as tmpFile:
tmpFile.write(json.dumps(d3data))
shutil.move(outfile + ".tmp", outfile)
with open(outCircle + ".tmp", "w") as tmpFile:
tmpFile.write(json.dumps(d3Circle))
shutil.move(outCircle + ".tmp", outCircle)