106 lines
4.2 KiB
Python
106 lines
4.2 KiB
Python
#!/usr/bin/python
|
|
import fileinput
|
|
import json
|
|
import time
|
|
import calendar
|
|
import shutil
|
|
import re
|
|
import math
|
|
import operator
|
|
|
|
MAX_NODES = 3
|
|
|
|
logfile = "/home/jumblesale/Code/irc/log"
|
|
outfile = "/home/krowbar/logs/chatBesties.json"
|
|
outCircle = "/home/krowbar/logs/chatcircle.json"
|
|
userData = {} #hash keyed by "user" that contains a hash of mentioned other users with count
|
|
nameFix = {
|
|
'jumblesal': 'jumblesale',
|
|
'hardmath1': 'kc',
|
|
'hardmath123': 'kc',
|
|
'bendorphan': 'endorphant',
|
|
'endorphan': 'endorphant',
|
|
'synergian': 'synergiance'
|
|
}
|
|
|
|
users = []
|
|
#Get a list of all user names by checking the logs for people who have said things
|
|
with open(logfile, "r") as log:
|
|
for line in log:
|
|
try:
|
|
time, user, message = line.split("\t", 3)
|
|
if nameFix.has_key(user):
|
|
user = nameFix[user]
|
|
else:
|
|
user = user.lower()
|
|
|
|
if user not in users:
|
|
users.append(user)
|
|
except ValueError:
|
|
continue #There are some bad lines in the log file that we'll ignore if we can't parse
|
|
|
|
d3data = {}
|
|
d3data['nodes'] = []
|
|
|
|
#re-read the log and this time look for instances of user names in messages
|
|
with open(logfile, "r") as log:
|
|
for line in log:
|
|
try:
|
|
time, user, message = line.split("\t", 3)
|
|
if nameFix.has_key(user):
|
|
user = nameFix[user]
|
|
else:
|
|
user = user.lower()
|
|
except ValueError:
|
|
continue #There are some bad lines in the log file that we'll ignore if we can't parse
|
|
for word in message.split(' '):
|
|
word = re.sub('[^A-Za-z0-9]+', '', word)
|
|
if word in users: #SOMEONE MENTIONED SOMEONE
|
|
if userData.has_key(user): #This user is already set up
|
|
if userData[user]['data'].has_key(word): #This user has mentioned this person before
|
|
userData[user]['data'][word] += 1
|
|
else: #This user never mentioned this person before
|
|
userData[user]['data'][word] = 1
|
|
else: #This user was never set up
|
|
userData[user] = {} #make it a dictionary!
|
|
userData[user]['data'] = {}
|
|
userData[user]['data'][word] = 1
|
|
userData[user]['id'] = len(d3data['nodes']) #so we know how to match people during the links phase
|
|
d3data['nodes'].append({"name": user, "group": 1})
|
|
if not userData.has_key(word): #check if the target has not been set up
|
|
userData[word] = {}
|
|
userData[word]['data'] = {}
|
|
userData[word]['id'] = len(d3data['nodes'])
|
|
d3data['nodes'].append({"name": word, "group": 1})
|
|
|
|
d3data['links'] = []
|
|
#Now connect all the pople to their stuff
|
|
for user, values in userData.iteritems():
|
|
besties = sorted(values['data'].items(), key=operator.itemgetter(1), reverse=True)[0:MAX_NODES] #ONLY the top 5 besties
|
|
for target, score in besties:
|
|
try:
|
|
print "Adding link for " + user + " (" + str(values['id']) + ") to " + target + " (" + str(userData[target]['id']) + ") for " + str(score)
|
|
d3data['links'].append({"source": values['id'], "target": userData[target]['id'], "value": math.ceil(math.sqrt(score))})
|
|
except KeyError:
|
|
print "Error when trying to link " + user + " to " + target
|
|
continue
|
|
if len(values['data']) > MAX_NODES:
|
|
print "...ignoring " + str(len(values['data']) - MAX_NODES) + " more connections"
|
|
|
|
d3Circle = {}
|
|
d3Circle['names'] = [''] * len(userData)
|
|
d3Circle['matrix'] = [[0] * len(userData)] * len(userData)
|
|
|
|
for user, values in userData.iteritems():
|
|
d3Circle['names'][values['id']] = user
|
|
for name, score in values['data'].iteritems():
|
|
d3Circle['matrix'][values['id']][userData[name]['id']] = score if score > 1 else 0
|
|
|
|
with open(outfile + ".tmp", "w") as tmpFile:
|
|
tmpFile.write(json.dumps(d3data))
|
|
shutil.move(outfile + ".tmp", outfile)
|
|
|
|
with open(outCircle + ".tmp", "w") as tmpFile:
|
|
tmpFile.write(json.dumps(d3Circle))
|
|
shutil.move(outCircle + ".tmp", outCircle)
|