120 lines
4.9 KiB
Python
Executable File
120 lines
4.9 KiB
Python
Executable File
#!/usr/bin/python
|
|
import fileinput
|
|
import json
|
|
import time
|
|
import calendar
|
|
import shutil
|
|
import re
|
|
import math
|
|
import operator
|
|
|
|
MAX_NODES = 4
|
|
|
|
logfile = "/home/archangelic/irc/log"
|
|
#logfile = "/home/jumblesale/Code/irc/log"
|
|
outfile = "/home/krowbar/logs/chatBesties.json"
|
|
outCircle = "/home/krowbar/logs/chatcircle.json"
|
|
timePeriod = calendar.timegm(time.gmtime()) - (2 * 7 * 24 * 60 * 60) #2 weeks
|
|
|
|
userData = {} #hash keyed by "user" that contains a hash of mentioned other users with count
|
|
nameFix = {
|
|
'jumblesal': 'jumblesale',
|
|
'hardmath1': 'kc',
|
|
'hardmath123': 'kc',
|
|
'bendorphan': 'endorphant',
|
|
'endorphan': 'endorphant',
|
|
'synergian': 'synergiance'
|
|
}
|
|
|
|
users = []
|
|
#Get a list of all user names by checking the logs for people who have said things
|
|
with open(logfile, "r") as log:
|
|
for line in log:
|
|
try:
|
|
time, user, message = line.split("\t", 3)
|
|
if int(time) < timePeriod:
|
|
continue #only add users who have spoken in the last period
|
|
if nameFix.has_key(user):
|
|
user = nameFix[user]
|
|
else:
|
|
user = user.lower()
|
|
|
|
if user not in users:
|
|
users.append(user)
|
|
except ValueError:
|
|
continue #There are some bad lines in the log file that we'll ignore if we can't parse
|
|
|
|
d3data = {}
|
|
d3data['nodes'] = []
|
|
|
|
#re-read the log and this time look for instances of user names in messages
|
|
with open(logfile, "r") as log:
|
|
for line in log:
|
|
try:
|
|
time, user, message = line.split("\t", 3)
|
|
if int(time) < timePeriod:
|
|
continue #only consider the past three weeks of chats
|
|
if nameFix.has_key(user):
|
|
user = nameFix[user]
|
|
else:
|
|
user = user.lower()
|
|
except ValueError:
|
|
continue #There are some bad lines in the log file that we'll ignore if we can't parse
|
|
for word in message.split(' '):
|
|
word = re.sub('[^A-Za-z0-9]+', '', word)
|
|
if word in users: #SOMEONE MENTIONED SOMEONE
|
|
if userData.has_key(user): #This user is already set up
|
|
if userData[user]['data'].has_key(word): #This user has mentioned this person before
|
|
userData[user]['data'][word] += 1
|
|
else: #This user never mentioned this person before
|
|
userData[user]['data'][word] = 1
|
|
#give both the target and mentioner a point
|
|
else: #This user was never set up
|
|
userData[user] = {} #make it a dictionary!
|
|
userData[user]['data'] = {}
|
|
userData[user]['data'][word] = 1
|
|
userData[user]['score'] = 0
|
|
userData[user]['id'] = len(d3data['nodes']) #so we know how to match people during the links phase
|
|
d3data['nodes'].append({"name": user, "group": 1})
|
|
if not userData.has_key(word): #check if the target has not been set up
|
|
userData[word] = {}
|
|
userData[word]['data'] = {}
|
|
userData[word]['score'] = 0
|
|
userData[word]['id'] = len(d3data['nodes'])
|
|
d3data['nodes'].append({"name": word, "group": 1})
|
|
userData[user]['score'] += 1
|
|
userData[word]['score'] += 1
|
|
|
|
d3data['links'] = []
|
|
#Now connect all the pople to their stuff
|
|
for user, values in userData.iteritems():
|
|
#give the user a 'group' based on their total score
|
|
d3data['nodes'][values['id']]['group'] = int(math.ceil(math.log(values['score'])))
|
|
besties = sorted(values['data'].items(), key=operator.itemgetter(1), reverse=True)[0:MAX_NODES] #ONLY the top besties
|
|
for target, score in besties:
|
|
try:
|
|
print "Adding link from " + user + " (" + str(values['id']) + ") to " + target + " (" + str(userData[target]['id']) + ") with strength " + str(score)
|
|
d3data['links'].append({"source": values['id'], "target": userData[target]['id'], "value": math.ceil(math.sqrt(score))*2 })
|
|
except KeyError:
|
|
print "! Error when trying to link " + user + " to " + target
|
|
continue
|
|
if len(values['data']) > MAX_NODES:
|
|
print " ...ignoring " + str(len(values['data']) - MAX_NODES) + " more connections from " + user
|
|
|
|
d3Circle = {}
|
|
d3Circle['names'] = [''] * len(userData)
|
|
d3Circle['matrix'] = [[0] * len(userData)] * len(userData)
|
|
|
|
for user, values in userData.iteritems():
|
|
d3Circle['names'][values['id']] = user
|
|
for name, score in values['data'].iteritems():
|
|
d3Circle['matrix'][values['id']][userData[name]['id']] = score if score > 1 else 0
|
|
|
|
with open(outfile + ".tmp", "w") as tmpFile:
|
|
tmpFile.write(json.dumps(d3data))
|
|
shutil.move(outfile + ".tmp", outfile)
|
|
|
|
with open(outCircle + ".tmp", "w") as tmpFile:
|
|
tmpFile.write(json.dumps(d3Circle))
|
|
shutil.move(outCircle + ".tmp", outCircle)
|