Added some more scripts

This commit is contained in:
Russell 2018-01-24 20:55:25 +00:00
parent 1849bd34e5
commit 6838f0ce5f
5 changed files with 95 additions and 1 deletions

View File

@ -73,7 +73,7 @@ emfor&^%3&^%1509671353
k2l8m11n2&^%11&^%1510932395
sacredpix&^%2&^%1510943959
deltawitc&^%362&^%1516822359
login&^%14&^%1516797327
login&^%14&^%1516827186
kelpiebot&^%3&^%1513101957
unreal&^%1&^%1514940020
tildethie&^%694&^%1516825682

68
Code/python/chatcloud2.py Executable file
View File

@ -0,0 +1,68 @@
#!/usr/bin/python
import fileinput
import json
import time
import calendar
import re
import shutil
import argparse
import logging, sys
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
parser = argparse.ArgumentParser(description='Generate word cloud data based off of irc chat logs')
parser.add_argument('-logfile', help='irc log file to read from', default='/home/jumblesale/Code/irc/log')
parser.add_argument('-outfile', help='output file to write to', default='')
parser.add_argument('-timeend', type=int, help='end time of the word cloud (in epoch time)', default=calendar.timegm(time.gmtime()))
parser.add_argument('-timestart', type=int, help='start time of the word cloud (in epoch time)', default=-1)
parser.add_argument('-bannedUsersFile', help='file containing list of banned users', default='/home/krowbar/Code/python/bannedUsers')
parser.add_argument('-bannedWordsFile', help='file containing list of banned words', default='/home/krowbar/Code/python/bannedWords')
parser.add_argument('-minLength', type=int, help='minimum size of words to include in the cloud', default=3)
parser.add_argument('-minOccurrence', type=int, help='the minimum occurence of a word to include it in the cloud', default=3)
args = parser.parse_args()
wordData = {} # keyed by "word" that contains a count
#we only care about recent chats, let's say for the past sixteen hours
args.timestart = args.timestart if args.timestart != -1 else args.timeend - (16 * 60 * 60)
#timeCutoff = calendar.timegm(time.strptime("1 Oct 16", "%d %b %y"))
logging.info("Generating word cloud based off words from " + str(args.timestart) + " to " + str(args.timeend))
bannedWords = open(args.bannedWordsFile).read().splitlines()
bannedUsers = open(args.bannedUsersFile).read().splitlines()
with open(args.logfile, "r") as log:
for line in log:
try:
time, user, message = line.split("\t", 3)
time = int(time)
except ValueError:
continue #There are some bad lines in the log file that we'll ignore if we can't parse
if user in bannedUsers:
continue #We don't care what they say
if time >= args.timestart and time <= args.timeend:
#print "Processing line from " + user + " at " + str(time)
for word in re.sub('[\'\"\`\/\\;:,.?!*&^\-()<>\{\}|_\[\]0-9]', ' ', message).lower().split():
#changing symbols into spaces instead of stripping them avoids compounded words
if len(word) < args.minLength or word in bannedWords:
#print "Rejecting " + word
continue
#if the word already exists in the list
if word in wordData:
wordData[word] += 1
else: #if they are new
wordData[word] = 1
#print "Added word: " + word
wordData = {i:wordData[i] for i in wordData if wordData[i] >= args.minOccurrence }
if len(wordData) == 0:
wordData = {"NOTHING": 1, "INTERESTING": 1, "TODAY": 1}
if args.outfile == '':
print json.dumps(wordData)
else:
with open(args.outfile + ".tmp", "w") as tmpFile:
tmpFile.write(json.dumps(wordData))
shutil.move(args.outfile + ".tmp", args.outfile)

View File

@ -0,0 +1,7 @@
#!/bin/bash
START=0
END=`date +%s`
OUT=/home/krowbar/logs/chatcloud_all_time.json
DIR=/home/krowbar/public_html/data
/usr/bin/python /home/krowbar/Code/python/chatcloud2.py -timeend $END -timestart $START -outfile $OUT

View File

@ -0,0 +1,15 @@
#!/bin/bash
START=$((`date +%s` - 2678400))
END=`date +%s`
MONTH=`date -d yesterday +_%Y_%m`
DATE=`date -d yesterday +%b\ %Y`
OUT=/home/krowbar/logs/chatcloud${MONTH}.json
DIR=/home/krowbar/public_html/data
PAGE=/home/krowbar/public_html/chatcloud/index.html
LINE=7
/usr/bin/python /home/krowbar/Code/python/chatcloud2.py -timeend $END -timestart $START -outfile $OUT
ln -s $OUT $DIR
sed "${LINE}i <option value=\"${MONTH}\">${DATE}</option>" < $PAGE > $PAGE.tmp
mv $PAGE.tmp $PAGE

View File

@ -0,0 +1,4 @@
import urllib
from bs4 import BeautifulSoup
def get_track():