Added some more scripts

2018-01-24 20:55:25 +00:00 · 2018-01-24 20:55:25 +00:00 · 6838f0ce5f
parent 1849bd34e5
commit 6838f0ce5f
5 changed files with 95 additions and 1 deletions
--- a/Code/irc/tildescores.txt
+++ b/Code/irc/tildescores.txt
@ -73,7 +73,7 @@ emfor&^%3&^%1509671353
 k2l8m11n2&^%11&^%1510932395
 sacredpix&^%2&^%1510943959
 deltawitc&^%362&^%1516822359
-login&^%14&^%1516797327
+login&^%14&^%1516827186
 kelpiebot&^%3&^%1513101957
 unreal&^%1&^%1514940020
 tildethie&^%694&^%1516825682
--- a/Code/python/chatcloud2.py
+++ b/Code/python/chatcloud2.py
@ -0,0 +1,68 @@
+#!/usr/bin/python
+import fileinput
+import json
+import time
+import calendar
+import re
+import shutil
+import argparse
+import logging, sys
+
+logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
+
+parser = argparse.ArgumentParser(description='Generate word cloud data based off of irc chat logs')
+parser.add_argument('-logfile', help='irc log file to read from', default='/home/jumblesale/Code/irc/log')
+parser.add_argument('-outfile', help='output file to write to', default='')
+
+parser.add_argument('-timeend', type=int, help='end time of the word cloud (in epoch time)', default=calendar.timegm(time.gmtime()))
+parser.add_argument('-timestart', type=int, help='start time of the word cloud (in epoch time)', default=-1)
+
+parser.add_argument('-bannedUsersFile', help='file containing list of banned users', default='/home/krowbar/Code/python/bannedUsers')
+parser.add_argument('-bannedWordsFile', help='file containing list of banned words', default='/home/krowbar/Code/python/bannedWords')
+
+parser.add_argument('-minLength', type=int, help='minimum size of words to include in the cloud', default=3)
+parser.add_argument('-minOccurrence', type=int, help='the minimum occurence of a word to include it in the cloud', default=3)
+
+args = parser.parse_args()
+
+wordData = {} # keyed by "word" that contains a count
+#we only care about recent chats, let's say for the past sixteen hours
+
+args.timestart = args.timestart if args.timestart != -1 else args.timeend - (16 * 60 * 60)
+#timeCutoff = calendar.timegm(time.strptime("1 Oct 16", "%d %b %y"))
+logging.info("Generating word cloud based off words from " + str(args.timestart) + " to " + str(args.timeend))
+
+bannedWords = open(args.bannedWordsFile).read().splitlines()
+bannedUsers = open(args.bannedUsersFile).read().splitlines()
+
+with open(args.logfile, "r") as log:
+    for line in log:
+        try:
+            time, user, message = line.split("\t", 3)
+            time = int(time)
+        except ValueError:
+            continue #There are some bad lines in the log file that we'll ignore if we can't parse
+        if user in bannedUsers:
+            continue #We don't care what they say
+        if time >= args.timestart and time <= args.timeend:
+            #print "Processing line from " + user + " at " + str(time)
+            for word in re.sub('[\'\"\`\/\\;:,.?!*&^\-()<>\{\}|_\[\]0-9]', ' ', message).lower().split():
+                #changing symbols into spaces instead of stripping them avoids compounded words
+                if len(word) < args.minLength or word in bannedWords:
+                    #print "Rejecting " + word
+                    continue
+                #if the word already exists in the list
+                if word in wordData:
+                    wordData[word] += 1
+                else: #if they are new
+                    wordData[word] = 1
+                    #print "Added word: " + word
+wordData = {i:wordData[i] for i in wordData if wordData[i] >= args.minOccurrence }
+if len(wordData) == 0:
+    wordData = {"NOTHING": 1, "INTERESTING": 1, "TODAY": 1}
+if args.outfile == '':
+    print json.dumps(wordData)
+else:
+    with open(args.outfile + ".tmp", "w") as tmpFile:
+        tmpFile.write(json.dumps(wordData))
+    shutil.move(args.outfile + ".tmp", args.outfile)
--- a/Code/python/create_alltime_chatcloud.sh
+++ b/Code/python/create_alltime_chatcloud.sh
@ -0,0 +1,7 @@
+#!/bin/bash
+START=0
+END=`date +%s`
+OUT=/home/krowbar/logs/chatcloud_all_time.json
+DIR=/home/krowbar/public_html/data
+
+/usr/bin/python /home/krowbar/Code/python/chatcloud2.py -timeend $END -timestart $START -outfile $OUT
--- a/Code/python/create_monthly_chatcloud.sh
+++ b/Code/python/create_monthly_chatcloud.sh
@ -0,0 +1,15 @@
+#!/bin/bash
+START=$((`date +%s` - 2678400))
+END=`date +%s`
+MONTH=`date -d yesterday +_%Y_%m`
+DATE=`date -d yesterday +%b\ %Y`
+OUT=/home/krowbar/logs/chatcloud${MONTH}.json
+DIR=/home/krowbar/public_html/data
+PAGE=/home/krowbar/public_html/chatcloud/index.html
+LINE=7
+
+/usr/bin/python /home/krowbar/Code/python/chatcloud2.py -timeend $END -timestart $START -outfile $OUT
+ln -s $OUT $DIR
+
+sed "${LINE}i  <option value=\"${MONTH}\">${DATE}</option>" < $PAGE > $PAGE.tmp
+mv $PAGE.tmp $PAGE
--- a/Code/python/mfp_watcher.py
+++ b/Code/python/mfp_watcher.py
@ -0,0 +1,4 @@
+import urllib
+from bs4 import BeautifulSoup
+
+def get_track():