33 lines
759 B
Python
33 lines
759 B
Python
import wordfreq,string,random
|
|
corpus = open("corpus").read().split("\n")
|
|
strs = corpus
|
|
filter = ""
|
|
for c in string.punctuation:
|
|
if c != "'":
|
|
filter += c
|
|
def getListFreqDict(list):
|
|
nl = []
|
|
for s in list:
|
|
ls = s.split(" ")
|
|
filter(ls,None)
|
|
lsc = ls[:]
|
|
for word in ls:
|
|
if word.find("http://") != -1 or word.find("https://") != -1:
|
|
lsc.remove(word);
|
|
s = " ".join(lsc)
|
|
nl.add(s)
|
|
ret = wordfreq.getFreqDict(" ".join(nl),filterchars=filter)
|
|
return ret
|
|
def getRandomWords(freqDict):
|
|
t = ""
|
|
for s in freqDict:
|
|
t += "{} ".format(s) * freqDict[s]
|
|
listW = [x for x in t.split(" ") if x != ""]
|
|
len = random.randint(1,20)
|
|
ret = ""
|
|
while len != 0:
|
|
ret += random.choice(listW)
|
|
ret += " "
|
|
len -= 1
|
|
return ret[:-1]+random.choice(".!?;")
|