min/modules/nlp.py

184 lines
5.0 KiB
Python
Raw Normal View History

2021-01-31 02:05:34 +00:00
from bot import *
2020-04-19 23:52:41 +00:00
2021-07-26 16:51:02 +00:00
import dataset, random, time, re
2020-04-19 23:52:41 +00:00
2022-01-27 02:26:57 +00:00
def get(l, i):
try:
if i <= len(l) and i >= 0:
return l[i]
else:
return ""
except IndexError:
return ""
2022-01-27 02:26:57 +00:00
async def rec(self, m):
prew = shared.db["prew"]
noch = shared.db["noun"]
beg = shared.db["beg"]
end = shared.db["end"]
words = re.sub(r"([\.,\?!])", r" \1", m).split()
2022-01-27 02:26:57 +00:00
if words[0] == "admin" or len(words) < 2:
return
beg.insert(dict(word=words[0]))
end.insert(dict(word=words[-1]))
for w in range(len(words)):
if w > 0:
prew.insert_ignore(
dict(
pre3=get(words, w - 3),
pre2=get(words, w - 2),
pre=get(words, w - 1),
pro=get(words, w),
pro2=get(words, w + 1),
pro3=get(words, w + 2),
),
["id"],
)
noch.insert(dict(word=get(words, w)))
2020-07-12 12:21:53 +00:00
async def getNoun(self, words, c):
2021-01-31 02:05:34 +00:00
if c in shared.cstate:
oldnoun = shared.cstate[c]
2020-07-12 12:21:53 +00:00
else:
oldnoun = None
2021-01-31 01:49:56 +00:00
2022-01-27 02:26:57 +00:00
nouns = shared.db["noun"]
2020-07-12 12:21:53 +00:00
out = {}
for i in words:
out[i] = nouns.count(word=i)
2020-07-12 12:21:53 +00:00
noun = min(out, key=out.get)
2022-01-27 02:26:57 +00:00
conversation = shared.db["conver"]
2020-07-12 12:21:53 +00:00
if oldnoun != None:
2022-01-27 02:26:57 +00:00
print("adding", [oldnoun, noun])
conversation.insert_ignore(dict(pre=oldnoun, pro=noun), ["id"])
2020-07-12 12:21:53 +00:00
2022-01-27 02:26:57 +00:00
nextnoun = [i["pro"] for i in conversation.find(pre=noun)]
print("nextnoun:", nextnoun)
2020-07-12 12:21:53 +00:00
if len(nextnoun) > 0:
noun = random.choice(nextnoun)
2021-01-31 02:05:34 +00:00
shared.cstate[c] = noun
2020-07-12 12:21:53 +00:00
return noun
2022-01-27 02:26:57 +00:00
2020-04-23 01:43:07 +00:00
async def genOut(self, noun):
2022-01-27 02:26:57 +00:00
prew = shared.db["prew"]
beg = shared.db["beg"]
end = shared.db["end"]
nouns = shared.db["noun"]
iter = 0
coun = 0
out = [noun]
while (
beg.find_one(word=out[0]) is None
2022-03-10 16:41:56 +00:00
or beg.count(word=out[0]) - 1 < shared.enmul / (1 + iter / shared.maxiter)
2022-01-27 02:26:57 +00:00
) and iter < shared.maxiter:
2021-11-12 00:11:02 +00:00
try:
2022-01-27 02:26:57 +00:00
out = [
random.choice(list(prew.find(pro=out[0], pro2=out[1], pro3=out[2])))[
"pre"
]
] + out
2021-11-12 00:11:02 +00:00
except IndexError:
2022-01-27 02:26:57 +00:00
try:
out = [
random.choice(list(prew.find(pro=out[0], pro2=out[1])))["pre"]
] + out
except IndexError:
try:
out = [random.choice(list(prew.find(pro=out[0])))["pre"]] + out
except IndexError:
2022-03-10 16:41:56 +00:00
iter += 69420
2022-01-27 02:26:57 +00:00
iter += 1
coun += 1
iter = 0
while (
end.find_one(word=out[-1]) is None
2022-03-10 16:41:56 +00:00
or end.count(word=out[-1]) - 1 < shared.enmul / (1 + iter / shared.maxiter)
2022-01-27 02:26:57 +00:00
) and iter < shared.maxiter:
2021-11-12 00:11:02 +00:00
try:
2022-01-27 02:26:57 +00:00
out.append(
random.choice(list(prew.find(pre3=out[-3], pre2=out[-2], pre=out[-1])))[
"pro"
]
)
2021-11-12 00:11:02 +00:00
except IndexError:
2022-01-27 02:26:57 +00:00
try:
out.append(
random.choice(list(prew.find(pre2=out[-2], pre=out[-1])))["pro"]
)
except IndexError:
try:
out.append(random.choice(list(prew.find(pre=out[-1])))["pro"])
except IndexError:
2022-03-10 16:41:56 +00:00
iter += 69420
2022-01-27 02:26:57 +00:00
iter += 1
coun += 1
2022-03-09 23:45:50 +00:00
if coun <= 4:
2022-03-10 16:41:56 +00:00
shared.enmul += 1
2022-01-27 02:26:57 +00:00
elif coun >= shared.maxiter:
2022-03-10 16:41:56 +00:00
shared.enmul -= 1
2022-01-27 02:26:57 +00:00
print(f"coun {coun} enmul {shared.enmul} maxiter {shared.maxiter}")
2022-01-27 02:26:57 +00:00
return out
2020-04-23 01:43:07 +00:00
async def filter(self, c, n, m):
2022-01-27 02:26:57 +00:00
if c in shared.qtime and shared.qtime[c] > time.time():
return
if m[: len(shared.prefix)] == shared.prefix:
m = m[len(shared.prefix) :]
await go(self, c, n, m)
elif m[: len(self.nickname) + 1] == self.nickname + " ":
m = m[len(self.nickname) + 1 :]
await go(self, c, n, m)
elif c[0] not in self.isupport.chantypes and n != self.nickname:
2022-01-27 02:26:57 +00:00
await go(self, c, n, m)
else:
if len(m.split()) > 1:
if shared.learntime + shared.learndelay < time.time():
await rec(self, m)
shared.learntime = time.time()
async def go(self, c, n, m):
2020-04-23 01:43:07 +00:00
await rec(self, m)
2022-01-27 02:26:57 +00:00
words = re.sub(r"([\.,\?!])", r" \1", m).split()
if words[0] == "admin":
return
msg = re.sub(
r" ([\.,\?!])",
r"\1",
" ".join(await genOut(self, await getNoun(self, words, c))),
)
if msg[-1] == "\x01" and msg[0] != "\x01":
msg = msg[:-1]
await self.send(build("PRIVMSG", [c, msg]))
2020-04-19 23:52:41 +00:00
2022-01-27 02:26:57 +00:00
2020-04-19 23:52:41 +00:00
async def init(self):
2022-01-27 02:26:57 +00:00
shared.qtime = {}
shared.learntime = 0
# delay between grabbing random messages and passively
# learning.
shared.learndelay = 1
2022-03-10 16:58:30 +00:00
# sentance ending weight, higher means longer sentances,
2022-05-14 01:55:28 +00:00
# lower means shorter sentances. this will need to slowly
2022-01-27 02:26:57 +00:00
# get larger as the database grows
2022-03-10 16:58:30 +00:00
shared.enmul = 200
2022-01-27 02:26:57 +00:00
shared.maxiter = 14
shared.rawm["nlp"] = filter
shared.cstate = {}