min/modules/nlp.py

from bot import *

import dataset, random, time, re


def get(l, i):
    try:
        if i <= len(l) and i >= 0:
            return l[i]
        else:
            return ""
    except IndexError:
        return ""


async def rec(self, m):
    prew = shared.db["prew"]
    noch = shared.db["noun"]
    beg = shared.db["beg"]
    end = shared.db["end"]

    words = re.sub(r"([\.,\?!])", r" \1", m).split()

    if words[0] == "admin" or len(words) < 2:
        return

    beg.insert(dict(word=words[0]))
    end.insert(dict(word=words[-1]))

    for w in range(len(words)):
        if w > 0:
            prew.insert_ignore(
                dict(
                    pre3=get(words, w - 3),
                    pre2=get(words, w - 2),
                    pre=get(words, w - 1),
                    pro=get(words, w),
                    pro2=get(words, w + 1),
                    pro3=get(words, w + 2),
                ),
                ["id"],
            )
        noch.insert(dict(word=get(words, w)))


async def getNoun(self, words, c):
    if c in shared.cstate:
        oldnoun = shared.cstate[c]
    else:
        oldnoun = None

    nouns = shared.db["noun"]
    out = {}
    for i in words:
        out[i] = nouns.count(word=i)
    noun = min(out, key=out.get)

    conversation = shared.db["conver"]
    if oldnoun != None:
        print("adding", [oldnoun, noun])
        conversation.insert_ignore(dict(pre=oldnoun, pro=noun), ["id"])

    nextnoun = [i["pro"] for i in conversation.find(pre=noun)]
    print("nextnoun:", nextnoun)
    if len(nextnoun) > 0:
        noun = random.choice(nextnoun)
    shared.cstate[c] = noun
    return noun


async def genOut(self, noun):
    prew = shared.db["prew"]
    beg = shared.db["beg"]
    end = shared.db["end"]
    nouns = shared.db["noun"]
    iter = 0
    coun = 0
    out = [noun]
    while (
        beg.find_one(word=out[0]) is None
        or beg.count(word=out[0]) - 1 < shared.enmul / (1 + iter / shared.maxiter)
    ) and iter < shared.maxiter:
        try:
            out = [
                random.choice(list(prew.find(pro=out[0], pro2=out[1], pro3=out[2])))[
                    "pre"
                ]
            ] + out
        except IndexError:
            try:
                out = [
                    random.choice(list(prew.find(pro=out[0], pro2=out[1])))["pre"]
                ] + out
            except IndexError:
                try:
                    out = [random.choice(list(prew.find(pro=out[0])))["pre"]] + out
                except IndexError:
                    iter += 69420
        iter += 1
        coun += 1
    iter = 0
    while (
        end.find_one(word=out[-1]) is None
        or end.count(word=out[-1]) - 1 < shared.enmul / (1 + iter / shared.maxiter)
    ) and iter < shared.maxiter:
        try:
            out.append(
                random.choice(list(prew.find(pre3=out[-3], pre2=out[-2], pre=out[-1])))[
                    "pro"
                ]
            )
        except IndexError:
            try:
                out.append(
                    random.choice(list(prew.find(pre2=out[-2], pre=out[-1])))["pro"]
                )
            except IndexError:
                try:
                    out.append(random.choice(list(prew.find(pre=out[-1])))["pro"])
                except IndexError:
                    iter += 69420
        iter += 1
        coun += 1

    if coun <= 4:
        shared.enmul += 1
    elif coun >= shared.maxiter:
        shared.enmul -= 1

    print(f"coun {coun} enmul {shared.enmul} maxiter {shared.maxiter}")

    return out


async def filter(self, c, n, m):
    if c in shared.qtime and shared.qtime[c] > time.time():
        return
    if m[: len(shared.prefix)] == shared.prefix:
        m = m[len(shared.prefix) :]
        await go(self, c, n, m)
    elif m[: len(self.nickname) + 1] == self.nickname + " ":
        m = m[len(self.nickname) + 1 :]
        await go(self, c, n, m)
    elif c[0] not in self.isupport.chantypes and n != self.nickname:
        await go(self, c, n, m)
    else:
        if len(m.split()) > 1:
            if shared.learntime + shared.learndelay < time.time():
                await rec(self, m)
                shared.learntime = time.time()


async def go(self, c, n, m):
    await rec(self, m)
    words = re.sub(r"([\.,\?!])", r" \1", m).split()
    if words[0] == "admin":
        return
    msg = re.sub(
        r" ([\.,\?!])",
        r"\1",
        " ".join(await genOut(self, await getNoun(self, words, c))),
    )
    if msg[-1] == "\x01" and msg[0] != "\x01":
        msg = msg[:-1]
    await self.send(build("PRIVMSG", [c, msg]))


async def init(self):

    shared.qtime = {}
    shared.learntime = 0

    # delay between grabbing random messages and passively
    # learning.
    shared.learndelay = 1
    # sentance ending weight, higher means longer sentances,
    # lower means shorter sentances. this will need to slowly
    # get larger as the database grows
    shared.enmul = 200
    shared.maxiter = 14

    shared.rawm["nlp"] = filter
    shared.cstate = {}
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`from bot import *`
clear out the oven for some ai stuff 2020-04-19 23:52:41 +00:00
pad punctuation 2021-07-26 16:51:02 +00:00			`import dataset, random, time, re`
clear out the oven for some ai stuff 2020-04-19 23:52:41 +00:00
use multiple words in sentance generation when cheapie 2021-11-12 00:43:03 +00:00
reformat with black 2022-01-27 02:26:57 +00:00			`def get(l, i):`
			`try:`
			`if i <= len(l) and i >= 0:`
			`return l[i]`
			`else:`
			`return ""`
			`except IndexError:`
			`return ""`
use multiple words in sentance generation when cheapie 2021-11-12 00:43:03 +00:00

reformat with black 2022-01-27 02:26:57 +00:00			`async def rec(self, m):`
			`prew = shared.db["prew"]`
			`noch = shared.db["noun"]`
			`beg = shared.db["beg"]`
			`end = shared.db["end"]`

tokenize punctuation before recording words 2022-03-10 16:43:40 +00:00			`words = re.sub(r"([\.,\?!])", r" \1", m).split()`
reformat with black 2022-01-27 02:26:57 +00:00
			`if words[0] == "admin" or len(words) < 2:`
			`return`

			`beg.insert(dict(word=words[0]))`
			`end.insert(dict(word=words[-1]))`

			`for w in range(len(words)):`
			`if w > 0:`
			`prew.insert_ignore(`
			`dict(`
			`pre3=get(words, w - 3),`
			`pre2=get(words, w - 2),`
			`pre=get(words, w - 1),`
			`pro=get(words, w),`
			`pro2=get(words, w + 1),`
			`pro3=get(words, w + 2),`
			`),`
			`["id"],`
			`)`
fix recording nouns i should not be allowed near a computer 2022-05-18 02:26:01 +00:00			`noch.insert(dict(word=get(words, w)))`
use multiple words in sentance generation when cheapie 2021-11-12 00:43:03 +00:00

conversation flow 2020-07-12 12:21:53 +00:00			`async def getNoun(self, words, c):`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`if c in shared.cstate:`
			`oldnoun = shared.cstate[c]`
conversation flow 2020-07-12 12:21:53 +00:00			`else:`
			`oldnoun = None`
reuse messages from other people 2021-01-31 01:49:56 +00:00
reformat with black 2022-01-27 02:26:57 +00:00			`nouns = shared.db["noun"]`
conversation flow 2020-07-12 12:21:53 +00:00			`out = {}`
			`for i in words:`
Improve message generation speed This moves most of the "heavy lifting" of the database queries into sqlite itself, which is much faster at it. 2021-09-26 03:21:27 +00:00			`out[i] = nouns.count(word=i)`
conversation flow 2020-07-12 12:21:53 +00:00			`noun = min(out, key=out.get)`

reformat with black 2022-01-27 02:26:57 +00:00			`conversation = shared.db["conver"]`
conversation flow 2020-07-12 12:21:53 +00:00			`if oldnoun != None:`
reformat with black 2022-01-27 02:26:57 +00:00			`print("adding", [oldnoun, noun])`
			`conversation.insert_ignore(dict(pre=oldnoun, pro=noun), ["id"])`
conversation flow 2020-07-12 12:21:53 +00:00
reformat with black 2022-01-27 02:26:57 +00:00			`nextnoun = [i["pro"] for i in conversation.find(pre=noun)]`
			`print("nextnoun:", nextnoun)`
conversation flow 2020-07-12 12:21:53 +00:00			`if len(nextnoun) > 0:`
			`noun = random.choice(nextnoun)`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`shared.cstate[c] = noun`
conversation flow 2020-07-12 12:21:53 +00:00			`return noun`
reformat with black 2022-01-27 02:26:57 +00:00

functional 2020-04-23 01:43:07 +00:00			`async def genOut(self, noun):`
reformat with black 2022-01-27 02:26:57 +00:00			`prew = shared.db["prew"]`
			`beg = shared.db["beg"]`
			`end = shared.db["end"]`
			`nouns = shared.db["noun"]`
			`iter = 0`
			`coun = 0`
			`out = [noun]`
			`while (`
			`beg.find_one(word=out[0]) is None`
improve ending balences 2022-03-10 16:41:56 +00:00			`or beg.count(word=out[0]) - 1 < shared.enmul / (1 + iter / shared.maxiter)`
reformat with black 2022-01-27 02:26:57 +00:00			`) and iter < shared.maxiter:`
consider multiple words from cheapie 2021-11-12 00:11:02 +00:00			`try:`
reformat with black 2022-01-27 02:26:57 +00:00			`out = [`
			`random.choice(list(prew.find(pro=out[0], pro2=out[1], pro3=out[2])))[`
			`"pre"`
			`]`
			`] + out`
consider multiple words from cheapie 2021-11-12 00:11:02 +00:00			`except IndexError:`
reformat with black 2022-01-27 02:26:57 +00:00			`try:`
			`out = [`
			`random.choice(list(prew.find(pro=out[0], pro2=out[1])))["pre"]`
			`] + out`
			`except IndexError:`
			`try:`
			`out = [random.choice(list(prew.find(pro=out[0])))["pre"]] + out`
			`except IndexError:`
improve ending balences 2022-03-10 16:41:56 +00:00			`iter += 69420`
reformat with black 2022-01-27 02:26:57 +00:00			`iter += 1`
			`coun += 1`
			`iter = 0`
			`while (`
			`end.find_one(word=out[-1]) is None`
improve ending balences 2022-03-10 16:41:56 +00:00			`or end.count(word=out[-1]) - 1 < shared.enmul / (1 + iter / shared.maxiter)`
reformat with black 2022-01-27 02:26:57 +00:00			`) and iter < shared.maxiter:`
consider multiple words from cheapie 2021-11-12 00:11:02 +00:00			`try:`
reformat with black 2022-01-27 02:26:57 +00:00			`out.append(`
			`random.choice(list(prew.find(pre3=out[-3], pre2=out[-2], pre=out[-1])))[`
			`"pro"`
			`]`
			`)`
consider multiple words from cheapie 2021-11-12 00:11:02 +00:00			`except IndexError:`
reformat with black 2022-01-27 02:26:57 +00:00			`try:`
			`out.append(`
			`random.choice(list(prew.find(pre2=out[-2], pre=out[-1])))["pro"]`
			`)`
			`except IndexError:`
			`try:`
			`out.append(random.choice(list(prew.find(pre=out[-1])))["pro"])`
			`except IndexError:`
improve ending balences 2022-03-10 16:41:56 +00:00			`iter += 69420`
reformat with black 2022-01-27 02:26:57 +00:00			`iter += 1`
			`coun += 1`

make enmul stepping smaller 2022-03-09 23:45:50 +00:00			`if coun <= 4:`
improve ending balences 2022-03-10 16:41:56 +00:00			`shared.enmul += 1`
reformat with black 2022-01-27 02:26:57 +00:00			`elif coun >= shared.maxiter:`
improve ending balences 2022-03-10 16:41:56 +00:00			`shared.enmul -= 1`
reformat with black 2022-01-27 02:26:57 +00:00
fix recording nouns i should not be allowed near a computer 2022-05-18 02:26:01 +00:00			`print(f"coun {coun} enmul {shared.enmul} maxiter {shared.maxiter}")`

reformat with black 2022-01-27 02:26:57 +00:00			`return out`
functional 2020-04-23 01:43:07 +00:00

			`async def filter(self, c, n, m):`
reformat with black 2022-01-27 02:26:57 +00:00			`if c in shared.qtime and shared.qtime[c] > time.time():`
			`return`
			`if m[: len(shared.prefix)] == shared.prefix:`
			`m = m[len(shared.prefix) :]`
			`await go(self, c, n, m)`
			`elif m[: len(self.nickname) + 1] == self.nickname + " ":`
			`m = m[len(self.nickname) + 1 :]`
			`await go(self, c, n, m)`
use ISUPPORT CHANTYPES instead of assuming channels use normal ones fixes #5 might break if the server lacks ISUPPORT i suppose (do any?) 2022-09-16 03:59:33 +00:00			`elif c[0] not in self.isupport.chantypes and n != self.nickname:`
reformat with black 2022-01-27 02:26:57 +00:00			`await go(self, c, n, m)`
			`else:`
			`if len(m.split()) > 1:`
			`if shared.learntime + shared.learndelay < time.time():`
			`await rec(self, m)`
			`shared.learntime = time.time()`

at jan6's request it now works without colon 2020-04-23 22:05:43 +00:00
			`async def go(self, c, n, m):`
functional 2020-04-23 01:43:07 +00:00			`await rec(self, m)`
reformat with black 2022-01-27 02:26:57 +00:00			`words = re.sub(r"([\.,\?!])", r" \1", m).split()`
			`if words[0] == "admin":`
			`return`
			`msg = re.sub(`
			`r" ([\.,\?!])",`
			`r"\1",`
			`" ".join(await genOut(self, await getNoun(self, words, c))),`
			`)`
remove trailing \1 if message does not start with \1 (fixes #1) 2021-08-04 00:45:16 +00:00			`if msg[-1] == "\x01" and msg[0] != "\x01":`
			`msg = msg[:-1]`
build privmsg instead of using self.message self.message will somewhat change behavior 2022-04-27 23:59:03 +00:00			`await self.send(build("PRIVMSG", [c, msg]))`
clear out the oven for some ai stuff 2020-04-19 23:52:41 +00:00
reformat with black 2022-01-27 02:26:57 +00:00
clear out the oven for some ai stuff 2020-04-19 23:52:41 +00:00			`async def init(self):`
reformat with black 2022-01-27 02:26:57 +00:00
			`shared.qtime = {}`
			`shared.learntime = 0`

			`# delay between grabbing random messages and passively`
			`# learning.`
			`shared.learndelay = 1`
update default enmul 2022-03-10 16:58:30 +00:00			`# sentance ending weight, higher means longer sentances,`
fox comment wording 2022-05-14 01:55:28 +00:00			`# lower means shorter sentances. this will need to slowly`
reformat with black 2022-01-27 02:26:57 +00:00			`# get larger as the database grows`
update default enmul 2022-03-10 16:58:30 +00:00			`shared.enmul = 200`
reformat with black 2022-01-27 02:26:57 +00:00			`shared.maxiter = 14`

			`shared.rawm["nlp"] = filter`
			`shared.cstate = {}`