min/modules/nlp.py

from bot import *

import dataset, random, time, re

async def rec(self, m):
  prew = shared.db['prew']
  noch = shared.db['noun']
  beg = shared.db['beg']
  end = shared.db['end']
  pre = ''
  words = m.split(' ')
  if words[0] == 'admin':
    return
  for w in words:
    if pre == '':
      beg.insert(dict(word=w))
    else:
      prew.insert_ignore(dict(pre=pre, pro=w),['id'])
    pre = w
    noch.insert(dict(word=w))
  end.insert(dict(word=pre))
  
async def getNoun(self, words, c):
    if c in shared.cstate:
        oldnoun = shared.cstate[c]
    else:
        oldnoun = None

    shared.db['remsg'].insert_ignore(dict(noun=oldnoun,msg=' '.join(words)),['id'])

    nouns = shared.db['noun']
    out = {}
    for i in words:
        out[i] = nouns.count(word=i)
    noun = min(out, key=out.get)

    conversation = shared.db['conver']
    if oldnoun != None:
        print("adding", [oldnoun,noun])
        conversation.insert_ignore(dict(pre=oldnoun,pro=noun),['id'])

    nextnoun = [i['pro'] for i in conversation.find(pre=noun)]
    print("nextnoun:",nextnoun)
    if len(nextnoun) > 0:
        noun = random.choice(nextnoun)
    shared.cstate[c] = noun
    return noun
  
async def genOut(self, noun):
  oldresponses = [i['msg'] for i in shared.db['remsg'].find(noun=noun)]
  if len(oldresponses) > 0:
    return random.choice(oldresponses).split(' ')
  prew = shared.db['prew']
  beg = shared.db['beg']
  end = shared.db['end']
  nouns = shared.db['noun']
  iter=0
  coun=0
  out = [noun]
  while (beg.find_one(word=out[0]) is None or nouns.count(word=out[0])-1 > iter * shared.enmul) and iter < 7:
    try:
      out = [ random.choice(list(prew.find(pro=out[0])))['pre'] ] + out
    except IndexError:
      iter += 69
    iter += 1
    coun += 1
  iter = 0
  while (end.find_one(word=out[-1]) is None or nouns.count(word=out[-1])-1 > iter * shared.enmul) and iter < 7:
    try:
      out.append(random.choice(list(prew.find(pre=out[-1])))['pro'])
    except IndexError:
      iter += 69
    iter += 1
    coun += 1

  if coun <= 3:
    shared.enmul -= 1
  elif coun >= 14:
    shared.enmul += 1

  return out


async def filter(self, c, n, m):
  if c in shared.qtime and shared.qtime[c] > time.time():
    return
  if m[:len(shared.prefix)] == shared.prefix:
    m = m[len(shared.prefix):]
    await go(self, c, n, m)
  elif m[:len(self.nickname)+1] == self.nickname+' ':
    m = m[len(self.nickname)+1:]
    await go(self, c, n, m)
  elif '#' not in c and n != self.nickname:
    await go(self, c, n, m)
  else:
    if len(m.split(' ')) > 1:
      if shared.learntime + shared.learndelay < time.time():
        await rec(self, m)
        shared.learntime = time.time()

async def go(self, c, n, m):
    await rec(self, m)
    words = re.sub(r'([\.,\?!])', r' \1', m).split()
    if words[0] == 'admin':
      return
    msg = re.sub(r' ([\.,\?!])', r'\1', ' '.join(await genOut(self, await getNoun(self, words, c))))
    if msg[-1] == "\x01" and msg[0] != "\x01":
        msg = msg[:-1]
    await self.message(c, msg)

async def init(self):
  
  shared.qtime = {}
  shared.learntime = 0

  # delay between grabbing random messages and passively
  # learning.
  shared.learndelay = 1
  # sentance ending weight, lower means longer sentances,
  # higher means shorter sentances. this will need to slowly
  # get larger as the database grows
  shared.enmul = 9
  

  shared.rawm['nlp'] = filter
  shared.cstate = {}
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`from bot import *`
clear out the oven for some ai stuff 2020-04-19 23:52:41 +00:00
pad punctuation 2021-07-26 16:51:02 +00:00			`import dataset, random, time, re`
clear out the oven for some ai stuff 2020-04-19 23:52:41 +00:00
functional 2020-04-23 01:43:07 +00:00			`async def rec(self, m):`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`prew = shared.db['prew']`
			`noch = shared.db['noun']`
			`beg = shared.db['beg']`
			`end = shared.db['end']`
functional 2020-04-23 01:43:07 +00:00			`pre = ''`
			`words = m.split(' ')`
beep boop some bugs 2020-04-23 02:14:17 +00:00			`if words[0] == 'admin':`
			`return`
functional 2020-04-23 01:43:07 +00:00			`for w in words:`
			`if pre == '':`
			`beg.insert(dict(word=w))`
			`else:`
no duplicates save on storage 2020-06-19 15:45:32 +00:00			`prew.insert_ignore(dict(pre=pre, pro=w),['id'])`
functional 2020-04-23 01:43:07 +00:00			`pre = w`
			`noch.insert(dict(word=w))`
			`end.insert(dict(word=pre))`

conversation flow 2020-07-12 12:21:53 +00:00			`async def getNoun(self, words, c):`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`if c in shared.cstate:`
			`oldnoun = shared.cstate[c]`
conversation flow 2020-07-12 12:21:53 +00:00			`else:`
			`oldnoun = None`
reuse messages from other people 2021-01-31 01:49:56 +00:00
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`shared.db['remsg'].insert_ignore(dict(noun=oldnoun,msg=' '.join(words)),['id'])`
reuse messages from other people 2021-01-31 01:49:56 +00:00
Improve message generation speed This moves most of the "heavy lifting" of the database queries into sqlite itself, which is much faster at it. 2021-09-26 03:21:27 +00:00			`nouns = shared.db['noun']`
conversation flow 2020-07-12 12:21:53 +00:00			`out = {}`
			`for i in words:`
Improve message generation speed This moves most of the "heavy lifting" of the database queries into sqlite itself, which is much faster at it. 2021-09-26 03:21:27 +00:00			`out[i] = nouns.count(word=i)`
conversation flow 2020-07-12 12:21:53 +00:00			`noun = min(out, key=out.get)`

port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`conversation = shared.db['conver']`
conversation flow 2020-07-12 12:21:53 +00:00			`if oldnoun != None:`
			`print("adding", [oldnoun,noun])`
			`conversation.insert_ignore(dict(pre=oldnoun,pro=noun),['id'])`

			`nextnoun = [i['pro'] for i in conversation.find(pre=noun)]`
			`print("nextnoun:",nextnoun)`
			`if len(nextnoun) > 0:`
			`noun = random.choice(nextnoun)`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`shared.cstate[c] = noun`
conversation flow 2020-07-12 12:21:53 +00:00			`return noun`
functional 2020-04-23 01:43:07 +00:00
			`async def genOut(self, noun):`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`oldresponses = [i['msg'] for i in shared.db['remsg'].find(noun=noun)]`
reuse messages from other people 2021-01-31 01:49:56 +00:00			`if len(oldresponses) > 0:`
			`return random.choice(oldresponses).split(' ')`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`prew = shared.db['prew']`
Improve message generation speed This moves most of the "heavy lifting" of the database queries into sqlite itself, which is much faster at it. 2021-09-26 03:21:27 +00:00			`beg = shared.db['beg']`
			`end = shared.db['end']`
			`nouns = shared.db['noun']`
functional 2020-04-23 01:43:07 +00:00			`iter=0`
increse sentance ending if the sentances are too long automatically 2021-08-07 13:16:31 +00:00			`coun=0`
functional 2020-04-23 01:43:07 +00:00			`out = [noun]`
Improve message generation speed This moves most of the "heavy lifting" of the database queries into sqlite itself, which is much faster at it. 2021-09-26 03:21:27 +00:00			`while (beg.find_one(word=out[0]) is None or nouns.count(word=out[0])-1 > iter * shared.enmul) and iter < 7:`
fixed not responding bug 2020-04-23 20:05:16 +00:00			`try:`
			`out = [ random.choice(list(prew.find(pro=out[0])))['pre'] ] + out`
			`except IndexError:`
			`iter += 69`
functional 2020-04-23 01:43:07 +00:00			`iter += 1`
increse sentance ending if the sentances are too long automatically 2021-08-07 13:16:31 +00:00			`coun += 1`
functional 2020-04-23 01:43:07 +00:00			`iter = 0`
Improve message generation speed This moves most of the "heavy lifting" of the database queries into sqlite itself, which is much faster at it. 2021-09-26 03:21:27 +00:00			`while (end.find_one(word=out[-1]) is None or nouns.count(word=out[-1])-1 > iter * shared.enmul) and iter < 7:`
fixed not responding bug 2020-04-23 20:05:16 +00:00			`try:`
			`out.append(random.choice(list(prew.find(pre=out[-1])))['pro'])`
			`except IndexError:`
			`iter += 69`
functional 2020-04-23 01:43:07 +00:00			`iter += 1`
increse sentance ending if the sentances are too long automatically 2021-08-07 13:16:31 +00:00			`coun += 1`
automatically control sentance ending multiplier 2021-08-10 14:22:37 +00:00
			`if coun <= 3:`
			`shared.enmul -= 1`
			`elif coun >= 14:`
increse sentance ending if the sentances are too long automatically 2021-08-07 13:16:31 +00:00			`shared.enmul += 1`
automatically control sentance ending multiplier 2021-08-10 14:22:37 +00:00
functional 2020-04-23 01:43:07 +00:00			`return out`


			`async def filter(self, c, n, m):`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`if c in shared.qtime and shared.qtime[c] > time.time():`
allow ops to tell the bot to shut up lol 2020-04-24 14:19:54 +00:00			`return`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`if m[:len(shared.prefix)] == shared.prefix:`
			`m = m[len(shared.prefix):]`
at jan6's request it now works without colon 2020-04-23 22:05:43 +00:00			`await go(self, c, n, m)`
merp merp merp 2021-05-22 22:55:16 +00:00			`elif m[:len(self.nickname)+1] == self.nickname+' ':`
			`m = m[len(self.nickname)+1:]`
at jan6's request it now works without colon 2020-04-23 22:05:43 +00:00			`await go(self, c, n, m)`
dont require prefix in pms 2021-05-22 20:51:55 +00:00			`elif '#' not in c and n != self.nickname:`
			`await go(self, c, n, m)`
collect even more data 2020-04-27 14:42:23 +00:00			`else:`
			`if len(m.split(' ')) > 1:`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`if shared.learntime + shared.learndelay < time.time():`
new nick lol 2020-05-15 20:44:43 +00:00			`await rec(self, m)`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`shared.learntime = time.time()`
at jan6's request it now works without colon 2020-04-23 22:05:43 +00:00
			`async def go(self, c, n, m):`
functional 2020-04-23 01:43:07 +00:00			`await rec(self, m)`
pad punctuation 2021-07-26 16:51:02 +00:00			`words = re.sub(r'([\.,\?!])', r' \1', m).split()`
beep boop some bugs 2020-04-23 02:14:17 +00:00			`if words[0] == 'admin':`
			`return`
remove trailing \1 if message does not start with \1 (fixes #1) 2021-08-04 00:45:16 +00:00			`msg = re.sub(r' ([\.,\?!])', r'\1', ' '.join(await genOut(self, await getNoun(self, words, c))))`
			`if msg[-1] == "\x01" and msg[0] != "\x01":`
			`msg = msg[:-1]`
			`await self.message(c, msg)`
clear out the oven for some ai stuff 2020-04-19 23:52:41 +00:00
			`async def init(self):`
allow ops to tell the bot to shut up lol 2020-04-24 14:19:54 +00:00
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`shared.qtime = {}`
			`shared.learntime = 0`
change some values 2021-06-02 14:27:38 +00:00
			`# delay between grabbing random messages and passively`
			`# learning.`
merp merp merp 2021-05-22 22:55:16 +00:00			`shared.learndelay = 1`
change some values 2021-06-02 14:27:38 +00:00			`# sentance ending weight, lower means longer sentances,`
			`# higher means shorter sentances. this will need to slowly`
			`# get larger as the database grows`
automatically control sentance ending multiplier 2021-08-10 14:22:37 +00:00			`shared.enmul = 9`
change some values 2021-06-02 14:27:38 +00:00
functional 2020-04-23 01:43:07 +00:00
change some values 2021-06-02 14:27:38 +00:00			`shared.rawm['nlp'] = filter`
port to new balun ircrobots framework 2021-01-31 02:05:34 +00:00			`shared.cstate = {}`