bragi/mark8.py

import markovify
import nltk
import json
import re
import time
import os
import random
import language_check
import textwrap

# Modifying mark7.py to re-introduce language-tool to fix the sentences.
# it's not clear that it's actually doing anything! (╯°□°）╯︵ ┻━┻ ...¯\_(ツ)_/¯
# still to do - selectable corpuses
# expand corpus collection
# re-introduce cosmic.voyage corpus (after removing message headers and non alphanumerics?)
#
# notes:
# the time.sleep() lines below are there just to slow down the processing to keep
# the server burden below 100% on one or more CPUs. These were added, when the supernice
# alias just wasn't enough (in my opinion) to run on tilde.team without possibly affecting
# other users. The time.sleep() lines are not necessary, they are there for shared pubnix love.
#
# there are probably one or more import lines above that do not need to be there.
#
# the lines toward the end are messy and difficult to read. And they insert some
# indicators and numbers into the printed text that were added for debugging when
# language-check kept crashing if markovify would return a blank sentence and language-check
# doesn't like variables of type None. So the product of the markovify call was str()'d, and
# the call itself was placed in a while loop until it actually returned something that isn't 'None'
# an intermediate 'matches' variable was reintroduced.
# all that code is subject to change.
#

tool = None
text = None
matches = None
combined_model = None
model_json = None
couplets = random.randint(2,14) # use this var to generate a random number of couplets between X,Y
title = None # generate a title for the poem/song
wrapper = textwrap.TextWrapper(width=69, subsequent_indent="            ")

#def genHeaders():
#    head1 = '''\
#	    [processing by miniVISR.autoComms @ relay station VHE-0j0-η         ]
#	    [recv'd on 100Hz+-15Hz, 112.358Hz, 112.358KHz, 33.33MHz, 66.66MHz   ]
# 	    [radio     112.358MHz, 121.5MHz, 130.167MHz, 143.625MHz, 244.30MHz  ]
#	    [freqs.    358.13MHz, 581.321MHz, 633.9MHz, 922.7MHz, 2113.853MHz   ]
#	    [signals quality: poor.      signals reconstuction confidence: 65%  ]
#	    [message transcription relayed to Earthsys QEC for general delivery ]
#	    [begin transcript                                                   ]
#	    '''.format()
#	head2 = '''\
#
#                      ...able, animal, and mineral,
#            I know the skalds of old Solsys, and I quote the verse atypical
#            Synechdoche and kenna til, disorder quite limerickal;
#            I'm very well disposed as well, to matters a-esthetical,
#            I sing love songs to moons both in circle and elliptical,
#            About their craters and their phases with my lyrics I'm quite loose,
#            Bragi's here in your system to sing, and to share whatever you choose.
#            '''.format()
#    return 0

#def genFooters():
#    countid = str(125925)#{file open stuff - keep a counter in a file?}
#
#    foot =  '''\
#            Bragi's here in your system to sing, and to share whatever you choose.
#            Broadcasting on many frequencies every 86,400 seconds. Bragi-{ordinal}
#            out.
#
#            [end transcript                                                     ]
#            END MESSAGE."
#            '''.format(ordinal=countid)
#    print()
#    print(foot)

#    return 0

def genTitle():
    word_list = str(combined_model.make_short_sentence(150, min_chars=80)).split()
    wtitle = ""
    for x in range(random.randint(1,3)):
        wtitle = wtitle + random.choice(word_list) + " "
    return str(wtitle).strip(' ,:"')

def genLongLine(minl, maxl):
    """
    give me a min length and a max length and i'll return a short
    sentence between those lengths, hopefully. and check it for grammar
    errors, and if some found, run it through language-check.correct()
    """
    text = None
    while (str(text) == 'None'):
        text = str(combined_model.make_short_sentence(maxl, min_chars=minl))
        time.sleep(1)


        matches = tool.check(text)
        if len(matches) == 0:
            line = str(" " + text)
        else:
            line = str(" " + language_check.correct(text, matches))
    return line

def genShortLine(minl, maxl):
    """
    give me a min length and a max length and i'll return a short
    sentence between those lengths, hopefully. and check it for grammar
    errors, and if some found, run it through language-check.correct()
    """
    text = None
    while (str(text) == 'None'):
        text = str(combined_model.make_short_sentence(maxl, min_chars=minl))
        time.sleep(1)

        matches = tool.check(text)
        if len(matches) == 0:
            line = str("      " + text)
        else:
            line = str("      " + language_check.correct(text, matches))
    return line

# same basic structure as in markchainer.py, but this uses saved models
# (model generation procedure separate from verse generation procedure
# for shared pubnix love and) to be able to work with growing corpora
# this searches a fixed relative path for already-created models.
# reads the model(json) in, converts from json, combines the models
# together.
for file in os.listdir("./corpus/prose/chains/"):
    if file.endswith(".mkdch"):
        with open("./corpus/prose/chains/" + file) as f:
            model = markovify.Text.from_json(json.load(f))
            time.sleep(5)
            if combined_model:
                time.sleep(5)
                combined_model = markovify.combine(models=[combined_model, model])
            else:
                combined_model = model

tool = language_check.LanguageTool('en-US')
# disabling spellchecking, didn't like some of the 'fixes' from testing. using archaic language in testing, will use novel words in the future. to re-enable spellchecking, comment out the following line:
tool.disable_spellchecking()

print("~*~") # This start delimeter is here for testing each run
# genHeaders() #maybe do this in the future
print("  ")
#print("title: " + genTitle())
print(genTitle())
print("  ")
#print("couplets: " + str(couplets))
for x in range(couplets):
    print(wrapper.fill(genLongLine(14,30)))
    print(wrapper.fill(genShortLine(10,40)))
#    print(wrapper.fill("  l--" + genLongLine(30,80)))
#    print(wrapper.fill("  s--" + genShortLine(20,56)))
print("  ")
print("~!~") # this ending delimeter is here for testing each run
# genFooters() #maybe do this in the future