bragi/mark8.py

161 lines
6.4 KiB
Python

import markovify
import nltk
import json
import re
import time
import os
import random
import language_check
import textwrap
# Modifying mark7.py to re-introduce language-tool to fix the sentences.
# it's not clear that it's actually doing anything! (╯°□°)╯︵ ┻━┻ ...¯\_(ツ)_/¯
# still to do - selectable corpuses
# expand corpus collection
# re-introduce cosmic.voyage corpus (after removing message headers and non alphanumerics?)
#
# notes:
# the time.sleep() lines below are there just to slow down the processing to keep
# the server burden below 100% on one or more CPUs. These were added, when the supernice
# alias just wasn't enough (in my opinion) to run on tilde.team without possibly affecting
# other users. The time.sleep() lines are not necessary, they are there for shared pubnix love.
#
# there are probably one or more import lines above that do not need to be there.
#
# the lines toward the end are messy and difficult to read. And they insert some
# indicators and numbers into the printed text that were added for debugging when
# language-check kept crashing if markovify would return a blank sentence and language-check
# doesn't like variables of type None. So the product of the markovify call was str()'d, and
# the call itself was placed in a while loop until it actually returned something that isn't 'None'
# an intermediate 'matches' variable was reintroduced.
# all that code is subject to change.
#
tool = None
text = None
matches = None
combined_model = None
model_json = None
couplets = random.randint(2,14) # use this var to generate a random number of couplets between X,Y
title = None # generate a title for the poem/song
wrapper = textwrap.TextWrapper(width=69, subsequent_indent=" ")
#def genHeaders():
# head1 = '''\
# [processing by miniVISR.autoComms @ relay station VHE-0j0-η ]
# [recv'd on 100Hz+-15Hz, 112.358Hz, 112.358KHz, 33.33MHz, 66.66MHz ]
# [radio 112.358MHz, 121.5MHz, 130.167MHz, 143.625MHz, 244.30MHz ]
# [freqs. 358.13MHz, 581.321MHz, 633.9MHz, 922.7MHz, 2113.853MHz ]
# [signals quality: poor. signals reconstuction confidence: 65% ]
# [message transcription relayed to Earthsys QEC for general delivery ]
# [begin transcript ]
# '''.format()
# head2 = '''\
#
# ...able, animal, and mineral,
# I know the skalds of old Solsys, and I quote the verse atypical
# Synechdoche and kenna til, disorder quite limerickal;
# I'm very well disposed as well, to matters a-esthetical,
# I sing love songs to moons both in circle and elliptical,
# About their craters and their phases with my lyrics I'm quite loose,
# Bragi's here in your system to sing, and to share whatever you choose.
# '''.format()
# return 0
#def genFooters():
# countid = str(125925)#{file open stuff - keep a counter in a file?}
#
# foot = '''\
# Bragi's here in your system to sing, and to share whatever you choose.
# Broadcasting on many frequencies every 86,400 seconds. Bragi-{ordinal}
# out.
#
# [end transcript ]
# END MESSAGE."
# '''.format(ordinal=countid)
# print()
# print(foot)
# return 0
def genTitle():
word_list = str(combined_model.make_short_sentence(150, min_chars=80)).split()
wtitle = ""
for x in range(random.randint(1,3)):
wtitle = wtitle + random.choice(word_list) + " "
return str(wtitle).strip(' ,:"')
def genLongLine(minl, maxl):
"""
give me a min length and a max length and i'll return a short
sentence between those lengths, hopefully. and check it for grammar
errors, and if some found, run it through language-check.correct()
"""
text = None
while (str(text) == 'None'):
text = str(combined_model.make_short_sentence(maxl, min_chars=minl))
time.sleep(1)
matches = tool.check(text)
if len(matches) == 0:
line = str(" " + text)
else:
line = str(" " + language_check.correct(text, matches))
return line
def genShortLine(minl, maxl):
"""
give me a min length and a max length and i'll return a short
sentence between those lengths, hopefully. and check it for grammar
errors, and if some found, run it through language-check.correct()
"""
text = None
while (str(text) == 'None'):
text = str(combined_model.make_short_sentence(maxl, min_chars=minl))
time.sleep(1)
matches = tool.check(text)
if len(matches) == 0:
line = str(" " + text)
else:
line = str(" " + language_check.correct(text, matches))
return line
# same basic structure as in markchainer.py, but this uses saved models
# (model generation procedure separate from verse generation procedure
# for shared pubnix love and) to be able to work with growing corpora
# this searches a fixed relative path for already-created models.
# reads the model(json) in, converts from json, combines the models
# together.
for file in os.listdir("./corpus/prose/chains/"):
if file.endswith(".mkdch"):
with open("./corpus/prose/chains/" + file) as f:
model = markovify.Text.from_json(json.load(f))
time.sleep(5)
if combined_model:
time.sleep(5)
combined_model = markovify.combine(models=[combined_model, model])
else:
combined_model = model
tool = language_check.LanguageTool('en-US')
# disabling spellchecking, didn't like some of the 'fixes' from testing. using archaic language in testing, will use novel words in the future. to re-enable spellchecking, comment out the following line:
tool.disable_spellchecking()
print("~*~") # This start delimeter is here for testing each run
# genHeaders() #maybe do this in the future
print(" ")
#print("title: " + genTitle())
print(genTitle())
print(" ")
#print("couplets: " + str(couplets))
for x in range(couplets):
print(wrapper.fill(genLongLine(14,30)))
print(wrapper.fill(genShortLine(10,40)))
# print(wrapper.fill(" l--" + genLongLine(30,80)))
# print(wrapper.fill(" s--" + genShortLine(20,56)))
print(" ")
print("~!~") # this ending delimeter is here for testing each run
# genFooters() #maybe do this in the future