161 lines
6.4 KiB
Python
161 lines
6.4 KiB
Python
import markovify
|
|
import nltk
|
|
import json
|
|
import re
|
|
import time
|
|
import os
|
|
import random
|
|
import language_check
|
|
import textwrap
|
|
|
|
# Modifying mark7.py to re-introduce language-tool to fix the sentences.
|
|
# it's not clear that it's actually doing anything! (╯°□°)╯︵ ┻━┻ ...¯\_(ツ)_/¯
|
|
# still to do - selectable corpuses
|
|
# expand corpus collection
|
|
# re-introduce cosmic.voyage corpus (after removing message headers and non alphanumerics?)
|
|
#
|
|
# notes:
|
|
# the time.sleep() lines below are there just to slow down the processing to keep
|
|
# the server burden below 100% on one or more CPUs. These were added, when the supernice
|
|
# alias just wasn't enough (in my opinion) to run on tilde.team without possibly affecting
|
|
# other users. The time.sleep() lines are not necessary, they are there for shared pubnix love.
|
|
#
|
|
# there are probably one or more import lines above that do not need to be there.
|
|
#
|
|
# the lines toward the end are messy and difficult to read. And they insert some
|
|
# indicators and numbers into the printed text that were added for debugging when
|
|
# language-check kept crashing if markovify would return a blank sentence and language-check
|
|
# doesn't like variables of type None. So the product of the markovify call was str()'d, and
|
|
# the call itself was placed in a while loop until it actually returned something that isn't 'None'
|
|
# an intermediate 'matches' variable was reintroduced.
|
|
# all that code is subject to change.
|
|
#
|
|
|
|
tool = None
|
|
text = None
|
|
matches = None
|
|
combined_model = None
|
|
model_json = None
|
|
couplets = random.randint(2,14) # use this var to generate a random number of couplets between X,Y
|
|
title = None # generate a title for the poem/song
|
|
wrapper = textwrap.TextWrapper(width=69, subsequent_indent=" ")
|
|
|
|
#def genHeaders():
|
|
# head1 = '''\
|
|
# [processing by miniVISR.autoComms @ relay station VHE-0j0-η ]
|
|
# [recv'd on 100Hz+-15Hz, 112.358Hz, 112.358KHz, 33.33MHz, 66.66MHz ]
|
|
# [radio 112.358MHz, 121.5MHz, 130.167MHz, 143.625MHz, 244.30MHz ]
|
|
# [freqs. 358.13MHz, 581.321MHz, 633.9MHz, 922.7MHz, 2113.853MHz ]
|
|
# [signals quality: poor. signals reconstuction confidence: 65% ]
|
|
# [message transcription relayed to Earthsys QEC for general delivery ]
|
|
# [begin transcript ]
|
|
# '''.format()
|
|
# head2 = '''\
|
|
#
|
|
# ...able, animal, and mineral,
|
|
# I know the skalds of old Solsys, and I quote the verse atypical
|
|
# Synechdoche and kenna til, disorder quite limerickal;
|
|
# I'm very well disposed as well, to matters a-esthetical,
|
|
# I sing love songs to moons both in circle and elliptical,
|
|
# About their craters and their phases with my lyrics I'm quite loose,
|
|
# Bragi's here in your system to sing, and to share whatever you choose.
|
|
# '''.format()
|
|
# return 0
|
|
|
|
#def genFooters():
|
|
# countid = str(125925)#{file open stuff - keep a counter in a file?}
|
|
#
|
|
# foot = '''\
|
|
# Bragi's here in your system to sing, and to share whatever you choose.
|
|
# Broadcasting on many frequencies every 86,400 seconds. Bragi-{ordinal}
|
|
# out.
|
|
#
|
|
# [end transcript ]
|
|
# END MESSAGE."
|
|
# '''.format(ordinal=countid)
|
|
# print()
|
|
# print(foot)
|
|
|
|
# return 0
|
|
|
|
def genTitle():
|
|
word_list = str(combined_model.make_short_sentence(150, min_chars=80)).split()
|
|
wtitle = ""
|
|
for x in range(random.randint(1,3)):
|
|
wtitle = wtitle + random.choice(word_list) + " "
|
|
return str(wtitle).strip(' ,:"')
|
|
|
|
def genLongLine(minl, maxl):
|
|
"""
|
|
give me a min length and a max length and i'll return a short
|
|
sentence between those lengths, hopefully. and check it for grammar
|
|
errors, and if some found, run it through language-check.correct()
|
|
"""
|
|
text = None
|
|
while (str(text) == 'None'):
|
|
text = str(combined_model.make_short_sentence(maxl, min_chars=minl))
|
|
time.sleep(1)
|
|
|
|
|
|
matches = tool.check(text)
|
|
if len(matches) == 0:
|
|
line = str(" " + text)
|
|
else:
|
|
line = str(" " + language_check.correct(text, matches))
|
|
return line
|
|
|
|
def genShortLine(minl, maxl):
|
|
"""
|
|
give me a min length and a max length and i'll return a short
|
|
sentence between those lengths, hopefully. and check it for grammar
|
|
errors, and if some found, run it through language-check.correct()
|
|
"""
|
|
text = None
|
|
while (str(text) == 'None'):
|
|
text = str(combined_model.make_short_sentence(maxl, min_chars=minl))
|
|
time.sleep(1)
|
|
|
|
matches = tool.check(text)
|
|
if len(matches) == 0:
|
|
line = str(" " + text)
|
|
else:
|
|
line = str(" " + language_check.correct(text, matches))
|
|
return line
|
|
|
|
# same basic structure as in markchainer.py, but this uses saved models
|
|
# (model generation procedure separate from verse generation procedure
|
|
# for shared pubnix love and) to be able to work with growing corpora
|
|
# this searches a fixed relative path for already-created models.
|
|
# reads the model(json) in, converts from json, combines the models
|
|
# together.
|
|
for file in os.listdir("./corpus/prose/chains/"):
|
|
if file.endswith(".mkdch"):
|
|
with open("./corpus/prose/chains/" + file) as f:
|
|
model = markovify.Text.from_json(json.load(f))
|
|
time.sleep(5)
|
|
if combined_model:
|
|
time.sleep(5)
|
|
combined_model = markovify.combine(models=[combined_model, model])
|
|
else:
|
|
combined_model = model
|
|
|
|
tool = language_check.LanguageTool('en-US')
|
|
# disabling spellchecking, didn't like some of the 'fixes' from testing. using archaic language in testing, will use novel words in the future. to re-enable spellchecking, comment out the following line:
|
|
tool.disable_spellchecking()
|
|
|
|
print("~*~") # This start delimeter is here for testing each run
|
|
# genHeaders() #maybe do this in the future
|
|
print(" ")
|
|
#print("title: " + genTitle())
|
|
print(genTitle())
|
|
print(" ")
|
|
#print("couplets: " + str(couplets))
|
|
for x in range(couplets):
|
|
print(wrapper.fill(genLongLine(14,30)))
|
|
print(wrapper.fill(genShortLine(10,40)))
|
|
# print(wrapper.fill(" l--" + genLongLine(30,80)))
|
|
# print(wrapper.fill(" s--" + genShortLine(20,56)))
|
|
print(" ")
|
|
print("~!~") # this ending delimeter is here for testing each run
|
|
# genFooters() #maybe do this in the future
|