added mark8e.py for epic poetry lookalike

2019-11-20 14:21:02 -05:00 · 2019-11-20 14:21:02 -05:00 · 3d3a35a711
parent 4316741cf1
commit 3d3a35a711
1 changed files with 165 additions and 0 deletions
--- a/mark8e.py
+++ b/mark8e.py
@ -0,0 +1,165 @@
+import markovify
+import nltk
+import json
+import re
+import time
+import os
+import random
+import language_check
+import textwrap
+
+# Modified mark8.py to generate something that looks like epic poetry. still this does not
+# handle rhyming schemes and other poetics.
+# This generates latex/markdown jumble instead of plain text.
+#
+# notes:
+# the time.sleep() lines below are there just to slow down the processing to keep
+# the server burden below 100% on one or more CPUs. These were added, when the supernice
+# alias just wasn't enough (in my opinion) to run on tilde.team without possibly affecting
+# other users. The time.sleep() lines are not necessary, they are there for shared pubnix love.
+# 
+# there are probably one or more import lines above that do not need to be there.
+#
+# the lines toward the end are messy and difficult to read. And they insert some
+# indicators and numbers into the printed text that were added for debugging when
+# language-check kept crashing if markovify would return a blank sentence and language-check
+# doesn't like variables of type None. So the product of the markovify call was str()'d, and
+# the call itself was placed in a while loop until it actually returned something that isn't 'None'
+# an intermediate 'matches' variable was reintroduced.
+# all that code is subject to change.
+# 
+
+tool = None
+text = None 
+matches = None
+combined_model = None
+model_json = None
+#couplets = random.randint(5,14) # use this var to generate a random number of couplets between X,Y
+quatrains = random.randint(14,28) # use this var to generate a random number of quatrains between X,Y
+title = None # generate a title for the poem/song
+wrapper = textwrap.TextWrapper(width=69, subsequent_indent="            ")
+
+# these commented-out blocks left here for possible future use for the himinvagn Bragi cosmic.voyage
+# if i can figure out how to automate the generation of these headers while garbling some text below,
+# and semirandomly changing what frequenceis are listed, and signals quality/reconstr conf.%
+#
+#def genHeaders():
+#    head1 = '''\
+#	    [processing by miniVISR.autoComms @ relay station VHE-0j0-η         ]
+#	    [recv'd on 100Hz+-15Hz, 112.358Hz, 112.358KHz, 33.33MHz, 66.66MHz   ]
+# 	    [radio     112.358MHz, 121.5MHz, 130.167MHz, 143.625MHz, 244.30MHz  ]
+#	    [freqs.    358.13MHz, 581.321MHz, 633.9MHz, 922.7MHz, 2113.853MHz   ]
+#	    [signals quality: poor.      signals reconstuction confidence: 65%  ]
+#	    [message transcription relayed to Earthsys QEC for general delivery ]
+#	    [begin transcript                                                   ]
+#	    '''.format()
+#	head2 = '''\
+#
+#                      ...able, animal, and mineral,
+#            I know the skalds of old Solsys, and I quote the verse atypical
+#            Synechdoche and kenna til, disorder quite limerickal;
+#            I'm very well disposed as well, to matters a-esthetical,
+#            I sing love songs to moons both in circle and elliptical,
+#            About their craters and their phases with my lyrics I'm quite loose,
+#            Bragi's here in your system to sing, and to share whatever you choose.
+#            '''.format()
+#    return 0
+
+#def genFooters():
+#    countid = str(125925)#{file open stuff - keep a counter in a file?}
+#	
+#    foot =  '''\
+#            Bragi's here in your system to sing, and to share whatever you choose.
+#            Broadcasting on many frequencies every 86,400 seconds. Bragi-{ordinal}
+#            out.
+#
+#            [end transcript                                                     ]
+#            END MESSAGE."
+#            '''.format(ordinal=countid)
+#    print()
+#    print(foot)
+ 
+#    return 0
+
+def genTitle():
+    word_list = str(combined_model.make_short_sentence(150, min_chars=80)).split()
+    wtitle = ""
+    for x in range(random.randint(1,3)):
+        wtitle = wtitle + random.choice(word_list) + " "
+    return str(wtitle).strip(' ,:"')
+
+def genLongLine(minl, maxl):
+    """
+    give me a min length and a max length and i'll return a short
+    sentence between those lengths, hopefully. and check it for grammar
+    errors, and if some found, run it through language-check.correct()
+    """
+    text = None
+    while (str(text) == 'None'):
+        text = str(combined_model.make_short_sentence(maxl, min_chars=minl))
+        time.sleep(1)
+
+            
+        matches = tool.check(text)
+        if len(matches) == 0:
+            line = str(" " + text)
+        else:
+            line = str(" " + language_check.correct(text, matches))
+    return line
+
+def genShortLine(minl, maxl):
+    """
+    give me a min length and a max length and i'll return a short
+    sentence between those lengths, hopefully. and check it for grammar
+    errors, and if some found, run it through language-check.correct()
+    """
+    text = None
+    while (str(text) == 'None'):
+        text = str(combined_model.make_short_sentence(maxl, min_chars=minl))
+        time.sleep(1)
+
+        matches = tool.check(text)
+        if len(matches) == 0:
+            line = str("      " + text)
+        else:
+            line = str("      " + language_check.correct(text, matches))
+    return line
+
+# same basic structure as in markchainer.py, but this uses saved models
+# (model generation procedure separate from verse generation procedure
+# for shared pubnix love and) to be able to work with growing corpora
+# this searches a fixed relative path for already-created models.
+# reads the model(json) in, converts from json, combines the models
+# together.
+for file in os.listdir("./corpus/prose/chains/"):
+    if file.endswith(".mkdch"):
+        with open("./corpus/prose/chains/" + file) as f:
+            model = markovify.Text.from_json(json.load(f))
+            time.sleep(5)
+            if combined_model:
+                time.sleep(5)
+                combined_model = markovify.combine(models=[combined_model, model])
+            else:
+                combined_model = model
+
+tool = language_check.LanguageTool('en-US')
+# disabling spellchecking, didn't like some of the 'fixes' from testing. using archaic language in testing, will use novel words in the future. to re-enable spellchecking, comment out the following line:
+tool.disable_spellchecking()
+
+#print("~*~") # This start delimeter is here for testing each run
+# doubled \\ to escape out some reserved symbols like \n and \m and \b
+print("\\newpage")
+ptitle = genTitle() 
+print("\poemtitle{" + ptitle + "}")
+print("\\begin{verse}")
+print("\indentpattern{0010}")
+print("\\begin{patverse*}")
+for x in range(quatrains):
+    print(genLongLine(30,80) + " \\" + "\\")
+    print(genLongLine(30,80) + " \\" + "\\")
+    print(genLongLine(30,80) + " \\" + "\\")
+    print(genLongLine(30,80) + " \\" + "\\")
+    print("\\medskip")
+print("\end{patverse*}")
+print("\end{verse}")
+