Steven Smith revised this gist . Go to revision
1 file changed, 1 insertion, 2 deletions
neuralnet.py
| @@ -53,5 +53,4 @@ for i in xrange(1,10000 if not len(sys.argv) > 2 else int(sys.argv[2])): | |||
| 53 | 53 | if not outp[seed]: | |
| 54 | 54 | seed = random.choice(outp.keys()) | |
| 55 | 55 | else: | |
| 56 | - | seed = random.choice(outp[seed]) | |
| 57 | - | print("") | |
| 56 | + | seed = random.choice(outp[seed]) | |
Steven Smith revised this gist . Go to revision
1 file changed, 1 insertion, 2 deletions
neuralnet.py
| @@ -49,10 +49,9 @@ with open(sys.argv[1]) as f: | |||
| 49 | 49 | ||
| 50 | 50 | seed = random.choice(outp.keys()) | |
| 51 | 51 | for i in xrange(1,10000 if not len(sys.argv) > 2 else int(sys.argv[2])): | |
| 52 | + | print("{}".format(seed), end=" ") | |
| 52 | 53 | if not outp[seed]: | |
| 53 | - | print("{}".format(seed), end=" ") | |
| 54 | 54 | seed = random.choice(outp.keys()) | |
| 55 | 55 | else: | |
| 56 | - | print("{}".format(seed), end=" ") | |
| 57 | 56 | seed = random.choice(outp[seed]) | |
| 58 | 57 | print("") | |
Steven Smith revised this gist . Go to revision
1 file changed, 58 insertions
neuralnet.py(file created)
| @@ -0,0 +1,58 @@ | |||
| 1 | + | from __future__ import print_function | |
| 2 | + | import sys,random | |
| 3 | + | ||
| 4 | + | # http://stackoverflow.com/a/31505798 | |
| 5 | + | import re | |
| 6 | + | caps = "([A-Z])" | |
| 7 | + | prefixes = "(Mr|St|Mrs|Ms|Dr)[.]" | |
| 8 | + | suffixes = "(Inc|Ltd|Jr|Sr|Co)" | |
| 9 | + | starters = "(Mr|Mrs|Ms|Dr|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)" | |
| 10 | + | acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)" | |
| 11 | + | websites = "[.](com|net|org|io|gov)" | |
| 12 | + | ||
| 13 | + | def split_into_sentences(text): | |
| 14 | + | text = " " + text + " " | |
| 15 | + | text = text.replace("\n"," ") | |
| 16 | + | text = re.sub(prefixes,"\\1<prd>",text) | |
| 17 | + | text = re.sub(websites,"<prd>\\1",text) | |
| 18 | + | if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>") | |
| 19 | + | text = re.sub("\s" + caps + "[.] "," \\1<prd> ",text) | |
| 20 | + | text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text) | |
| 21 | + | text = re.sub(caps + "[.]" + caps + "[.]" + caps + "[.]","\\1<prd>\\2<prd>\\3<prd>",text) | |
| 22 | + | text = re.sub(caps + "[.]" + caps + "[.]","\\1<prd>\\2<prd>",text) | |
| 23 | + | text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text) | |
| 24 | + | text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text) | |
| 25 | + | text = re.sub(" " + caps + "[.]"," \\1<prd>",text) | |
| 26 | + | if "”" in text: text = text.replace(".”","”.") | |
| 27 | + | if "\"" in text: text = text.replace(".\"","\".") | |
| 28 | + | if "!" in text: text = text.replace("!\"","\"!") | |
| 29 | + | if "?" in text: text = text.replace("?\"","\"?") | |
| 30 | + | text = text.replace(".",".<stop>") | |
| 31 | + | text = text.replace("?","?<stop>") | |
| 32 | + | text = text.replace("!","!<stop>") | |
| 33 | + | text = text.replace("<prd>",".") | |
| 34 | + | sentences = text.split("<stop>") | |
| 35 | + | sentences = sentences[:-1] | |
| 36 | + | sentences = [s.strip() for s in sentences] | |
| 37 | + | return sentences | |
| 38 | + | # end stackoverflow | |
| 39 | + | ||
| 40 | + | outp = {} | |
| 41 | + | with open(sys.argv[1]) as f: | |
| 42 | + | for sentence in split_into_sentences(f.read()): | |
| 43 | + | words = sentence.split() | |
| 44 | + | for i, word in enumerate(words): | |
| 45 | + | if not word in outp: | |
| 46 | + | outp[word] = [] | |
| 47 | + | if i+1 < len(words): | |
| 48 | + | outp[word].append(words[i+1]) | |
| 49 | + | ||
| 50 | + | seed = random.choice(outp.keys()) | |
| 51 | + | for i in xrange(1,10000 if not len(sys.argv) > 2 else int(sys.argv[2])): | |
| 52 | + | if not outp[seed]: | |
| 53 | + | print("{}".format(seed), end=" ") | |
| 54 | + | seed = random.choice(outp.keys()) | |
| 55 | + | else: | |
| 56 | + | print("{}".format(seed), end=" ") | |
| 57 | + | seed = random.choice(outp[seed]) | |
| 58 | + | print("") | |