Revision of neuralnet.py

Steven Smith revised this gist 1480324292. Go to revision

1 file changed, 1 insertion, 2 deletions

neuralnet.py

			@@ -53,5 +53,4 @@ for i in xrange(1,10000 if not len(sys.argv) > 2 else int(sys.argv[2])):
53	53		if not outp[seed]:
54	54		seed = random.choice(outp.keys())
55	55		else:
56		-	seed = random.choice(outp[seed])
57		-	print("")
	56	+	seed = random.choice(outp[seed])

Steven Smith revised this gist 1480324284. Go to revision

1 file changed, 1 insertion, 2 deletions

neuralnet.py

			@@ -49,10 +49,9 @@ with open(sys.argv[1]) as f:
49	49
50	50		seed = random.choice(outp.keys())
51	51		for i in xrange(1,10000 if not len(sys.argv) > 2 else int(sys.argv[2])):
	52	+	print("{}".format(seed), end=" ")
52	53		if not outp[seed]:
53		-	print("{}".format(seed), end=" ")
54	54		seed = random.choice(outp.keys())
55	55		else:
56		-	print("{}".format(seed), end=" ")
57	56		seed = random.choice(outp[seed])
58	57		print("")

Steven Smith revised this gist 1480324234. Go to revision

1 file changed, 58 insertions

neuralnet.py(file created)

		@@ -0,0 +1,58 @@
1	+	from __future__ import print_function
2	+	import sys,random
3	+
4	+	# http://stackoverflow.com/a/31505798
5	+	import re
6	+	caps = "([A-Z])"
7	+	prefixes = "(Mr\|St\|Mrs\|Ms\|Dr)[.]"
8	+	suffixes = "(Inc\|Ltd\|Jr\|Sr\|Co)"
9	+	starters = "(Mr\|Mrs\|Ms\|Dr\|He\s\|She\s\|It\s\|They\s\|Their\s\|Our\s\|We\s\|But\s\|However\s\|That\s\|This\s\|Wherever)"
10	+	acronyms = "([A-Z][.][A-Z][.](?:[A-Z][.])?)"
11	+	websites = "[.](com\|net\|org\|io\|gov)"
12	+
13	+	def split_into_sentences(text):
14	+	text = " " + text + " "
15	+	text = text.replace("\n"," ")
16	+	text = re.sub(prefixes,"\\1<prd>",text)
17	+	text = re.sub(websites,"<prd>\\1",text)
18	+	if "Ph.D" in text: text = text.replace("Ph.D.","Ph<prd>D<prd>")
19	+	text = re.sub("\s" + caps + "[.] "," \\1<prd> ",text)
20	+	text = re.sub(acronyms+" "+starters,"\\1<stop> \\2",text)
21	+	text = re.sub(caps + "[.]" + caps + "[.]" + caps + "[.]","\\1<prd>\\2<prd>\\3<prd>",text)
22	+	text = re.sub(caps + "[.]" + caps + "[.]","\\1<prd>\\2<prd>",text)
23	+	text = re.sub(" "+suffixes+"[.] "+starters," \\1<stop> \\2",text)
24	+	text = re.sub(" "+suffixes+"[.]"," \\1<prd>",text)
25	+	text = re.sub(" " + caps + "[.]"," \\1<prd>",text)
26	+	if "”" in text: text = text.replace(".”","”.")
27	+	if "\"" in text: text = text.replace(".\"","\".")
28	+	if "!" in text: text = text.replace("!\"","\"!")
29	+	if "?" in text: text = text.replace("?\"","\"?")
30	+	text = text.replace(".",".<stop>")
31	+	text = text.replace("?","?<stop>")
32	+	text = text.replace("!","!<stop>")
33	+	text = text.replace("<prd>",".")
34	+	sentences = text.split("<stop>")
35	+	sentences = sentences[:-1]
36	+	sentences = [s.strip() for s in sentences]
37	+	return sentences
38	+	# end stackoverflow
39	+
40	+	outp = {}
41	+	with open(sys.argv[1]) as f:
42	+	for sentence in split_into_sentences(f.read()):
43	+	words = sentence.split()
44	+	for i, word in enumerate(words):
45	+	if not word in outp:
46	+	outp[word] = []
47	+	if i+1 < len(words):
48	+	outp[word].append(words[i+1])
49	+
50	+	seed = random.choice(outp.keys())
51	+	for i in xrange(1,10000 if not len(sys.argv) > 2 else int(sys.argv[2])):
52	+	if not outp[seed]:
53	+	print("{}".format(seed), end=" ")
54	+	seed = random.choice(outp.keys())
55	+	else:
56	+	print("{}".format(seed), end=" ")
57	+	seed = random.choice(outp[seed])
58	+	print("")

Newer Older