from __future__ import print_function
#
# Read prefix-suffix maps for a file
#
def read_grams(path):
f = open(path)
h1 = {}
for line in f:
values = line.strip().split('\t')
if len(values) == 1:
break
elif len(values) > 1:
h1[values[0]] = values[1:]
else:
print("ignore unexpected h1 line:")
print(len(values), values)
print(line)
h2 = {}
for line in f:
values = line.strip().split('\t')
if len(values) == 1:
break
elif len(values) > 2:
prefix = (values[0], values[1])
h2[prefix] = values[2:]
else:
print("ignore unexpected h2 line:")
print(line)
return h1, h2
#
# Generate a non-sensical sentence using bigram
#
def compose(h1, h2, count=10):
import random
first_word = random.choice(list(h1.keys()))
second_word = random.choice(h1[first_word])
prefix = (first_word, second_word)
print(first_word, second_word, end=' ')
for n in range(2, count):
next_word = random.choice(h2[prefix])
print(next_word, end=' ')
prefix = shift(prefix, next_word)
print()
def shift(prefix, word):
return prefix[1:] + (word,)
def compose_from(path, count=10):
h1, h2 = read_grams(path)
compose(h1, h2, count)
compose_from('bee.tsv')