markov2_use.py

from __future__ import print_function

#
# Read prefix-suffix maps for a file
#
def read_grams(path):
    f = open(path)
    h1 = {}
    for line in f:
        values = line.strip().split('\t')
        if len(values) == 1:
            break
        elif len(values) > 1:
            h1[values[0]] = values[1:]
        else:
            print("ignore unexpected h1 line:")
            print(len(values), values)
            print(line)
    h2 = {}
    for line in f:
        values = line.strip().split('\t')
        if len(values) == 1:
            break
        elif len(values) > 2:
            prefix = (values[0], values[1])
            h2[prefix] = values[2:]
        else:
            print("ignore unexpected h2 line:")
            print(line)
    return h1, h2

#
# Generate a non-sensical sentence using bigram
#
def compose(h1, h2, count=10):
    import random
    first_word = random.choice(list(h1.keys()))
    second_word = random.choice(h1[first_word])
    prefix = (first_word, second_word)
    print(first_word, second_word, end=' ')
    for n in range(2, count):
        next_word = random.choice(h2[prefix])
        print(next_word, end=' ')
        prefix = shift(prefix, next_word)
    print()

def shift(prefix, word):
    return prefix[1:] + (word,)

def compose_from(path, count=10):
    h1, h2 = read_grams(path)
    compose(h1, h2, count)

compose_from('bee.tsv')