from nltk import word_tokenize
from collections import defaultdict
from nltk import bigrams, trigrams
import numpy as np
#prosedyreren tar inn setninger som lister av ord
def produce_text(input_sents):
    trigram_counts = defaultdict(lambda: defaultdict(lambda: 0))
    trigram_model = defaultdict(lambda: defaultdict(lambda: 0))

    #padding er om vi legger til et eget tegn som representerer setningsstart og setningsslutt eller ikke (None)

    for sentence in input_sents:
        for w1, w2, w3 in trigrams(sentence, pad_right=True, pad_left=True):
            trigram_counts[(w1, w2)][w3] += 1

    for w1_w2 in trigram_counts:
        total_trigramcount = sum(trigram_counts[w1_w2].values())
        for w3 in trigram_counts[w1_w2]:
            trigram_model[w1_w2][w3] = trigram_counts[w1_w2][w3]/total_trigramcount

    text = [None, None]
    sentence_is_finished = False
    while not sentence_is_finished:
        key = tuple(text[-2:])
        words = list(trigram_model[key].keys())
        probs = list(trigram_model[key].values())

        text.append(np.random.choice(words, p=probs))

        if text[-2:] == [None, None]:
            sentence_is_finished = True

    print(' '.join([t for t in text if t]))