#Movie Term Chatbot from __future__ import print_function import numpy as np import tensorflow.keras from tensorflow.keras.models import Sequential from tensorflow.keras.layers import Dense, Dropout, Activation, LSTM from tensorflow.keras.layers import Conv2D, MaxPooling2D from tensorflow.keras.preprocessing.text import Tokenizer import random batch_size = 32 max_words = 1000 epochs = 2 tempdata = open("movie_lines.txt", errors='replace').readlines() x_train_temp = np.array([i.split("+++$+++")[-1].strip() for i in tempdata[:1000]]) y_train_temp = [i.split("+++$+++")[-2].strip() for i in tempdata[:1000]] #Getting Categories categories = list(set(y_train_temp)) print("Categories:"+str(categories)) y_train = np.array([categories.index(i) for i in y_train_temp]) print(y_train) num_classes = len(set(y_train)) print(num_classes, 'classes') #Storing original for later use x_train_org = x_train_temp[:] y_train_org = y_train[:] #Embeddings allwords = ' '.join(x_train_temp).lower().split(' ') uniquewords = list(set(allwords)) #Stemming from nltk.stem import * stemmer = PorterStemmer() def fixWord(word): return word #Making words into numbers uniquewords = [fixWord(i) for i in uniquewords] x_train = [] def makeTextIntoNumbers(text): iwords = text.lower().split(' ') numbers = [] for n in iwords: try: numbers.append(uniquewords.index(fixWord(i))) except ValueError: numbers.append(0) return numbers for i in x_train_temp: x_train.append(makeTextIntoNumbers(i)) x_train = np.array(x_train) #Make binary embeddings tokenizer = Tokenizer(num_words=max_words) x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') y_train = tensorflow.keras.utils.to_categorical(y_train, num_classes) print([i for i in x_train[0]]) #Model model = Sequential() model.add(Dense(512, input_shape=(max_words,))) model.add(Activation('relu')) model.add(Dense(num_classes)) model.add(Activation('softmax')) model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy']) history = model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, validation_split=0.1) print("Finished training") def getCategory(inputString): #Get the correct classification token = tokenizer.sequences_to_matrix(np.array([makeTextIntoNumbers(inputString),makeTextIntoNumbers(x_train_org[0])])) aindex = np.argmax(model.predict(np.array([token[0]]))) return aindex def getRandomTextFromIndex(aIndex): #Classify speaker and return random phrase res = -1 while res!=aIndex: aNumber = random.randint(0,len(y_train_org)) res = y_train_org[aNumber] return x_train_org[aNumber] print("ready") s = " " while s: category = getCategory(s) text = getRandomTextFromIndex(category) print("Chatbot:" + text) s = input("Human:")