from __future__ import division from sklearn import datasets import matplotlib.pyplot as plt import seaborn as sns #matplotlib inline sns.set(style='ticks', palette='Set2') import pandas as pd import numpy as np import math data = datasets.load_iris() X = data.data[:100, :2] y = data.target[:100] X_full = data.data[:100, :] #setosa = plt.scatter(X[:50,0], X[:50,1], c='b') #versicolor = plt.scatter(X[50:,0], X[50:,1], c='r') #plt.xlabel("Sepal Length") #plt.ylabel("Sepal Width") #plt.legend((setosa, versicolor), ("Setosa", "Versicolor")) #plt.show() nofsamp,nfeat = X.shape Xappend = np.ones((nofsamp,nfeat+1)) Xappend[:,1:] = X #print Xappend[1:10,:] def logistic_func(theta, x): return float(1) / (1 + math.e**(-x.dot(theta))) def log_gradient(theta, x, y): first_calc = logistic_func(theta, x) - np.squeeze(y) final_calc = first_calc.T.dot(x) return final_calc def cost_func(theta, x, y): log_func_v = logistic_func(theta,x) y = np.squeeze(y) step1 = y * np.log(log_func_v) step2 = (1-y) * np.log(1 - log_func_v) final = -step1 - step2 return np.mean(final) def grad_desc(theta_values, X, y, lr=.001, converge_change=.001): #normalize #X = (X - np.mean(X, axis=0)) / np.std(X, axis=0) #setup cost iter cost_iter = [] cost = cost_func(theta_values, X, y) cost_iter.append([0, cost]) change_cost = 1 i = 1 while(change_cost > converge_change): old_cost = cost theta_values = theta_values - (lr * log_gradient(theta_values, X, y)) cost = cost_func(theta_values, X, y) cost_iter.append([i, cost]) change_cost = old_cost - cost i+=1 return theta_values, np.array(cost_iter) def pred_values(theta, X): #normalize # X = (X - np.mean(X, axis=0)) / np.std(X, axis=0) pred_prob = logistic_func(theta, X) pred_value = np.where(pred_prob >= .5, 1, 0) return pred_value shape = Xappend.shape[1] #Predict sigmoid(0) def test_logistic_func(): from logreg_noregularization import logistic_func x0vec = np.zeros((5,nfeat)) X0append = np.ones((5,nfeat+1)) X0append[:,1:]=x0vec theta0 = np.zeros((nfeat+1)) hzero = logistic_func(theta0, X0append) hmean = np.mean(hzero) correctval = 0.5; assert np.abs(hmean-correctval)<1e-2 def test_cost_func(): from logreg_noregularization import cost_func Xtest = Xappend[:10,:] #x0vec = np.zeros((5, nfeat)) #X0append = np.ones((5, nfeat + 1)) #X0append[:, 1:] = x0vec ytest = y[:10] thetatest = np.zeros(nfeat+1) thetatest[0]=0.0 thetatest[1] = 1.0 thetatest[2]= 1.0 testcost = cost_func(thetatest, Xtest, ytest) correctcost = 8.17 assert np.abs(testcost-correctcost)<0.5 def test_log_gradient(): from logreg_noregularization import log_gradient Xtest = Xappend[:10, :] ytest = y[:10] thetatest = np.zeros(nfeat + 1) thetatest[0] = 0.0 thetatest[1] = 0.0 thetatest[2] = 0.0 testthetagrad = log_gradient(thetatest, Xtest, ytest) correcttheta = np.zeros((nfeat+1)) #print testthetagrad correcttheta[0] = 5.0 correcttheta[1]=24.3 correcttheta[2]=16.55 diff = np.abs(testthetagrad-correcttheta) assert np.mean(diff)<0.1 #y_flip = np.logical_not(y) #flip Setosa to be 1 and Versicolor to zero to be consistent #betas = np.zeros(shape) #fitted_values, cost_iter = grad_desc(betas, Xappend, y_flip) #print(fitted_values) #predicted_y = pred_values(fitted_values, Xappend) #predicted_y #correct = np.sum(y_flip == predicted_y) #print('correc', correct) #plt.plot(cost_iter[:,0], cost_iter[:,1]) #plt.ylabel("Cost") #plt.xlabel("Iteration") #plt.show()