I'm trying to write a logistic regression python code. I have 2 datasets. One is for training and the other one is for test. I have a good result for lost function however my machine learning can not calculate True positive and False positive values. Do you guys have any idea? You can see my code below:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
class LogisticRegression:
def __init__(self, learning_rate = 0.005, max_iter = 100):
self.x_train = None
self.y_train = None
self.x_test = None
self.y_test = None
self.w = None
self.plot_index = None
self.plot_costList = []
self.number_of_iteration = max_iter # default
self.learning_rate = learning_rate # default
self.dataopen()
self.normalization()
self.debug = 0
def dataopen(self):
df_for_train = pd.read_csv("data_ready_for_ML_GENIE.txt", delimiter=" ")
df_for_train.dropna(how="any",inplace=True)
df_for_train.replace(["yes", "no"], [1, 0], inplace=True)
df_for_train.replace(["driver", "passenger"], [1, 0], inplace=True)
self.x_train = df_for_train.iloc[:, :-1]
self.y_train = df_for_train.iloc[:, -1]
df_for_test = pd.read_csv("data_ready_for_ML_TCGA.txt", delimiter=" ")
df_for_test.dropna(how="any",inplace=True)
df_for_test.replace(["yes", "no"], [1, 0], inplace=True)
df_for_test.replace(["driver", "passenger"], [1, 0], inplace=True)
self.x_test = df_for_test.iloc[:, :-1]
self.y_test = df_for_test.iloc[:, -1]
def normalization(self):
for col in (self.x_train.columns):
self.x_train[col] = (self.x_train[col] - np.min(self.x_train[col])) / (np.max(self.x_train[col]) - np.min(self.x_train[col]))
for col in (self.x_test.columns):
self.x_test[col] = (self.x_test[col] - np.min(self.x_test[col])) / (np.max(self.x_test[col]) - np.min(self.x_test[col]))
self.x_train = self.x_train.T
self.x_test = self.x_test.T
self.y_test = np.array(self.y_test)
self.y_train = np.array(self.y_train)
def getResults(self):
dimension = self.x_train.shape[0]
w, b = self.weight_bias(dimension)
parameters, gradients, cost_list = self.update(w, b)
y_prediction_test = self.predict(parameters["weight"], parameters["bias"]) # Calling predict function
print("Y Test : ", self.y_test)
self.w = parameters["weight"]
return y_prediction_test
def weight_bias(self, dimension):
w = np.full((dimension,1), 0.01)
b = 0.0
return w, b
def update(self, w, b):
cost_list = []
cost_list2 = []
index = []
for i in range(self.number_of_iteration):
cost,gradients = self.forward_backward_propagation(w, b)
cost_list.append(cost)
w = w - self.learning_rate * gradients["derivative_weight"]
b = b - self.learning_rate * gradients["derivative_bias"]
if i % 10 == 0:
cost_list2.append(cost)
index.append(i)
print ("Cost after iteration %i: %f" %(i, cost))
parameters = {"weight": w,"bias": b}
self.plot_index, self.plot_costList = index, cost_list2
return parameters, gradients, cost_list
def predict(self, w, b):
z = self.sigmoid(np.dot(w.T, self.x_test)+b) # By sigmoid, we can find y_predicted values here
Y_prediction = np.zeros((1, self.x_test.shape[1]))
# if z is bigger than 0.5, our prediction is one means driver (y_prediction=1),
# if z is smaller than 0.5, our prediction is zero means passenger (y_prediction=0),
for i in range(z.shape[1]):
if z[0,i] <= 0.5:
Y_prediction[0,i] = 0
else:
Y_prediction[0,i] = 1
return Y_prediction
def forward_backward_propagation(self, w, b):
z = np.dot(w.T, self.x_train) + b
y_head = self.sigmoid(z)
loss = -self.y_train*np.log(y_head) - (1-self.y_train)*np.log(1-y_head)
cost = (np.sum(loss)) / self.x_train.shape[1]
#backward propogation
derivative_weight = (np.dot(self.x_train,((y_head-self.y_train).T)))/self.x_train.shape[1]
derivative_bias = np.sum(y_head-self.y_train)/self.x_train.shape[1]
gradients = {"derivative_weight": derivative_weight,"derivative_bias": derivative_bias}
return cost, gradients
def sigmoid(self, z):
y_head = 1 / (1+np.exp(-z))
return y_head
If you get your predicted test data labels back from getResults()
, then you calculate your true and false positives from the predicted labels vs. groundtruth.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.