简体   繁体   中英

Logistic regression without Sklearn in python

I'm trying to write a logistic regression python code. I have 2 datasets. One is for training and the other one is for test. I have a good result for lost function however my machine learning can not calculate True positive and False positive values. Do you guys have any idea? You can see my code below:

import numpy as np 
import pandas as pd 
import matplotlib.pyplot as plt

class LogisticRegression:
    def __init__(self, learning_rate = 0.005, max_iter = 100):
        self.x_train = None
        self.y_train = None
        self.x_test = None
        self.y_test = None
        self.w = None
        self.plot_index = None
        self.plot_costList = []
        
        self.number_of_iteration = max_iter     # default
        self.learning_rate = learning_rate      # default
        
        self.dataopen()
        self.normalization()
        
        self.debug = 0
        
    
    def dataopen(self):
        df_for_train = pd.read_csv("data_ready_for_ML_GENIE.txt", delimiter=" ")  
        df_for_train.dropna(how="any",inplace=True)     
        df_for_train.replace(["yes", "no"], [1, 0], inplace=True)       
        df_for_train.replace(["driver", "passenger"], [1, 0], inplace=True)  

        self.x_train = df_for_train.iloc[:, :-1]
        self.y_train = df_for_train.iloc[:, -1]    

        df_for_test = pd.read_csv("data_ready_for_ML_TCGA.txt", delimiter=" ")  
        df_for_test.dropna(how="any",inplace=True)
        df_for_test.replace(["yes", "no"], [1, 0], inplace=True)
        df_for_test.replace(["driver", "passenger"], [1, 0], inplace=True)
         

        self.x_test = df_for_test.iloc[:, :-1]
        self.y_test = df_for_test.iloc[:, -1]
        
    def normalization(self):        
        for col in (self.x_train.columns):
            self.x_train[col] = (self.x_train[col] - np.min(self.x_train[col])) / (np.max(self.x_train[col]) - np.min(self.x_train[col]))
        for col in (self.x_test.columns):
            self.x_test[col] = (self.x_test[col] - np.min(self.x_test[col])) / (np.max(self.x_test[col]) - np.min(self.x_test[col]))
        
        self.x_train = self.x_train.T  
        self.x_test = self.x_test.T
        self.y_test = np.array(self.y_test)
        self.y_train = np.array(self.y_train)
        
    def getResults(self):           
        dimension =  self.x_train.shape[0]  
        w, b = self.weight_bias(dimension)
        parameters, gradients, cost_list = self.update(w, b)
        y_prediction_test = self.predict(parameters["weight"], parameters["bias"])  # Calling predict function
        print("Y Test : ", self.y_test)
        self.w = parameters["weight"]
        return y_prediction_test
       
        
    def weight_bias(self, dimension):  
        w = np.full((dimension,1), 0.01)
        b = 0.0
        return w, b
    
    def update(self, w, b):
        cost_list = []
        cost_list2 = []
        index = []
       
        for i in range(self.number_of_iteration):
           
            cost,gradients = self.forward_backward_propagation(w, b) 
            cost_list.append(cost)
            w = w - self.learning_rate * gradients["derivative_weight"]   
            b = b - self.learning_rate * gradients["derivative_bias"]
            if i % 10 == 0:
                cost_list2.append(cost)   
                index.append(i)
                print ("Cost after iteration %i: %f" %(i, cost)) 
                
        parameters = {"weight": w,"bias": b}
        self.plot_index, self.plot_costList = index, cost_list2  
        return parameters, gradients, cost_list
    
    
    def predict(self, w, b):
        
        z = self.sigmoid(np.dot(w.T, self.x_test)+b)   # By sigmoid, we can find y_predicted values here
        Y_prediction = np.zeros((1, self.x_test.shape[1]))
        # if z is bigger than 0.5, our prediction is one means driver (y_prediction=1),
        # if z is smaller than 0.5, our prediction is zero means passenger (y_prediction=0),
        for i in range(z.shape[1]):
            if z[0,i] <= 0.5:
                Y_prediction[0,i] = 0
            else:
                Y_prediction[0,i] = 1
        
        return Y_prediction
    
    def forward_backward_propagation(self, w, b):
        z = np.dot(w.T, self.x_train) + b  
        y_head = self.sigmoid(z)      
        loss = -self.y_train*np.log(y_head) - (1-self.y_train)*np.log(1-y_head) 
        cost = (np.sum(loss)) / self.x_train.shape[1]           
        
        #backward propogation
        derivative_weight = (np.dot(self.x_train,((y_head-self.y_train).T)))/self.x_train.shape[1]  
        derivative_bias = np.sum(y_head-self.y_train)/self.x_train.shape[1]
     
        gradients = {"derivative_weight": derivative_weight,"derivative_bias": derivative_bias}
        return cost, gradients
   
        def sigmoid(self, z):
        y_head = 1 / (1+np.exp(-z)) 
        return y_head

If you get your predicted test data labels back from getResults() , then you calculate your true and false positives from the predicted labels vs. groundtruth.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM