简体   繁体   中英

How can I improve this nested for loop?

I want to re-write the function get_bs in a different way, how can I improve this nested for loop, may be in a more compact way? Any suggestions?

I was thinking, may be split the nested loop in two different nested loops and each writing in one line, eg for i,j in.... Is this possible? Can someone please show me? Thanks.

class NB:
def __init__(self, train_data, test_data):
    self.train_data = train_data
    self.test_data = test_data
    self.train_entries = []
    self.test_entries = []
    self.diabetes_yes = []
    self.diabetes_no = []
    self.a_diabetesyes = []
    self.a_diabetesno = []
    self.b_diabetesyes = []
    self.b_diabetesno = []
    self.num_elements = 0
    self.p_diabetesyes = 0
    self.p_diabetesno = 0
    self.num_diabetesyes = 0
    self.num_diabetesno = 0

def trainNB(self):
    self.traincleandata()
    for i in range(self.num_elements - 1):
        self.a_diabetesyes[i] = sum(self.diabetes_yes[i]) / len(self.diabetes_yes[i])
        self.a_diabetesno[i] = sum(self.diabetes_no[i]) / len(self.diabetes_no[i])
    self.get_bs()

def traincleandata(self):
    for line in self.train_data:
        if self.num_elements == 0:
            self.num_elements = len(line.split(','))
            for i in range(self.num_elements):
                self.diabetes_yes.append([])
                self.diabetes_no.append([])
                self.a_diabetesyes.append(0.0)
                self.a_diabetesno.append(0.0)
                self.b_diabetesyes.append(0.0)
                self.b_diabetesno.append(0.0)
        params = line.split(',')
        cleanparams = getcleanparams(params)
        entry = Entry(cleanparams)
        self.train_entries.append(entry)

        for i in range(len(entry.elements) - 1):
            if (entry.diabetes == "yes"):
                self.diabetes_yes[i].append(Decimal(entry.elements[i]))
            else:
                self.diabetes_no[i].append(Decimal(entry.elements[i]))

        if (entry.diabetes == 'yes'):
            self.p_diabetesyes += 1
            self.num_diabetesyes += 1
        else:
            self.p_diabetesno += 1
            self.num_diabetesno += 1

    self.p_diabetesyes = Decimal(self.p_diabetesyes) / Decimal(len(self.train_entries))
    self.p_diabetesno = Decimal(self.p_diabetesno) / Decimal(len(self.train_entries))

def get_bs(self):

    sigSumYes = [0] * self.num_elements
    sigSumNo = [0] * self.num_elements

    for i in range(self.num_elements - 1):
        for j in range(self.num_diabetesyes):
            diff_yes = self.diabetes_yes[i][j] - self.a_diabetesyes[i]
            sigSumYes[i] += m.pow(diff_yes, 2)
        self.b_diabetesyes[i] = m.sqrt(sigSumYes[i] / (len(self.diabetes_yes[i]) - 1))
        for j in range(self.num_diabetesno):
            diff_no = self.diabetes_no[i][j] - self.a_diabetesno[i]
            sigSumNo[i] += m.pow(diff_no, 2)
        self.b_diabetesno[i] = m.sqrt(sigSumNo[i] / (len(self.diabetes_no[i]) - 1))

def testNB(self):
    self.testcleandata()
    self.testalgo()

def testcleandata(self):
    for line in self.test_data:
        params = line.split(',')
        cleanparams = getcleanparams(params)
        entry = Entry(cleanparams)
        self.test_entries.append(entry)

def testalgo(self):
    counter = 1
    P_diabetesyes = [0] * self.num_elements
    P_diabetesno = [0] * self.num_elements
    for entry in self.test_entries:
        pYesEntry = 1
        pNoEntry = 1
        pYesEntry, pNoEntry = self.test_Entry(pYesEntry, pNoEntry, P_diabetesyes, P_diabetesno, entry)

        pYesEntry *= float(self.p_diabetesyes)
        pNoEntry *= float(self.p_diabetesno)

        entry.set_ifdiabetes("yes") if (pYesEntry / pNoEntry >= 1) else entry.set_ifdiabetes("no")
        counter += 1

def test_Entry(self, pYesEntry, pNoEntry, P_diabetesyes, P_diabetesno, entry):
    for i in range(self.num_elements - 1):
        P_diabetesyes[i] = Decimal((1 / (self.b_diabetesyes[i] * m.sqrt(2 * m.pi))) * m.pow(m.e, (
                -m.pow(Decimal(entry.elements[i]) - self.a_diabetesyes[i], 2) / (
                2 * m.pow(self.b_diabetesyes[i], 2)))))
        P_diabetesno[i] = Decimal((1 / (self.b_diabetesno[i] * m.sqrt(2 * m.pi))) * m.pow(m.e, (
                -m.pow(Decimal(entry.elements[i]) - self.a_diabetesno[i], 2) / (
                2 * m.pow(self.b_diabetesno[i], 2)))))
        pYesEntry *= float(P_diabetesyes[i])
        pNoEntry *= float(P_diabetesno[i])

    return pYesEntry, pNoEntry

Since you asked me how to do this in the comments...

To get i,j indices for a nested loop in a single line of code, using itertools:

import itertools

for i, j in itertools.product(range(self.num_elements - 1),
                              range(self.num_diabetesyes)):
    diff_yes = self.diabetes_yes[i][j] - self.a_diabetesyes[i]
    sigSumYes[i] += m.pow(diff_yes, 2)

    diff_no = self.diabetes_no[i][j] - self.a_diabetesno[i]
    sigSumNo[i] += m.pow(diff_no, 2)

After that, have a separate for loop for the parts that only iterate over i.

for i in range(self.num_elements - 1):
    self.b_diabetesyes[i] = m.sqrt(sigSumYes[i] / (len(self.diabetes_yes[i]) - 1))
    self.b_diabetesno[i] = m.sqrt(sigSumNo[i] / (len(self.diabetes_no[i]) - 1))

Note, I don't think this is the best approach. Using Numpy and vector math would be much faster and more elegant.

Usually, "no libraries" rules in assigments mean "Don't use an off the shelf implementation of the algorithm you're asked to implement". Not "Don't use any external convenience classes whatsoever".

Maybe nobody dares to answer that you shouldn't always be looking further in changing your code, especially when it works. Most important pieces of software are often redondant and unreadable because of repetition. Just improve the functionnality or switch to a new software.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM