Applying function over all rows of dataframe through iteration - Python

Question

I am working on this dataframe:

 Detection_Location Blast Hole East Coordinate North Coordinate Tag Detector ID Detection Start Time Detection end time Tags CV22 105,100,99 16764.83,16752.74,16743.1 107347.67,107360.32,107362.96 385742468,385112050,385087366 2018-09-06 20:02:46 2018-09-06 20:49:21 3 CV23 63,64,61 16755.07,16745.42,16773.48 107387.68,107390.32,107382.6 385262370,385656531,385760755 2018-09-08 14:12:42 2018-09-08 14:24:19 3 CV22 5,35,19 16757.27,16747.75,16770.89 107452.4,107417.68,107420.83 385662254,385453358,385826979 2018-09-23 05:01:12 2018-09-23 05:52:54 3

I am trying to pull X coordinate from the column 'East Coordinate' and y coordinate from column 'North coordinate'. I have written a function to calculate centroid of the 3 points in each row and then calculate distance from each point and a validation. It is working perfectly for one row.

I want to apply that function for each row and then return the answer. I have tried df.iterrows and df.apply as well but both giving same answer for all the rows so clearly its not working.

This is the code: (Only the last part of how to apply that function on each row and get the result as the additional column in the original dataset would be a challenge, rest code is just for understanding.)

def Calculate_dispersion(row):
    #Picking up the columns with x coordinates.
    df2 = df['East Coordinate'].tolist()
    #Picking up the columns with y coordinates.
    df3 = df['North Coordinate'].tolist()


    #Splitting the list into seperate x coordinates.
    df4 = pd.DataFrame([sub.split(",") for sub in df2])
    #Splitting the list into seperate y coordinates.
    df5 = pd.DataFrame([sub.split(",") for sub in df3])


    #Creating a tuple with x coordinates
    x1 = df4.iloc[0]
    x2 = x1.tolist()
    x3 = tuple(float(x) for x in x2)
    #Creating a tuple with y coordinates
    y1 = df5.iloc[0]
    y2 = y1.tolist()
    y3 = tuple(float(x) for x in y2)


    #Creating the Coordinate tuple for centroid calculation.
    c = (x3,y3)

    #Calculating centroid.
    centroid = (sum(c[0])/len(c[0]),sum(c[1])/len(c[1]))
    Centroid1 = (round(centroid[0],2), round(centroid[1],2))

    #Converting tuple in (x,y) form.
    a = (x3[0], y3[0])
    b = (x3[1], y3[1])
    c = (x3[2], y3[2])

    #Function for calculating distance from centroid.
    def get_distance(x1, x2, y1, y2):
        dist = math.sqrt((x2-x1)**2 +(y2-y1)**2)
        return dist


    #Assinging first coordinate points value.
    x1 = a[0]
    x2 = Centroid1[0]
    y1 = a[1]
    y2 = Centroid1[1]
    #Calculating distance for second coordinate point and centroid.
    distance_a_centroid = get_distance(x1,x2,y1,y2)
    print(distance_a_centroid)

    #Assinging second coordinate points value.
    x_1 = b[0]
    y_1 = b[1]
    #Calculating distance for second coordinate point and centroid.
    distance_b_centroid = get_distance(x_1, x2, y_1, y2)
    print(distance_b_centroid)


    #Assinging second coordinate points value.
    x_2 = c[0]
    y_2 = c[1]
    #Calculating distance for third coordinate point and centroid.
    distance_c_centroid = get_distance(x_2,x2,y_2,y2)
    print(distance_c_centroid)

    #calculate average dispersion
    Average_dispersion = (distance_a_centroid+distance_b_centroid+distance_c_centroid)/3
    print(Average_dispersion)

    #Validation statement
    if distance_a_centroid <= 16.00 and distance_b_centroid <= 16.00 and distance_c_centroid <=16.00 :
        print ("True")
    else:
        print("False")





for index, row in df.iterrows():
    Final_PHD = Calculate_dispersion(row)
print(Final_PHD)

Thanks in advance

Answer 1

It is simple, if you look at nothing inside the function depends on the row with which you execute the function, this implies that the function will always return the same. Note that the variables of the function are local variables and therefore are restarted with each function call.

Try this:

def Calculate_dispersion(row):
    #Picking up the columns with x coordinates.
    df2 = df.loc[row,'East Coordinate'].tolist()
    #Picking up the columns with y coordinates.
    df3 = df.loc[row,'North Coordinate'].tolist()


    #Splitting the list into seperate x coordinates.
    df4 = pd.DataFrame([sub.split(",") for sub in df2])
    #Splitting the list into seperate y coordinates.
    df5 = pd.DataFrame([sub.split(",") for sub in df3])


    #Creating a tuple with x coordinates
    x1 = df4.iloc[0]
    x2 = x1.tolist()
    x3 = tuple(float(x) for x in x2)
    #Creating a tuple with y coordinates
    y1 = df5.iloc[0]
    y2 = y1.tolist()
    y3 = tuple(float(x) for x in y2)


    #Creating the Coordinate tuple for centroid calculation.
    c = (x3,y3)

    #Calculating centroid.
    centroid = (sum(c[0])/len(c[0]),sum(c[1])/len(c[1]))
    Centroid1 = (round(centroid[0],2), round(centroid[1],2))

    #Converting tuple in (x,y) form.
    a = (x3[0], y3[0])
    b = (x3[1], y3[1])
    c = (x3[2], y3[2])

    #Function for calculating distance from centroid.
    def get_distance(x1, x2, y1, y2):
        dist = math.sqrt((x2-x1)**2 +(y2-y1)**2)
        return dist


    #Assinging first coordinate points value.
    x1 = a[0]
    x2 = Centroid1[0]
    y1 = a[1]
    y2 = Centroid1[1]
    #Calculating distance for second coordinate point and centroid.
    distance_a_centroid = get_distance(x1,x2,y1,y2)
    print(distance_a_centroid)

    #Assinging second coordinate points value.
    x_1 = b[0]
    y_1 = b[1]
    #Calculating distance for second coordinate point and centroid.
    distance_b_centroid = get_distance(x_1, x2, y_1, y2)
    print(distance_b_centroid)


    #Assinging second coordinate points value.
    x_2 = c[0]
    y_2 = c[1]
    #Calculating distance for third coordinate point and centroid.
    distance_c_centroid = get_distance(x_2,x2,y_2,y2)
    print(distance_c_centroid)

    #calculate average dispersion
    Average_dispersion = (distance_a_centroid+distance_b_centroid+distance_c_centroid)/3
    print(Average_dispersion)

    #Validation statement
    if distance_a_centroid <= 16.00 and distance_b_centroid <= 16.00 and distance_c_centroid <=16.00 :
        print ("True")
    else:
        print("False")





row=0
while row<len(df.index)
    Final_PHD = Calculate_dispersion(row)
    row+=1
    print(Final_PHD)

Applying function over all rows of dataframe through iteration - Python

Question

1 answers

solution1
0 2019-08-21 01:12:46

Applying function over all rows of dataframe through iteration - Python

Question

1 answers

solution1 0 2019-08-21 01:12:46

solution1
0 2019-08-21 01:12:46