简体   繁体   中英

How can I iterate through data in two data frames and keep the index of my first data frame?

I have two data frames with columns "latitude" and "longitude". I need to:

  • get all distances between lat/lon pair 1 in df1 with all lat/lon pairs in df2,
  • return the minimum distance of these distances. append the result to a dictionary,
  • move on to lat/lon pair2 in df1
  • repeat

This is how far I got

def distance(df1, df2 = School_usable):
    # create `result` dict to store index of row as key, and minimum distance as value
    result = {}
    values = ()  
    
    # get df1 lat/lon pair 1 and df2 lat/lon pair 1
    df1 = df1.sort_values(by = "code_postal", ascending = True)
    df2 = df2.sort_values(by = "code_postal", ascending = True)
    
    lat1 = np.array(df1["latitude"])
    lat2 = np.array(df2["latitude"])
    lon1 = np.array(df1["longitude"])
    lon2 = np.array(df2["longitude"])
    
    # math to calculate distance between two lat/lon pairs
    x = 0
    for index, row in df1.iterrows():
 
        p = pi/180
        a = 0.5 - cos((lat2-lat1[x])*p)/2 + cos(lat1[x]*p) * cos(lat2*p) * (1-cos((lon2-lon1[x])*p))/2   
        distance = np.array(round(12742 * asin(sqrt(a)),2))

        # Here, I wish to store distance into value. The "idea" is to get all
        # distances between df1 lat/lon pair1 and df2 lat/lon pair 1 --> *n* .
        # Then, get the minimum of these distances and update `result`, such 
        # that the index of df1 is the key and minimum distance is the value.
        values.append(distance)
        for val in values:  
            min_value = val.min()
        result.update({index: min_value}) 
        x +=1 

    return result

distance()

#worked it out and got this

from math import cos, asin, sqrt, pi

def distance(l1, L1, l2, L2):
    p = pi/180
    a = 0.5 - cos((L2-L1)*p)/2 + cos(L1*p) * cos(L2*p) * (1-cos((l2-l1)*p))/2   
    return round(12742 * asin(sqrt(a)),2)

def minDistance(df1, df2):
    df1 = df1.sort_values(by = "code_postal", ascending = True)
    df2 = df2.sort_values(by = "code_postal", ascending = True)

    zc1 = np.array(df1["code_postal"])
    zc2 = np.array(df2["code_postal"])
    
    lat1 = np.array(df1["latitude"])
    lat2 = np.array(df2["latitude"])
    lon1 = np.array(df1["longitude"])
    lon2 = np.array(df2["longitude"])
    
    minDistances = []
    for (z1, l1, L1) in zip(zc1, lon1,lat1):
        minDist = 1e308
        minIndex = -1
        index = np.where(zc2 == z1)[0]
        if len(index) : index = index[0]
            
        else: index = len(zc2)
        while index < len(zc2) and zc2[index] == z1:
            l2, L2 = lon2[index], lat2[index]
            d = distance(l1, L1, l2, L2)
            if d < minDist:
                minDist = d
                minIndex = index
            index += 1
        
        minDistances.append(minDist)
    return minDistances

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM