簡體   English   中英

left_join 基於 R 中最近的 LAT_LON

[英]left_join based on closest LAT_LON in R

我試圖在參考我的原始 data.frame 的 data.frame 中找到最接近的 LAT_LON 的 ID。 我已經通過一個唯一的標識符合並雙方data.frames並基於計算的距離想通了這一點distHaverSine從功能geosphere 現在,我想更進一步,加入沒有唯一標識符的 data.frames 並找到最接近 LAT-LON 的 ID。 合並后我使用了以下代碼:

v3 <-v2 %>% mutate(CTD = distHaversine(cbind(LON.x, LAT.x), cbind(LON.y, LAT.y)))

數據:

loc <- data.frame(station = c('Baker Street','Bank'),
     lat = c(51.522236,51.5134047),
     lng = c(-0.157080, -0.08905843),
               postcode = c('NW1','EC3V'))
stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
                lat = c(51.53253,51.520865,51.490281,51.51224),
                lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
                postcode = c('EC1V','EC1A', 'W14', 'W2'))

作為最終結果,我想要這樣的東西:

df <- data.frame(loc = c('Baker Street','Bank','Baker Street','Bank','Baker Street','Bank','Baker 
        Street','Bank'), 
              stop = c('Angel','Barbican','Barons Court','Bayswater','Angel','Barbican','Barons Court','Bayswater'), 
              dist = c('x','x','x','x','x','x','x','x'), 
              lat = c(51.53253,51.520865,51.490281,51.51224,51.53253,51.520865,51.490281,51.51224), 
              lng = c(-0.10579,-0.097758,-0.214340,-0.187569,-0.10579,-0.097758,-0.214340,-0.187569),
              postcode = c('EC1V','EC1A', 'W14', 'W2','EC1V','EC1A', 'W14', 'W2')
              )

任何幫助表示贊賞。 謝謝。

由於物體之間的距離很小,我們可以通過使用坐標之間的歐幾里得距離來加快計算速度。 由於我們不在赤道附近,lng 坐標被壓縮了一點; 我們可以通過稍微縮放 lng 來使比較稍微好一點。

cor_stop <- stop[, c("lat", "lng")]
cor_stop$lng <- cor_stop$lng * sin(mean(cor_stop$lat, na.rm = TRUE)/180*pi)
cor_loc <- loc[, c("lat", "lng")]
cor_loc$lng <- cor_loc$lng * sin(mean(cor_loc$lat, na.rm = TRUE)/180*pi)

然后我們可以使用FNN包計算每個位置的最近停靠點,該包使用基於樹的搜索來快速找到最近的 K 個鄰居。 這應該擴展到大數據集(我已經將它用於具有數百萬條記錄的數據集):

library(FNN)
matches <- knnx.index(cor_stop, cor_loc, k = 1)
matches
##      [,1]
## [1,]    4
## [2,]    2

然后我們可以構建最終結果:

res <- loc
res$stop_station  <- stop$station[matches[,1]]
res$stop_lat      <- stop$lat[matches[,1]]
res$stop_lng      <- stop$lng[matches[,1]]
res$stop_postcode <- stop$postcode[matches[,1]]

並計算實際距離:

library(geosphere)
res$dist <- distHaversine(res[, c("lng", "lat")], res[, c("stop_lng", "stop_lat")])
res
##          station      lat         lng postcode stop_station stop_lat  stop_lng
## 1 Baker Street 51.52224 -0.15708000      NW1    Bayswater 51.51224 -0.187569
## 2         Bank 51.51340 -0.08905843     EC3V     Barbican 51.52087 -0.097758
##   stop_postcode     dist
## 1            W2 2387.231
## 2          EC1A 1026.091

我不確定經緯度中最近的點也是“鳥飛時”的最近點,您可以使用此方法首先選擇經緯度中最近的 K 個點; 然后計算這些點的距離,然后選擇最近的點。

所有連接、距離計算和繪圖都可以使用可用的 R 包來完成。

library(tidyverse)
library(sf)
#> Linking to GEOS 3.6.2, GDAL 2.2.3, PROJ 4.9.3
library(nngeo)
library(mapview)

## Original data
loc <- data.frame(station = c('Baker Street','Bank'),
                  lat = c(51.522236,51.5134047),
                  lng = c(-0.157080, -0.08905843),
                  postcode = c('NW1','EC3V'))

stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
                   lat = c(51.53253,51.520865,51.490281,51.51224),
                   lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
                   postcode = c('EC1V','EC1A', 'W14', 'W2'))

df <- data.frame(loc = c('Baker Street','Bank','Baker Street','Bank','Baker Street','Bank','Baker 
        Street','Bank'), 
                 stop = c('Angel','Barbican','Barons Court','Bayswater','Angel','Barbican','Barons Court','Bayswater'), 
                 dist = c('x','x','x','x','x','x','x','x'), 
                 lat = c(51.53253,51.520865,51.490281,51.51224,51.53253,51.520865,51.490281,51.51224), 
                 lng = c(-0.10579,-0.097758,-0.214340,-0.187569,-0.10579,-0.097758,-0.214340,-0.187569),
                 postcode = c('EC1V','EC1A', 'W14', 'W2','EC1V','EC1A', 'W14', 'W2')
)



## Create sf objects from lat/lon points
loc_sf <- loc %>% st_as_sf(coords = c('lng', 'lat'), remove = T) %>%
  st_set_crs(4326) 

stop_sf <- stop %>% st_as_sf(coords = c('lng', 'lat'), remove = T) %>%
  st_set_crs(4326) 


# Use st_nearest_feature to cbind loc to stop by nearest points
joined_sf <- stop_sf %>% 
  cbind(
    loc_sf[st_nearest_feature(stop_sf, loc_sf),])


## mutate to add column showing distance between geometries
joined_sf %>%
  mutate(dist = st_distance(geometry, geometry.1, by_element = T))
#> Simple feature collection with 4 features and 5 fields
#> Active geometry column: geometry
#> geometry type:  POINT
#> dimension:      XY
#> bbox:           xmin: -0.21434 ymin: 51.49028 xmax: -0.097758 ymax: 51.53253
#> epsg (SRID):    4326
#> proj4string:    +proj=longlat +datum=WGS84 +no_defs
#>        station postcode    station.1 postcode.1                   geometry
#> 1        Angel     EC1V         Bank       EC3V  POINT (-0.10579 51.53253)
#> 2     Barbican     EC1A         Bank       EC3V POINT (-0.097758 51.52087)
#> 3 Barons Court      W14 Baker Street        NW1  POINT (-0.21434 51.49028)
#> 4    Bayswater       W2 Baker Street        NW1 POINT (-0.187569 51.51224)
#>                    geometry.1         dist
#> 1 POINT (-0.08905843 51.5134) 2424.102 [m]
#> 2 POINT (-0.08905843 51.5134) 1026.449 [m]
#> 3   POINT (-0.15708 51.52224) 5333.417 [m]
#> 4   POINT (-0.15708 51.52224) 2390.791 [m]



## Use nngeo and mapview to plot lines on a map
# NOT run for reprex, output image attached 
#connected <- st_connect(stop_sf, loc_sf)
# mapview(connected) + 
#   mapview(loc_sf, color = 'red') +
#   mapview(stop_sf, color = 'black')

reprex 包(v0.3.0) 於 2020 年 1 月 21 日創建

在此處輸入圖片說明

如果您能夠使用投影坐標系,可以完全避免搜索最近的鄰居。 如果可以,那么您可以在每個位置周圍廉價地構建Voronoi 多邊形- 這些多邊形定義了最靠近每個輸入點的區域。

然后,您可以使用 GIS 交叉點來查找哪些點位於哪些多邊形中,然后計算已知的最近點對的距離。 我認為這應該快得多。 但是,您不能使用具有地理坐標的 Voronoi 多邊形。

loc <- data.frame(station = c('Baker Street','Bank'),
     lat = c(51.522236,51.5134047),
     lng = c(-0.157080, -0.08905843),
               postcode = c('NW1','EC3V'))

stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
                lat = c(51.53253,51.520865,51.490281,51.51224),
                lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
                postcode = c('EC1V','EC1A', 'W14', 'W2'))

# Convert to a suitable PCS (in this case OSGB)
stop <- st_as_sf(stop, coords=c('lng','lat'), crs=4326)
stop <- st_transform(stop, crs=27700)
loc <- st_as_sf(loc, coords=c('lng','lat'), crs=4326)
loc <- st_transform(loc, crs=27700)

# Extract Voronoi polygons around locations and convert to an sf object
loc_voronoi <- st_collection_extract(st_voronoi(do.call(c, st_geometry(loc))))
loc_voronoi <- st_sf(loc_voronoi, crs=crs(loc))

# Match Voronoi polygons to locations and select that geometry
loc$voronoi <- loc_voronoi$loc_voronoi[unlist(st_intersects(loc, loc_voronoi))]
st_geometry(loc) <- 'voronoi'

# Find which stop is closest to each location
stop$loc <- loc$station[unlist(st_intersects(stop, loc))]

# Reset locs to use the point geometry and get distances
st_geometry(loc) <- 'geometry'
stop$loc_dist <- st_distance(stop, loc[stop$loc,], by_element=TRUE)

這為您提供以下輸出:

Simple feature collection with 4 features and 4 fields
geometry type:  POINT
dimension:      XY
bbox:           xmin: 524069.7 ymin: 178326.3 xmax: 532074.6 ymax: 183213.9
epsg (SRID):    27700
proj4string:    +proj=tmerc +lat_0=49 +lon_0=-2 +k=0.9996012717 +x_0=400000 +y_0=-100000 +ellps=airy +towgs84=446.448,-125.157,542.06,0.15,0.247,0.842,-20.489 +units=m +no_defs
       station postcode                  geometry          loc     loc_dist
1        Angel     EC1V POINT (531483.8 183213.9)         Bank 2423.722 [m]
2     Barbican     EC1A POINT (532074.6 181931.2)         Bank 1026.289 [m]
3 Barons Court      W14 POINT (524069.7 178326.3) Baker Street 5332.478 [m]
4    Bayswater       W2 POINT (525867.7 180813.9) Baker Street 2390.377 [m]

我不確定我是否正確理解您的問題,但您可以先交叉連接locstop ,然后添加一列距離。

library(dplyr)

loc <- data.frame(station = c('Baker Street','Bank'),
                 lat = c(51.522236,51.5134047),
                 lng = c(-0.157080, -0.08905843),
                 postcode = c('NW1','EC3V'))

stop <- data.frame(station = c('Angel','Barbican','Barons Court','Bayswater'),
                  lat = c(51.53253,51.520865,51.490281,51.51224),
                  lng = c(-0.10579,-0.097758,-0.214340,-0.187569),
                  postcode = c('EC1V','EC1A', 'W14', 'W2'))

# Create data.tables
loc_dt <- data.table::setDT(loc)
stop_dt <- data.table::setDT(stop)

# Cross join two data.tables
coordinates_dt <- optiRum::CJ.dt(loc_dt, stop_dt)

# Add column with distance in meters
coordinates_dt %>%
 mutate(dist_m = spatialrisk::haversine(lat, lng, i.lat, i.lng))
#>         station      lat         lng postcode    i.station    i.lat     i.lng
#> 1: Baker Street 51.52224 -0.15708000      NW1        Angel 51.53253 -0.105790
#> 2:         Bank 51.51340 -0.08905843     EC3V        Angel 51.53253 -0.105790
#> 3: Baker Street 51.52224 -0.15708000      NW1     Barbican 51.52087 -0.097758
#> 4:         Bank 51.51340 -0.08905843     EC3V     Barbican 51.52087 -0.097758
#> 5: Baker Street 51.52224 -0.15708000      NW1 Barons Court 51.49028 -0.214340
#> 6:         Bank 51.51340 -0.08905843     EC3V Barons Court 51.49028 -0.214340
#> 7: Baker Street 51.52224 -0.15708000      NW1    Bayswater 51.51224 -0.187569
#> 8:         Bank 51.51340 -0.08905843     EC3V    Bayswater 51.51224 -0.187569
#>    i.postcode   dist_m
#> 1:       EC1V 3732.422
#> 2:       EC1V 2423.989
#> 3:       EC1A 4111.786
#> 4:       EC1A 1026.091
#> 5:        W14 5328.649
#> 6:        W14 9054.998
#> 7:         W2 2387.231
#> 8:         W2 6825.897

reprex 包(v1.0.0) 於 2021 年 4 月 7 日創建

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM