簡體   English   中英

如何將兩列與數組合並

[英]How to combine two columns with an array

我有DF_1顯示出發地和目的地城市,我想知道它們有多遠(英里/公里)。 DF_2我有城市之間的距離。 我怎么知道這兩個 DF 的距離?

DF_1

origin <- c('LONDON','NEW YORK','TOKIO','LONDON','RIO DE JANEIRO')
destination <- c('NEW YORK','NEW YORK','RIO DE JANEIRO','LISBON','MADRID')
DF_1 <- data.frame(origin,destination)

DF_2

CITY <- c('NEW YORK', 'LONDON', 'SAN FRANCISCO', 'MADRID',  'LOS ANGELES', 'LISBON', 'RIO DE JANEIRO', 'MOSCOW',  'SAO PAULO', 'TOKIO')
NEW_YORK <- c(0, 700, 250, 1000, 400, 800, 430, 900, 500, 30) 
LONDON <- c(700, 0, 350, 1200, 50, 110, 780, 984, 1150, 5)
SAN_FRANCISCO <- c(250, 350, 0, 200, 15, 260, 305, 412, 29, 102)
MADRID <- c(1000, 1200, 200, 0, 77, 115, 225, 318, 412, 511)
LOS_ANGELES <- c(400, 50, 15, 77, 0, 88, 819, 733, 978, 1001)
LISBON <- c(800, 110, 260, 115, 88, 0, 17, 3000, 1418, 735)
RIO_DE_JANEIRO <- c(430, 780, 305, 225, 819, 17, 0, 513, 701, 56) 
MOSCOW <- c(900, 984, 412, 318, 733, 3000, 513, 0, 389, 499)
SAO_PAULO <- c(500, 1150, 29, 412, 978, 1418, 701, 389, 0, 1113)
TOKIO <- c(30, 5, 102, 511, 1001, 735, 56, 499, 1113, 0)
DF_2 <- data.frame(CITY, `NEW YORK` = NEW_YORK, LONDON, `SAN FRANCISCO` = SAN_FRANCISCO, MADRID,  `LOS ANGELES` = LOS_ANGELES, LISBON, `RIO DE JANEIRO` = RIO_DE_JANEIRO, MOSCOW,  `SAO PAULO` = SAO_PAULO, TOKIO, check.names = FALSE)

我想要的結果是這樣的:

origin <- c('LONDON','NEW YORK','TOKIO','LONDON','RIO DE JANEIRO')
destination <- c('NEW YORK','NEW YORK','RIO DE JANEIRO','LISBON','MADRID')
distance <- c(700,0,56,110,225)
DF_FINAL <- data.frame(origin,destination,distance)

使用基礎 R:您可以使用:

transform(DF_1,distance = `rownames<-`(DF_2[,-1],DF_2[,1])[as.matrix(DF_1)])

          origin    destination distance
1         LONDON       NEW YORK      700
2       NEW YORK       NEW YORK        0
3          TOKIO RIO DE JANEIRO       56
4         LONDON         LISBON      110
5 RIO DE JANEIRO         MADRID      225

那是。 創建一個新的 dataframe ,其中 rownames 作為城市名稱:

DF_3 <- DF_2[,-1]#Remove the first column
rownames(DF_3) <- DF_2$CITY #change the rownames:
DF_1$DISTANCE <- DF_3[as.matrix(DF_1)]
DF_1

這是一個從base R進行row/column索引的選項

i1 <- match(DF_1$origin, DF_2$CITY)
j1 <- match(DF_1$destination, names(DF_2)[-1])
DF_1$distance <- DF_2[-1][cbind(i1, j1)] 
DF_1
#          origin    destination distance
#1         LONDON       NEW YORK      700
#2       NEW YORK       NEW YORK        0
#3          TOKIO RIO DE JANEIRO       56
#4         LONDON         LISBON      110
#5 RIO DE JANEIRO         MADRID      225

這應該准確地重現您正在尋找的內容(使用tidyverse ):

DF_FINAL <- DF_1 %>%
  inner_join(DF_2, by = c("origin" = "CITY")) %>%
  gather(key = "city", value = "distance", -origin, -destination) %>%
  filter(destination == city) %>%
  select(-c(city))

DF_FINAL
|origin         |destination    | distance|
|:--------------|:--------------|--------:|
|LONDON         |NEW YORK       |      700|
|NEW YORK       |NEW YORK       |        0|
|RIO DE JANEIRO |MADRID         |      225|
|LONDON         |LISBON         |      110|
|TOKIO          |RIO DE JANEIRO |       56|

我嘗試在tidyverse框架中做這些事情。 第一步是將距離矩陣轉換為“長”格式。 然后,只需將其加入原始data.frame

我建議在data.frame()定義的末尾添加stringsAsFactors = FALSE以避免警告消息。

library(tidyr)
library(dplyr)

pivot_longer(DF_2, -CITY) %>%
  rename(origin = CITY, destination = name, distance = value) %>%
  right_join(DF_1)

# A tibble: 5 x 3
  origin         destination    distance
  <chr>          <chr>             <dbl>
1 LONDON         NEW YORK            700
2 NEW YORK       NEW YORK              0
3 TOKIO          RIO DE JANEIRO       56
4 LONDON         LISBON              110
5 RIO DE JANEIRO MADRID              225

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM