如何計算R中分組數據行之間的距離和時間差的所有可能組合？

Question

我的數據包括美元鈔票的距離和時間。 我的數據如下：

   bid ts latitude longitude
1  123  0 38.40513  41.83777
2  123 23 38.41180  41.68493
3  123 45 42.20771  43.36318
4  123 50 40.22803  43.00208
5  456  0 39.12882  42.73877
6  456 12 38.46078  42.79847
7  456 27 40.53698  42.57617
8  456 19 39.04038  42.17070
9  234  0 39.18274  41.17445
10 234  8 39.58652  43.61317
11 234 15 41.32383  41.49377
12 234 23 40.26008  42.01927

bid = bill id

ts =當t = 0時從原始數據點計算的時間戳（天）

緯度和經度=位置

此數據顯示了美國各地的賬單ID的變動情況。

我想計算每個類似行組4的所有可能組合之間的平方距離和時間差。例如，對於出價組123我想計算距離和時間之間的差異：第1行和第2行，第1行和第3行，第1行和第4行，第2行和第3行，第2行和第4行，第3行和第4行。

這將為我提供這一組投標之間所有可能的計算組合。

我能夠在連續行之間使用dplyr執行此操作，如下所示：

detach("package:plyr", unload=TRUE)
library(magrittr)
library(dplyr)
library(geosphere)

deltadata <- group_by(df, bid) %>%

       mutate(

          dsq = (c(NA,distHaversine(cbind(longitude[-n()], latitude[-n()]),
                    cbind(longitude[  -1], latitude[  -1]))))^2,
          dt = c(NA, diff(ts))

              )%>%

ungroup() %>%
filter( ! is.na(dsq) )
deltadata

# A tibble: 21 x 6
     bid    ts latitude longitude          dsq    dt
   <dbl> <dbl>    <dbl>     <dbl>        <dbl> <dbl>
 1   123    23 38.41180  41.68493    178299634    23
 2   123    45 42.20771  43.36318 198827672092    22
 3   123    50 40.22803  43.00208  49480260636     5
 4   456    12 38.46078  42.79847   5557152213    12
 5   456    27 40.53698  42.57617  53781504422    15
 6   456    19 39.04038  42.17070  28958550947    -8
 7   234     8 39.58652  43.61317  46044153364     8
 8   234    15 41.32383  41.49377  69621429008     7
 9   234    23 40.26008  42.01927  15983792199     8
 10   345     5 40.25700  41.69525  26203255328     5
# ... with 11 more rows

問題：這只計算連續行之間的平方距離和時間，即：第1行和第2行，第2行和第3行，第3行和第4行

有沒有一種實用的方法可以為每組中所有可能的行組合執行此操作？

我希望我的輸出對每個出價有6個計算，如下所示：

# A tibble: 21 x 6
     bid    ts latitude longitude          dsq    dt
   <dbl> <dbl>    <dbl>     <dbl>        <dbl> <dbl>
 1   123    23 38.41180  41.68493    178299634    23  (for rows 1 and 2)
 2   123    45 42.20771  43.36318 198827672092    22  (for rows 1 and 3)
 3   123    50 40.22803  43.00208  49480260636     5  (for rows 1 and 4)
 4   123    12 38.46078  42.79847   5557152213    12  (for rows 2 and 3)
 5   123    27 40.53698  42.57617  53781504422    15  (for rows 2 and 4)
 6   123    19 39.04038  42.17070  28958550947    -8  (for rows 2 and 5)

我是R的新手，所以任何建議都表示贊賞！

Answer 1

你可以像這樣使用inner_join ：

library(dplyr)
library(geosphere)

df <- read.table(text = '   bid ts latitude longitude
1  123  0 38.40513  41.83777
2  123 23 38.41180  41.68493
3  123 45 42.20771  43.36318
4  123 50 40.22803  43.00208
5  456  0 39.12882  42.73877
6  456 12 38.46078  42.79847
7  456 27 40.53698  42.57617
8  456 19 39.04038  42.17070
9  234  0 39.18274  41.17445
10 234  8 39.58652  43.61317
11 234 15 41.32383  41.49377
12 234 23 40.26008  42.01927')


df %>%
  inner_join(df, by = c("bid" = "bid")) %>%
  mutate(
    dsq = distHaversine(cbind(longitude.x, latitude.x),
                        cbind(longitude.y, latitude.y))^2,
    dt = ts.x -ts.y
  ) %>%
  filter(dt > 0)
#>    bid ts.x latitude.x longitude.x ts.y latitude.y longitude.y          dsq dt
#> 1  123   23   38.41180    41.68493    0   38.40513    41.83777    178300279 23
#> 2  123   45   42.20771    43.36318    0   38.40513    41.83777 195932999496 45
#> 3  123   45   42.20771    43.36318   23   38.41180    41.68493 198827439286 22
#> 4  123   50   40.22803    43.00208    0   38.40513    41.83777  51230447939 50
#> 5  123   50   40.22803    43.00208   23   38.41180    41.68493  53740739037 27
#> 6  123   50   40.22803    43.00208   45   42.20771    43.36318  49479978030  5
#> 7  456   12   38.46078    42.79847    0   39.12882    42.73877   5557111219 12
#> 8  456   27   40.53698    42.57617    0   39.12882    42.73877  24765506646 27
#> 9  456   27   40.53698    42.57617   12   38.46078    42.79847  53781664569 15
#> 10 456   27   40.53698    42.57617   19   39.04038    42.17070  28958542352  8
#> 11 456   19   39.04038    42.17070    0   39.12882    42.73877   2506329323 19
#> 12 456   19   39.04038    42.17070   12   38.46078    42.79847   7133122323  7
#> 13 234    8   39.58652    43.61317    0   39.18274    41.17445  46043956815  8
#> 14 234   15   41.32383    41.49377    0   39.18274    41.17445  57544071797 15
#> 15 234   15   41.32383    41.49377    8   39.58652    43.61317  69621225065  7
#> 16 234   23   40.26008    42.01927    0   39.18274    41.17445  19614888600 23
#> 17 234   23   40.26008    42.01927    8   39.58652    43.61317  24136886438 15
#> 18 234   23   40.26008    42.01927   15   41.32383    41.49377  15983645507  8

Answer 2

既然你也使用了data.table標簽，這里有一個使用該軟件包的解決方案：

library(data.table)
library(geosphere)

df <- read.table(text = '   bid ts latitude longitude
1  123  0 38.40513  41.83777
2  123 23 38.41180  41.68493
3  123 45 42.20771  43.36318
4  123 50 40.22803  43.00208
5  456  0 39.12882  42.73877
6  456 12 38.46078  42.79847
7  456 27 40.53698  42.57617
8  456 19 39.04038  42.17070
9  234  0 39.18274  41.17445
10 234  8 39.58652  43.61317
11 234 15 41.32383  41.49377
12 234 23 40.26008  42.01927')
dt <- data.table(df, key = 'bid')
dt <- dt[dt, allow.cartesian = TRUE][ts < i.ts]
dt[, dt := i.ts - ts][, dsq := distHaversine(cbind(longitude, latitude),
                                             cbind(i.longitude, i.latitude))^2]
dt
#>     bid ts latitude longitude i.ts i.latitude i.longitude dt          dsq
#>  1: 123  0 38.40513  41.83777   23   38.41180    41.68493 23    178300279
#>  2: 123  0 38.40513  41.83777   45   42.20771    43.36318 45 195932999496
#>  3: 123 23 38.41180  41.68493   45   42.20771    43.36318 22 198827439286
#>  4: 123  0 38.40513  41.83777   50   40.22803    43.00208 50  51230447939
#>  5: 123 23 38.41180  41.68493   50   40.22803    43.00208 27  53740739037
#>  6: 123 45 42.20771  43.36318   50   40.22803    43.00208  5  49479978030
#>  7: 234  0 39.18274  41.17445    8   39.58652    43.61317  8  46043956815
#>  8: 234  0 39.18274  41.17445   15   41.32383    41.49377 15  57544071797
#>  9: 234  8 39.58652  43.61317   15   41.32383    41.49377  7  69621225065
#> 10: 234  0 39.18274  41.17445   23   40.26008    42.01927 23  19614888600
#> 11: 234  8 39.58652  43.61317   23   40.26008    42.01927 15  24136886438
#> 12: 234 15 41.32383  41.49377   23   40.26008    42.01927  8  15983645507
#> 13: 456  0 39.12882  42.73877   12   38.46078    42.79847 12   5557111219
#> 14: 456  0 39.12882  42.73877   27   40.53698    42.57617 27  24765506646
#> 15: 456 12 38.46078  42.79847   27   40.53698    42.57617 15  53781664569
#> 16: 456 19 39.04038  42.17070   27   40.53698    42.57617  8  28958542352
#> 17: 456  0 39.12882  42.73877   19   39.04038    42.17070 19   2506329323
#> 18: 456 12 38.46078  42.79847   19   39.04038    42.17070  7   7133122323

如何計算R中分組數據行之間的距離和時間差的所有可能組合？

問題描述

2 個解決方案

解決方案1
3 2018-04-04 19:15:20

解決方案2
2 已采納 2018-04-05 10:03:50

如何計算R中分組數據行之間的距離和時間差的所有可能組合？

問題描述

2 個解決方案

解決方案1 3 2018-04-04 19:15:20

解決方案2 2 已采納 2018-04-05 10:03:50

解決方案1
3 2018-04-04 19:15:20

解決方案2
2 已采納 2018-04-05 10:03:50