簡體   English   中英

R:展開組內所有可能組合的網格並在所有對中應用函數

[英]R: expand grid of all possible combinations within groups and apply functions across all the pairs

data <- tibble(time = c(1,1,2,2), a = c(1,2,3,4), b =c(4,3,2,1), c = c(1,1,1,1))

結果將如下所示

result <- tibble( 
             t = c(1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2),
             firm1 = c("a","a","a","b","b","b","c","c","c","a","a","a","b","b","b","c","c","c"),
             firm2 = c("a","b","c","a","b","c","a","b","c","a","b","c","a","b","c","a","b","c"),
             value = c(6,10,5,10,14,9,5,9,4,14,10,9,10,6,5,9,5,4))
result

function 可以是

function(x, y){sum(x, y)}

基本上我正在尋找一個整潔的解決方案來在每個時間點擴展網格數據並跨列應用功能。 誰能幫忙? 我試過了,但我沒時間在雙人面前。

expected_result<-expand.grid(names(data[-1]), names(data[-1])) %>%
  mutate(value = map2(Var1, Var2, ~ fun1(data[.x], data[.y])))
expected_result

我們可能會使用

library(dplyr)
library(tidyr)
library(purrr)
 data1 <- data %>% 
    group_by(time) %>% 
    summarise(across(everything(), sum, na.rm = TRUE), .groups = 'drop') %>%
     pivot_longer(cols = -time) %>% 
     group_split(time)
  map_dfr(data1, ~ {dat <- .x
       crossing(firm1 = dat$name, firm2 = dat$name) %>% 
       mutate(value = c(outer(dat$value, dat$value, FUN = `+`))) %>% 
       mutate(time = first(dat$time), .before = 1)})

-輸出

# A tibble: 18 × 4
    time firm1 firm2 value
   <dbl> <chr> <chr> <dbl>
 1     1 a     a         6
 2     1 a     b        10
 3     1 a     c         5
 4     1 b     a        10
 5     1 b     b        14
 6     1 b     c         9
 7     1 c     a         5
 8     1 c     b         9
 9     1 c     c         4
10     2 a     a        14
11     2 a     b        10
12     2 a     c         9
13     2 b     a        10
14     2 b     b         6
15     2 b     c         5
16     2 c     a         9
17     2 c     b         5
18     2 c     c         4

使用exand.grid您可以獲得所有可能的列組合,按時間拆分數據並為tmp的每一行應用fun

library(dplyr)
library(purrr)

tmp <- expand.grid(firm1 = names(data[-1]), firm2 = names(data[-1]))

fun <- function(x, y) sum(x, y)

result <- data %>%
  group_split(time) %>%
  map_df(~cbind(time = .x$time[1], tmp, 
                value = apply(tmp, 1, function(x) fun(.x[[x[1]]], .x[[x[2]]]))))

result

#   time firm1 firm2 value
#1     1     a     a     6
#2     1     b     a    10
#3     1     c     a     5
#4     1     a     b    10
#5     1     b     b    14
#6     1     c     b     9
#7     1     a     c     5
#8     1     b     c     9
#9     1     c     c     4
#10    2     a     a    14
#11    2     b     a    10
#12    2     c     a     9
#13    2     a     b    10
#14    2     b     b     6
#15    2     c     b     5
#16    2     a     c     9
#17    2     b     c     5
#18    2     c     c     4

您也可以在 base R 中執行此操作 -

result <- do.call(rbind, by(data, data$time, function(x) {
  cbind(time = x$time[1], tmp, 
        value = apply(tmp, 1, function(y) fun(x[[y[1]]], x[[y[2]]])))
}))

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM