簡體   English   中英

使用所有可能的組合創建一個新的 dataframe

[英]Create a new dataframe with the all possible combinations

擁有這樣的 dataframe :

data.frame(previous = c(1,2,2,1,3,3), next = c(1,1,2,3,1,3), id = c(1,2,3,4,5,6))

如何提取一個數據框,它將檢查前一列和下一列並創建 9 個新列,只有當上一列和下一列的組合存在時,這些列才有 1。 例如,如果上一個 if 2 和下一個 1 組合是 2 1 並且接收一個。

預期 output 的示例:

data.frame(previous = c(1,2,2,1,3,3), next = c(1,1,2,3,1,3),
col1_1 = c(1,0,0,0,0,0),
col1_2 = c(0,0,0,0,0,0),
col1_3 = c(0,0,0,1,0,0),
col2_1 = c(0,1,0,0,0,0),
col2_2 = c(0,0,1,0,0,0),
col2_3 = c(0,0,0,0,0,0),
col3_1 = c(0,0,0,0,1,0),
col3_2 = c(0,0,0,0,0,0),
col3_3 = c(0,0,0,0,0,1), id = c(1,2,3,4,5,6))

您可以使用expand.grid來獲取所有組合。

假設您的數據框被稱為df並且next列實際上被稱為next. 避免與關鍵字next沖突:

as.data.frame(apply(expand.grid(1:3, 1:3), 1, function(x) {
                      as.numeric(x[1] == df$previous & x[2] == df$next.)}))
#>   V1 V2 V3 V4 V5 V6 V7 V8 V9
#> 1  1  0  0  0  0  0  0  0  0
#> 2  0  1  0  0  0  0  0  0  0
#> 3  0  0  0  0  1  0  0  0  0
#> 4  0  0  0  0  0  0  1  0  0
#> 5  0  0  1  0  0  0  0  0  0
#> 6  0  0  0  0  0  0  0  0  1

一步一步的方法可能是以下方法。 我已更改next1next列名稱以避免出現問題:

AllComb<-expand.grid(unique(df$previous),unique(df$next1))# Creating all possible combinations

myframe <- matrix(rep(0,nrow(AllComb)*nrow(df)),ncol=nrow(AllComb),nrow =nrow(df))
colnames(myframe)<-paste("col_",AllComb$Var1,"_",AllComb$Var2, sep ="")

for(id_row in 1:ncol(df)){
  myvec     <- df[id_row,]  
  Word     <- paste("col_",myvec[1],"_",myvec[2], sep ="")# Finding Word
  Colindex <-which(colnames(myframe)==Word) #Finding Column index
  myframe[id_row, Colindex] <-1  # Replacing in column index and vetor
}
dfRes<-cbind(previous =df$previous, "next"= df$next1, myframe, id=df$id)

# previous next col_1_1 col_2_1 col_3_1 col_1_2 col_2_2 col_3_2 col_1_3 col_2_3 col_3_3 id
# [1,]        1    1       1       0       0       0       0       0       0       0       0  1
# [2,]        2    1       0       1       0       0       0       0       0       0       0  2
# [3,]        2    2       0       0       0       0       1       0       0       0       0  3
# [4,]        1    3       0       0       0       0       0       0       0       0       0  4
# [5,]        3    1       0       0       0       0       0       0       0       0       0  5
# [6,]        3    3       0       0       0       0       0       0       0       0       0  6

在 a by您可以使用switch ,因為您的值很好地連續 1:3。 最后我們merge得到結果。

tmp <- by(dat, dat$next., function(x) {
  x1 <- x$previous
  o <- `colnames<-`(t(sapply(x1, function(z) 
    switch(z, c(1, 0, 0), c(0, 1, 0), c(0, 0, 1)))), 
    paste(el(x1), 1:3, sep="_"))
  cbind(x, col=o)
  })

res <- Reduce(function(...) merge(..., all=TRUE), tmp)
res[is.na(res)] <- 0  ## set NA to zero if wanted

結果

res[order(res$id),]   ## order by ID if needed
#   previous next. id col.1_1 col.1_2 col.1_3 col.2_1 col.2_2 col.2_3
# 1        1     1  1       1       0       0       0       0       0
# 3        2     1  2       0       1       0       0       0       0
# 4        2     2  3       0       0       0       0       1       0
# 2        1     3  4       1       0       0       0       0       0
# 5        3     1  5       0       0       1       0       0       0
# 6        3     3  6       0       0       1       0       0       0

數據

dat <- structure(list(previous = c(1, 2, 2, 1, 3, 3), next. = c(1, 1, 
2, 3, 1, 3), id = c(1, 2, 3, 4, 5, 6)), class = "data.frame", row.names = c(NA, 
-6L))

注意: next作為列名並不是一個特別好的主意,因為它在 R 中具有特殊含義。

這是一個 tidyverse 方法:

library(tidyr)
library(dplyr)

df %>%
  rowid_to_column() %>%
  complete(previous, nxt) %>%
  unite(col , previous, nxt, sep = "_", remove = FALSE) %>%
  pivot_wider(names_from = col, values_from = rowid, values_fn = list(rowid = ~1), values_fill = list(rowid = 0)) %>%
  na.omit() %>%
  arrange(id)

# A tibble: 6 x 12
  previous   nxt    id `1_1` `1_2` `1_3` `2_1` `2_2` `2_3` `3_1` `3_2` `3_3`
     <dbl> <dbl> <dbl> <int> <int> <int> <int> <int> <int> <int> <int> <int>
1        1     1     1     1     0     0     0     0     0     0     0     0
2        2     1     2     0     0     0     1     0     0     0     0     0
3        2     2     3     0     0     0     0     1     0     0     0     0
4        1     3     4     0     0     1     0     0     0     0     0     0
5        3     1     5     0     0     0     0     0     0     1     0     0
6        3     3     6     0     0     0     0     0     0     0     0     1

這是另一個tidyverse解決方案,與 @H1 的解決方案略有不同(可能更簡潔)。

library(dplyr)
library(tidyr)

df %>%
  mutate(n = 1) %>% 
  complete(id, previous, next., fill = list(n = 0)) %>%
  unite(col, previous, next.) %>%
  pivot_wider(names_from = col, names_prefix = "col", values_from = n) %>%
  right_join(df)

# # A tibble: 6 x 12
#      id col1_1 col1_2 col1_3 col2_1 col2_2 col2_3 col3_1 col3_2 col3_3 previous next.
#   <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>  <dbl>    <dbl> <dbl>
# 1     1      1      0      0      0      0      0      0      0      0        1     1
# 2     2      0      0      0      1      0      0      0      0      0        2     1
# 3     3      0      0      0      0      1      0      0      0      0        2     2
# 4     4      0      0      1      0      0      0      0      0      0        1     3
# 5     5      0      0      0      0      0      0      1      0      0        3     1
# 6     6      0      0      0      0      0      0      0      0      1        3     3

你可以試試下面的代碼

dfout <- within(df, 
                col <- `colnames<-`(t(sapply((Previous-1)*3+Next, 
                                             function(v) replace(rep(0,9),v,1))),
                                    do.call(paste,c(expand.grid(1:3,1:3),sep = "_"))))

這樣

> dfout
  Previous Next id col.1_1 col.2_1 col.3_1 col.1_2 col.2_2 col.3_2 col.1_3 col.2_3 col.3_3
1        1    1  1       1       0       0       0       0       0       0       0       0
2        2    1  2       0       0       0       1       0       0       0       0       0
3        2    2  3       0       0       0       0       1       0       0       0       0
4        1    3  4       0       0       1       0       0       0       0       0       0
5        3    1  5       0       0       0       0       0       0       1       0       0
6        3    3  6       0       0       0       0       0       0       0       0       1

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM