简体   繁体   中英

Find all combinations of n1 elements from vector1 and n2 elements from vector2 in R?

I have two vectors and I am trying to find all unique combinations of 3 elements from vector1 and 2 elements from vector2. I have tried the following code.

V1 = combn(1:5, 3)   # 10 combinations in total
V2 = combn(6:11, 2)  # 15 combinations in total

How to combine V1 and V2 so that there are 10 * 15 = 150 combinations in total? Thanks.

The function comboGrid from RcppAlgos (I am the author) does just the trick:

library(RcppAlgos)

grid <- comboGrid(c(rep(list(1:5), 3), rep(list(6:11), 2)),
                  repetition = FALSE)

head(grid)
#>      Var1 Var2 Var3 Var4 Var5
#> [1,]    1    2    3    6    7
#> [2,]    1    2    3    6    8
#> [3,]    1    2    3    6    9
#> [4,]    1    2    3    6   10
#> [5,]    1    2    3    6   11
#> [6,]    1    2    3    7    8

tail(grid)
#>        Var1 Var2 Var3 Var4 Var5
#> [145,]    3    4    5    8    9
#> [146,]    3    4    5    8   10
#> [147,]    3    4    5    8   11
#> [148,]    3    4    5    9   10
#> [149,]    3    4    5    9   11
#> [150,]    3    4    5   10   11

It is quite efficient as well. It is written in C++ and pulls together many ideas from the excellent question: Picking unordered combinations from pools with overlap . The underlying algorithm avoids generating duplicates that would need to be filtered out.

Consider the following example where generating the Cartesian product contains more than 10 billion results:

system.time(huge <- comboGrid(c(rep(list(1:20), 5), rep(list(21:35), 3)),
                              repetition = FALSE))
#>    user  system elapsed 
#>   0.990   0.087   1.077

dim(huge)
#> [1] 7054320       8

You can try expand.grid along with asplit , eg,

expand.grid(asplit(V1,2), asplit(V2,2))

or

with(
  expand.grid(asplit(V1, 2), asplit(V2, 2)),
  t(mapply(c, Var1, Var2))
)

You can use expand.grid() :

g <- expand.grid(seq_len(ncol(V1)), seq_len(ncol(V2)))
V3 <- rbind(V1[, g[, 1]], V2[, g[, 2]])

The result is in a similar format as V1 and V2 , ie a 5 × 150 matrix (here printed transposed):

head(t(V3))
#      [,1] [,2] [,3] [,4] [,5]
# [1,]    1    2    3    6    7
# [2,]    1    2    4    6    7
# [3,]    1    2    5    6    7
# [4,]    1    3    4    6    7
# [5,]    1    3    5    6    7
# [6,]    1    4    5    6    7

dim(unique(t(V3)))
# [1] 150   5

And a generalized approach that can handle more than two initial matrices of combinations, stored in a list V :

V <- list(V1, V2)
g <- do.call(expand.grid, lapply(V, \(x) seq_len(ncol(x))))
V.comb <- do.call(rbind, mapply('[', V, T, g))

identical(V.comb, V3)
[1] TRUE

After some helpful refactoring guidance from @onyambu, here is a shorter solution based on base::merge() :

merge(t(combn(1:5, 3)),t(combn(6:11, 2)),by.x=NULL,by.y = NULL)

...and the first 20 rows of output:

> merge(t(combn(1:5, 3)),t(combn(6:11, 2)),by.x=NULL,by.y = NULL)
    V1.x V2.x V3 V1.y V2.y
1      1    2  3    6    7
2      1    2  4    6    7
3      1    2  5    6    7
4      1    3  4    6    7
5      1    3  5    6    7
6      1    4  5    6    7
7      2    3  4    6    7
8      2    3  5    6    7
9      2    4  5    6    7
10     3    4  5    6    7
11     1    2  3    6    8
12     1    2  4    6    8
13     1    2  5    6    8
14     1    3  4    6    8
15     1    3  5    6    8
16     1    4  5    6    8
17     2    3  4    6    8
18     2    3  5    6    8
19     2    4  5    6    8
20     3    4  5    6    8

original solution

A base R solution to create a Cartesian product with merge() looks like this:

df1 <- data.frame(t(combn(1:5, 3)))
df2 <- data.frame(t(combn(6:11, 2)))
colnames(df2) <- paste("y",1:2,sep=""))

merge(df1,df2,by.x=NULL,by.y = NULL)

...and the first 25 rows of output:

> merge(df1,df2,by.x=NULL,by.y = NULL)
    X1 X2 X3 y1 y2
1    1  2  3  6  7
2    1  2  4  6  7
3    1  2  5  6  7
4    1  3  4  6  7
5    1  3  5  6  7
6    1  4  5  6  7
7    2  3  4  6  7
8    2  3  5  6  7
9    2  4  5  6  7
10   3  4  5  6  7
11   1  2  3  6  8
12   1  2  4  6  8
13   1  2  5  6  8
14   1  3  4  6  8
15   1  3  5  6  8
16   1  4  5  6  8
17   2  3  4  6  8
18   2  3  5  6  8
19   2  4  5  6  8
20   3  4  5  6  8
21   1  2  3  6  9
22   1  2  4  6  9
23   1  2  5  6  9
24   1  3  4  6  9
25   1  3  5  6  9

Similar idea, using apply

apply(expand.grid(seq(ncol(V1)), seq(ncol(V2))), 1, function(i) {
  c(V1[,i[1]], V2[,i[2]])})
#>      [,1] [,2] [,3] [,4] [,5] [,6] [,7] [,8] [,9] [,10] [,11] [,12] [,13] [,14]
#> [1,]    1    1    1    1    1    1    2    2    2     3     1     1     1     1
#> [2,]    2    2    2    3    3    4    3    3    4     4     2     2     2     3
#> [3,]    3    4    5    4    5    5    4    5    5     5     3     4     5     4
#> [4,]    6    6    6    6    6    6    6    6    6     6     6     6     6     6
#> [5,]    7    7    7    7    7    7    7    7    7     7     8     8     8     8
#>      [,15] [,16] [,17] [,18] [,19] [,20] [,21] [,22] [,23] [,24] [,25] [,26]
#> [1,]     1     1     2     2     2     3     1     1     1     1     1     1
#> [2,]     3     4     3     3     4     4     2     2     2     3     3     4
#> [3,]     5     5     4     5     5     5     3     4     5     4     5     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]     8     8     8     8     8     8     9     9     9     9     9     9
#>      [,27] [,28] [,29] [,30] [,31] [,32] [,33] [,34] [,35] [,36] [,37] [,38]
#> [1,]     2     2     2     3     1     1     1     1     1     1     2     2
#> [2,]     3     3     4     4     2     2     2     3     3     4     3     3
#> [3,]     4     5     5     5     3     4     5     4     5     5     4     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]     9     9     9     9    10    10    10    10    10    10    10    10
#>      [,39] [,40] [,41] [,42] [,43] [,44] [,45] [,46] [,47] [,48] [,49] [,50]
#> [1,]     2     3     1     1     1     1     1     1     2     2     2     3
#> [2,]     4     4     2     2     2     3     3     4     3     3     4     4
#> [3,]     5     5     3     4     5     4     5     5     4     5     5     5
#> [4,]     6     6     6     6     6     6     6     6     6     6     6     6
#> [5,]    10    10    11    11    11    11    11    11    11    11    11    11
#>      [,51] [,52] [,53] [,54] [,55] [,56] [,57] [,58] [,59] [,60] [,61] [,62]
#> [1,]     1     1     1     1     1     1     2     2     2     3     1     1
#> [2,]     2     2     2     3     3     4     3     3     4     4     2     2
#> [3,]     3     4     5     4     5     5     4     5     5     5     3     4
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]     8     8     8     8     8     8     8     8     8     8     9     9
#>      [,63] [,64] [,65] [,66] [,67] [,68] [,69] [,70] [,71] [,72] [,73] [,74]
#> [1,]     1     1     1     1     2     2     2     3     1     1     1     1
#> [2,]     2     3     3     4     3     3     4     4     2     2     2     3
#> [3,]     5     4     5     5     4     5     5     5     3     4     5     4
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]     9     9     9     9     9     9     9     9    10    10    10    10
#>      [,75] [,76] [,77] [,78] [,79] [,80] [,81] [,82] [,83] [,84] [,85] [,86]
#> [1,]     1     1     2     2     2     3     1     1     1     1     1     1
#> [2,]     3     4     3     3     4     4     2     2     2     3     3     4
#> [3,]     5     5     4     5     5     5     3     4     5     4     5     5
#> [4,]     7     7     7     7     7     7     7     7     7     7     7     7
#> [5,]    10    10    10    10    10    10    11    11    11    11    11    11
#>      [,87] [,88] [,89] [,90] [,91] [,92] [,93] [,94] [,95] [,96] [,97] [,98]
#> [1,]     2     2     2     3     1     1     1     1     1     1     2     2
#> [2,]     3     3     4     4     2     2     2     3     3     4     3     3
#> [3,]     4     5     5     5     3     4     5     4     5     5     4     5
#> [4,]     7     7     7     7     8     8     8     8     8     8     8     8
#> [5,]    11    11    11    11     9     9     9     9     9     9     9     9
#>      [,99] [,100] [,101] [,102] [,103] [,104] [,105] [,106] [,107] [,108]
#> [1,]     2      3      1      1      1      1      1      1      2      2
#> [2,]     4      4      2      2      2      3      3      4      3      3
#> [3,]     5      5      3      4      5      4      5      5      4      5
#> [4,]     8      8      8      8      8      8      8      8      8      8
#> [5,]     9      9     10     10     10     10     10     10     10     10
#>      [,109] [,110] [,111] [,112] [,113] [,114] [,115] [,116] [,117] [,118]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      8      8      8      8      8      8      8      8      8      8
#> [5,]     10     10     11     11     11     11     11     11     11     11
#>      [,119] [,120] [,121] [,122] [,123] [,124] [,125] [,126] [,127] [,128]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      8      8      9      9      9      9      9      9      9      9
#> [5,]     11     11     10     10     10     10     10     10     10     10
#>      [,129] [,130] [,131] [,132] [,133] [,134] [,135] [,136] [,137] [,138]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      9      9      9      9      9      9      9      9      9      9
#> [5,]     10     10     11     11     11     11     11     11     11     11
#>      [,139] [,140] [,141] [,142] [,143] [,144] [,145] [,146] [,147] [,148]
#> [1,]      2      3      1      1      1      1      1      1      2      2
#> [2,]      4      4      2      2      2      3      3      4      3      3
#> [3,]      5      5      3      4      5      4      5      5      4      5
#> [4,]      9      9     10     10     10     10     10     10     10     10
#> [5,]     11     11     11     11     11     11     11     11     11     11
#>      [,149] [,150]
#> [1,]      2      3
#> [2,]      4      4
#> [3,]      5      5
#> [4,]     10     10
#> [5,]     11     11

Created on 2022-12-02 with reprex v2.0.2

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM