简体   繁体   中英

R - Create multiple scenario combinations

I have four (partially overlapping) groups of eight unique applicants that have applied for 20%, 30%, 40%, and 50% of the work I have to assign:

g20 <- c("a","b","c","d","e","f")
g30 <- c("a","b","c","d","e","f","g","h")
g40 <- c("c","d","e","f","g","h")
g50 <- c("e","f","g","h")

Because I can only award the work in these four increments and I have to choose no fewer than two people and no more than four, I have six scenarios for awarding 100% of the work:

  1. 50/50
  2. 50/30/20
  3. 40/40/20
  4. 40/30/30
  5. 40/20/20/20
  6. 30/30/20/20

For each scenario, I need to find all the possible combinations (without replacement) for awarding the work to the applicants in the corresponding groups.

I can easily enough accomplish this for the first scenario using t(combn(g50,2)) but I am unsure how to handle the other scenarios where I have to pull combinations from different vectors AND ensure an applicant is only selected only once in any given combination. The output needs to be the actual combinations, not just the number of combinations.

Using R, how do I do get these combinations pulling from four different groups and (using scenario 5 as an example) ensure that "cdef", "cedf", "cfed", "cfde" etc. are all treated as the same result?

Is this possible?

Also creating all possible combinations like Jon Spring's solution but using package and removing dupe applicant.

If your real-life dimensions are per OP, you can consider expanding to all possible combinations and remove rows where an applicant is duplicated:

library(data.table)

g20 <- c("a","b","c","d","e","f")
g30 <- c("a","b","c","d","e","f","g","h")
g40 <- c("c","d","e","f","g","h")
g50 <- c("e","f","g","h")

scen <- paste0("g", c(30, 30, 20, 20))
allcombi <- do.call(CJ, mget(scen))
setnames(allcombi, paste0("V", 1L:length(allcombi)))

#remove rows with applicants that are repeated in different columns
nodupe <- allcombi[
    allcombi[, .I[anyDuplicated(unlist(.SD)) == 0L], 
        by=1:allcombi[,.N]]$V1]

#sort within columns with the same percentage of work
for(cols in split(names(nodupe), scen))
    nodupe[, (cols) := sort(.SD), by=seq_len(nodupe[,.N]), .SDcols=cols]

#remove identical combinations
ans <- unique(nodupe)
setnames(ans, scen)[]

output:

     g30 g30 g20 g20
  1:   a   b   c   d
  2:   a   b   c   e
  3:   a   b   c   f
  4:   a   b   d   e
  5:   a   b   d   f
 ---                
221:   g   h   c   e
222:   g   h   c   f
223:   g   h   d   e
224:   g   h   d   f
225:   g   h   e   f

Code & results from running for all 6 scenarios:

scenarios <- list(c(50,50), 
    c(50,30,20), 
    c(40,40,20), 
    c(40,30,30), 
    c(40,20,20,20), 
    c(30,30,20,20))

lapply(scenarios, 
    function(scen) {
        scen <- paste0("g", scen)
        allcombi <- do.call(CJ, mget(scen, envir=.GlobalEnv))
        setnames(allcombi, paste0("V", 1L:length(allcombi)))

        nodupe <- allcombi[
            allcombi[, .I[anyDuplicated(unlist(.SD)) == 0L], 
                by=1:allcombi[,.N]]$V1]

        for(cols in split(names(nodupe), scen))
            nodupe[, (cols) := sort(.SD), by=seq_len(nodupe[,.N]), .SDcols=cols]

        ans <- unique(nodupe)
        setnames(ans, scen)[]
})

output:

[[1]]
   g50 g50
1:   e   f
2:   e   g
3:   e   h
4:   f   g
5:   f   h
6:   g   h

[[2]]
     g50 g30 g20
  1:   e   a   b
  2:   e   a   c
  3:   e   a   d
  4:   e   a   f
  5:   e   b   a
 ---            
128:   h   g   b
129:   h   g   c
130:   h   g   d
131:   h   g   e
132:   h   g   f

[[3]]
    g40 g40 g20
 1:   c   d   a
 2:   c   d   b
 3:   c   d   e
 4:   c   d   f
 5:   c   e   a
 6:   c   e   b
 7:   c   e   d
 8:   c   e   f
 9:   c   f   a
10:   c   f   b
11:   c   f   d
12:   c   f   e
13:   c   g   a
14:   c   g   b
15:   c   g   d
16:   c   g   e
17:   c   g   f
18:   c   h   a
19:   c   h   b
20:   c   h   d
21:   c   h   e
22:   c   h   f
23:   d   e   a
24:   d   e   b
25:   d   e   c
26:   d   e   f
27:   d   f   a
28:   d   f   b
29:   d   f   c
30:   d   f   e
31:   d   g   a
32:   d   g   b
33:   d   g   c
34:   d   g   e
35:   d   g   f
36:   d   h   a
37:   d   h   b
38:   d   h   c
39:   d   h   e
40:   d   h   f
41:   e   f   a
42:   e   f   b
43:   e   f   c
44:   e   f   d
45:   e   g   a
46:   e   g   b
47:   e   g   c
48:   e   g   d
49:   e   g   f
50:   e   h   a
51:   e   h   b
52:   e   h   c
53:   e   h   d
54:   e   h   f
55:   f   g   a
56:   f   g   b
57:   f   g   c
58:   f   g   d
59:   f   g   e
60:   f   h   a
61:   f   h   b
62:   f   h   c
63:   f   h   d
64:   f   h   e
65:   g   h   a
66:   g   h   b
67:   g   h   c
68:   g   h   d
69:   g   h   e
70:   g   h   f
    g40 g40 g20

[[4]]
     g40 g30 g30
  1:   c   a   b
  2:   c   a   d
  3:   c   a   e
  4:   c   a   f
  5:   c   a   g
 ---            
122:   h   d   f
123:   h   d   g
124:   h   e   f
125:   h   e   g
126:   h   f   g

[[5]]
    g40 g20 g20 g20
 1:   c   a   b   d
 2:   c   a   b   e
 3:   c   a   b   f
 4:   c   a   d   e
 5:   c   a   d   f
 6:   c   a   e   f
 7:   c   b   d   e
 8:   c   b   d   f
 9:   c   b   e   f
10:   c   d   e   f
11:   d   a   b   c
12:   d   a   b   e
13:   d   a   b   f
14:   d   a   c   e
15:   d   a   c   f
16:   d   a   e   f
17:   d   b   c   e
18:   d   b   c   f
19:   d   b   e   f
20:   d   c   e   f
21:   e   a   b   c
22:   e   a   b   d
23:   e   a   b   f
24:   e   a   c   d
25:   e   a   c   f
26:   e   a   d   f
27:   e   b   c   d
28:   e   b   c   f
29:   e   b   d   f
30:   e   c   d   f
31:   f   a   b   c
32:   f   a   b   d
33:   f   a   b   e
34:   f   a   c   d
35:   f   a   c   e
36:   f   a   d   e
37:   f   b   c   d
38:   f   b   c   e
39:   f   b   d   e
40:   f   c   d   e
41:   g   a   b   c
42:   g   a   b   d
43:   g   a   b   e
44:   g   a   b   f
45:   g   a   c   d
46:   g   a   c   e
47:   g   a   c   f
48:   g   a   d   e
49:   g   a   d   f
50:   g   a   e   f
51:   g   b   c   d
52:   g   b   c   e
53:   g   b   c   f
54:   g   b   d   e
55:   g   b   d   f
56:   g   b   e   f
57:   g   c   d   e
58:   g   c   d   f
59:   g   c   e   f
60:   g   d   e   f
61:   h   a   b   c
62:   h   a   b   d
63:   h   a   b   e
64:   h   a   b   f
65:   h   a   c   d
66:   h   a   c   e
67:   h   a   c   f
68:   h   a   d   e
69:   h   a   d   f
70:   h   a   e   f
71:   h   b   c   d
72:   h   b   c   e
73:   h   b   c   f
74:   h   b   d   e
75:   h   b   d   f
76:   h   b   e   f
77:   h   c   d   e
78:   h   c   d   f
79:   h   c   e   f
80:   h   d   e   f
    g40 g20 g20 g20

[[6]]
     g30 g30 g20 g20
  1:   a   b   c   d
  2:   a   b   c   e
  3:   a   b   c   f
  4:   a   b   d   e
  5:   a   b   d   f
 ---                
221:   g   h   c   e
222:   g   h   c   f
223:   g   h   d   e
224:   g   h   d   f
225:   g   h   e   f

EDIT -- updated my response based on closer reading of OP. Now identifying how many distinct teams can be formed, regardless of the permutations of how the work can be divided among them.

Yes! This is by no means the most elegant or efficient solution, but it is possible. It takes about 1 second with this data, but it will be slower if you have real data that is more complicated.

First I establish the possibilities for each applicant. I think it makes more intuitive sense to lay it out this way, because we need to make one assignment (including the possibility of zero) for each applicant.

a <- c(0, 20, 30)
b <- c(0, 20, 30)
c <- c(0, 20, 30, 40)
d <- c(0, 20, 30, 40)
e <- c(0, 20, 30, 40, 50)
f <- c(0, 20, 30, 40, 50)
g <- c(0,     30, 40, 50)
h <- c(0,     30, 40, 50)

Then I enumerate all the possibilities of assigning the work, using expand.grid , and then filter to only include the ones where 100% of work gets done.

library(tidyverse)
soln_with_permutations <- expand.grid(a,b,c,d,e,f,g,h) %>%
  # the Applicants come in as Var1, Var2... here, will rename below
  as.tibble() %>%
  rownames_to_column() %>% # This number tracks each row / potential solution

  # gather into long format to make summing simpler
  gather(applicant, assignment, -rowname) %>%
  # rename Var1 as "a", Var2 as "b", and so on.
  mutate(applicant = str_sub(applicant, start = -1) %>% as.integer %>% letters[.]) %>%

  group_by(rowname) %>%
  # keep only solutions adding to 100%
  filter(sum(assignment) == 100) %>%
  # keep only solutions involving four or fewer applicants
  filter(sum(assignment > 0) <= 4) %>%
  ungroup()

Each rowname describes a distinct solution in terms of how the work is divided among the applicants, but many are permutations where the work is allocated differently among the same teams. To see how many different teams are formed, and how many different scenarios could work for that team, I label each solution with the team (labeled alphabetically) and the scenario (labeled by descending share).

soln_distinct_teams <- soln_with_permutations %>%
  filter(assignment > 0) %>%
  group_by(rowname) %>%
  # Get team composition, alphabetical
  mutate(team = paste0(applicant, collapse = "")) %>%
  # Get allocation structure, descending
  arrange(-assignment) %>%
  mutate(allocation = paste0(assignment, collapse = "/")) %>%
  ungroup() %>%

  # Distinct teams / allocations only
  distinct(team, allocation) %>%
  arrange(allocation, team) %>%
  mutate(soln_num = row_number()) %>%

  # select(soln_num, team, allocation) %>%
  spread(allocation, soln_num)

Each row shows one of the 132 different teams of 2-4 applicants which could be created, and across the columns we see the different scenarios that could apply to that team in at least one permutation.

# A tibble: 132 x 7
   team  `30/30/20/20` `40/20/20/20` `40/30/30` `40/40/20` `50/30/20` `50/50`
   <chr>         <int>         <int>      <int>      <int>      <int>   <int>
 1 abc              NA            NA        126         NA         NA      NA
 2 abcd              1            71         NA         NA         NA      NA
 3 abce              2            72         NA         NA         NA      NA
 4 abcf              3            73         NA         NA         NA      NA
 5 abcg              4            74         NA         NA         NA      NA
 6 abch              5            75         NA         NA         NA      NA
 7 abd              NA            NA        127         NA         NA      NA
 8 abde              6            76         NA         NA         NA      NA
 9 abdf              7            77         NA         NA         NA      NA
10 abdg              8            78         NA         NA         NA      NA
# ... with 122 more rows

Thanks for all the help on this one! chinsoon12's solution was the most useful for me to build off of. As noted, this solution was still returning some duplicates (in the 40/40/20 or 40/30/30 scenarios it was not removing duplicates where the percentage appeared twice in the scenario).

While perhaps not the most elegant solution, I modified chinsoon12's solution. Using 40/40/20 as an example, I first created all possible combinations of 40/40 then created the combinations of 40/40 and 20. I was then able to accurately remove the duplicates.

# Create 40/40 combos
combs_40 <- t(combn(g40,2))
c40 <- paste0(combs_40[,1],combs_40[,2])

# Create combos of 40/40 and 20
scen <- c("c40","g20")
allcombi <- do.call(CJ, mget(scen, envir=.GlobalEnv))
allcombi <- as.data.frame(allcombi)

# Split into cols
x <- t(as.data.frame(strsplit(allcombi$c40,split="")))
allcombi <- as.data.table(cbind(x[,1],x[,2],allcombi$g20))
setnames(allcombi, paste0("V", 1L:length(allcombi)))

# Remove rows with applicants that are repeated in different columns
nodupe <- allcombi[
  allcombi[, .I[anyDuplicated(unlist(.SD)) == 0L], 
           by=1:allcombi[,.N]]$V1]
# Redefine scen
scen <- c("g40","g40","g20")

# Sort within columns with the same percentage of work
for(cols in split(names(nodupe), scen))
  nodupe[, (cols) := sort(.SD), by=seq_len(nodupe[,.N]), .SDcols=cols]

# Set names, write results
setnames(nodupe, scen)[]
results_404020 <- nodupe

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM