![](/img/trans.png)
[英]R tidyverse mutate with all combinations of subset of columns in grouped dataframe
[英]R - subset Dataframe into all possible combinations with contraints
我有以下 dataframe:
Person City Ethnicity
A 1 2
B 2 3
C 3 3
D 1 1
E 2 1
F 3 1
G 2 2
H 1 1
I 2 2
J 1 2
K 1 3
L 1 3
M 2 2
我想要一个包含 6 个人的所有可能组合的 df,以便满足以下约束:
R有没有办法做到这一点?
谢谢
数据
structure(list(Person = structure(1:13, .Label = c("A", "B",
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"), class = "factor"),
City = c(1L, 2L, 3L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 1L,
2L), Ethnicity = c(2L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
3L, 3L, 2L)), class = "data.frame", row.names = c(NA, -13L
))
可能的组合是 A,B,C,D,E,H。
您可以尝试使用combn
生成所有组合,然后使用一些谓词函数来过滤出您想要的组合,如下所示:
# Data
data <- structure(list(
Person = structure(1:13, .Label = c(
"A", "B",
"C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M"
), class = "factor"),
City = c(
1L, 2L, 3L, 1L, 2L, 3L, 2L, 1L, 2L, 1L, 1L, 1L,
2L
), Ethnicity = c(
2L, 3L, 3L, 1L, 1L, 1L, 2L, 1L, 2L, 2L,
3L, 3L, 2L
)
), class = "data.frame", row.names = c(NA, -13L))
# Helpers
has_all_cities <- function(x, data) {
all_cities <- unique(data$City)
setequal(data[x, ]$City, all_cities)
}
has_ppl_from_city_one <- function(x, data) {
num_ppl_from_city_one <- data[x, ]$City == 1
sum(num_ppl_from_city_one) >= 3 # three or more
}
has_all_ethnicity <- function(x, data) {
all_ethnicities <- unique(data$Ethnicity)
setequal(data[x, ]$Ethnicity, all_ethnicities)
}
satisfy_all_constraints <- function(x, data) {
has_all_cities(x, data) &&
has_ppl_from_city_one(x, data) &&
has_all_ethnicity(x, data)
}
# Main
row.names(data) <- data$Person
y <- combn(data$Person, m = 6)
dim(y)
ind <- apply(y, 2, satisfy_all_constraints, data = data)
res <- y[, ind]
res[, 1:6]
# [,1] [,2] [,3] [,4] [,5] [,6]
# [1,] A A A A A A
# [2,] B B B B B B
# [3,] C C C C C C
# [4,] D D D D D D
# [5,] E E E E F F
# [6,] H J K L H J
# Levels: A B C D E F G H I J K L M
ncol(res)
# 574
# Check requirements
data[res[, 1], ]
# Person City Ethnicity
# A A 1 2
# B B 2 3
# C C 3 3
# D D 1 1
# E E 2 1
# H H 1 1
# No duplicate person
# Has all cities: 1, 2, 3
# Has all ethnicity: 1, 2, 3
# Has at least 3 people from city 1
# Convert into data.frame
df <- as.data.frame(structure(as.character(res), dim = dim(res)))
df[, 1:6]
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.