I have the "in_table"
as shown below. I need to obtain "Table1", "Table2", "Table3"
and so on using the "Comb_table"
. Basically when a variable in Comb_table is 1 I need to include in the list.
Is there any efficient way to do in R language rather than manually typing all the combinations?
Any help is appreciated.
Thanks.
in_table:
POL Var1 Var2 Var3 Var4 Var5 Var6 Var7
8035 1 11 1 GRD 0030 0110 09/30
36763 1 88 13 GRD 5260 0300 11/15
36763 1 88 13 GRD 5280 0300 11/15
35786 1 88 13 GRD 0030 0110 09/30
Comb_table:
Var1 Var2 Var3 Var4 Var5 Var6 Var7
Table1 1 1 1 1 1 1 1
Table2 0 1 1 1 1 1 1
Table3 1 0 1 1 1 1 1
Table1 <- in_table[, .(Pol_count = length(unique(POL))), by = list(Var1,Var2,Var3,Var4,Var5,Var6,Var7)]
Table2 <- in_table[, .(Pol_count = length(unique(POL))), by = list(Var2,Var3,Var4,Var5,Var6,Var7)]
Table3 <- in_table[, .(Pol_count = length(unique(POL))), by = list(Var1,Var3,Var4,Var5,Var6,Var7)]
and so on.
res = comb_table[, .(list(in_table[, uniqueN(POL), by = c(names(.SD)[.SD==1])])), by = tab]
# tab V1
#1: Table1 <data.table>
#2: Table2 <data.table>
#3: Table3 <data.table>
res$V1
#[[1]]
# Var1 Var2 Var3 Var4 Var5 Var6 Var7 V1
#1: 1 11 1 GRD 30 110 09/30 1
#2: 1 88 13 GRD 5260 300 11/15 1
#3: 1 88 13 GRD 5280 300 11/15 1
#4: 1 88 13 GRD 30 110 09/30 1
#
#[[2]]
# Var2 Var3 Var4 Var5 Var6 Var7 V1
#1: 11 1 GRD 30 110 09/30 1
#2: 88 13 GRD 5260 300 11/15 1
#3: 88 13 GRD 5280 300 11/15 1
#4: 88 13 GRD 30 110 09/30 1
#
#[[3]]
# Var1 Var3 Var4 Var5 Var6 Var7 V1
#1: 1 1 GRD 30 110 09/30 1
#2: 1 13 GRD 5260 300 11/15 1
#3: 1 13 GRD 5280 300 11/15 1
#4: 1 13 GRD 30 110 09/30 1
This works:
> library(magrittr)
> melt(comb_table, id="tab", variable.factor=FALSE)[value == 1] %>%
split(by="tab") %>%
lapply(function(z) in_table[, .(n = uniqueN(POL)), by=c(z$variable)])
$Table1
Var1 Var2 Var3 Var4 Var5 Var6 Var7 n
1: 1 11 1 GRD 30 110 09/30 1
2: 1 88 13 GRD 5260 300 11/15 1
3: 1 88 13 GRD 5280 300 11/15 1
4: 1 88 13 GRD 30 110 09/30 1
$Table3
Var1 Var3 Var4 Var5 Var6 Var7 n
1: 1 1 GRD 30 110 09/30 1
2: 1 13 GRD 5260 300 11/15 1
3: 1 13 GRD 5280 300 11/15 1
4: 1 13 GRD 30 110 09/30 1
$Table2
Var2 Var3 Var4 Var5 Var6 Var7 n
1: 11 1 GRD 30 110 09/30 1
2: 88 13 GRD 5260 300 11/15 1
3: 88 13 GRD 5280 300 11/15 1
4: 88 13 GRD 30 110 09/30 1
magrittr is just used here for convenience.
Alternately, if you're fine having it all in one table and are using data.table >=1.10.5, something like this (I haven't tested it...) should work with grouping sets:
> melt(comb_table, id="tab", variable.factor=FALSE)[value == 1, groupingsets(
in_table,
sets = split(variable, tab)
)]
Data used: I decided that the OP's rownames are/should be a column named "tab".
> dput(setDF(comb_table))
structure(list(tab = c("Table1", "Table2", "Table3"), Var1 = c(1L,
0L, 1L), Var2 = c(1L, 1L, 0L), Var3 = c(1L, 1L, 1L), Var4 = c(1L,
1L, 1L), Var5 = c(1L, 1L, 1L), Var6 = c(1L, 1L, 1L), Var7 = c(1L,
1L, 1L)), .Names = c("tab", "Var1", "Var2", "Var3", "Var4", "Var5",
"Var6", "Var7"), row.names = c(NA, -3L), class = "data.frame")
> dput(setDF(in_table))
structure(list(POL = c(8035L, 36763L, 36763L, 35786L), Var1 = c(1L,
1L, 1L, 1L), Var2 = c(11L, 88L, 88L, 88L), Var3 = c(1L, 13L,
13L, 13L), Var4 = c("GRD", "GRD", "GRD", "GRD"), Var5 = c(30L,
5260L, 5280L, 30L), Var6 = c(110L, 300L, 300L, 110L), Var7 = c("09/30",
"11/15", "11/15", "09/30")), .Names = c("POL", "Var1", "Var2",
"Var3", "Var4", "Var5", "Var6", "Var7"), row.names = c(NA, -4L
), class = "data.frame")
May be this:
create a factor with variable name given to 1
and NA
is given to 0
nm_list <- data.frame( do.call("rbind", Map( function(x,y) as.character(factor(x, levels = c(0,1), labels = c(NA, y))),
x = Comb_table, y = names(Comb_table))),
stringsAsFactors = FALSE )
nm_list
# X1 X2 X3
# Var1 Var1 <NA> Var1
# Var2 Var2 Var2 <NA>
# Var3 Var3 Var3 Var3
# Var4 Var4 Var4 Var4
# Var5 Var5 Var5 Var5
# Var6 Var6 Var6 Var6
# Var7 Var7 Var7 Var7
library('data.table')
setDT(in_table) # convert data frame to data table by reference
lapply( nm_list, function(x) {
x <- na.omit(x) # remove NA
in_table[, .(Pol_count = length(unique(POL))), by = x] # extract the variables by passing the values to by argument
})
# $X1
# Var1 Var2 Var3 Var4 Var5 Var6 Var7 Pol_count
# 1: 1 11 1 GRD 30 110 09/30 1
# 2: 1 88 13 GRD 5260 300 11/15 1
# 3: 1 88 13 GRD 5280 300 11/15 1
# 4: 1 88 13 GRD 30 110 09/30 1
#
# $X2
# Var2 Var3 Var4 Var5 Var6 Var7 Pol_count
# 1: 11 1 GRD 30 110 09/30 1
# 2: 88 13 GRD 5260 300 11/15 1
# 3: 88 13 GRD 5280 300 11/15 1
# 4: 88 13 GRD 30 110 09/30 1
#
# $X3
# Var1 Var3 Var4 Var5 Var6 Var7 Pol_count
# 1: 1 1 GRD 30 110 09/30 1
# 2: 1 13 GRD 5260 300 11/15 1
# 3: 1 13 GRD 5280 300 11/15 1
# 4: 1 13 GRD 30 110 09/30 1
Data:
in_table <- read.table(text='POL Var1 Var2 Var3 Var4 Var5 Var6 Var7
8035 1 11 1 GRD 0030 0110 09/30
36763 1 88 13 GRD 5260 0300 11/15
36763 1 88 13 GRD 5280 0300 11/15
35786 1 88 13 GRD 0030 0110 09/30', header = TRUE)
Comb_table <- read.table(text = 'Var1 Var2 Var3 Var4 Var5 Var6 Var7
Table1 1 1 1 1 1 1 1
Table2 0 1 1 1 1 1 1
Table3 1 0 1 1 1 1 1')
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.