i have been trying to generalize a function where I do a number of different aggregations with different keys and then I merge them together. First an example data.table:
DT <- data.table(
Key1=c("giallo","giallo","giallo","verde","verde","verde","verde","verde"),
Key2=c("M","M","L","S","M","M","M","L"),
Filtro=c(1,1,1,0,0,0,1,1),
Var1=c(1,4,5,3,7,5,8,1),
Var2=c(11,24,15,33,17,45,38,21)
)
With the help of question I wrote the following code:
Tavola <- function(s,chiave1,chiave2=NULL)
{
Tavola1 <- s[,
by=eval(chiave1),
list(
Somma11=sum(Var1),
Somma12=sum(Var2),
Media11=mean(Var1),
Media12=mean(Var2)
)
]
print(Tavola1)
Tavola2 <- s[Filtro==1,
by=eval(chiave1),
list(
Somma21=sum(Var1),
Somma22=sum(Var2),
Media21=mean(Var1),
Media22=mean(Var2)
)
]
print(Tavola2)
Tavola3 <- s[Filtro==1 & Var1>3,
by=eval(chiave1),
list(
Somma32=sum(Var2),
Media32=mean(Var2)
)
]
print(Tavola3)
mymerge = function(x,y) merge(x,y,by=chiave1,all=TRUE)
TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))
return(TavolaFinale)
}
TavolaStat <- Tavola(s=DT,chiave1="Key1")
> TavolaStat
Key1 Somma11 Somma12 Media11 Media12 Somma21 Somma22 Media21 Media22 Somma32 Media32
1: giallo 10 50 3.333333 16.66667 10 50 3.333333 16.66667 39 19.5
2: verde 24 154 4.800000 30.80000 9 59 4.500000 29.50000 38 38.0
which works, but when I try to generalize to actually summarize by the 2 variable keys I can't:
Tavola <- function(s,chiave1,chiave2=NULL)
{
Tavola1 <- s[,
by=list(eval((chiave1)),eval(chiave2)),
list(
Somma11=sum(Var1),
Somma12=sum(Var2),
Media11=mean(Var1),
Media12=mean(Var2)
)
]
print(Tavola1)
Tavola2 <- s[Filtro==1,
by=list(eval(chiave1),eval(chiave2)),
list(
Somma21=sum(Var1),
Somma22=sum(Var2),
Media21=mean(Var1),
Media22=mean(Var2)
)
]
print(Tavola2)
Tavola3 <- s[Filtro==1 & Var1>3,
by=list(eval(chiave1),eval(chiave2)),
list(
Somma32=sum(Var2),
Media32=mean(Var2)
)
]
print(Tavola3)
mymerge = function(x,y) merge(x,y,by=c(chiave1,chiave2),all=TRUE)
TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))
return(TavolaFinale)
}
> TavolaStat <- Tavola(s=DT,chiave1="Key1")
Show Traceback
Rerun with Debug
Error in `[.data.table`(s, , by = list(eval((chiave1)), eval(chiave2)), :
column or expression 2 of 'by' or 'keyby' is type NULL. Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))] >
> TavolaStat <- Tavola(s=DT,chiave1="Key1",chiave2="Key2")
Show Traceback
Rerun with Debug
Error in `[.data.table`(s, , by = list(eval((chiave1)), eval(chiave2)), :
The items in the 'by' or 'keyby' list are length (1,1). Each must be same length as rows in x or number of rows returned by i (8).
How do I fix this? Thanks in advance
How about passing those keys as 1 vector instead of 2?
Tavola <- function(s,chiaves)
{
Tavola1 <- s[,
list(
Somma11=sum(Var1),
Somma12=sum(Var2),
Media11=mean(Var1),
Media12=mean(Var2)),
by=chiaves
]
print(Tavola1)
Tavola2 <- s[Filtro==1,
list(
Somma21=sum(Var1),
Somma22=sum(Var2),
Media21=mean(Var1),
Media22=mean(Var2)),
by=chiaves
]
print(Tavola2)
Tavola3 <- s[Filtro==1 & Var1>3,
list(
Somma32=sum(Var2),
Media32=mean(Var2)),
by=chiaves
]
print(Tavola3)
mymerge = function(x,y) merge(x,y,by=(chiaves),all=TRUE)
TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))
return(TavolaFinale)
}
TavolaStat <- Tavola(s=DT,c("Key1","Key2"))
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.