简体   繁体   中英

r data.table function to aggregate by multiple variables

i have been trying to generalize a function where I do a number of different aggregations with different keys and then I merge them together. First an example data.table:

DT <- data.table(
                  Key1=c("giallo","giallo","giallo","verde","verde","verde","verde","verde"),
                  Key2=c("M","M","L","S","M","M","M","L"),
                  Filtro=c(1,1,1,0,0,0,1,1),
                  Var1=c(1,4,5,3,7,5,8,1),
                  Var2=c(11,24,15,33,17,45,38,21)
               )

With the help of question I wrote the following code:

Tavola <- function(s,chiave1,chiave2=NULL)
{

   Tavola1 <- s[,
                by=eval(chiave1),
                list(
                     Somma11=sum(Var1),
                     Somma12=sum(Var2),
                     Media11=mean(Var1),
                     Media12=mean(Var2)
                    )
                ]
   print(Tavola1)
   Tavola2 <- s[Filtro==1,
                by=eval(chiave1),
                list(
                      Somma21=sum(Var1),
                      Somma22=sum(Var2),
                      Media21=mean(Var1),
                      Media22=mean(Var2)
                    )
               ]
   print(Tavola2)
   Tavola3 <- s[Filtro==1 & Var1>3,
                by=eval(chiave1),
                list(
                      Somma32=sum(Var2),
                      Media32=mean(Var2)
                    )
               ]

   print(Tavola3)
   mymerge = function(x,y) merge(x,y,by=chiave1,all=TRUE)
   TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))

   return(TavolaFinale)
}
TavolaStat <- Tavola(s=DT,chiave1="Key1")
> TavolaStat
     Key1 Somma11 Somma12  Media11  Media12 Somma21 Somma22  Media21  Media22 Somma32 Media32
1: giallo      10      50 3.333333 16.66667      10      50 3.333333 16.66667      39    19.5
2:  verde      24     154 4.800000 30.80000       9      59 4.500000 29.50000      38    38.0

which works, but when I try to generalize to actually summarize by the 2 variable keys I can't:

    Tavola <- function(s,chiave1,chiave2=NULL)
    {

       Tavola1 <- s[,
                    by=list(eval((chiave1)),eval(chiave2)),
                    list(
                         Somma11=sum(Var1),
                         Somma12=sum(Var2),
                         Media11=mean(Var1),
                         Media12=mean(Var2)
                        )
                    ]
       print(Tavola1)
       Tavola2 <- s[Filtro==1,
                    by=list(eval(chiave1),eval(chiave2)),
                    list(
                          Somma21=sum(Var1),
                          Somma22=sum(Var2),
                          Media21=mean(Var1),
                          Media22=mean(Var2)
                        )
                   ]
       print(Tavola2)
       Tavola3 <- s[Filtro==1 & Var1>3,
                    by=list(eval(chiave1),eval(chiave2)),
                    list(
                          Somma32=sum(Var2),
                          Media32=mean(Var2)
                        )
                   ]

       print(Tavola3)
       mymerge = function(x,y) merge(x,y,by=c(chiave1,chiave2),all=TRUE)
       TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))

       return(TavolaFinale)
    }
> TavolaStat <- Tavola(s=DT,chiave1="Key1")
 Show Traceback

 Rerun with Debug
 Error in `[.data.table`(s, , by = list(eval((chiave1)), eval(chiave2)),  : 
  column or expression 2 of 'by' or 'keyby' is type NULL. Do not quote column names. Usage: DT[,sum(colC),by=list(colA,month(colB))] > 
> TavolaStat <- Tavola(s=DT,chiave1="Key1",chiave2="Key2")
 Show Traceback

 Rerun with Debug
 Error in `[.data.table`(s, , by = list(eval((chiave1)), eval(chiave2)),  : 
  The items in the 'by' or 'keyby' list are length (1,1). Each must be same length as rows in x or number of rows returned by i (8). 

How do I fix this? Thanks in advance

How about passing those keys as 1 vector instead of 2?

Tavola <- function(s,chiaves)
{

  Tavola1 <- s[,
               list(
                 Somma11=sum(Var1),
                 Somma12=sum(Var2),
                 Media11=mean(Var1),
                 Media12=mean(Var2)),
                 by=chiaves
               ]
  print(Tavola1)
  Tavola2 <- s[Filtro==1,
               list(
                 Somma21=sum(Var1),
                 Somma22=sum(Var2),
                 Media21=mean(Var1),
                 Media22=mean(Var2)),
                 by=chiaves
               ]
  print(Tavola2)
  Tavola3 <- s[Filtro==1 & Var1>3,
               list(
                 Somma32=sum(Var2),
                 Media32=mean(Var2)),
                 by=chiaves
               ]

  print(Tavola3)
  mymerge = function(x,y) merge(x,y,by=(chiaves),all=TRUE)
  TavolaFinale <- Reduce(mymerge,list(Tavola1,Tavola2,Tavola3))

  return(TavolaFinale)
}
TavolaStat <- Tavola(s=DT,c("Key1","Key2"))

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM