繁体   English   中英

在 function 和 data.table 中使用

[英]Using by in a function with data.table

我写了一个小 function 通过离散变量聚合几列:

library(data.table)

onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
  .x <- deparse(substitute(x))
  .weight <- deparse(substitute(weight))
  .displacement <- deparse(substitute(displacement))
  .by <- deparse(substitute(by)) # Does not work with multiple variables!
  
  cols <- c(.weight, .displacement)
  cols <- cols[cols != "NULL"]
  
  .xby <- c(.x, .by)
  .xby <- .xby[.xby != "NULL"]
  
  data.table::data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by = .xby, .SDcols = cols][]
}

返回变量wtdisp的总和(按cylam分组):

onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = am)

#>    cyl am     wt   disp
#> 1:   6  1  8.265  465.0
#> 2:   4  1 16.338  748.9
#> 3:   6  0 13.555  818.2
#> 4:   8  0 49.249 4291.4
#> 5:   4  0  8.805  407.6
#> 6:   8  1  6.740  652.0

以下也返回正确的结果:


onewayfn(mtcars, cyl, weight = wt, displacement = disp)
#>    cyl     wt   disp
#> 1:   6 21.820 1283.2
#> 2:   4 25.143 1156.5
#> 3:   8 55.989 4943.4

但是,如果我向by添加多个变量,function 会返回错误:

onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = list(am,vs))

我想获得与上面相同的结果,但现在按cylamvs分组。 我该如何重写onewayfn()来做到这一点?

我们可以将by作为字符串向量传递

onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
  .x <- deparse(substitute(x))
  .weight <- deparse(substitute(weight))
  .displacement <- deparse(substitute(displacement))
  #.by <- deparse(substitute(by)) # Does not work with multiple variables!
  
  cols <- c(.weight, .displacement)
  cols <- cols[cols != "NULL"]
  
  .xby <- c(.x, by)
  .xby <- .xby[.xby != "NULL"]
  
  data.table::data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by = .xby, .SDcols = cols][]
}

-测试

onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = c("am","vs"))

#   cyl am vs     wt   disp
#1:   6  1  0  8.265  465.0
#2:   4  1  1 14.198  628.6
#3:   6  0  1 13.555  818.2
#4:   8  0  0 49.249 4291.4
#5:   4  0  1  8.805  407.6
#6:   4  1  0  2.140  120.3
#7:   8  1  0  6.740  652.0

或者另一种选择是eval一个字符串

newayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
  
  dfname <- deparse(substitute(df))
  .x <- deparse(substitute(x))
  .weight <- deparse(substitute(weight))
  .displacement <- deparse(substitute(displacement))
  .by <- deparse(substitute(by)) # Does not work with multiple variables!
  
 
  cols <- c(.weight, .displacement)
  cols <- cols[cols != "NULL"]
  cols <- paste(dQuote(cols, FALSE), collapse=",")
  cols <- glue::glue("c({cols})")
  .by <- gsub("list\\(|\\)", "", .by)
  .xby <- c(.x, .by)
  .xby <- .xby[.xby != "NULL"]
  .xby1 <- paste0("c(", gsub("(\\w+)", "'\\1'", toString(.xby)), ")")
  str1 <- glue::glue('data.table::data.table({dfname})[, lapply(.SD, sum, na.rm = TRUE), by = {.xby1}, .SDcols = {cols}][]')
  print(str1)
  eval(parse(text = str1))
}

-测试

onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = list(am, vs))
#data.table::data.table(mtcars)[, lapply(.SD, sum, na.rm = TRUE), by = c('cyl', 'am', 'vs'), .SDcols = c("wt","disp")][]
#   cyl am vs     wt   disp
#1:   6  1  0  8.265  465.0
#2:   4  1  1 14.198  628.6
#3:   6  0  1 13.555  818.2
#4:   8  0  0 49.249 4291.4
#5:   4  0  1  8.805  407.6
#6:   4  1  0  2.140  120.3
#7:   8  1  0  6.740  652.0
 
onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = am)
#data.table::data.table(mtcars)[, lapply(.SD, sum, na.rm = TRUE), by = c('cyl', 'am'), .SDcols = c("wt","disp")][]
#   cyl am     wt   disp
#1:   6  1  8.265  465.0
#2:   4  1 16.338  748.9
#3:   6  0 13.555  818.2
#4:   8  0 49.249 4291.4
#5:   4  0  8.805  407.6
#6:   8  1  6.740  652.0

这是另一种选择:

onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL) {
    mc <- as.list(match.call())
    byl <- as.list(mc$by)

    if (length(byl) > 0L && (byl[[1L]] == as.symbol("list") || byl[[1L]] == as.symbol(".")))
        byl <- byl[-1L]

    COLS <- c()
    if (!missing(weight))
        COLS <- deparse(mc$weight)
    if (!missing(displacement))
        COLS <- c(COLS, deparse(mc$displacement))

    BY <- as.call(c(as.symbol("list"), substitute(x), byl))
    eval(bquote(data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by=.(BY), .SDcols=.(COLS)]))
}

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM