[英]Using by in a function with data.table
我写了一个小 function 通过离散变量聚合几列:
library(data.table)
onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
.x <- deparse(substitute(x))
.weight <- deparse(substitute(weight))
.displacement <- deparse(substitute(displacement))
.by <- deparse(substitute(by)) # Does not work with multiple variables!
cols <- c(.weight, .displacement)
cols <- cols[cols != "NULL"]
.xby <- c(.x, .by)
.xby <- .xby[.xby != "NULL"]
data.table::data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by = .xby, .SDcols = cols][]
}
返回变量wt
和disp
的总和(按cyl
和am
分组):
onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = am)
#> cyl am wt disp
#> 1: 6 1 8.265 465.0
#> 2: 4 1 16.338 748.9
#> 3: 6 0 13.555 818.2
#> 4: 8 0 49.249 4291.4
#> 5: 4 0 8.805 407.6
#> 6: 8 1 6.740 652.0
以下也返回正确的结果:
onewayfn(mtcars, cyl, weight = wt, displacement = disp)
#> cyl wt disp
#> 1: 6 21.820 1283.2
#> 2: 4 25.143 1156.5
#> 3: 8 55.989 4943.4
但是,如果我向by
添加多个变量,function 会返回错误:
onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = list(am,vs))
我想获得与上面相同的结果,但现在按cyl
、 am
和vs
分组。 我该如何重写onewayfn()
来做到这一点?
我们可以将by
作为字符串向量传递
onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
.x <- deparse(substitute(x))
.weight <- deparse(substitute(weight))
.displacement <- deparse(substitute(displacement))
#.by <- deparse(substitute(by)) # Does not work with multiple variables!
cols <- c(.weight, .displacement)
cols <- cols[cols != "NULL"]
.xby <- c(.x, by)
.xby <- .xby[.xby != "NULL"]
data.table::data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by = .xby, .SDcols = cols][]
}
-测试
onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = c("am","vs"))
# cyl am vs wt disp
#1: 6 1 0 8.265 465.0
#2: 4 1 1 14.198 628.6
#3: 6 0 1 13.555 818.2
#4: 8 0 0 49.249 4291.4
#5: 4 0 1 8.805 407.6
#6: 4 1 0 2.140 120.3
#7: 8 1 0 6.740 652.0
或者另一种选择是eval
一个字符串
newayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL){
dfname <- deparse(substitute(df))
.x <- deparse(substitute(x))
.weight <- deparse(substitute(weight))
.displacement <- deparse(substitute(displacement))
.by <- deparse(substitute(by)) # Does not work with multiple variables!
cols <- c(.weight, .displacement)
cols <- cols[cols != "NULL"]
cols <- paste(dQuote(cols, FALSE), collapse=",")
cols <- glue::glue("c({cols})")
.by <- gsub("list\\(|\\)", "", .by)
.xby <- c(.x, .by)
.xby <- .xby[.xby != "NULL"]
.xby1 <- paste0("c(", gsub("(\\w+)", "'\\1'", toString(.xby)), ")")
str1 <- glue::glue('data.table::data.table({dfname})[, lapply(.SD, sum, na.rm = TRUE), by = {.xby1}, .SDcols = {cols}][]')
print(str1)
eval(parse(text = str1))
}
-测试
onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = list(am, vs))
#data.table::data.table(mtcars)[, lapply(.SD, sum, na.rm = TRUE), by = c('cyl', 'am', 'vs'), .SDcols = c("wt","disp")][]
# cyl am vs wt disp
#1: 6 1 0 8.265 465.0
#2: 4 1 1 14.198 628.6
#3: 6 0 1 13.555 818.2
#4: 8 0 0 49.249 4291.4
#5: 4 0 1 8.805 407.6
#6: 4 1 0 2.140 120.3
#7: 8 1 0 6.740 652.0
onewayfn(mtcars, cyl, weight = wt, displacement = disp, by = am)
#data.table::data.table(mtcars)[, lapply(.SD, sum, na.rm = TRUE), by = c('cyl', 'am'), .SDcols = c("wt","disp")][]
# cyl am wt disp
#1: 6 1 8.265 465.0
#2: 4 1 16.338 748.9
#3: 6 0 13.555 818.2
#4: 8 0 49.249 4291.4
#5: 4 0 8.805 407.6
#6: 8 1 6.740 652.0
这是另一种选择:
onewayfn <- function(df, x, weight = NULL, displacement = NULL, by = NULL) {
mc <- as.list(match.call())
byl <- as.list(mc$by)
if (length(byl) > 0L && (byl[[1L]] == as.symbol("list") || byl[[1L]] == as.symbol(".")))
byl <- byl[-1L]
COLS <- c()
if (!missing(weight))
COLS <- deparse(mc$weight)
if (!missing(displacement))
COLS <- c(COLS, deparse(mc$displacement))
BY <- as.call(c(as.symbol("list"), substitute(x), byl))
eval(bquote(data.table(df)[, lapply(.SD, sum, na.rm = TRUE), by=.(BY), .SDcols=.(COLS)]))
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.