I have a table (t02f) with a character variable ("t02.clase") and numeric variables. I need to create another table with the mean of each variable for each value in the column t02.clase.
Example table:
t02.clase Std_A_CLI_monto_sucursal_1 Std_A_CLI_monto_sucursal_2 Std_A_CLI_monto_sucursal_3
1 AK 0 0.00000000 0.051311597
2 AK 0 0.01303586 0.003442244
3 AK 0 0.00000000 0.017347593
Std_A_CLI_monto_sucursal_4 Std_A_CLI_promociones Std_A_CLI_dias_cliente Std_A_CLI_sucursales
1 0 0.4736842 0.57061341 0.05555556
2 0 0.5000000 0.55492154 0.05555556
3 0 0.0000000 0.05991441 0.05555556
Std_A_CLI_promos_enviadas Std_A_CLI_promos_compras Std_A_CLI_produni Std_A_CLI_flagret
1 0.6363636 0.2727273 0.14749987 0
2 0.6363636 0.1818182 0.05973218 1
I need to end up with sth like:
t02.clase Std_A_CLI_monto_sucursal_1 Std_A_CLI_monto_sucursal_2 Std_A_CLI_monto_sucursal_3
AK 0.3 0.001 0.2
AM 0.2 0.02 0.3
ES 0.5 0.001 0.0001
I used:
a1<-data.frame(NULL)
a<-data.frame(NULL)
for (i in (colnames(t02f)[-1])) {a1<-ddply(t02f, .(t02.clase), summarize, mean(i))
a<-cbind(a1,a) }
But I get numbers from 1 to ncol(t02f) insted of the means.
Or using data.table
library(data.table)
setDT(t02f)[, lapply(.SD, mean, na.rm = TRUE), by = t02.clase]
# t02.clase Std_A_CLI_monto_sucursal_1 Std_A_CLI_monto_sucursal_2 Std_A_CLI_monto_sucursal_3 Std_A_CLI_monto_sucursal_4 Std_A_CLI_promociones Std_A_CLI_dias_cliente
# 1: AK 0 0.004345287 0.02403381 0 0.3245614 0.3951498
# Std_A_CLI_sucursales
# 1: 0.05555556
Frankly, you could do it quite efficiently using base R by
and colMeans
combination, something like (don't run it if already converted your data set to data.table
class)
do.call(rbind, by(t02f[, -1], t02f$t02.clase, colMeans, na.rm = TRUE))
# Std_A_CLI_monto_sucursal_1 Std_A_CLI_monto_sucursal_2 Std_A_CLI_monto_sucursal_3 Std_A_CLI_monto_sucursal_4 Std_A_CLI_promociones Std_A_CLI_dias_cliente
# AK 0 0.004345287 0.02403381 0 0.3245614 0.3951498
# Std_A_CLI_sucursales
# AK 0.05555556
Or (just for fun)
vapply(t02f[, -1], function(x) tapply(x, t02f$t02.clase, mean, rm = TRUE), double(1))
Try summarise_each
from dplyr
library(dplyr)
res <- t02f %>%
group_by(t02.clase)%>%
summarise_each(funs(mean=mean(., na.rm=TRUE)))
as.data.frame(res)
# t02.clase Std_A_CLI_monto_sucursal_1 Std_A_CLI_monto_sucursal_2
#1 AK 0 0.004345287
# Std_A_CLI_monto_sucursal_3 Std_A_CLI_monto_sucursal_4 Std_A_CLI_promociones
#1 0.02403381 0 0.3245614
# Std_A_CLI_dias_cliente Std_A_CLI_sucursales
#1 0.3951498 0.05555556
If you want to use ddply
, there is an option colwise
library(plyr)
ddply(t02f, .(t02.clase), numcolwise(mean, na.rm=TRUE))
# t02.clase Std_A_CLI_monto_sucursal_1 Std_A_CLI_monto_sucursal_2
#1 AK 0 0.004345287
# Std_A_CLI_monto_sucursal_3 Std_A_CLI_monto_sucursal_4 Std_A_CLI_promociones
#1 0.02403381 0 0.3245614
# Std_A_CLI_dias_cliente Std_A_CLI_sucursales
#1 0.3951498 0.05555556
Or using aggregate
from base R
aggregate(.~t02.clase, t02f, mean, na.rm=TRUE)
t02f <- structure(list(t02.clase = c("AK", "AK", "AK"),
Std_A_CLI_monto_sucursal_1 = c(0L,0L, 0L), Std_A_CLI_monto_sucursal_2 =
c(0, 0.01303586, 0), Std_A_CLI_monto_sucursal_3 = c(0.051311597,
0.003442244, 0.017347593), Std_A_CLI_monto_sucursal_4 = c(0L,
0L, 0L), Std_A_CLI_promociones = c(0.4736842, 0.5, 0), Std_A_CLI_dias_cliente =
c(0.57061341,0.55492154, 0.05991441), Std_A_CLI_sucursales = c(0.05555556,
0.05555556, 0.05555556)), .Names = c("t02.clase", "Std_A_CLI_monto_sucursal_1",
"Std_A_CLI_monto_sucursal_2", "Std_A_CLI_monto_sucursal_3",
"Std_A_CLI_monto_sucursal_4","Std_A_CLI_promociones", "Std_A_CLI_dias_cliente",
"Std_A_CLI_sucursales"), row.names = c("1", "2", "3"), class = "data.frame")
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.