繁体   English   中英

从多个类别将数据从长到远重塑

[英]Reshaping data from long to wide with multiple categories

我试图将我的数据从长格式重整为基于多个分组的宽格式,但没有成功。 与此数据:

id <- 1:20
month <- rep(4:7, 50)
name <- rep(c("sam", "mike", "tim", "jill", "max"), 40)
cost <- sample(1:100, 200, replace=TRUE)
df <- data.frame(id, month, name, cost)

df.mo.mean <- aggregate(df$cost ~ df$name + df$month, FUN="mean")
df.mo.sd <- aggregate(df$cost ~ df$name + df$month, FUN="sd")

df.mo <- data.frame(df.mo.mean, df.mo.sd)
df.mo <- df.mo[,-c(4,5)]
df.mo[3:4] <- round(df.mo[3:4],2)

head(df)
   id month name cost
1  1     4  sam   29
2  2     5 mike   93
3  3     6  tim   27
4  4     7 jill   67
5  5     4  max   28
6  6     5  sam   69

我正在尝试让数据看起来像下面的东西,并尝试将其推广为未知数量的名称(但最大<15)

month    name1.cost.mean  name1.cost.sd  name2.cost.mean  name2.cost.sd
1        45               4              40               6
2        ...   

我试图reshapedo.callrbind没有成功。 我可以想到的唯一其他方法是使用循环,这意味着我做错了事。 我没有plyr任何经验,并且希望使用基本软件包来解决此问题(出于学习目的),但是如果不可能的话,任何其他建议也将非常有帮助

set.seed(1)
 library(plyr)
 kk<-ddply(df,.(month,name),summarize,mean=mean(cost),sd=sd(cost))
 reshape(kk,timevar="name",idvar="month",direction="wide")



    month mean.jill  sd.jill mean.max   sd.max mean.mike  sd.mike mean.sam   sd.sam mean.tim   sd.tim
1      4      55.3 34.62834     63.3 23.35261      57.6 22.91627     63.4 28.89906     43.3 25.42112
6      5      49.3 25.00689     51.1 27.85059      48.4 23.16223     43.0 24.33562     47.6 32.13928
11     6      60.4 23.61826     52.1 29.74503      38.6 34.39703     53.0 23.28567     52.4 20.88700
16     7      50.0 30.76073     62.7 23.98634      51.7 32.10763     52.8 32.27589     49.5 23.00845

我不确定您要的是什么,但也许这样的事情可能有用

> set.seed(1)
> df <- data.frame(id=1:20, month=rep(4:7, 50), 
+                  name=rep(c("sam", "mike", "tim", "jill", "max"), 40),
+                  cost= sample(1:100, 200, replace=TRUE))
> 
> DF.mean <- aggregate(cost ~ name + month, FUN=mean, data=df)  ## mean
> DF.sd   <- aggregate(cost ~ name + month, FUN=sd, data=df)    ## sd
> 
> x1 <- as.data.frame.matrix(xtabs(cost~month+name, data=DF.mean)) # reshaping mean
> colnames(x1) <- paste0(colnames(x1), ".mean")
> x2 <- as.data.frame.matrix(xtabs(cost~month+name, data=DF.sd))   # reshaping sd
> colnames(x2) <- paste0(colnames(x2), ".sd")
> 
> cbind(x1, x2)
  jill.mean max.mean mike.mean sam.mean tim.mean  jill.sd   max.sd  mike.sd   sam.sd   tim.sd
4      55.3     63.3      57.6     63.4     43.3 34.62834 23.35261 22.91627 28.89906 25.42112
5      49.3     51.1      48.4     43.0     47.6 25.00689 27.85059 23.16223 24.33562 32.13928
6      60.4     52.1      38.6     53.0     52.4 23.61826 29.74503 34.39703 23.28567 20.88700
7      50.0     62.7      51.7     52.8     49.5 30.76073 23.98634 32.10763 32.27589 23.00845

另外,请注意,@ Metrics方法可以使用R基本函数来完成,而无需任何额外的包:

> kk <- aggregate(cost ~ name + month, FUN=function(x) c(mean=mean(x), sd=sd(x)), data=df)
> reshape(kk,timevar="name",idvar="month",direction="wide")
  month cost.jill.mean cost.jill.sd cost.max.mean cost.max.sd cost.mike.mean cost.mike.sd cost.sam.mean cost.sam.sd cost.tim.mean cost.tim.sd
1      4       55.30000     34.62834      63.30000    23.35261       57.60000     22.91627      63.40000    28.89906      43.30000    25.42112
6      5       49.30000     25.00689      51.10000    27.85059       48.40000     23.16223      43.00000    24.33562      47.60000    32.13928
11     6       60.40000     23.61826      52.10000    29.74503       38.60000     34.39703      53.00000    23.28567      52.40000    20.88700
16     7       50.00000     30.76073      62.70000    23.98634       51.70000     32.10763      52.80000    32.27589      49.50000    23.00845
> means <- with( df, tapply(cost, list(month, name), FUN=mean) )
> sds <- with( df, tapply(cost, list(month, name), FUN=sd) )
> colnames(means) <- paste0(colnames(means), ".mean")
> colnames(sds) <- paste0(colnames(sds), ".sd")
> comb.df <- as.data.frame( cbind(means, sds) )
> comb.df <- comb.df[order(names(comb.df))]
> comb.df
  jill.mean jill.mean.sd max.mean max.mean.sd mike.mean mike.mean.sd
4      62.1     22.29823     39.7    25.53016      39.6     30.11164
5      40.7     30.72838     44.4    29.12502      54.2     23.91095
6      47.3     31.54556     46.9    32.30910      65.3     30.05569
7      55.5     33.16038     45.9    28.13637      59.7     31.79815
  sam.mean sam.mean.sd tim.mean tim.mean.sd
4     40.9    23.54877     58.5    21.69613
5     51.5    30.76163     34.2    32.16900
6     69.1    18.26016     55.2    32.99764
7     46.9    29.90150     55.8    27.17352

您可以使用两次重塑,然后合并结果

library(reshape2)
> dcast(df, month ~ name, mean, value.var="cost")
  month jill  max mike  sam  tim
1     4 39.5 54.6 45.6 48.4 57.4
2     5 45.1 61.7 45.4 54.5 50.8
3     6 41.9 45.7 56.4 43.1 52.1
4     7 51.6 38.6 43.6 65.1 51.5

> dcast(df, month ~ name, sd, value.var="cost")
  month     jill      max     mike      sam      tim
1     4 29.31154 25.25954 28.96051 31.32695 29.82989
2     5 31.02848 27.96049 34.32589 30.08599 23.95273
3     6 32.09517 32.50316 37.16988 27.03681 30.42094
4     7 19.56300 31.50026 28.65969 36.53750 26.73429

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM