簡體   English   中英

重塑R中的數據

[英]Reshaping a data in R

我有一個數據集,如文章底部所示。 數據具有四個列,分別稱為SIC,AT95Group,AT95Mean,AT95Med。 AT95Group列采用四個值,例如“ 00”,“ 01”,“ 11”和“ 10”。 當前,對於每個SIC,AT95Group的每個值都有四行。 我想以某種方式重塑數據框,以便每個SIC僅具有一行。 之前,我們為每對(SIC,AT95Group)有兩列分別稱為均值和med的列,我們想要創建本質上為4 * 2的列(對於組“ 00”,“ 11”,“ 01”,“ 10”)為4列(“均值”和“ Med”)。 八列類似於“ 00Mean”,“ 11Mean”,“ 00Med”,“ 11Med”等,每個SIC都有相應的值。

我覺得這很難做到。 有任何建議。 謝謝。

> dput(head(pp,20))
structure(list(SIC = c(1L, 1L, 1L, 10L, 10L, 10L, 10L, 12L, 12L, 
12L, 12L, 13L, 13L, 13L, 13L, 14L, 14L, 14L, 14L, 15L), AT95Group = c("11", 
"10", "00", "11", "01", "00", "10", "01", "11", "10", "00", "11", 
"01", "00", "10", "11", "01", "10", "00", "01"), AT95Med = c(0.0691039117115276, 
0.0608649722972575, 0.0609974198491522, 0.215571816296268, 0.305308985848382, 
0.351312558091798, 0.352704719896703, 0.0459887720804718, 0.0304466181779069, 
0.0513875431555943, 0.0541431932578377, 0.0650920855876547, 0.143724642017362, 
0.156092793582293, 0.0976059673595903, 0.0116620184564053, 0.0188895210677074, 
0.0356836223212195, 0.0513040852859517, 0.0982448708035204), 
    AT95Mean = c(0.0691039117115276, 0.0608649722972575, 0.0609974198491522, 
    0.215571816296268, 0.305308985848382, 0.351312558091798, 
    0.352704719896703, 0.0459887720804718, 0.0304466181779069, 
    0.0513875431555943, 0.0541431932578377, 0.0650920855876547, 
    0.143724642017362, 0.156092793582293, 0.0976059673595903, 
    0.0116620184564053, 0.0188895210677074, 0.0356836223212195, 
    0.0513040852859517, 0.0982448708035204)), .Names = c("SIC", 
"AT95Group", "AT95Med", "AT95Mean"), row.names = c(241L, 236L, 
27L, 1126L, 1035L, 1030L, 664L, 1269L, 1259L, 1245L, 1244L, 3919L, 
4722L, 3329L, 3222L, 4886L, 4889L, 4951L, 4860L, 5108L), class = "data.frame")

嘗試提到的代碼的粗略嘗試失敗。 不知道如何繼續。

pp <- unique(dacc1[,c("SIC","AT95Group","AT95Med","AT95Mean")])
xsic <- unique(pp[,"SIC"]);
xlist <- list(xsic,rep("AT95",length(xsic)));

編輯:

我在運行troy的結果后獲得的結果:

> pp1 <- head(pp,20)
     SIC AT95Group    AT95Med   AT95Mean
241    1        11 0.06910391 0.06910391
236    1        10 0.06086497 0.06086497
27     1        00 0.06099742 0.06099742
1126  10        11 0.21557182 0.21557182
1035  10        01 0.30530899 0.30530899
1030  10        00 0.35131256 0.35131256
664   10        10 0.35270472 0.35270472
1269  12        01 0.04598877 0.04598877
1259  12        11 0.03044662 0.03044662
1245  12        10 0.05138754 0.05138754
1244  12        00 0.05414319 0.05414319
3919  13        11 0.06509209 0.06509209
4722  13        01 0.14372464 0.14372464
3329  13        00 0.15609279 0.15609279
3222  13        10 0.09760597 0.09760597
4886  14        11 0.01166202 0.01166202
4889  14        01 0.01888952 0.01888952
4951  14        10 0.03568362 0.03568362
4860  14        00 0.05130409 0.05130409
5108  15        01 0.09824487 0.09824487

> molten<-melt(pp);
Using AT95Group as id variables

molten$variable<-paste(gsub("[AT95]","",molten$variable),molten$AT95Group," ");
cast(molten[,c(1,3,4)], SIC ~ variable);

> cast(molten[,c(1,3,4)], SIC ~ variable);
Error in `[.data.frame`(molten, , c(1, 3, 4)) : 
  undefined columns selected

我希望這個解決方案不要太神秘:

xsic <- unique(pp[,"SIC"]);
AT = c("00", "01", "10", "11")
d = data.frame(xsic=xsic);
for(i in 1:4) {
  subgroup = pp[ pp$AT95Group==AT[i],];
  d[[paste0(AT[i],"AT95Med")]] = subgroup$AT95Med[match(xsic,subgroup$SIC)];
  d[[paste0(AT[i],"AT95Mean")]] = subgroup$AT95Mean[match(xsic,subgroup$SIC)];
}

結果:

xsic 00AT95Med 00AT95Mean  01AT95Med 01AT95Mean  10AT95Med 10AT95Mean  11AT95Med 11AT95Mean
  1 0.06099742 0.06099742         NA         NA 0.06086497 0.06086497 0.06910391 0.06910391
 10 0.35131256 0.35131256 0.30530899 0.30530899 0.35270472 0.35270472 0.21557182 0.21557182
 12 0.05414319 0.05414319 0.04598877 0.04598877 0.05138754 0.05138754 0.03044662 0.03044662
 13 0.15609279 0.15609279 0.14372464 0.14372464 0.09760597 0.09760597 0.06509209 0.06509209
 14 0.05130409 0.05130409 0.01888952 0.01888952 0.03568362 0.03568362 0.01166202 0.01166202
 15         NA         NA 0.09824487 0.09824487         NA         NA         NA         NA

或者,您可以使用“重塑”包:

install.packages("reshape")  # only run this once if you don't have it
require(reshape)
pp   #  this is what I called your table
molten<-melt(pp)  # this stretches the table out into variable/value pairs

# then modify the "variable" values so they reflect the group (and delete 'AT95')
molten$variable<-paste(gsub("[AT95]","",molten$variable),molten$AT95Group," ")

# then use cast (you can look up the documentation in ?reshape)
# but basically this gives you a crosstab of the SICs against the new variables
# the significant of 1,3,4 is it pulls out only the columns I want to cast
cast(molten[,c(1,3,4)], SIC ~ variable)

這給你:

  SIC  Mean 00    Mean 01    Mean 10    Mean 11     Med 00     Med 01     Med 10     Med 11  
1   1 0.06099742         NA 0.06086497 0.06910391 0.06099742         NA 0.06086497 0.06910391
2  10 0.35131256 0.30530899 0.35270472 0.21557182 0.35131256 0.30530899 0.35270472 0.21557182
3  12 0.05414319 0.04598877 0.05138754 0.03044662 0.05414319 0.04598877 0.05138754 0.03044662
4  13 0.15609279 0.14372464 0.09760597 0.06509209 0.15609279 0.14372464 0.09760597 0.06509209
5  14 0.05130409 0.01888952 0.03568362 0.01166202 0.05130409 0.01888952 0.03568362 0.01166202
6  15         NA 0.09824487         NA         NA         NA 0.09824487         NA         NA

為了記錄,在base還有一個reshape函數(嗯, stats ):

reshape(pp, direction = "wide", idvar = "SIC", 
      timevar = "AT95Group", v.names = c("AT95Med", "AT95Mean"))
#     SIC AT95Med.11 AT95Mean.11 AT95Med.10 AT95Mean.10 AT95Med.00 AT95Mean.00 AT95Med.01 AT95Mean.01
#241    1 0.06910391  0.06910391 0.06086497  0.06086497 0.06099742  0.06099742         NA          NA
#1126  10 0.21557182  0.21557182 0.35270472  0.35270472 0.35131256  0.35131256 0.30530899  0.30530899
#1269  12 0.03044662  0.03044662 0.05138754  0.05138754 0.05414319  0.05414319 0.04598877  0.04598877
#3919  13 0.06509209  0.06509209 0.09760597  0.09760597 0.15609279  0.15609279 0.14372464  0.14372464
#4886  14 0.01166202  0.01166202 0.03568362  0.03568362 0.05130409  0.05130409 0.01888952  0.01888952
#5108  15         NA          NA         NA          NA         NA          NA 0.09824487  0.09824487

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM