简体   繁体   English

R在提迪尔传播与聚集

[英]R spread vs gather in tidyr

I have a dataframe in the following form: 我有以下形式的数据框:

         person currentTest  beforeValue   afterValue
    1       1           A  1.284297055  2.671763513
    2       2           A -0.618359548 -2.354926905
    3       3           A  0.039457430 -0.091709968
    4       4           A -0.448608324 -0.362851832
    5       5           A -0.961777124 -1.416284339
    6       6           A  0.702471895  2.052181444
    7       7           A -0.455222045 -2.125684279
    8       8           A -1.231549132 -2.777425148
    9       9           A -0.797234990 -0.558306183
    10     10           A -0.709734963 -1.244159550
    11      1           B -0.472799377 -0.869472343
    12      2           B  0.059720737  1.444855389
    13      3           B  0.924201532  2.731049485
    14      4           B  0.658884183  1.017542475
    15      5           B -1.989807256 -4.712671740
    16      6           B  0.660241305  1.971232718
    17      7           B  0.089636952 -0.564457911
    18      8           B -0.828399941  0.507659171
    19      9           B -0.838074237 -0.316996942
    20     10           B -1.659197101 -3.317623686
    ...

What I'd like is to get a data frame of: 我想要的是获取以下数据框:

person   A_Before     A_After       B_Before,      B_After, ...
  1     1.284297055   2.671763513  -0.472799377  -0.869472343
  2     -0.618359548 -2.354926905   0.059720737   1.444855389
...

I've tried gather and spread but that's not quite what I need as there's the creation of new columns. 我已经尝试过收集和传播,但这并不是我所需要的,因为要创建新的列。 Any suggestions? 有什么建议么?

The dput version for easy access is below: 易于使用的dput版本如下:

 resultsData <- dput(resultsData)
structure(list(person = c(1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 
10L, 1L, 2L, 3L, 4L, 5L, 6L, 7L, 8L, 9L, 10L, 1L, 2L, 3L, 4L, 
5L, 6L, 7L, 8L, 9L, 10L), currentTest = structure(c(1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 2L, 
2L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 4L, 4L, 4L, 4L, 4L, 
4L, 4L, 4L, 4L, 4L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 6L, 
6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L, 6L), .Label = c("A", "B", "C", 
"D", "E", "F"), class = "factor"), beforeValue = c(1.28429705541541, 
-0.618359548370402, 0.039457429902531, -0.448608324038257, -0.961777123997687, 
0.702471895259405, -0.455222044740939, -1.23154913153736, -0.797234989892673, 
-0.709734963076803, -0.47279937661921, 0.0597207367403981, 0.924201531911827, 
0.658884182599422, -1.98980725637449, 0.660241304554785, 0.0896369516528346, 
-0.828399941497236, -0.838074236572976, -1.65919710134782, 0.577469369909437, 
1.92748171699512, -0.245593641496638, 0.126104785456265, -0.559338325961641, 
1.29802115505785, 0.719406692531958, 0.969414499181256, -0.814697072724845, 
0.86465983690719, -0.709539159817187, 1.02775240926492, -0.50490096148732, 
0.40769259465753, -0.868531009656408, 0.949518511358715, 2.32458579520932, 
-0.257578702370506, -0.789761851618986, 0.0979274657020477, -0.00803566278013502, 
1.42984177159549, 1.45485678109231, -0.956556613290905, 0.443323691839299, 
-0.261951072972966, -1.30990441429799, 0.0921741874883992, -1.02612779569131, 
0.81550719514697, -0.403037731404182, -0.384422139459082, 0.417074857491798, 
-1.37128032791855, -0.0796160137501127, 1.35302483988882, -0.752751140138746, 
0.812453275384099, -1.32443072805549, -1.66986584340583), afterValue = c(2.67176351335094, 
-2.35492690509713, -0.0917099675669388, -0.362851831626841, -1.4162843393352, 
2.05218144382074, -2.12568427901904, -2.77742514848958, -0.558306182843248, 
-1.24415954975022, -0.869472343362331, 1.44485538931333, 2.73104948477609, 
1.01754247530805, -4.71267174035743, 1.9712327179732, -0.564457911016569, 
0.507659170771878, -0.31699694238194, -3.31762368638082, 1.09068172988414, 
4.37537723545199, -0.116850493406969, 1.9533832597394, -1.69003563933244, 
2.62250581307257, -0.00837379068728961, 1.84192937988371, -0.675899868505659, 
2.08506660046288, -0.583526785879512, 0.699298693972492, -1.26172199141024, 
1.23589313451783, -1.56008919968504, 0.436686458587792, 0.11699090169902, 
-1.07206510594109, 1.21204947218164, -0.812406581646911, 0.50373332256566, 
-0.084945367568491, -0.236015748624917, -0.479606239480476, -0.596799139055039, 
-0.562575023441403, -0.339935276865152, -0.213813544612318, -0.265296303857373, 
-1.12545083569158, 0.0105156062602101, 0.635695183644557, 0.767433440961415, 
0.16648012185356, 0.544633089427927, -0.904001384160196, -0.429299134808951, 
0.764224744168297, -0.166062348771635, -0.101892580202475)), class = c("tbl_df", 
"tbl", "data.frame"), row.names = c(NA, -60L), .Names = c("person", 
"currentTest", "beforeValue", "afterValue"))

We can use dcast from reshape2 我们可以使用dcastreshape2

library(reshape2)
meltdf <- melt(resultsData, id.vars=1:2)
dcast(meltdf, person ~ currentTest + variable)

> dcast(meltdf, person ~ currentTest + variable)
     person A_beforeValue A_afterValue B_beforeValue B_afterValue C_beforeValue C_afterValue D_beforeValue D_afterValue E_beforeValue
1       1    1.28429706   2.67176351   -0.47279938   -0.8694723     0.5774694  1.090681730   -0.70953916   -0.5835268  -0.008035663
2       2   -0.61835955  -2.35492691    0.05972074    1.4448554     1.9274817  4.375377235    1.02775241    0.6992987   1.429841772
3       3    0.03945743  -0.09170997    0.92420153    2.7310495    -0.2455936 -0.116850493   -0.50490096   -1.2617220   1.454856781
4       4   -0.44860832  -0.36285183    0.65888418    1.0175425     0.1261048  1.953383260    0.40769259    1.2358931  -0.956556613
5       5   -0.96177712  -1.41628434   -1.98980726   -4.7126717    -0.5593383 -1.690035639   -0.86853101   -1.5600892   0.443323692
6       6    0.70247190   2.05218144    0.66024130    1.9712327     1.2980212  2.622505813    0.94951851    0.4366865  -0.261951073
7       7   -0.45522204  -2.12568428    0.08963695   -0.5644579     0.7194067 -0.008373791    2.32458580    0.1169909  -1.309904414
8       8   -1.23154913  -2.77742515   -0.82839994    0.5076592     0.9694145  1.841929380   -0.25757870   -1.0720651   0.092174187
9       9   -0.79723499  -0.55830618   -0.83807424   -0.3169969    -0.8146971 -0.675899869   -0.78976185    1.2120495  -1.026127796
10     10   -0.70973496  -1.24415955   -1.65919710   -3.3176237     0.8646598  2.085066600    0.09792747   -0.8124066   0.815507195

E_afterValue F_beforeValue F_afterValue
1    0.50373332   -0.40303773   0.01051561
2   -0.08494537   -0.38442214   0.63569518
3   -0.23601575    0.41707486   0.76743344
4   -0.47960624   -1.37128033   0.16648012
5   -0.59679914   -0.07961601   0.54463309
6   -0.56257502    1.35302484  -0.90400138
7   -0.33993528   -0.75275114  -0.42929913
8   -0.21381354    0.81245328   0.76422474
9   -0.26529630   -1.32443073  -0.16606235
10  -1.12545084   -1.66986584  -0.10189258

You can use a combined gather + spread approach; 您可以使用gather + spread相结合的方法; Gather the *Values columns and combine with currentTest to form the new header, then spread to wide format: 收集* Values列,并与currentTest组合以形成新的标头,然后扩展为宽格式:

resultsData %>% 
    gather(key, value, -person, -currentTest) %>% 
    unite(header, c('currentTest', 'key'), sep = "_") %>% 
    spread(header, value)

# A tibble: 10 x 13
#   person A_afterValue A_beforeValue B_afterValue B_beforeValue C_afterValue C_beforeValue
# *  <int>        <dbl>         <dbl>        <dbl>         <dbl>        <dbl>         <dbl>
# 1      1   2.67176351    1.28429706   -0.8694723   -0.47279938  1.090681730     0.5774694
# 2      2  -2.35492691   -0.61835955    1.4448554    0.05972074  4.375377235     1.9274817
# 3      3  -0.09170997    0.03945743    2.7310495    0.92420153 -0.116850493    -0.2455936
# 4      4  -0.36285183   -0.44860832    1.0175425    0.65888418  1.953383260     0.1261048
# 5      5  -1.41628434   -0.96177712   -4.7126717   -1.98980726 -1.690035639    -0.5593383
# 6      6   2.05218144    0.70247190    1.9712327    0.66024130  2.622505813     1.2980212
# 7      7  -2.12568428   -0.45522204   -0.5644579    0.08963695 -0.008373791     0.7194067
# 8      8  -2.77742515   -1.23154913    0.5076592   -0.82839994  1.841929380     0.9694145
# 9      9  -0.55830618   -0.79723499   -0.3169969   -0.83807424 -0.675899869    -0.8146971
#10     10  -1.24415955   -0.70973496   -3.3176237   -1.65919710  2.085066600     0.8646598
# ... with 6 more variables: D_afterValue <dbl>, D_beforeValue <dbl>, E_afterValue <dbl>,
#   E_beforeValue <dbl>, F_afterValue <dbl>, F_beforeValue <dbl>

If you need to rename the columns: 如果需要重命名列:

resultsData %>% 
    gather(key, value, -person, -currentTest) %>% 
    unite(header, c('currentTest', 'key'), sep = "_") %>% 
    spread(header, value) %>% 
    rename_at(vars(matches("Value$")), funs(gsub("Value$", "", .)))

We could do this in a single line using recast 我们可以使用recast在一行中完成此操作

reshape2::recast(resultsData, person ~currentTest + variable, id.var = 1:2)
#person A_beforeValue A_afterValue B_beforeValue B_afterValue C_beforeValue C_afterValue D_beforeValue D_afterValue
#1       1    1.28429706   2.67176351   -0.47279938   -0.8694723     0.5774694  1.090681730   -0.70953916   -0.5835268
#2       2   -0.61835955  -2.35492691    0.05972074    1.4448554     1.9274817  4.375377235    1.02775241    0.6992987
#3       3    0.03945743  -0.09170997    0.92420153    2.7310495    -0.2455936 -0.116850493   -0.50490096   -1.2617220
#4       4   -0.44860832  -0.36285183    0.65888418    1.0175425     0.1261048  1.953383260    0.40769259    1.2358931
#5       5   -0.96177712  -1.41628434   -1.98980726   -4.7126717    -0.5593383 -1.690035639   -0.86853101   -1.5600892
#6       6    0.70247190   2.05218144    0.66024130    1.9712327     1.2980212  2.622505813    0.94951851    0.4366865
#7       7   -0.45522204  -2.12568428    0.08963695   -0.5644579     0.7194067 -0.008373791    2.32458580    0.1169909
#8       8   -1.23154913  -2.77742515   -0.82839994    0.5076592     0.9694145  1.841929380   -0.25757870   -1.0720651
#9       9   -0.79723499  -0.55830618   -0.83807424   -0.3169969    -0.8146971 -0.675899869   -0.78976185    1.2120495
#10     10   -0.70973496  -1.24415955   -1.65919710   -3.3176237     0.8646598  2.085066600    0.09792747   -0.8124066
#   E_beforeValue E_afterValue F_beforeValue F_afterValue
#1   -0.008035663   0.50373332   -0.40303773   0.01051561
#2    1.429841772  -0.08494537   -0.38442214   0.63569518
#3    1.454856781  -0.23601575    0.41707486   0.76743344
#4   -0.956556613  -0.47960624   -1.37128033   0.16648012
#5    0.443323692  -0.59679914   -0.07961601   0.54463309
#6   -0.261951073  -0.56257502    1.35302484  -0.90400138
#7   -1.309904414  -0.33993528   -0.75275114  -0.42929913
#8    0.092174187  -0.21381354    0.81245328   0.76422474
#9   -1.026127796  -0.26529630   -1.32443073  -0.16606235
#10   0.815507195  -1.12545084   -1.66986584  -0.10189258

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM