简体   繁体   中英

Reshaping a data.frame with tidyr

I have a following data.frame :

data1 <- structure(list(id = c(1, 1, 1, 1, 1, 1, 2, 2, 3, 3, 3, 3, 4, 
4, 4, 4, 4, 5, 5, 5, 5, 6, 6, 6, 6, 6, 7, 7, 7, 7, 7, 7, 7, 7, 
8, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10, 11, 11, 11, 11), type_id = c(1, 
1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2, 1, 1, 2, 2, 2, 1, 1, 2, 2, 1, 
1, 2, 2, 2, 1, 1, 2, 2, 2, 2, 2, 2, 1, 1, 2, 2, 1, 1, 2, 1, 1, 
2, 2, 2, 1, 1, 2, 2), min = c("0", "0", "66", "71", "74", "81", 
"0", "0", "0", "0", "14", "17", "0", "0", "45", "60", "87", "0", 
"0", "49", "89", "0", "0", "60", "60", "75", "0", "0", "7", "47", 
"66", "75", "83", "89", "0", "0", "68", "73", "0", "0", "85", 
"0", "0", "46", "71", "87", "0", "0", "81", "90"), sec = c("0", 
"0", "37", "20", "20", "28", "0", "0", "0", "0", "22", "26", 
"0", "0", "1", "38", "38", "0", "0", "0", "53", "0", "0", "8", 
"10", "10", "0", "0", "2", "55", "33", "39", "31", "41", "0", 
"0", "18", "53", "0", "0", "47", "0", "0", "44", "36", "49", 
"0", "0", "53", "12"), group_id = c(1, 0, 1, 0, 1, 0, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1), 
a1 = c("11334, 98745, 81880, 111457, 38411, 156074, 153256, 84450, 59966, 37605, 50175, 41792, 44346, 48844, 20467, 153133, 69140, 56864", 
"17745, 40725, 37402, 61604, 15033, 95658, 103025, 203341, 101668, 78412, 51938, 172850, 172632, 173515, 13152, 38038, 155569, 149828", 
"11334, 81880, 98745, 41792, 38411, 111457, 37605, 84450, 44346, 50175, 59966, 48844, 20467, 153133, 69140, 56864", 
"17745, 40725, 37402, 203341, 15033, 95658, 103025, 61604, 101668, 155569, 51938, 172850, 172632, 173515, 13152, 38038, 149828", 
"11334, 81880, 98745, 84450, 38411, 111457, 20467, 41792, 44346, 37605, 59966, 48844, 153133, 69140, 56864", 
"17745, 40725, 37402, 203341, 15033, 95658, 103025, 155569, 101668, 173515, 51938, 172850, 172632, 13152, 38038, 149828", 
"121160, 58621, 100180, 97299, 17476, 57410, 61366, 27789, 37572, 205651, 20664, 33148, 103955, 57112, 182156, 165809, 14664, 209244", 
"131897, 11352, 43808, 40845, 61933, 83299, 109345, 66242, 38499, 60307, 112516, 19071, 83543, 48860, 11735, 2513, 20529, 15137", 
"60772, 41328, 82263, 19419, 41270, 102380, 17878, 116594, 94245, 47431, 135365, 20310, 88482, 167767, 171162, 135363, 194794, 181911", 
"21205, 68983, 40616, 39847, 17761, 51927, 60586, 40145, 40399, 83314, 90517, 103914, 44699, 60551, 12813, 41674, 98747, 39158", 
"60772, 41328, 82263, 41270, 102380, 17878, 116594, 94245, 47431, 135365, 20310, 88482, 167767, 171162, 135363, 194794, 181911", 
"60772, 41328, 82263, 102380, 41270, 135363, 17878, 116594, 94245, 47431, 20310, 88482, 167767, 171162, 194794, 181911", 
"20066, 55494, 74230, 173954, 19188, 201084, 66975, 19197, 54861, 126187, 82403, 11554, 49413, 60252, 75773, 86417, 94926, 218112", 
"57513, 104545, 54284, 168991, 48760, 85368, 87428, 74471, 169141, 160190, 86176, 79733, 168977, 51344, 80755, 147303, 84112, 87856", 
"20066, 55494, 74230, 173954, 19188, 201084, 126187, 19197, 54861, 60252, 82403, 11554, 49413, 75773, 86417, 94926, 218112", 
"57513, 80755, 54284, 168991, 48760, 85368, 87428, 74471, 169141, 160190, 86176, 79733, 168977, 51344, 147303, 84112, 87856", 
"57513, 80755, 54284, 168991, 48760, 85368, 84112, 74471, 87428, 86176, 51344, 79733, 168977, 147303, 87856", 
"105666, 61858, 39487, 50089, 37869, 103192, 40555, 14295, 91972, 68312, 40276, 9047, 42564, 221267, 20208, 3773, 205102, 6744", 
"111234, 177815, 12745, 7645, 19159, 106611, 42774, 80801, 108438, 13017, 109065, 26901, 56192, 87447, 10318, 173807, 194164, 219352", 
"111234, 56192, 12745, 109065, 106611, 7645, 80801, 42774, 177815, 13017, 108438, 26901, 87447, 10318, 173807, 194164, 219352", 
"111234, 56192, 12745, 173807, 106611, 7645, 80801, 42774, 26901, 13017, 177815, 87447, 10318, 194164, 219352", 
"15749, 20658, 105717, 59779, 48717, 40669, 45124, 18073, 43020, 163526, 41464, 55459, 38580, 60706, 73889, 8380, 113564, 204480", 
"51940, 20695, 58877, 74208, 197365, 76359, 43670, 62398, 66749, 57249, 176297, 148225, 55909, 109322, 42899, 59846, 41184, 40002", 
"15749, 20658, 105717, 59779, 48717, 40669, 45124, 204480, 43020, 73889, 41464, 55459, 38580, 60706, 8380, 113564", 
"15749, 20658, 105717, 59779, 48717, 40669, 45124, 204480, 43020, 73889, 41464, 55459, 38580, 60706, 8380, 113564", 
"51940, 20695, 58877, 74208, 197365, 76359, 57249, 62398, 66749, 41184, 176297, 148225, 55909, 109322, 42899, 59846, 40002", 
"19838, 109528, 106618, 153127, 77359, 58845, 56983, 50232, 104547, 168580, 104953, 101148, 114243, 155513, 149736, 195384, 59735, 128389", 
"37915, 158534, 115556, 39104, 55605, 39194, 45268, 93264, 78830, 108823, 80607, 85971, 165990, 54756, 39215, 97485, 157668, 168763", 
"19838, 109528, 58845, 50232, 101148, 77359, 56983, 153127, 104547, 168580, 104953, 114243, 155513, 149736, 195384, 59735, 128389", 
"19838, 109528, 58845, 153127, 101148, 149736, 56983, 168580, 104547, 104953, 114243, 155513, 195384, 59735, 128389", 
"37915, 158534, 115556, 39104, 55605, 39194, 80607, 93264, 78830, 108823, 85971, 165990, 54756, 39215, 97485, 157668, 168763", 
"19838, 109528, 56983, 58845, 101148, 149736, 153127, 195384, 168580, 104953, 114243, 155513, 59735, 128389", 
"37915, 158534, 115556, 54756, 55605, 39194, 80607, 93264, 78830, 39104, 85971, 165990, 39215, 97485, 157668, 168763", 
"37915, 158534, 115556, 54756, 55605, 39194, 157668, 93264, 78830, 80607, 85971, 165990, 39215, 97485, 168763", 
"37096, 49539, 28654, 15114, 57145, 149266, 49277, 11829, 80146, 173879, 93464, 57586, 61760, 42996, 59940, 106899, 96305, 169432", 
"40383, 58822, 40146, 17339, 88900, 80447, 101178, 78056, 61548, 62399, 83283, 20452, 78356, 151086, 128198, 3201, 171771, 153373", 
"40383, 58822, 40146, 17339, 88900, 80447, 101178, 78056, 61548, 62399, 83283, 20452, 78356, 151086, 128198, 3201, 171771, 153373", 
"37096, 49539, 28654, 96305, 57145, 149266, 49277, 93464, 11829, 173879, 80146, 57586, 61760, 42996, 59940, 106899, 169432", 
"18656, 52940, 40868, 121599, 37742, 52153, 43250, 89085, 20046, 44604, 61566, 73426, 212319, 41945, 54484, 16045, 38439, 56827", 
"66797, 169187, 100059, 56979, 60914, 38454, 112338, 41733, 92217, 118748, 110979, 104542, 15157, 171287, 210237, 33871, 152760, 154566", 
"66797, 169187, 100059, 56979, 60914, 38454, 15157, 112338, 152760, 110979, 41733, 104542, 171287, 210237, 33871, 154566", 
"40349, 15149, 41320, 15237, 56917, 126184, 90105, 48615, 88498, 78007, 20037, 18726, 40387, 54469, 7958, 149484, 178304, 103912", 
"9089, 67527, 19151, 55829, 60232, 77777, 50229, 49944, 44683, 195864, 63370, 49440, 57134, 12086, 54908, 232427, 173809, 215457", 
"40349, 15149, 41320, 15237, 56917, 126184, 103912, 48615, 88498, 78007, 90105, 18726, 40387, 54469, 7958, 149484, 178304", 
"9089, 67527, 19151, 55829, 60232, 77777, 44683, 232427, 49440, 49944, 63370, 57134, 12086, 54908, 173809, 215457", 
"9089, 67527, 19151, 55829, 60232, 77777, 49440, 232427, 57134, 49944, 63370, 12086, 54908, 173809, 215457", 
"40349, 54469, 41320, 15237, 56917, 126184, 90105, 48615, 88498, 78007, 103912, 18726, 40387, 20037, 120447, 7958, 149484, 178304", 
"18656, 54484, 40868, 121599, 41945, 52153, 43250, 89085, 73426, 44604, 212319, 41725, 108413, 16045, 85624, 38439, 56827, 20046", 
"18656, 85624, 40868, 121599, 41945, 52153, 54484, 38439, 73426, 89085, 43250, 41725, 108413, 16045, 56827, 20046", 
"18656, 85624, 40868, 121599, 41945, 52153, 54484, 38439, 73426, 16045, 43250, 41725, 108413, 56827, 20046"
), a2 = c("1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5", 
"1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", "1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5", 
"1, 3, 3, 2, 2, 2, 3, 3, 4, 4, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 4, 5, 5, 5, 5, 5, 5, 5", "1, 2, 2, 3, 2, 2, 3, 3, 4, 4, 3, 5, 5, 5, 5, 5", 
"1, 2, 2, 3, 2, 2, 3, 3, 4, 3, 3, 5, 5, 5, 5")), class = "data.frame", .Names = c("id", "type_id", "min", "sec", "group_id", "a1", "a2"), row.names = c(NA, -50L))

Now I need to transform it so that a1 and a2 would have a single value for each row, ie:

structure(list(id = c(1, 1, 1), type_id = c(1, 1, 1), min = c(0, 
0, 0), sec = c(0, 0, 0), group_id = c(1, 1, 1), a1 = c(11334, 
98745, 81880), a2 = c(1, 3, 3)), .Names = c("id", "type_id", 
"min", "sec", "group_id", "a1", "a2"), row.names = c(NA, -3L), class = 
"data.frame")

I tried to separate the columns a1 and a2 first, then gather them and finally spread into two columns, like below:

data2 <- data1 %>% 
  separate(col = a1, into = paste('obj', 1:18)) %>% 
  separate(col = a2, into = paste('desc', 1:18))

data2 %>% 
  gather(key = 'obj', value = 'value', -id, -type_id, -min, -sec, -group_id) %>% 
  spread(key = 'obj', value = 'value')

but ended up with the following error:

Error: Duplicate identifiers for rows (907, 908), (909, 910), (913, 914), (918, 919), (922, 923), (927, 928), (935, 936), (939, 940), (942, 943), (947, 948), (1357, 1358), (1359, 1360), (1363, 1364), (1368, 1369), (1372, 1373), (1377, 1378), (1385, 1386), (1389, 1390), (1392, 1393), (1397, 1398), (1407, 1408), (1409, 1410), (1413, 1414), (1418, 1419), (1422, 1423), (1427, 1428), (1435, 1436), (1439, 1440), (1442, 1443), (1447, 1448), (1457, 1458), (1459, 1460), (1463, 1464), (1468, 1469), (1472, 1473), (1477, 1478), (1485, 1486), (1489, 1490), (1492, 1493), (1497, 1498), (1507, 1508), (1509, 1510), (1513, 1514), (1518, 1519), (1522, 1523), (1527, 1528), (1535, 1536), (1539, 1540), (1542, 1543), (1547, 1548), (1557, 1558), (1559, 1560), (1563, 1564), (1568, 1569), (1572, 1573), (1577, 1578), (1585, 1586), (1589, 1590), (1592, 1593), (1597, 1598), (1607, 1608), (1609, 1610), (1613, 1614), (1618, 1619), (1622, 1623), (1627, 1628), (1635, 1636), (1639, 1640), (1642, 1643), (1647, 1648), (. ..

So my question is: how can I transform this dataset (using tidyr or other packages if neccesary)?

You need the separate_rows -function from which can take several columns at once as input when the nested columns have the same number of elements in each row:

data1 %>% separate_rows(a1, a2)

which gives:

  id type_id min sec group_id a1 a2 1 1 1 0 0 1 11334 1 2 1 1 0 0 1 98745 3 3 1 1 0 0 1 81880 3 4 1 1 0 0 1 111457 2 5 1 1 0 0 1 38411 2 6 1 1 0 0 1 156074 2 7 1 1 0 0 1 153256 3 8 1 1 0 0 1 84450 3 9 1 1 0 0 1 59966 4 10 1 1 0 0 1 37605 4 ..... 

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM