I'm trying to tidy my daily activity data (accelerometer data). I would like to sum the repeated rows of each day for all columns. I have 32 rows (some are repeated) and 90 columns (data of one subject).
# Example of my data with 32 rows and 14 columns
df <- data.frame(LbNr = c(22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002,22002),
Type = c("A2. Working" ,"A1. NonWorking" ,"A1. NonWorking" ,"A4. SleepWeek" ,"A1. NonWorking" ,"A2. Working" ,"A1. NonWorking" ,"A4. SleepWeek" ,"A4. SleepWeek" ,"A1. NonWorking" ,"A2. Working" ,"A1. NonWorking" ,"A1. NonWorking" ,"A4. SleepWeek" ,"A1. NonWorking" ,"A2. Working" ,"A1. NonWorking" ,"A4. SleepWeek" ,"A4. SleepWeek" ,"A1. NonWorking" ,"A2. Working" ,"A1. NonWorking" ,"A1. NonWorking" ,"C4. SleepWeekend" ,"C0. Leisure" ,"C0. Leisure" ,"C4. SleepWeekend" ,"C0. Leisure" ,"C4. SleepWeekend" ,"C4. SleepWeekend" ,"A1. NonWorking" ,"A2. Working"),
Weekday = c(1,1,2,2,2,2,2,2,3,3,3,3,4,4,4,4,4,4,5,5,5,5,6,6,6,7,7,7,7,1,1,1),
Time = c(0.66667,5.66667,0.35,6.15,1.5,9.83333,6.05,0.11667,6.83333,1.33333,9.83333,6,0.03333,7.2,6.43333,5,5.23333,0.1,6.41667,0.96667,11.01667,5.6,0.43333,7.9,15.66667,0.03333,7.91667,15.61667,0.43333,6.33333,0.66667,6.83333),
lie = c(0.00583,0.37778,0.03556,4.84389,0.05444,0.05972,0.67639,0.0125,5.68806,0.02333,0.65278,0.23889,0.00917,7.2,0.45472,0.38333,0.29694,0.08,5.48694,0.01889,0.01028,0.12139,0.01694,6.96028,0.24472,0.00333,6.93639,0.11833,0.41861,5.74889,0.00861,0.07333),
sit = c(0.31194,4.36167,0.14417,1.30611,0.45083,6.64111,4.14306,0.10417,1.14528,0.51167,5.79417,3.11833,0,0,2.23944,2.79722,3.66583,0.00472,0.92972,0.29917,6.76806,4.21056,0.30222,0.92194,9.77694,0.00417,0.91833,12.02972,0.01472,0.58444,0.15806,5.58694),
stand = c(0.13389,0.47111,0.09139,0,0.67278,1.63667,0.51806,0,0,0.46417,1.81917,1.57472,0.01889,0,1.88917,0.88639,0.63028,0.00667,0,0.3975,1.83417,0.72528,0.05889,0.00667,2.33944,0.01361,0.03639,1.78139,0,0,0.25472,0.41167),
move = c(0.09056,0.34444,0.05167,0,0.21611,0.59472,0.34306,0,0,0.21333,0.525,0.72806,0.00528,0,0.76583,0.39194,0.41861,0.00667,0,0.14056,1.04694,0.36944,0.03778,0.00806,2.44583,0.00944,0.02083,0.93083,0,0,0.15417,0.235),
walk = c(0.11528,0.10722,0.02722,0,0.10583,0.84194,0.35639,0,0,0.11694,1.00806,0.33167,0,0,1.04611,0.51389,0.20833,0,0,0.09333,1.28528,0.16083,0.0175,0.00306,0.79972,0.00278,0.00472,0.65306,0,0,0.08139,0.49528),
run = c(0,0.00111,0,0,0,0.00167,0.00194,0,0,0,0.00083,0.00083,0,0,0.00333,0.0025,0.00083,0,0,0.00139,0.00472,0,0,0,0.00194,0,0,0.08694,0,0,0,0.00111),
stairs = c(0.00917,0.00333,0,0,0,0.0575,0.01111,0,0,0.00389,0.03333,0.0075,0,0,0.03472,0.02472,0.00472,0.00194,0,0.00583,0.06722,0.0125,0,0,0.05806,0,0,0.01639,0,0,0.00417,0.03),
cycle = c(0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0.00778,0,0,0.01,0,0,0,0,0,0,0,0,0,0,0.00556,0),
WalkSlow = c(0.01222,0.02056,0.00389,0,0.03056,0.17417,0.03361,0,0,0.01889,0.35889,0.07778,0,0,0.07528,0.04222,0.03417,0,0,0.02444,0.13722,0.03361,0.00417,0,0.14,0,0.00056,0.08056,0,0,0.02278,0.08278),
WalkFast = c(0.10278,0.08639,0.02278,0,0.07417,0.66,0.32194,0,0,0.0975,0.64583,0.25139,0,0,0.97083,0.46861,0.17222,0,0,0.06861,1.14694,0.12667,0.01306,0.00278,0.65444,0.00194,0.0025,0.56944,0,0,0.0575,0.41))
I have tried some small codes, but, I have failed in almost all. The code below is what I could get, it's too big. I'm wondering if have any other way to do it smaller.
# LbNr = subjects' id
# Weekday = 1 Monday.... 7 Sunday
# Type = activities: A1. NonWorking, A2. Working, A4. SleepWeek, C0. Leisure, C4. SleepWeekend
# code
df %>% select(LbNr, Type, Weekday, Time, lie:IncTrunkWalk) %>%
group_by(LbNr, Type, Weekday) %>%
summarise(n = n(), Time = sum(Time),lie = sum(lie), sit = sum(sit), stand = sum(stand),
move = sum(move), walk = sum(walk), run = sum(run), stairs = sum(stairs),
cycle = sum(cycle), row = sum(row), WalkSlow = sum(WalkSlow),
WalkFast = sum(WalkFast)) %>%
arrange(Weekday) %>% filter(Weekday %in% c('3':'7'))
So far I had another problem with this code. My problem is on Saturday "6", when I concatenate the time could be that Saturday receives activities that started on Friday (see the example below), sometimes will appear "A1. NonWorking" or "A4. SleepWeek", depends on the volunteer. I would like to sum this different activity on "C0. Leisure". If it was possible I would like to do it in one code.
# LbNr Type Weekday n Time lie sit
# <dbl> <fct> <dbl> <int> <dbl> <dbl> <dbl>
#8 22002 A2. Working 5 1 11.0 0.0103 6.77
#9 22002 A4. SleepWeek 5 1 6.42 5.49 0.930
#10 22002 A1. NonWorking 6 1 0.433 0.0169 0.302
#11 22002 C0. Leisure 6 1 15.7 0.245 9.78
#12 22002 C4. SleepWeekend 6 1 7.9 6.96 0.922
#13 22002 C0. Leisure 7 2 15.6 0.122 12.0
#I would like to get something like this.
# LbNr Type Weekday n Time lie sit
# <dbl> <fct> <dbl> <int> <dbl> <dbl> <dbl>
#8 22002 A2. Working 5 1 11.0 0.0103 6.77
#9 22002 A4. SleepWeek 5 1 6.42 5.49 0.930
#10 22002 C0. Leisure 6 1 16.133 0.2619 10.082
#11 22002 C4. SleepWeekend 6 1 7.9 6.96 0.922
#12 22002 C0. Leisure 7 2 15.6 0.122 12.0
For the first problem, I expect to get a small code. Moreover, if it was possible, I would expect to get a better code for the sum of different activities on Saturday.
Thanks in advance, Luiz
It's hard to try and answer your question without a better example (ie, you can dput()
your data to give us a sample). But here is a solution to your last issue: "For the first problem, I expect to get a table with the sum of repeated rows for all columns. Moreover, if it was possible, I would expect to get a better code for the sum of different activities on Saturday."
# create toy data of 3 different IDs, 3 different types, and repeated days
df <- data.frame(id=sample(c(1:3),100,T),
type=sample(letters[1:3],100,T),
day=sample(c(1:7),100,T),
matrix(runif(300),nrow=100),
stringsAsFactors = F)
# gather data, summarize each activity column by ID, type and day
# and select Saturday==6
df %>% gather(k,v,-id,-type,-day) %>%
group_by(id,type,day,k) %>%
summarise(sum=sum(v)) %>%
filter(day==6) %>%
spread(k,sum)
# A tibble: 8 x 6
# Groups: id, type, day [8]
id type day X1 X2 X3
<int> <chr> <int> <dbl> <dbl> <dbl>
1 1 a 6 1.85 3.26 2.09
2 1 b 6 0.604 0.583 0.586
3 1 c 6 0.163 0.663 0.624
4 2 a 6 0.185 0.952 0.349
5 2 b 6 1.16 0.832 0.974
6 2 c 6 0.906 1.62 0.853
7 3 b 6 0.671 1.39 0.887
8 3 c 6 0.449 0.150 0.647
UPDATE
Here is an updated solution with the new data provided.
df %>% group_by(LbNr,Type,Weekday) %>% summarise_all(.,sum)
# A tibble: 20 x 14
# Groups: LbNr, Type [5]
LbNr Type Weekday Time lie sit stand move walk run stairs cycle
<dbl> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 22002 A1. ~ 1 6.33 0.386 4.52e+0 0.726 0.499 0.189 0.00111 0.0075 0.00556
2 22002 A1. ~ 2 7.9 0.766 4.74e+0 1.28 0.611 0.489 0.00194 0.0111 0
3 22002 A1. ~ 3 7.33 0.262 3.63e+0 2.04 0.941 0.449 0.00083 0.0114 0
4 22002 A1. ~ 4 11.7 0.761 5.91e+0 2.54 1.19 1.25 0.00416 0.0394 0.00778
5 22002 A1. ~ 5 6.57 0.140 4.51e+0 1.12 0.51 0.254 0.00139 0.0183 0.01
6 22002 A1. ~ 6 0.433 0.0169 3.02e-1 0.0589 0.0378 0.0175 0 0 0
7 22002 A2. ~ 1 7.5 0.0792 5.90e+0 0.546 0.326 0.611 0.00111 0.0392 0
8 22002 A2. ~ 2 9.83 0.0597 6.64e+0 1.64 0.595 0.842 0.00167 0.0575 0
9 22002 A2. ~ 3 9.83 0.653 5.79e+0 1.82 0.525 1.01 0.00083 0.0333 0
10 22002 A2. ~ 4 5 0.383 2.80e+0 0.886 0.392 0.514 0.0025 0.0247 0
11 22002 A2. ~ 5 11.0 0.0103 6.77e+0 1.83 1.05 1.29 0.00472 0.0672 0
12 22002 A4. ~ 2 6.27 4.86 1.41e+0 0 0 0 0 0 0
13 22002 A4. ~ 3 6.83 5.69 1.15e+0 0 0 0 0 0 0
14 22002 A4. ~ 4 7.3 7.28 4.72e-3 0.00667 0.00667 0 0 0.00194 0
15 22002 A4. ~ 5 6.42 5.49 9.30e-1 0 0 0 0 0 0
16 22002 C0. ~ 6 15.7 0.245 9.78e+0 2.34 2.45 0.800 0.00194 0.0581 0
17 22002 C0. ~ 7 15.6 0.122 1.20e+1 1.80 0.940 0.656 0.0869 0.0164 0
18 22002 C4. ~ 1 6.33 5.75 5.84e-1 0 0 0 0 0 0
19 22002 C4. ~ 6 7.9 6.96 9.22e-1 0.00667 0.00806 0.00306 0 0 0
20 22002 C4. ~ 7 8.35 7.36 9.33e-1 0.0364 0.0208 0.00472 0 0 0
# ... with 2 more variables: WalkSlow <dbl>, WalkFast <dbl>
I think this answers your first question about wanting a 'small code'. I don't understand your second question still about "I would expect to get a better code for the sum of different activities on Saturday." Does this mean that you want to sum across the different activities (lie, sit, etc.) for Saturday only? Or do you want to sum across different types (A2, C0, etc) of activities?
df %>% group_by(LbNr,Type,Weekday) %>% summarise_all(.,sum) %>%
filter(Weekday==6)
# A tibble: 3 x 14
# Groups: LbNr, Type [3]
LbNr Type Weekday Time lie sit stand move walk run stairs cycle WalkSlow
<dbl> <fct> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 22002 A1. ~ 6 0.433 0.0169 0.302 0.0589 0.0378 0.0175 0 0 0 0.00417
2 22002 C0. ~ 6 15.7 0.245 9.78 2.34 2.45 0.800 0.00194 0.0581 0 0.14
3 22002 C4. ~ 6 7.9 6.96 0.922 0.00667 0.00806 0.00306 0 0 0 0
# ... with 1 more variable: WalkFast <dbl>
# summarise across different activities, for each column, on Saturday only
df %>% group_by(LbNr,Type,Weekday) %>% summarise_all(.,sum) %>%
filter(Weekday==6) %>% group_by(LbNr) %>% select(-Type,-Weekday) %>%
summarise_all(.,sum)
# A tibble: 1 x 12
LbNr Time lie sit stand move walk run stairs cycle WalkSlow WalkFast
<dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
1 22002 24 7.22 11.0 2.41 2.49 0.820 0.00194 0.0581 0 0.144 0.670
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.