H, I have a data frame like this:
d <- data.frame(v1=seq(0,9.9,0.1),
v2=rnorm(100),
v3=rnorm(100))
> head(d)
v1 v2 v3
1 0.0 -0.01431916 -0.5005415
2 0.1 -1.01575590 1.5307473
3 0.2 1.00081065 -0.1730830
4 0.3 -1.20697918 0.5105118
5 0.4 -2.16698578 -1.0120544
6 0.5 0.33886508 0.4797016
I now want a new data frame that summarizes all values in the intervals 0-0.99, 1-1.99, 2-2.99, 3-3.99,.... by the mean for example
like this
start end mean.v2 mean.v3
0 1 0.2 0.1
1 2 0.5 0.4
and so on
thanks
Update I should add that in my real data set the observations in each interval are of different lengths and they don't always start at zero or end at 10
here is one way using cut()
as suggested by @akrun:
d %>% mutate( ints = cut(v1 ,breaks = 11)) %>%
group_by(ints) %>%
summarise( mean.v2 = mean(v2) , mean.v3 = mean(v3) )
Based on @David H"s answer, with 2 options to choose from:
cut()
using a vector of breaks floor()
instead of cut()
Create data
set.seed(33)
d <- data.frame(v1=seq(0,9.9,0.1),
v2=rnorm(100),
v3=rnorm(100))
cut()
using a vector of breaks For that simple example you could use breaks <- 0:10
but to be more general let's take the min and max of d$v1
.
breaks <- floor(min(d$v1)):ceiling(max(d$v1))
breaks
# [1] 0 1 2 3 4 5 6 7 8 9 10
Summarise over intervals 0-0.99, 1-1.99, 2-2.99, 3-3.99,....
d %>%
mutate(interval = cut(v1,
breaks,
include.lowest = TRUE,
right = FALSE)) %>%
group_by(interval) %>%
summarise( mean.v2 = mean(v2) , mean.v3 = mean(v3))
# Source: local data frame [10 x 3]
#
# interval mean.v2 mean.v3
# (fctr) (dbl) (dbl)
# 1 [0,1) -0.13040624 -0.20781247
# 2 [1,2) 0.26505794 0.51990167
# 3 [2,3) 0.13451628 1.12066174
# 4 [3,4) 0.23451272 -0.14773437
# 5 [4,5) 0.34326922 0.28567969
# 6 [5,6) -0.77059944 -0.16629580
# 7 [6,7) -0.17617190 0.03320797
# 8 [7,8) 0.86550135 -0.24664350
# 9 [8,9) -0.06652047 -0.27798769
# 10 [9,10] -0.10424865 0.24060163
floor()
instead of cut()
Cheat a little bit by subtracting a tiny number 1e-9
from the end of each interval.
d %>%
mutate(start = floor(v1), end = start + 1 - 1e-9 ) %>%
group_by(start, end) %>%
summarise_each(funs(mean))
# Source: local data frame [10 x 4]
# Groups: start [?]
#
# start end mean.v2 mean.v3
# (dbl) (dbl) (dbl) (dbl)
# 1 0 1 -0.13040624 -0.20781247
# 2 1 2 0.26505794 0.51990167
# 3 2 3 0.13451628 1.12066174
# 4 3 4 0.23451272 -0.14773437
# 5 4 5 0.34326922 0.28567969
# 6 5 6 -0.77059944 -0.16629580
# 7 6 7 -0.17617190 0.03320797
# 8 7 8 0.86550135 -0.24664350
# 9 8 9 -0.06652047 -0.27798769
# 10 9 10 -0.10424865 0.24060163
Using the floor() and ceiling() functions. And the ifelse() in cases where the interval is 1 - 1 or 2 - 2 for example.
d<-data.frame(v1=seq(0,9.9,0.1),
v2=rnorm(100),
v3=rnorm(100))
library(dplyr)
d%>%
mutate(start=floor(v1),
end=ifelse(ceiling(v1)==start,start+1,ceiling(v1)))%>%
group_by(start,end)%>%
summarise(mean.v2=mean(v2),
mean.v3=mean(v3))
Source: local data frame [10 x 4]
Groups: start [?]
start end mean.v2 mean.v3
(dbl) (dbl) (dbl) (dbl)
1 0 1 0.135180183 -0.36083298
2 1 2 -0.245567899 0.26827020
3 2 3 -0.051136441 0.14211666
4 3 4 0.252451303 0.38530797
5 4 5 0.007209073 0.30137345
6 5 6 -0.307008690 0.07662942
7 6 7 0.103271270 0.14734865
8 7 8 0.016753997 -0.02559756
9 8 9 -0.199958098 -0.21821830
10 9 10 0.532339512 -0.46509108
The same but including a column named intervals instead of two (start and end):
d%>%
mutate(start=floor(v1),
end=ifelse(ceiling(v1)==start,start+1,ceiling(v1)),
interval=paste(start,"-",end))%>%
select(-start,-end)%>%
group_by(interval)%>%
summarise(mean.v2=mean(v2),
mean.v3=mean(v3))
Source: local data frame [10 x 3]
interval mean.v2 mean.v3
(chr) (dbl) (dbl)
1 0 - 1 0.135180183 -0.36083298
2 1 - 2 -0.245567899 0.26827020
3 2 - 3 -0.051136441 0.14211666
4 3 - 4 0.252451303 0.38530797
5 4 - 5 0.007209073 0.30137345
6 5 - 6 -0.307008690 0.07662942
7 6 - 7 0.103271270 0.14734865
8 7 - 8 0.016753997 -0.02559756
9 8 - 9 -0.199958098 -0.21821830
10 9 - 10 0.532339512 -0.46509108
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.