I want to reset cumsum
over a vector as it reaches certain value.
Eg for the following vector:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
expected output is:
c(0, 0, 10, 0, 0, 22, 0, 30, 0, 0)
With reset <- 10
I can reduce the task to flagging the first values after the full integer:
res <- cumsum(v)
resd <- res/reset
resd
# [1] 0.3 0.8 1.0 1.5 1.8 2.2 2.7 3.0 3.1 3.5
Expected output is this:
c(F, F, T, F, F, T, F, T, F, F) # or
c(0, 0, 1.0, 0, 0, 2.2, 0, 3.0, 0, 0)
I need a fast way to calculate one of those.
my (improved) solution:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
res <- cumsum(v)
reset <- 10
resd <- res/reset
res[diff(c(0, floor(resd))) == 0] <- 0
print(res) #gives 0 0 10 0 0 22 0 30 0 0
edit: now the first element in v
can be larger than 10.
Another possible approach:
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
reset <- 10
s <- cumsum(v)
idx <- as.integer(s / reset)
logic <- idx >= 1 & !duplicated(idx)
> logic
[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
# corresponding one-liner
logic <- with(list(idx=as.integer(cumsum(v) / reset)),idx >= 1 & !duplicated(idx))
Just for fun I've also created a Rcpp version of the function :
library(Rcpp)
library(inline)
cumsumResetRcpp <- cxxfunction(signature(values='numeric',reset='integer'),
'
Rcpp::IntegerVector r(reset);
int resetVal = r[0];
Rcpp::NumericVector v(values);
int n = v.size();
Rcpp::NumericVector result(n);
double cumsum = 0;
for(int i = 0; i < n; i++){
int prevCumSumFloor = (int)(cumsum / resetVal);
cumsum += v[i];
int currCumSumFloor = (int)(cumsum / resetVal);
if(currCumSumFloor > prevCumSumFloor)
result[i] = cumsum;
}
return( result ) ;
', plugin="Rcpp", verbose=FALSE,includes='')
Comparison with my previous version :
library(microbenchmark)
baseRVersion <- function(v,reset){
a <- cumsum(v)
a[!with(list(idx=as.integer(a / reset)),idx >= 1 & !duplicated(idx))] <- 0
a
}
RcppVersion <- function(v,reset){
cumsumResetRcpp(v,reset)
}
set.seed(1234)
v <- sample(5,1e6,replace=TRUE)
microbenchmark(baseRVersion(v,10), RcppVersion(v,10),times=20)
# Result :
Unit: milliseconds
expr min lq mean median uq max neval
baseRVersion(v, 10) 69.78914 74.34717 91.67828 102.95764 103.6911 105.4055 20
RcppVersion(v, 10) 17.28785 17.58432 18.89449 19.25759 19.8595 20.5627 20
This sets all cumsums less than 10 or ones where the modulo division by 10 value is duplicated to zero:
a <- cumsum(v)
a %/% 10
[1] 0 0 1 1 1 2 2 3 3 3
a[ duplicated(a %/% 10) | a<10 ] <- 0
a
[1] 0 0 10 0 0 22 0 30 0 0
Because I can never resist...
qaswed <-function(v) {
res <- cumsum(v)
reset <- 10
resd <- res/reset
res[diff(c(0, floor(resd))) == 0] <- 0
}
digemall <-function(v){
reset <- 10
with(list(idx=as.integer(cumsum(v) / reset)),idx >= 1 & !duplicated(idx))
}
colonel <-function(v){
ifelse(c(0, diff(cumsum(v) %/% 10)), cumsum(v), 0)
}
userx <- function(v){
a <- cumsum(v)
c(a[1] >= 10, a[-1] %/% 10 > a[-length(a)] %/% 10)
}
set.seed(5)
v <- sample(5,1e6,replace=TRUE)
microbenchmark(qaswed(v),digemall(v),colonel(v),userx(v),times=10)
Unit: milliseconds
expr min lq mean median uq max neval
qaswed(v) 45.97558 50.29943 86.54772 85.52356 88.60232 200.89699 10
digemall(v) 54.12038 58.85200 67.15433 60.51172 64.40194 99.32623 10
colonel(v) 200.80942 233.56203 254.33662 252.65635 275.16588 306.76971 10
userx(v) 53.87098 56.55786 71.38571 57.98169 92.94224 96.69956 10
v <- c(3, 5, 2, 5, 3, 4, 5, 3, 1, 4)
a <- cumsum(v)
c(a[1] >= 10, a[-1] %/% 10 > a[-length(a)] %/% 10)
Output:
[1] FALSE FALSE TRUE FALSE FALSE TRUE FALSE TRUE FALSE FALSE
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.