简体   繁体   中英

Change values in a dataframe according to cases

I have the following dataframe:

> dput(df)
structure(list(x = c(0.871877138037235, 0.534444199409336, 0.677225327817723, 
0.124835065566003, 0.972407285822555, 0.179870884865522, 0.468708630651236, 
0.405605535488576, 0.717907374724746, 0.157441936200485), y = c(0, 
1, 2, 0, 0, 0, 0, 0, 1, 0)), class = "data.frame", row.names = c(NA, 
-10L))

ie

> df
           x y
1  0.8718771 0
2  0.5344442 1
3  0.6772253 2
4  0.1248351 0
5  0.9724073 0
6  0.1798709 0
7  0.4687086 0
8  0.4056055 0
9  0.7179074 1
10 0.1574419 0

I would like to obtain a new dataframe considering the following rules:

  • If in column y appear 1 and 2 (or 2 and 1) sequentially, then multiply the next 3 values in column x by -1.4
  • If in column y appears 1 (and just 1), then multiply the next 3 values column x by -1
  • If in column y appear 1 and 3 (or 3 and 1) sequentially, then multiply the next 3 values column x by -0.6
  • If in column y appears 2 (and just 2), then multiply the next 3 values column x by 1.4

In our case the desired result is:

> df
           x  y
1  0.8718771  0
2  0.5344442  1
3  0.6772253  2
4 -0.1747691  0
5  -1.36137   0
6 -0.2518193  0
7  0.4687086  0
8  0.4056055  0
9  0.7179074  1
10 -0.1574419 0

This solution may sound ugly but I think it's quite stable, however it may need further testings and improvements:

library(dplyr)
# First I set out to detect every observation that falls into any of the 4 categories

df %>%
  mutate(z = case_when(
    lag(y, n = 2, default = 0) %in% c(1, 2) & lag(y, default = 0) %in% c(2, 1) ~ 1,
    lag(y, n = 2, default = 0) == 0 & lag(y, default = 0) == 1 & y == 0 ~ 2,
    lag(y, n = 2, default = 0) %in% c(1, 3) & lag(y, default = 0) %in% c(2, 1) ~ 3, 
    lag(y, n = 2, default = 0) == 0 & lag(y, default = 0) == 2 & y == 0 ~ 4,
    TRUE ~ 0
  )) -> DF

# Then I wrote a custom function to apply multiplication phase on a sequence of three rows

fn <- function(x) {
  out <- x$x
  
  for(i in 1:nrow(x)) {
    if(x$z[i] == 1) {
      out[i:(i+2)] <- out[i:(i+2)] * (-1.4)
    } else if(x$z[i] == 2) {
      out[i:(i+2)] <- out[i:(i+2)] * (-1)
    } else if(x$z[i] == 3) {
      out[i:(i+2)] <- out[i:(i+2)] * (-0.6)
    } else if(x$z[i] == 4) {
      out[i:(i+2)] <- out[i:(i+2)] * (1.4)
    } else {
      out[i:(i+2)] <- out[i:(i+2)] * 1
    }
  }
  dt <- cbind(new_x = out[!is.na(out)], y = x$y) |> as.data.frame()
  dt
}

fn(DF)

        new_x y
1   0.8718771 0
2   0.5344442 1
3   0.6772253 2
4  -0.1747691 0
5  -1.3613702 0
6  -0.2518192 0
7   0.4687086 0
8   0.4056055 0
9   0.7179074 1
10 -0.1574419 0

A for loop

df <- structure(list(x = c(0.871877138037235, 0.534444199409336, 0.677225327817723, 
                           0.124835065566003, 0.972407285822555, 0.179870884865522, 0.468708630651236, 
                           0.405605535488576, 0.717907374724746, 0.157441936200485), y = c(0, 
                                                                                           1, 2, 0, 0, 0, 0, 0, 1, 0)), class = "data.frame", row.names = c(NA, 
                                                                                                                                                            -10L))
df
#>            x y
#> 1  0.8718771 0
#> 2  0.5344442 1
#> 3  0.6772253 2
#> 4  0.1248351 0
#> 5  0.9724073 0
#> 6  0.1798709 0
#> 7  0.4687086 0
#> 8  0.4056055 0
#> 9  0.7179074 1
#> 10 0.1574419 0

for(i in 2:nrow(df)){
  if((df$y[i] == 1 & df$y[i+1] ==2) | (df$y[i] == 2 & df$y[i+1] ==1)) {
    df$x[seq(i+2, by = 1, length.out= min(nrow(df) - (i+1), 3))] <- df$x[seq(i+2, by = 1, length.out=min(nrow(df) - (i+1), 3))] * -1.4
  } else if ((df$y[i] == 1 & df$y[i+1] ==3) | (df$y[i] == 3 & df$y[i+1] ==1)){
    df$x[seq(i+2, by = 1, length.out= min(nrow(df) - (i+1), 3))] <- df$x[seq(i+2, by = 1,length.out= min(nrow(df) - (i+1), 3))] * -0.6
  } else if (df$y[i] == 1 & !df$y[i+1] %in% c(1,2,3) & !df$y[i-1] %in% c(1,2,3) ) {
    df$x[seq(i+1, by = 1, length.out=min(nrow(df) - (i), 3)) ] <- df$x[seq(i+1, by = 1, length.out= min(nrow(df) - (i), 3))] * -1
  } else if (df$y[i] == 2 & !df$y[i+1] %in% c(1,2,3) & !df$y[i-1] %in% c(1,2,3)) {
    df$x[seq(i+1, by = 1, length.out=min(nrow(df) - (i), 3)) ] <- df$x[seq(i+1, by = 1, length.out=min(nrow(df) - (i), 3))] * 1.4
  } 
}

df
#>             x y
#> 1   0.8718771 0
#> 2   0.5344442 1
#> 3   0.6772253 2
#> 4  -0.1747691 0
#> 5  -1.3613702 0
#> 6  -0.2518192 0
#> 7   0.4687086 0
#> 8   0.4056055 0
#> 9   0.7179074 1
#> 10 -0.1574419 0

Created on 2021-06-26 by the reprex package (v2.0.0)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM