简体   繁体   中英

Replacing values in R over rows without using a loop

I want to replace missing values in one column ("stock") with the lagged values of that column and some other manipulation with other columns. Since the following loop takes a lot of time in my original dataset, is there another way to do this without loop?

    for (i in 1990:1993){
  df <- df %>% 
    group_by(fuel) %>%
    mutate(stock=ifelse(i==year & year>1991,lag(stock)+formation+lag(sales),stock))%>% 
    mutate(sales=ifelse(i==year & year>1991, stock-lag(stock),sales))
}

Dataset sample:

df <- structure(list(year = c(1990L, 1991L, 1992L, 1993L, 1990L, 1991L, 
1992L, 1993L), fuel = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 
2L), .Label = c("a", "b"), class = "factor"), stock = c(10, 11, 
NA, NA, 10, 11, NA, NA), sales = c(NA, 1, NA, NA, NA, 1, NA, 
NA), formation = c(0.3, 0.4, 0.5, 0.3, 0.7, 0.4, 0.5, 0.7)), class = c("grouped_df", 
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -8L), vars = "fuel", labels = structure(list(
    fuel = structure(1:2, .Label = c("a", "b"), class = "factor")), class = "data.frame", row.names = c(NA, 
-2L), vars = "fuel", drop = TRUE), indices = list(0:3, 4:7), drop = TRUE, group_sizes = c(4L, 
4L), biggest_group_size = 4L)

Is this what you're looking for? TRUE is basically like the ELSE statement in this context. I'm sure you can do something similar with if else as well, but the results are the same.

df <- structure(list(year = c(
  1990L, 1991L, 1992L, 1993L, 1990L, 1991L,
  1992L, 1993L
), fuel = structure(c(
  1L, 1L, 1L, 1L, 2L, 2L, 2L,
  2L
), .Label = c("a", "b"), class = "factor"), stock = c(
  10, 11,
  NA, NA, 10, 11, NA, NA
), sales = c(
  NA, 1, NA, NA, NA, 1, NA,
  NA
), formation = c(0.3, 0.4, 0.5, 0.3, 0.7, 0.4, 0.5, 0.7)), class = c(
  "grouped_df",
  "tbl_df", "tbl", "data.frame"
), row.names = c(NA, -8L), vars = "fuel", labels = structure(list(
  fuel = structure(1:2, .Label = c("a", "b"), class = "factor")
), class = "data.frame", row.names = c(
  NA,
  -2L
), vars = "fuel", drop = TRUE), indices = list(0:3, 4:7), drop = TRUE, group_sizes = c(
  4L,
  4L
), biggest_group_size = 4L)

library(dplyr)
#> 
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#> 
#>     filter, lag
#> The following objects are masked from 'package:base':
#> 
#>     intersect, setdiff, setequal, union

cat("Before")
#> Before
print(df)
#> # A tibble: 8 x 5
#> # Groups:   fuel [2]
#>    year fuel  stock sales formation
#>   <int> <fct> <dbl> <dbl>     <dbl>
#> 1  1990 a        10    NA       0.3
#> 2  1991 a        11     1       0.4
#> 3  1992 a        NA    NA       0.5
#> 4  1993 a        NA    NA       0.3
#> 5  1990 b        10    NA       0.7
#> 6  1991 b        11     1       0.4
#> 7  1992 b        NA    NA       0.5
#> 8  1993 b        NA    NA       0.7

df <- df %>%
  mutate(
    stock = case_when(
      year == 1991 ~ stock,
      TRUE ~ lag(stock) + formation + lag(sales)
    ),
    sales = case_when(
      year == 1991 ~ sales,
      TRUE ~ stock - lag(stock)
    )
  )

cat("After")
#> After
print(df)
#> # A tibble: 8 x 5
#> # Groups:   fuel [2]
#>    year fuel  stock sales formation
#>   <int> <fct> <dbl> <dbl>     <dbl>
#> 1  1990 a      NA    NA         0.3
#> 2  1991 a      11     1         0.4
#> 3  1992 a      12.5   1.5       0.5
#> 4  1993 a      NA    NA         0.3
#> 5  1990 b      NA    NA         0.7
#> 6  1991 b      11     1         0.4
#> 7  1992 b      12.5   1.5       0.5
#> 8  1993 b      NA    NA         0.7

Created on 2019-01-04 by the reprex package (v0.2.1)

Here's an algebraic solution using temporary columns. There's probably a more elegant way but I think this works.

df2 <- df %>%
  # Make temporary columns for stock and sales.
  mutate(stock_temp     = stock,
         sales_temp     = sales) %>%

  # For each fuel type, fill down over missing values in those columns
  group_by(fuel) %>%
  fill(stock_temp, sales_temp) %>%

  # Add temp column using formation when stock is missing, and collect the
  #  cumulative total of these. Add that to sales_temp to get stock.
  #  The missing sales values are defined in OP as change in stock.
  mutate(formation_temp = if_else(is.na(stock), 
                                  formation, 0),
         cuml_form      = cumsum(formation_temp),
         add            = if_else(is.na(stock), 
                                  lag(sales_temp) + cuml_form, 0),
         stock = if_else(is.na(stock), stock_temp + add, stock),
         sales = if_else(is.na(sales), stock - lag(stock), sales)) %>%
  select(year:formation)


> df2
## A tibble: 8 x 5
## Groups:   fuel [2]
#   year fuel  stock  sales formation
#  <int> <fct> <dbl>  <dbl>     <dbl>
#1  1990 a      10   NA           0.3
#2  1991 a      11    1           0.4
#3  1992 a      12.5  1.5         0.5
#4  1993 a      12.8  0.3         0.3
#5  1990 b      10   NA           0.7
#6  1991 b      11    1           0.4
#7  1992 b      12.5  1.5         0.5
#8  1993 b      13.2  0.700       0.7

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM