I want to replace missing values in one column ("stock") with the lagged values of that column and some other manipulation with other columns. Since the following loop takes a lot of time in my original dataset, is there another way to do this without loop?
for (i in 1990:1993){
df <- df %>%
group_by(fuel) %>%
mutate(stock=ifelse(i==year & year>1991,lag(stock)+formation+lag(sales),stock))%>%
mutate(sales=ifelse(i==year & year>1991, stock-lag(stock),sales))
}
Dataset sample:
df <- structure(list(year = c(1990L, 1991L, 1992L, 1993L, 1990L, 1991L,
1992L, 1993L), fuel = structure(c(1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L), .Label = c("a", "b"), class = "factor"), stock = c(10, 11,
NA, NA, 10, 11, NA, NA), sales = c(NA, 1, NA, NA, NA, 1, NA,
NA), formation = c(0.3, 0.4, 0.5, 0.3, 0.7, 0.4, 0.5, 0.7)), class = c("grouped_df",
"tbl_df", "tbl", "data.frame"), row.names = c(NA, -8L), vars = "fuel", labels = structure(list(
fuel = structure(1:2, .Label = c("a", "b"), class = "factor")), class = "data.frame", row.names = c(NA,
-2L), vars = "fuel", drop = TRUE), indices = list(0:3, 4:7), drop = TRUE, group_sizes = c(4L,
4L), biggest_group_size = 4L)
Is this what you're looking for? TRUE
is basically like the ELSE
statement in this context. I'm sure you can do something similar with if else as well, but the results are the same.
df <- structure(list(year = c(
1990L, 1991L, 1992L, 1993L, 1990L, 1991L,
1992L, 1993L
), fuel = structure(c(
1L, 1L, 1L, 1L, 2L, 2L, 2L,
2L
), .Label = c("a", "b"), class = "factor"), stock = c(
10, 11,
NA, NA, 10, 11, NA, NA
), sales = c(
NA, 1, NA, NA, NA, 1, NA,
NA
), formation = c(0.3, 0.4, 0.5, 0.3, 0.7, 0.4, 0.5, 0.7)), class = c(
"grouped_df",
"tbl_df", "tbl", "data.frame"
), row.names = c(NA, -8L), vars = "fuel", labels = structure(list(
fuel = structure(1:2, .Label = c("a", "b"), class = "factor")
), class = "data.frame", row.names = c(
NA,
-2L
), vars = "fuel", drop = TRUE), indices = list(0:3, 4:7), drop = TRUE, group_sizes = c(
4L,
4L
), biggest_group_size = 4L)
library(dplyr)
#>
#> Attaching package: 'dplyr'
#> The following objects are masked from 'package:stats':
#>
#> filter, lag
#> The following objects are masked from 'package:base':
#>
#> intersect, setdiff, setequal, union
cat("Before")
#> Before
print(df)
#> # A tibble: 8 x 5
#> # Groups: fuel [2]
#> year fuel stock sales formation
#> <int> <fct> <dbl> <dbl> <dbl>
#> 1 1990 a 10 NA 0.3
#> 2 1991 a 11 1 0.4
#> 3 1992 a NA NA 0.5
#> 4 1993 a NA NA 0.3
#> 5 1990 b 10 NA 0.7
#> 6 1991 b 11 1 0.4
#> 7 1992 b NA NA 0.5
#> 8 1993 b NA NA 0.7
df <- df %>%
mutate(
stock = case_when(
year == 1991 ~ stock,
TRUE ~ lag(stock) + formation + lag(sales)
),
sales = case_when(
year == 1991 ~ sales,
TRUE ~ stock - lag(stock)
)
)
cat("After")
#> After
print(df)
#> # A tibble: 8 x 5
#> # Groups: fuel [2]
#> year fuel stock sales formation
#> <int> <fct> <dbl> <dbl> <dbl>
#> 1 1990 a NA NA 0.3
#> 2 1991 a 11 1 0.4
#> 3 1992 a 12.5 1.5 0.5
#> 4 1993 a NA NA 0.3
#> 5 1990 b NA NA 0.7
#> 6 1991 b 11 1 0.4
#> 7 1992 b 12.5 1.5 0.5
#> 8 1993 b NA NA 0.7
Created on 2019-01-04 by the reprex package (v0.2.1)
Here's an algebraic solution using temporary columns. There's probably a more elegant way but I think this works.
df2 <- df %>%
# Make temporary columns for stock and sales.
mutate(stock_temp = stock,
sales_temp = sales) %>%
# For each fuel type, fill down over missing values in those columns
group_by(fuel) %>%
fill(stock_temp, sales_temp) %>%
# Add temp column using formation when stock is missing, and collect the
# cumulative total of these. Add that to sales_temp to get stock.
# The missing sales values are defined in OP as change in stock.
mutate(formation_temp = if_else(is.na(stock),
formation, 0),
cuml_form = cumsum(formation_temp),
add = if_else(is.na(stock),
lag(sales_temp) + cuml_form, 0),
stock = if_else(is.na(stock), stock_temp + add, stock),
sales = if_else(is.na(sales), stock - lag(stock), sales)) %>%
select(year:formation)
> df2
## A tibble: 8 x 5
## Groups: fuel [2]
# year fuel stock sales formation
# <int> <fct> <dbl> <dbl> <dbl>
#1 1990 a 10 NA 0.3
#2 1991 a 11 1 0.4
#3 1992 a 12.5 1.5 0.5
#4 1993 a 12.8 0.3 0.3
#5 1990 b 10 NA 0.7
#6 1991 b 11 1 0.4
#7 1992 b 12.5 1.5 0.5
#8 1993 b 13.2 0.700 0.7
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.