简体   繁体   中英

Fill missing values with dates

I have the following dataframe

DirectorID CompanyID start_new    end_new DateStartRole DateEndRole
1           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
2           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
3           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
4           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
5           31   1990357      <NA>       <NA>    2015-01-01  2019-12-31
6           36      1505      <NA>       <NA>    2000-01-01  2000-12-31
7           36     17834      <NA>       <NA>    2001-01-01  2011-12-31
8           36     17834      <NA>       <NA>    2001-01-01  2011-12-31
9           36     17834      <NA>       <NA>    2001-01-01  2011-12-31
10          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
11          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
12          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
13          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
14          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
15          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
16          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
17          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
18          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
19          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
20          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
21          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
22          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
23          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
24          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
25          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
26          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
27          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
28          36     17834      <NA>       <NA>    2001-01-01  2011-12-31
structure(list(DirectorID = c(31, 31, 31, 31, 31, 36, 36, 36, 
36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36, 36), CompanyID = c(1990357, 
1990357, 1990357, 1990357, 1990357, 1505, 17834, 17834, 17834, 
17834, 17834, 17834, 17834, 17834, 17834, 17834, 17834, 17834, 
17834, 17834), start_new = structure(c(NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_), class = "Date"), end_new = structure(c(NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, NA_real_, 
NA_real_, NA_real_, NA_real_, NA_real_, NA_real_), class = "Date"), 
    DateStartRole = structure(c(16436, 16436, 16436, 16436, 16436, 
    10957, 11323, 11323, 11323, 11323, 11323, 11323, 11323, 11323, 
    11323, 11323, 11323, 11323, 11323, 11323), class = "Date"), 
    DateEndRole = structure(c(18261, 18261, 18261, 18261, 18261, 
    11322, 15339, 15339, 15339, 15339, 15339, 15339, 15339, 15339, 
    15339, 15339, 15339, 15339, 15339, 15339), class = "Date")), row.names = c(NA, 
20L), class = "data.frame")

The variables DateStartRole and DateEndRole indicate the full period. I want the start_new and end_new variable to have a date range of 1 year. The desired output should look like this

DirectorID CompanyID          start_new    end_new      DateStartRole  DateEndRole
1           31   1990357      2015-01-01   2015-12-31    2015-01-01    2019-12-31
2           31   1990357      2016-01-01   2016-12-31    2015-01-01    2019-12-31
3           31   1990357      2017-01-01   2017-12-31    2015-01-01    2019-12-31
4           31   1990357      2018-01-01   2018-12-31    2015-01-01    2019-12-31
5           31   1990357      2019-01-01   2019-12-31    2015-01-01    2019-12-31

You can create a sequence of 1 year from first DateStartRole to number of rows for each DirectorID

library(dplyr)
library(lubridate)

df %>%
  group_by(DirectorID) %>%
  mutate(start_new = seq(first(DateStartRole), by = '1 year', length.out = n()), 
         end_new = lead(start_new, default = last(start_new) + years(1)) - 1) %>%
  ungroup

#   DirectorID CompanyID start_new  end_new    DateStartRole DateEndRole
#        <dbl>     <dbl> <date>     <date>     <date>        <date>     
# 1         31   1990357 2015-01-01 2015-12-31 2015-01-01    2019-12-31 
# 2         31   1990357 2016-01-01 2016-12-31 2015-01-01    2019-12-31 
# 3         31   1990357 2017-01-01 2017-12-31 2015-01-01    2019-12-31 
# 4         31   1990357 2018-01-01 2018-12-31 2015-01-01    2019-12-31 
# 5         31   1990357 2019-01-01 2019-12-31 2015-01-01    2019-12-31 
# 6         36      1505 2000-01-01 2000-12-31 2000-01-01    2000-12-31 
# 7         36     17834 2001-01-01 2001-12-31 2001-01-01    2011-12-31 
# 8         36     17834 2002-01-01 2002-12-31 2001-01-01    2011-12-31 
# 9         36     17834 2003-01-01 2003-12-31 2001-01-01    2011-12-31 
#10         36     17834 2004-01-01 2004-12-31 2001-01-01    2011-12-31 
#11         36     17834 2005-01-01 2005-12-31 2001-01-01    2011-12-31 
#12         36     17834 2006-01-01 2006-12-31 2001-01-01    2011-12-31 
#13         36     17834 2007-01-01 2007-12-31 2001-01-01    2011-12-31 
#14         36     17834 2008-01-01 2008-12-31 2001-01-01    2011-12-31 
#15         36     17834 2009-01-01 2009-12-31 2001-01-01    2011-12-31 
#16         36     17834 2010-01-01 2010-12-31 2001-01-01    2011-12-31 
#17         36     17834 2011-01-01 2011-12-31 2001-01-01    2011-12-31 
#18         36     17834 2012-01-01 2012-12-31 2001-01-01    2011-12-31 
#19         36     17834 2013-01-01 2013-12-31 2001-01-01    2011-12-31 
#20         36     17834 2014-01-01 2014-12-31 2001-01-01    2011-12-31 

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM