简体   繁体   中英

NA when preparing data using tidyverse in R

I want prepare my time series, here initial dput()

df=structure(list(group = c(1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 
                            2L, 2L, 2L), year = c(1973L, 1974L, 1975L, 1976L, 1977L, 1978L, 
                                                  1973L, 1974L, 1975L, 1976L, 1977L, 1978L), Jan = c(9007L, 7750L, 
                                                                                                     8162L, 7717L, 7792L, 7836L, 9007L, 7750L, 8162L, 7717L, 7792L, 
                                                                                                     7836L), Feb = c(8106L, 6981L, 7306L, 7461L, 6957L, 6892L, 8106L, 
                                                                                                                     6981L, 7306L, 7461L, 6957L, 6892L), Mar = c(8928L, 8038L, 8124L, 
                                                                                                                                                                 7767L, 7726L, 7791L, 8928L, 8038L, 8124L, 7767L, 7726L, 7791L
                                                                                                                     ), Apr = c(9137L, 8422L, 7870L, 7925L, 8106L, 8192L, 9137L, 8422L, 
                                                                                                                                7870L, 7925L, 8106L, 8192L), May = c(10017L, 8714L, 9387L, 8623L, 
                                                                                                                                                                     8890L, 9115L, 10017L, 8714L, 9387L, 8623L, 8890L, 9115L), Jun = c(10826L, 
                                                                                                                                                                                                                                       9512L, 9556L, 8945L, 9299L, 9434L, 10826L, 9512L, 9556L, 8945L, 
                                                                                                                                                                                                                                       9299L, 9434L), Jul = c(11317L, 10120L, 10093L, 10078L, 10625L, 
                                                                                                                                                                                                                                                              10484L, 11317L, 10120L, 10093L, 10078L, 10625L, 10484L), Aug = c(10744L, 
                                                                                                                                                                                                                                                                                                                               9823L, 9620L, 9179L, 9302L, 9827L, 10744L, 9823L, 9620L, 9179L, 
                                                                                                                                                                                                                                                                                                                               9302L, 9827L), Sep = c(9713L, 8743L, 8285L, 8037L, 8314L, 9110L, 
                                                                                                                                                                                                                                                                                                                                                      9713L, 8743L, 8285L, 8037L, 8314L, 9110L), Oct = c(9938L, 9129L, 
                                                                                                                                                                                                                                                                                                                                                                                                         8466L, 8488L, 8850L, 9070L, 9938L, 9129L, 8466L, 8488L, 8850L, 
                                                                                                                                                                                                                                                                                                                                                                                                         9070L), Nov = c(9161L, 8710L, 8160L, 7874L, 8265L, 8633L, 9161L, 
                                                                                                                                                                                                                                                                                                                                                                                                                         8710L, 8160L, 7874L, 8265L, 8633L), Dec = c(8927L, 8680L, 8034L, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                     8647L, 8796L, 9240L, 8927L, 8680L, 8034L, 8647L, 8796L, 9240L
                                                                                                                                                                                                                                                                                                                                                                                                                         )), .Names = c("group", "year", "Jan", "Feb", "Mar", "Apr", "May", 
                                                                                                                                                                                                                                                                                                                                                                                                                                        "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"), class = "data.frame", row.names = c(NA, 
                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                                              -12L))

so I as it was shown in my past topic

load_pkgs <- c("forecast", "zoo", "timetk", "tidyverse") 
sapply(load_pkgs, function(x) suppressPackageStartupMessages(library(x, character.only = T)))

ld <- split(df[, -1], df$group)

# Tidy-up the splits

ld <- lapply(ld, function(x) {
  x %>%
    gather(key, value, -year) %>%
    unite(date, year, key, sep = "-") %>%
    mutate(date = paste0(date, "-01")) %>%
    mutate(date = as.Date(date, format = "%Y-%b-%d"))    
})

as result I get:

$`1`
   date value
1  <NA>  9007
2  <NA>  7750
3  <NA>  8162
4  <NA>  7717

but indeed I must get result like:

         date value
1  1973-01-01  9007
2  1974-01-01  7750
3  1975-01-01  8162
4  1976-01-01  7717
5  1977-01-01  7792
6  1978-01-01  7836

why I can't get it?
This code doesn't work in my console.All library I installed.

Meanwhile, you can try a workaround with lubridate package:

library(lubridate)
ld <- lapply(ld, function(x) {
  x %>%
    gather(key, value, -year) %>%
    unite(date, year, key, sep = "-") %>%
    mutate(date = paste0(date, "-01")) %>%
    mutate(date =ymd(date))    # here you use it   
})

Results:

$`1`
         date value
1  1973-01-01  9007
2  1974-01-01  7750
3  1975-01-01  8162
4  1976-01-01  7717
5  1977-01-01  7792
...

   $`2`
         date value
1  1973-01-01  9007
2  1974-01-01  7750
3  1975-01-01  8162
4  1976-01-01  7717
5  1977-01-01  7792
...

A different (and potentially faster) solution using tidyverse and lubridate :

lapply(ld, function(x) {
 x %>%
 gather(var, value, -year) %>%
 mutate(date = ymd(paste(year, match(var, month.abb), "01", sep = "-"))) %>%
 select(-year, -var) 
})

$`1`
   value       date
1   9007 1973-01-01
2   7750 1974-01-01
3   8162 1975-01-01
4   7717 1976-01-01
5   7792 1977-01-01
6   7836 1978-01-01
7   8106 1973-02-01
8   6981 1974-02-01
9   7306 1975-02-01
10  7461 1976-02-01

$`2`
   value       date
1   9007 1973-01-01
2   7750 1974-01-01
3   8162 1975-01-01
4   7717 1976-01-01
5   7792 1977-01-01
6   7836 1978-01-01
7   8106 1973-02-01
8   6981 1974-02-01
9   7306 1975-02-01
10  7461 1976-02-01

First, it is reshaping the data. Then, it pastes the years, the abbreviated months converted to numbers and "01" into one and transforms it into a date by ymd().

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM