簡體   English   中英

如何正確轉換日期格式為integer 格式為R

[英]How correct convert date format in integer format in R

df1=structure(list(date = c("22.04.2022", "22.04.2022", "22.04.2022", 
"22.04.2022", "23.04.2022", "23.04.2022", "23.04.2022", "24.04.2022", 
"24.04.2022", "24.04.2022"), d1 = c("8:00:00", "10:00:00", "12:00:00", 
"12:00:00", "10:00:00", "12:00:00", "12:00:00", "10:00:00", "12:00:00", 
"12:00:00"), d2 = c("10:00:00", "20:00:00", "22:00:00", "22:00:00", 
"20:00:00", "22:00:00", "22:00:00", "20:00:00", "22:00:00", "22:00:00"
)), class = "data.frame", row.names = c(NA, -10L))

這里有 3 列日期格式,我需要將它們全部轉換為 integer,以便所需的 output 將是

date         d1          d2         date1      d1_1 d2_1
22.04.2022  8:00:00     10:00:00    20220422    8   10
22.04.2022  10:00:00    20:00:00    20220422    10  20
22.04.2022  12:00:00    22:00:00    20220422    12  22
22.04.2022  12:00:00    22:00:00    20220422    12  22
23.04.2022  10:00:00    20:00:00    20220423    10  20
23.04.2022  12:00:00    22:00:00    20220423    12  22
23.04.2022  12:00:00    22:00:00    20220423    12  22
24.04.2022  10:00:00    20:00:00    20220424    10  20
24.04.2022  12:00:00    22:00:00    20220424    12  22
24.04.2022  12:00:00    22:00:00    20220424    12  22

其中date1 d1_1 d2date d1 d2的 integer

怎樣才能做得更好? 謝謝你。

一個可能的解決方案:

library(tidyverse)
library(lubridate)

df1 %>% 
  mutate(date1 = dmy(date) %>% str_remove_all("-"),
         across(matches("d[1-2]"), ~ str_extract(.x, "^\\d+(?=\\:)"),
                .names = "{.col}_1"))

#>          date       d1       d2    date1 d1_1 d2_1
#> 1  22.04.2022  8:00:00 10:00:00 20220422    8   10
#> 2  22.04.2022 10:00:00 20:00:00 20220422   10   20
#> 3  22.04.2022 12:00:00 22:00:00 20220422   12   22
#> 4  22.04.2022 12:00:00 22:00:00 20220422   12   22
#> 5  23.04.2022 10:00:00 20:00:00 20220423   10   20
#> 6  23.04.2022 12:00:00 22:00:00 20220423   12   22
#> 7  23.04.2022 12:00:00 22:00:00 20220423   12   22
#> 8  24.04.2022 10:00:00 20:00:00 20220424   10   20
#> 9  24.04.2022 12:00:00 22:00:00 20220424   12   22
#> 10 24.04.2022 12:00:00 22:00:00 20220424   12   22

我們也可以轉換為Date class 並使用format以及使用hour來提取小時部分

library(dplyr)
library(lubridate)
df1 %>%
   mutate(date1 = format(dmy(date), '%Y%m%d'),
     across(d1:d2, ~ hour(hms(.x)), .names = "{.col}_1"))

-輸出

         date       d1       d2    date1 d1_1 d2_1
1  22.04.2022  8:00:00 10:00:00 20220422    8   10
2  22.04.2022 10:00:00 20:00:00 20220422   10   20
3  22.04.2022 12:00:00 22:00:00 20220422   12   22
4  22.04.2022 12:00:00 22:00:00 20220422   12   22
5  23.04.2022 10:00:00 20:00:00 20220423   10   20
6  23.04.2022 12:00:00 22:00:00 20220423   12   22
7  23.04.2022 12:00:00 22:00:00 20220423   12   22
8  24.04.2022 10:00:00 20:00:00 20220424   10   20
9  24.04.2022 12:00:00 22:00:00 20220424   12   22
10 24.04.2022 12:00:00 22:00:00 20220424   12   22

在使用sub基礎上:

df1$date1 <- sub("(\\d+)\\.(\\d+)\\.(\\d+)", "\\3\\2\\1", df1$date)
df1[c("d1_1", "d2_2")] <- lapply(df1[c("d1", "d2")], \(x) sub(":.*", "", x))

使用as.Datedifftime

df1$date1 <- gsub("-", "", as.Date(df1$date, "%d.%m.%Y"))
df1[c("d1_1", "d2_2")] <- lapply(df1[c("d1", "d2")],
                                  \(x) as.integer(as.difftime(x, unit="hour")))

結果:

df1
#         date       d1       d2    date1 d1_1 d2_2
#1  22.04.2022  8:00:00 10:00:00 20220422    8   10
#2  22.04.2022 10:00:00 20:00:00 20220422   10   20
#3  22.04.2022 12:00:00 22:00:00 20220422   12   22
#4  22.04.2022 12:00:00 22:00:00 20220422   12   22
#5  23.04.2022 10:00:00 20:00:00 20220423   10   20
#6  23.04.2022 12:00:00 22:00:00 20220423   12   22
#7  23.04.2022 12:00:00 22:00:00 20220423   12   22
#8  24.04.2022 10:00:00 20:00:00 20220424   10   20
#9  24.04.2022 12:00:00 22:00:00 20220424   12   22
#10 24.04.2022 12:00:00 22:00:00 20220424   12   22

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM