簡體   English   中英

將行轉換為列並按日期連接它們

[英]Transform rows to columns and join them by dates

我正在嘗試將每日天氣信息添加到R上的數據框,因此df1 + df2 = df3是客觀的。 我認為在df1上按日期對df2進行分段,將df2和rbind.fill折疊為df1將會是這樣,但我遇到了麻煩。

date2 <- c('2014-06-03','2017-05-20')
date  <- c('2014-06-01','2017-05-15')
df1 <- data.frame(date,date2)
date

#> df1
#        date      date2
#1 2014-06-01 2014-06-03
#2 2017-05-15 2017-05-20

date3 <- c('2014-06-01','2014-06-02','2014-06-03','2017-05-15','2017-05-16','2017-05-17','2017-05-18','2017-05-19','2017-05-20')
rain  <-  c(3,            4,               3,          5,           5,            6,         7,           6         ,6)
sun <- c (  10,10,10, 15,15,15,16,15,15)
df2 <- data.frame(date3,rain,sun)

#> df2
#       date3 rain sun
#1 2014-06-01    3  10
#2 2014-06-02    4  10
#3 2014-06-03    3  10
#4 2017-05-15    5  15
#5 2017-05-16    5  15
#6 2017-05-17    6  15
#7 2017-05-18    7  16
#8 2017-05-19    6  15
#9 2017-05-20    6  15

rain_day1 <- c(3,5)
rain_day2 <- c(4,5)
rain_day3 <- c(3,6)
rain_day4 <- c(NA,7)
rain_day5 <- c(NA,6)
rain_day6 <- c(NA,6)
sun_day1 <- c(10,15)
sun_day2 <- c(10,15)
sun_day3 <- c(10,15)
sun_day4 <- c(NA,15)
sun_day5 <- c(NA,16)
sun_day6 <- c(NA,15)
date5 <- c('2014-06-03','2017-05-20')
date4  <- c('2014-06-01','2017-05-15')
df3 <- data.frame(date4,date5,rain_day1,sun_day1,rain_day2,sun_day2,rain_day3,sun_day3,rain_day4,sun_day4,rain_day5,sun_day5,rain_day6,sun_day6)

#> df3
#      date4      date5 rain_day1 sun_day1 rain_day2 sun_day2 rain_day3 sun_day3 rain_day4 sun_day4
#1 2014-06-01 2014-06-03         3       10         4       10         3       10        NA       NA
#2 2017-05-15 2017-05-20         5       15         5       15         6       15         7       15
#  rain_day5 sun_day5 rain_day6 sun_day6
#1        NA       NA        NA       NA
#2         6       16         6       15

任何幫助,將不勝感激。 提前致謝

嘗試這個。 我歡迎任何改進我的管道散文,我正在努力。

library(magrittr)
df1$date <- as.Date(df1$date)
df1$date2 <- as.Date(df1$date2)
df2$date3 <- as.Date(df2$date3)
df2 %<>% setNames(c("date3","rain_day","sun_day"))
row_list <- df1 %>% apply(1,function(x){ df2 %>%
                                   subset(date3 >= x["date"] & date3 <= x["date2"]) %>%     # subsetting
                                   "["(2:3) %>%                                             # selecting relevant column
                                   unlist}) %>%                                             # spearding all data into a vector
  sapply(.,function(x){x[rep(c(0,length(x)/2),length(x)/2)+rep(1:(length(x)/2),each=2)]})   # reordering

row_names <- row_list %>% sapply(length) %>% which.max %>% "[["(row_list,.) %>% names() # taking the names from longest list
row_list %>% sapply(function(x){c(x,rep(NA,max(sapply(.,length))-length(x)))}) %>% # complete with NAs
 t %>% as.data.frame %>% setNames(row_names) %>% cbind(df1,.)                     # transpose, convert, set names and append to df1
#         date      date2 rain_day1 sun_day1 rain_day2 sun_day2 rain_day3 sun_day3 rain_day4 sun_day4 rain_day5 sun_day5 rain_day6 sun_day6
# 1 2014-06-01 2014-06-03         3       10         4       10         3       10        NA       NA        NA       NA        NA       NA
# 2 2017-05-15 2017-05-20         5       15         5       15         6       15         7       16         6       15         6       15

“我認為在df1上按日期對df2進行分類,將df2和rbind折疊起來。用df1填充它們就是這樣的方式”......它花了比想要的更長的時間,但我做到了!

lapply(df1, class)
df1$date <- sapply( df1$date , function(x) as.character(x) ) 
df1$date2 <- sapply( df1$date2 , function(x) as.character(x) ) 
lapply(df2, class)
df2$date3 <- sapply( df2$date3 , function(x) as.character(x) ) 
lapply(df3, class)
df3$date4 <- sapply( df3$date4 , function(x) as.character(x) ) 
df3$date5 <- sapply( df3$date5 , function(x) as.character(x) ) 


tryCatch(library("plyr") , 
         error = function(e) {
           install.packages("plyr")
           library("plyr")
         }
)
df4 <- df1
for (i in 1:nrow(df1)){
  dff <- df2[ which( df2$date3 <= df1$date2[i] ) , ]
  dff <- dff[ which( dff$date3 >= df1$date[i] ) , ]
  dff <- as.data.frame(t(unlist(dff)))
  colnames(dff)[1] <- "date"
  df5 <- merge(x = df1, y = dff, by = "date")
  df5 <- rbind.fill(df4,df5)
  df5<-df5[-1,]
  df4 <- df5
} 

df5 <- df5[, -grep("^date3", colnames(df5))]

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM