簡體   English   中英

將列表中的數據幀組合為寬數據幀

[英]combine data frames in a list to wide data frame

我的列表中有很多數據框(此示例只有2個)

set.seed(1)
df1 <- data.frame(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.frame(v1 = rnorm(10), V2 = rnorm(10))

dflist <- list(df1, df2)

我如何將變量v1轉換為新數據幀,其中每一行都是一個v1列。

請注意,它們的長度不同,應將空值設置為NA,而不要像此手動解決方案那樣重復以前的值:

df <- rbind(dflist[[1]]$v1, dflist[[2]]$v1)

解決方案應如下所示:

       [,1]      [,2]       [,3]      [,4]      [,5]        [,6]        [,7]      [,8]      [,9]     [,10]
[1,] -0.6264538 0.1836433 -0.8356286  1.595281 0.3295078          NA          NA        NA        NA        NA
[2,]  1.5117812 0.3898432 -0.6212406 -2.214700 1.1249309 -0.04493361 -0.01619026 0.9438362 0.8212212 0.5939013

在基數R中:

maxn <- max(sapply(dflist,nrow))
t(sapply(dflist,function(x) c(x$v1,rep(NA,maxn-length(x$v1)))))
# [,1]       [,2]       [,3]      [,4]       [,5]      [,6]     [,7]      [,8]       [,9]     [,10]
# [1,]  0.09196323 0.09557415  0.3929732 0.2369672 -0.8581784        NA       NA        NA         NA        NA
# [2,] -0.95637695 0.44242010 -1.3525504 1.1507461 -0.8572686 0.1472487 1.196955 0.7803178 -0.6583661 -0.914915
library(data.table)
df1<- data.table(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.table(v1 = rnorm(10), V2 = rnorm(10))


dflist <- list(df1, df2)
df_new<-list("vector")

for (i in 1:length(dflist)){
  df_aux<-transpose(dflist[[i]])[1,]
  df_new[[i]]<-df_aux
}


df<-rbindlist(df_new,use.names = TRUE,fill=TRUE)
df1 <- data.frame(v1 = rnorm(5), V2 = rnorm(5))
df2 <- data.frame(v1 = rnorm(10), V2 = rnorm(10))

dflist <- list(df1, df2)

df <- rbind(dflist[[1]]$v1, dflist[[2]]$v1)

first_column <- lapply(dflist, function(x) as.data.frame(x[,1]))
result <- t(do.call(cbind, first_column))
all(df == result)

一種選擇是使用dplyr::bind_rows列表中的數據幀。 甚至可以使用bind_rows .id參數指定/跟蹤組合數據中的數據幀號。

library(tidyverse)

# As mentioned by OP, the result containing only v1 column.
bind_rows(dflist, .id="name") %>%
  select(-V2) %>% 
  group_by(name) %>% 
  mutate(rn = row_number()) %>%
  spread(rn,v1)

# # A tibble: 2 x 11
# # Groups: name [2]
#  name     `1`   `2`    `3`   `4`   `5`      `6`      `7`    `8`    `9`   `10`
# * <chr>  <dbl> <dbl>  <dbl> <dbl> <dbl>    <dbl>    <dbl>  <dbl>  <dbl>  <dbl>
# 1 1     -0.626 0.184 -0.836  1.60 0.330  NA       NA      NA     NA     NA    
# 2 2      1.51  0.390 -0.621 -2.21 1.12  - 0.0449 - 0.0162  0.944  0.821  0.594

#The result containing data from both v1 and V2 columns 
bind_rows(dflist, .id="name") %>%
  gather(key, value, -name) %>%
  group_by(name, key) %>%
  mutate(rn = row_number()) %>%
  spread(rn, value)

# # A tibble: 4 x 12
# # Groups: name, key [4]
#  name  key      `1`   `2`     `3`    `4`    `5`      `6`      `7`     `8`     `9`   `10`
# * <chr> <chr>  <dbl> <dbl>   <dbl>  <dbl>  <dbl>    <dbl>    <dbl>   <dbl>   <dbl>  <dbl>
# 1 1     v1    -0.626 0.184 -0.836   1.60   0.330  NA       NA       NA      NA     NA    
# 2 1     V2    -0.820 0.487  0.738   0.576 -0.305  NA       NA       NA      NA     NA    
# 3 2     v1     1.51  0.390 -0.621  -2.21   1.12  - 0.0449 - 0.0162   0.944   0.821  0.594
# 4 2     V2     0.919 0.782  0.0746 -1.99   0.620 - 0.0561 - 0.156  - 1.47  - 0.478  0.418

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM