简体   繁体   中英

Split a nested list of a dataframe column into different columns

I have tried related solutions but they do not work for my case. I have a dataframe that has a nested list in one column and i want to split this list and put it in columns.The list contains another list with the time stamp for each month(ts) and the consumption for each month(v). The dataframe is:

   id      monthly_consum
1 112          list1
2  34          list2
3  54          list3

where

list1<-list(list(ts = "2016-01-01T00:00:00+01:00", v = 466.6),list(ts = "2016-02-01T00:00:00+01:00", v = 565.6),
                         list(ts = "2016-03-01T00:00:00+01:00", v = 765.6),list(ts = "2016-04-01T00:00:00+01:00", v = 888.6),
                         list(ts = "2016-05-01T00:00:00+01:00", v = 465),list(ts = "2016-06-01T00:00:00+01:00", v = 465.6),
                         list(ts = "2016-07-01T00:00:00+01:00", v = 786),list(ts = "2016-08-01T00:00:00+01:00", v = 435),
                         list(ts = "2016-09-01T00:00:00+01:00", v = 568),list(ts = "2016-10-01T00:00:00+01:00", v = 678),
                         list(ts = "2016-11-01T00:00:00+01:00", v = 522),list(ts = "2016- 12-01T00:00:00+01:00", v = 555))


list2<-list(list(ts = "2016-01-01T00:00:00+01:00", v = 333.6),list(ts = "2016-02-01T00:00:00+01:00", v = 565.6),
              list(ts = "2016-03-01T00:00:00+01:00", v = 765.6),list(ts = "2016-04-01T00:00:00+01:00", v = 333.6),
              list(ts = "2016-05-01T00:00:00+01:00", v = 465),list(ts = "2016-06-01T00:00:00+01:00", v = 465.6),
              list(ts = "2016-07-01T00:00:00+01:00", v = 786),list(ts = "2016-08-01T00:00:00+01:00", v = 435),
              list(ts = "2016-09-01T00:00:00+01:00", v = 568),list(ts = "2016-10-01T00:00:00+01:00", v = 678),
              list(ts = "2016-11-01T00:00:00+01:00", v = 522),list(ts = "2016-12-01T00:00:00+01:00", v = 555))


list3<-list(list(ts = "2016-01-01T00:00:00+01:00", v = 323.6),list(ts = "2016-02-01T00:00:00+01:00", v = 565.6),
           list(ts = "2016-03-01T00:00:00+01:00", v = 333.6),list(ts = "2016-04-01T00:00:00+01:00", v = 888.6),
           list(ts = "2016-05-01T00:00:00+01:00", v = 465),list(ts = "2016-06-01T00:00:00+01:00", v = 465.6),
           list(ts = "2016-07-01T00:00:00+01:00", v = 786),list(ts = "2016-08-01T00:00:00+01:00", v = 435),
           list(ts = "2016-09-01T00:00:00+01:00", v = 568),list(ts = "2016-10-01T00:00:00+01:00", v = 678),
           list(ts = "2016-11-01T00:00:00+01:00", v = 522),list(ts = "2016-12-01T00:00:00+01:00", v = 555))

I would like to split the list and create a dataframe which will have one of the 2 following formats:

   id          ts.1                     cons.1    ts.2    cons.2  ts.3 etc..
1 112      2016-01-01T00:00:00+01:00    466.6    2016-02..   ...   ...
2  34      2016-01-01T00:00:00+01:00    333.6    2016-02..   ...   ...
3  54      2016-01-01T00:00:00+01:00    323.6    2016-02..   ...   ...

OR

  id             ts                  consumption    
 112      2016-01-01T00:00:00+01:00    466.6    
 112      2016-02-01T00:00:00+01:00    565.6    
 112      2016-03-01T00:00:00+01:00    765.6 
 112      2016-04-01T00:00:00+01:00    888.6    
 112      2016-05-01T00:00:00+01:00    465    
 112      2016-06-01T00:00:00+01:00    465.6 
 112      2016-07-01T00:00:00+01:00    786    
 112      2016-08-01T00:00:00+01:00    435    
 112      2016-09-01T00:00:00+01:00    568 
 112      2016-10-01T00:00:00+01:00    678    
 112      2016-11-01T00:00:00+01:00    522   
 112      2016-12-01T00:00:00+01:00    555 
 34       2016-01-01T00:00:00+01:00    466.6    
 34       2016-02-01T00:00:00+01:00    333.6    
 34       2016-03-01T00:00:00+01:00    323.6 
 etc............

could you help me? I am using data.frame(matrix(unlist..)) but it does not give the format that i want. When I use rbind list i get:

"Error in rbindlist(....) : Item 1 of list input is not a data.frame, data.table or list"

Thank you in advance!

UPDATE Using dput i would get (in the real problem):

 >dput(locs_total[9:12,1:5])
     structure(list(X.dep_id. = c("34", "34", "34", "34"), X.loc_id. = c("17761", 
    "17406", "23591", "27838"), X.surface. = c("200", "1250", "54", 
    "150"), X.sector. = c("HOUSING", "SMALL-STORE-FOOD", "LIBRARY", 
    "OFFICE-BUILDING"), 
 X.avg_cons_main. = list(list(structure(list(
        ts = "2016-01-01T00:00:00+01:00", v = 466.65), .Names = c("ts", 
    "v")), structure(list(ts = "2016-02-01T00:00:00+01:00", v = 406.45), 
   .Names = c("ts", 
    "v")), structure(list(ts = "2016-03-01T00:00:00+01:00", v = 483.35), 
   .Names = c("ts", 
   "v")), structure(list(ts = "2016-04-01T00:00:00+02:00", v = 79.45), . 
   Names = c("ts", 
  "v"))), NULL, NULL, NULL)), .Names = c("X.dep_id.", "X.loc_id.", 
  "X.surface.", "X.sector.", "X.avg_cons_main."
 ), row.names = c("9", "10", "11", "12"), class = "data.frame")

We can loop through the list

res <- do.call(rbind, Map(cbind, id = df1$id, lapply(mget(df1$monthly_consum), 
                   function(x) do.call(rbind.data.frame, x))))
names(res)[3] <- "consumption"
row.names(res) <- NULL
head(res, 14)
#    id                         ts consumption
#1  112  2016-01-01T00:00:00+01:00       466.6
#2  112  2016-02-01T00:00:00+01:00       565.6
#3  112  2016-03-01T00:00:00+01:00       765.6
#4  112  2016-04-01T00:00:00+01:00       888.6
#5  112  2016-05-01T00:00:00+01:00       465.0
#6  112  2016-06-01T00:00:00+01:00       465.6
#7  112  2016-07-01T00:00:00+01:00       786.0
#8  112  2016-08-01T00:00:00+01:00       435.0
#9  112  2016-09-01T00:00:00+01:00       568.0
#10 112  2016-10-01T00:00:00+01:00       678.0
#11 112  2016-11-01T00:00:00+01:00       522.0
#12 112 2016- 12-01T00:00:00+01:00       555.0
#13  34  2016-01-01T00:00:00+01:00       333.6
#14  34  2016-02-01T00:00:00+01:00       565.6

data

df1 <- structure(list(id = c(112L, 34L, 54L), monthly_consum = c("list1", 
"list2", "list3")), .Names = c("id", "monthly_consum"), 
class = "data.frame", row.names = c("1", "2", "3"))

If the ids are also in the lists, you can use dplyr::bind_rows

dplyr::bind_rows(list1, list2, list3)
# A tibble: 36 × 2
                          ts     v
                       <chr> <dbl>
1  2016-01-01T00:00:00+01:00 466.6
2  2016-02-01T00:00:00+01:00 565.6
3  2016-03-01T00:00:00+01:00 765.6
4  2016-04-01T00:00:00+01:00 888.6
5  2016-05-01T00:00:00+01:00 465.0
6  2016-06-01T00:00:00+01:00 465.6
7  2016-07-01T00:00:00+01:00 786.0
8  2016-08-01T00:00:00+01:00 435.0
9  2016-09-01T00:00:00+01:00 568.0
10 2016-10-01T00:00:00+01:00 678.0
# ... with 26 more rows

To add IDs from another df

library(dplyr)

ids <- data_frame(list_id = c(112, 34, 54),
                  monthly_consum = c("list1", "list2", "list3"))

If we consider nested lists, you can use purrr:map as follows:

-combine the three lists in one list

k <- list(list1, list2, list3)

-use map to bind_rows in each column independently

k1 <- purrr:: map(k, bind_rows) 

-use the ids as names for the lists

names(k1) <- ids$list_id

-bind_rows using .id

bind_rows(k1, .id = "id")

# A tibble: 36 × 3
      id                        ts     v
   <chr>                     <chr> <dbl>
1    112 2016-01-01T00:00:00+01:00 466.6
2    112 2016-02-01T00:00:00+01:00 565.6
3    112 2016-03-01T00:00:00+01:00 765.6
4    112 2016-04-01T00:00:00+01:00 888.6
5    112 2016-05-01T00:00:00+01:00 465.0
6    112 2016-06-01T00:00:00+01:00 465.6
7    112 2016-07-01T00:00:00+01:00 786.0
8    112 2016-08-01T00:00:00+01:00 435.0
9    112 2016-09-01T00:00:00+01:00 568.0
10   112 2016-10-01T00:00:00+01:00 678.0

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM