简体   繁体   中英

How to convert list of list into tidy tibble or data.frame in R

I have the following list of list:

my_lol <- structure(list(coolfactor_score = list(structure(c(0.164477631065473, 
0.198253819406019, 0.396414447052519, 0.133118603987442, 0.107735498488546
), .Names = c("B", "Mac", "NK", "Neu", "Stro")), structure(c(0.186215537135912, 
0.18408529174803, 0.375349920115798, 0.247664923324821, 0.006684327675438
), .Names = c("B", "Mac", "NK", "Neu", "Stro"))), sr_crt = list(
    structure(list(crt = 0.133118603987442, sr = 0.407076876403305), .Names = c("crt", 
    "sr")), structure(list(crt = 0.18408529174803, sr = 0.0829181742326453), .Names = c("crt", 
    "sr"))), sample_names = c("Sample1", "Sample2")), .Names = c("coolfactor_score", 
"sr_crt", "sample_names"))

Which looks like this:

> my_lol
$coolfactor_score
$coolfactor_score[[1]]
        B       Mac        NK       Neu      Stro 
0.1644776 0.1982538 0.3964144 0.1331186 0.1077355 

$coolfactor_score[[2]]
          B         Mac          NK         Neu        Stro 
0.186215537 0.184085292 0.375349920 0.247664923 0.006684328 


$sr_crt
$sr_crt[[1]]
$sr_crt[[1]]$crt
[1] 0.1331186

$sr_crt[[1]]$sr
[1] 0.4070769


$sr_crt[[2]]
$sr_crt[[2]]$crt
[1] 0.1840853

$sr_crt[[2]]$sr
[1] 0.08291817



$sample_names
[1] "Sample1" "Sample2"
# Note that the number of samples can be more than 2 and cell type more than 5.

How can I tidy it into this data frame (tibbles)

CellType    Sample    CoolFactorScore  SR            CRT
B           Sample1   0.1644776        0.4070769     0.1331186
Mac         Sample1   0.1982538        0.4070769     0.1331186
NK          Sample1   0.3964144        0.4070769     0.1331186
Neu         Sample1   0.1331186        0.4070769     0.1331186
Stro        Sample1   0.1077355        0.4070769     0.1331186
B           Sample2   0.186215537      0.08291817    0.1840853
Mac         Sample2   0.184085292      0.08291817    0.1840853
NK          Sample2   0.375349920      0.08291817    0.1840853
Neu         Sample2   0.247664923      0.08291817    0.1840853
Stro        Sample2   0.006684328      0.08291817    0.1840853

One way using base R:

mylist <- lapply(1:2, function(i) {
  #this is the important bit where you extract the corresponding elements
  #of sample 1 first and sample 2 second.
  df <- data.frame(lapply(my_lol, '[', i))
  names(df) <- c('CoolFactorScore', 'CRT', 'SR', 'Sample')
  df$CellType <- rownames(df)
  row.names(df) <- NULL
  df
})

do.call(rbind, mylist)

Out:

  CoolFactorScore       CRT         SR  Sample CellType
1      0.164477631 0.1331186 0.40707688 Sample1        B
2      0.198253819 0.1331186 0.40707688 Sample1      Mac
3      0.396414447 0.1331186 0.40707688 Sample1       NK
4      0.133118604 0.1331186 0.40707688 Sample1      Neu
5      0.107735498 0.1331186 0.40707688 Sample1     Stro
6      0.186215537 0.1840853 0.08291817 Sample2        B
7      0.184085292 0.1840853 0.08291817 Sample2      Mac
8      0.375349920 0.1840853 0.08291817 Sample2       NK
9      0.247664923 0.1840853 0.08291817 Sample2      Neu
10     0.006684328 0.1840853 0.08291817 Sample2     Stro

Here's a not so elegant method:

int <- lapply(1:2, function(x) do.call(data.frame, 
              c(list(CoolFactorScore=my_lol[[1]][[x]]), 
                my_lol[[2]][[x]], 
                list(Sample=my_lol[[3]][[x]])))) 
do.call(rbind, int)

      CoolFactorScore       crt         sr  Sample
B         0.164477631 0.1331186 0.40707688 Sample1
Mac       0.198253819 0.1331186 0.40707688 Sample1
NK        0.396414447 0.1331186 0.40707688 Sample1
Neu       0.133118604 0.1331186 0.40707688 Sample1
Stro      0.107735498 0.1331186 0.40707688 Sample1
B1        0.186215537 0.1840853 0.08291817 Sample2
Mac1      0.184085292 0.1840853 0.08291817 Sample2
NK1       0.375349920 0.1840853 0.08291817 Sample2
Neu1      0.247664923 0.1840853 0.08291817 Sample2
Stro1     0.006684328 0.1840853 0.08291817 Sample2

Here is a solution without loop using functions of data.table package.

library(data.table)

step 1: unwrap the list

unlist(my_lol) -> tmp1

step 2: transpose and convert it to data.table
This way you will get the widest table which can be composed from the original data. It should be transformed (in further steps) to long table according to request.

as.data.table(t(tmp1)) -> tmp2

step 3: The 'sample_names1' and 'sample_names2' are necessary to be transformed to 'Sample' manually.
If you wanna generalize to multiple sample_names values, then you should amend this step, according to syntax of possible values. (This version works for such sample_names value syntax as: 'Sample1', 'Sample2', 'Sample3' and so on.)

names(tmp2) <- gsub('sample_names\\d+', 'Sample', names(tmp2))

step 4: creating measure field names based on field names of tmp2 table

measure <- unique(names(tmp2))

step 5: creating longer table (tmp3) from wide table (tmp2)

tmp3 <- melt(tmp2, 
             measure.vars = patterns(measure), 
             value.name = measure)

step 6: renaming columns according to request

names(tmp3) <- gsub('coolfactor_score.', '', names(tmp3))
names(tmp3) <- gsub('sr_crt.', '', names(tmp3))
setnames(tmp3, 'crt', 'CRT')
setnames(tmp3, 'sr', 'SR')

step 7: creating even longer table (mylist) from tmp3

mylist <- melt(tmp3,
               id.vars = c('Sample',
                           'CRT',
                           'SR'),
               measure.vars = c('B', 
                                'Mac',  
                                'NK',   
                                'Neu',
                                'Stro'),
               value.name = 'CoolFactorScore',
               variable.name = 'CellType')

step 8: reordering columns according to request

setcolorder(mylist, c('CellType', 'Sample', 'CoolFactorScore', 'SR', 'CRT'))

step 9: reordering rows according to request

mylist <- mylist[order(Sample, CellType)]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM