简体   繁体   中英

How to convert DataFrames to nested lists

Providing the following structure of a data.frame()

var1.gender var1.score.raw var1.score.raw.lower var1.score.raw.upper [...] var2.gender var2.score.raw var2.score.raw.lower var2.score.raw.upper [...]

How do i convert this to a multi-dimensional list, split by . ?

Sample data:

df <- data.frame('var1.gender' = c(1,1,3,3), 'var1.score.raw' = c(12.3, 12.4, 14.5, 13.2), 'var1.score.raw.lower' = c(11,11,13,12), 'var1.score.raw.upper' = c(13,13,15,14), 'var2.gender' = c(1,1,3,3), 'var2.score.raw' = c(12.3, 12.4, 14.5, 13.2), 'var2.score.raw.lower' = c(11,11,13,12), 'var2.score.raw.upper' = c(13,13,15,14))

The resulting list should look something like this:

$var1
$var1$gender
[1] 1 1 3 3

$var1$score
$var1$score$raw
[1] 12.3 12.4 14.5 13.2

$var1$score$lower
[1] 11 11 13 12

$var1$score$upper
[1] 13 13 15 14



$var2
$var2$gender
[1] 1 1 3 3

$var2$score
$var2$score$raw
[1] 12.3 12.4 14.5 13.2

$var2$score$lower
[1] 11 11 13 12

$var2$score$upper
[1] 13 13 15 14

By the way "df" is structured one straightforward approach to building the wanted list is to evaluate a call like list[["X"]][["Y"]][["Z"]][...] = df$XYZ.. for each column of "df". This can be done dynamically by manipulating "language" objects.

Defining a function that accepts a list, a character vector of names/indices and a value to assign at that level, we have:

assign_list_element = function(x, inds, val)
{
    cl = bquote(x[[.(inds[1])]])
    for(s in inds[-1]) cl = bquote(.(cl)[[.(s)]])

    cl = call("<-", cl, bquote(.(val))) 
    print(cl); flush.console() 

    eval(cl)  

    return(x)
}

Some bquote calls could be made simpler or replaced with substitute , but, using it as above constructs a better formatted call regarding the indices (for printing purposes).

And, then, for each column of "df", re-structure a -at start empty- list:

nms = strsplit(names(df), ".", TRUE)
l = list()
for(i in seq_along(nms)) l = assign_list_element(l, nms[[i]], df[[i]])
#x[["var1"]][["gender"]] <- c(1, 1, 3, 3)
#x[["var1"]][["score"]][["raw"]] <- c(12.3, 12.4, 14.5, 13.2)
#x[["var1"]][["score"]][["lower"]] <- c(11, 11, 13, 12)
#x[["var1"]][["score"]][["upper"]] <- c(13, 13, 15, 14)
#x[["var2"]][["gender"]] <- c(1, 1, 3, 3)
#x[["var2"]][["score"]][["raw"]] <- c(12.3, 12.4, 14.5, 13.2)
#x[["var2"]][["score"]][["lower"]] <- c(11, 11, 13, 12)
#x[["var2"]][["score"]][["upper"]] <- c(13, 13, 15, 14)

str(l)
#List of 2
# $ var1:List of 2
#  ..$ gender: num [1:4] 1 1 3 3
#  ..$ score :List of 3
#  .. ..$ raw  : num [1:4] 12.3 12.4 14.5 13.2
#  .. ..$ lower: num [1:4] 11 11 13 12
#  .. ..$ upper: num [1:4] 13 13 15 14
# $ var2:List of 2
#  ..$ gender: num [1:4] 1 1 3 3
#  ..$ score :List of 3
#  .. ..$ raw  : num [1:4] 12.3 12.4 14.5 13.2
#  .. ..$ lower: num [1:4] 11 11 13 12
#  .. ..$ upper: num [1:4] 13 13 15 14

Using this approach, the list is re-structured at every iteration, though its elements are not copied.

I will edit this in a moment with something that looks at periods in column names (much more complicated) but, without automating, you can create nested lists like so:

df <- data.frame('var1.gender' = c(1,1,3,3), 'var1.score.raw' = c(12.3, 12.4, 14.5, 13.2), 'var1.score.raw.lower' = c(11,11,13,12), 'var1.score.raw.upper' = c(13,13,15,14), 'var2.gender' = c(1,1,3,3), 'var2.score.raw' = c(12.3, 12.4, 14.5, 13.2), 'var2.score.raw.lower' = c(11,11,13,12), 'var2.score.raw.upper' = c(13,13,15,14))
df

# changed your naming here to remove the not-needed ".raw."
colnames(df) <- c("var1.gender", "var1.score.raw", "var1.score.lower", "var1.score.upper", "var2.gender", "var2.score.raw", "var2.score.lower", "var2.score.upper")

nested <- with(df, expr = {list(var1 = list(gender = var1.gender, 
                                            score = list(raw = var1.score.raw, 
                                                         lower = var1.score.lower, 
                                                         upper = var1.score.upper)),
                                var2 = list(gender = var2.gender, 
                                            score = list(raw = var2.score.raw, 
                                                         lower = var2.score.lower, 
                                                         upper = var2.score.upper)))})
nested
$var1
$var1$gender
[1] 1 1 3 3

$var1$score
$var1$score$raw
[1] 12.3 12.4 14.5 13.2

$var1$score$lower
[1] 11 11 13 12

$var1$score$upper
[1] 13 13 15 14



$var2
$var2$gender
[1] 1 1 3 3

$var2$score
$var2$score$raw
[1] 12.3 12.4 14.5 13.2

$var2$score$lower
[1] 11 11 13 12

$var2$score$upper
[1] 13 13 15 14

Tried to make a dynamic version of this but got lost thinking about recursion. Anyways, this may work if you extend the number of varX you have in your dataset. It's not as clean as doing it by hand and still has a $empty list.

nester <- function(df, splitby = "."){
  separated <- strsplit(colnames(df), paste0("[", splitby, "]"))
  # in order to rbind this into a matrix, we have to make all vectors the same length
  n <- max(rapply(separated, length))
  separated <- do.call(rbind, rapply(separated, function(x) {length(x) <- n; x }, how = "replace"))
  separated <- ifelse(is.na(separated), "empty", separated)
  listnames <- apply(separated, 2, unique)
  L <- list()
  # Assumes n is 3. 
  for(L1 in listnames[[1]]){
    L[[L1]] <- list() # create List level 1
    for(L2 in listnames[[2]]){
      L[[L1]][[L2]] <- list() # create List level 2
      for(L3 in listnames[[3]]){
        L[[L1]][[L2]][[L3]] <- list() # create list level 3
        # If no data exists for that list combination ...
        if(length(df[,which(separated[,1] == L1 & separated[,2] == L2 & separated[,3] == L3)]) == 0){
          L[[L1]][[L2]][[L3]] <- NULL # then remove that nested list.
        } else {
          # otherwise go ahead and put that column in as a list
          L[[L1]][[L2]][[L3]] <- df[,which(separated[,1] == L1 & separated[,2] == L2 & separated[,3] == L3)]
          # if data is sitting in a list$empty ...
          if( L3 == "empty" ){
            z <- unname(unlist(L[[L1]][[L2]][[L3]]))
            L[[L1]][[L2]][[L3]] <- as.vector(z) # save the empty L3 to the L2
            #L[[L1]][[L2]][[L3]] <- NULL # and delete the L3
          }  
        }
      }
    }
  }
  return(L)
}
df.List <- nester(df, splitby = ".")
df.List

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM