I love the flexibility of lists in contexts where objects of formal classes are too inflexible. Basically, they could be used to map the way of handling tree structures that we are conceptually used to from our OS's standard file system.
Yet, in order to master "degrees of nestedness" exceeding 3 or 4, you need a way of computing an index of your nested list. Once you have such an index, it's not really difficult to select a specific branch, update it, add new branches to the list, in short: doing all the stuff we'd also do with files and folders in our OS's file system.
Basically, str(object)
gets the job done already, but only outputs text ( capture.output(str(object))
. I'm after transforming that into a data frame structure
Example list
setClass("TESTCLASS_X", representation=representation(a="numeric"))
setRefClass("TESTCLASS_Y", fields=list(a="numeric"))
src <- list(
a=list(
a.1=list(a.1.1 = 1, a.1.2 = 1:5, a.1.3 = integer(0)),
a.2=list(a.2.1="a", a.2.2=c("a", "b", "c", "d", "e"), a.2.3=character()),
a.3=list(a.3.1=1.5, a.3.2=c(1.5, 2.5), a.3.3=numeric()),
a.4=list(a.4.1=1+1i, a.4.2=c(0.1+0.3i, 0.2+0.2i, 0.1+0.1i), a.4.3=complex()),
a.5=list(a.5.1 = TRUE, a.5.2 = c(TRUE, TRUE, TRUE), a.5.3 = logical())
),
b=list(
b.1=list(
b.1.1=list(b.1.1.1 =matrix(1, ncol=1),
b.1.1.2=matrix(c(1:4), ncol=2, dimnames=list(NULL, c("a","b"))),
b.1.1.3=matrix()
),
b.1.2=list(b.1.2.1=data.frame(1), b.1.2.2=data.frame(a=1:3, b=1:3),
b.1.2.3=data.frame()
)
),
b.2=list(
b.2.1=list(b.2.1.1=list(a=1), b.2.1.2=list(a=1:2, b=letters[1:2]),
b.2.1.3=list(a=1:2, b="a", c=1:3, d=FALSE, e=1:5), b.2.1.4=list()
)
)
),
c=list(
c.1=list(c.1.1=new.env(), c.1.2=new("TESTCLASS_X", a=1:5),
c.1.3=new("TESTCLASS_Y", a=1:5))
)
)
Desired output
name pos is.top is.bottom class is.s4 dim
1 a 1 TRUE FALSE list FALSE 5
2 a/a.1 2 FALSE FALSE list FALSE 3
3 a/a.1/a.1.1 3 FALSE TRUE numeric FALSE 1
4 a/a.1/a.1.2 3 FALSE TRUE integer FALSE 5
5 a/a.1/a.1.3 3 FALSE TRUE integer FALSE 0
6 a/a.2 2 FALSE FALSE list FALSE 3
7 a/a.2/a.2.1 3 FALSE TRUE character FALSE 1
8 a/a.2/a.2.2 3 FALSE TRUE character FALSE 5
9 a/a.2/a.2.3 3 FALSE TRUE character FALSE 0
10 a/a.3 2 FALSE FALSE list FALSE 3
11 a/a.3/a.3.1 3 FALSE TRUE numeric FALSE 1
12 a/a.3/a.3.2 3 FALSE TRUE numeric FALSE 2
13 a/a.3/a.3.3 3 FALSE TRUE numeric FALSE 0
14 a/a.4 2 FALSE FALSE list FALSE 3
15 a/a.4/a.4.1 3 FALSE TRUE complex FALSE 1
16 a/a.4/a.4.2 3 FALSE TRUE complex FALSE 3
17 a/a.4/a.4.3 3 FALSE TRUE complex FALSE 0
18 a/a.5 2 FALSE FALSE list FALSE 3
19 a/a.5/a.5.1 3 FALSE TRUE logical FALSE 1
20 a/a.5/a.5.2 3 FALSE TRUE logical FALSE 3
21 a/a.5/a.5.3 3 FALSE TRUE logical FALSE 0
22 b 1 TRUE FALSE list FALSE 2
23 b/b.1 2 FALSE FALSE list FALSE 2
24 b/b.1/b.1.1 3 FALSE FALSE list FALSE 3
25 b/b.1/b.1.1/b.1.1.1 4 FALSE TRUE matrix FALSE 1-1
26 b/b.1/b.1.1/b.1.1.2 4 FALSE TRUE matrix FALSE 2-2
27 b/b.1/b.1.1/b.1.1.3 4 FALSE TRUE matrix FALSE 1-1
28 b/b.1/b.1.2 3 FALSE FALSE list FALSE 3
29 b/b.1/b.1.2/b.1.2.1 4 FALSE TRUE data.frame FALSE 1-1
30 b/b.1/b.1.2/b.1.2.2 4 FALSE TRUE data.frame FALSE 3-2
31 b/b.1/b.1.2/b.1.2.3 4 FALSE TRUE data.frame FALSE 0-0
32 b/b.2 2 FALSE FALSE list FALSE 1
33 b/b.2/b.2.1 3 FALSE FALSE list FALSE 4
34 b/b.2/b.2.1/b.2.1.1 4 FALSE FALSE list FALSE 1
35 b/b.2/b.2.1/b.2.1.1/a 5 FALSE TRUE numeric FALSE 1
36 b/b.2/b.2.1/b.2.1.2 4 FALSE FALSE list FALSE 2
37 b/b.2/b.2.1/b.2.1.2/a 5 FALSE TRUE integer FALSE 2
38 b/b.2/b.2.1/b.2.1.2/b 5 FALSE TRUE character FALSE 2
39 b/b.2/b.2.1/b.2.1.3 4 FALSE FALSE list FALSE 5
40 b/b.2/b.2.1/b.2.1.3/a 5 FALSE TRUE integer FALSE 2
41 b/b.2/b.2.1/b.2.1.3/b 5 FALSE TRUE character FALSE 1
42 b/b.2/b.2.1/b.2.1.3/c 5 FALSE TRUE integer FALSE 3
43 b/b.2/b.2.1/b.2.1.3/d 5 FALSE TRUE logical FALSE 1
44 b/b.2/b.2.1/b.2.1.3/e 5 FALSE TRUE integer FALSE 5
45 b/b.2/b.2.1/b.2.1.4 4 FALSE FALSE list FALSE 0
46 c 1 TRUE FALSE list FALSE 1
47 c/c.1 2 FALSE FALSE list FALSE 3
48 c/c.1/c.1.1 3 FALSE TRUE environment FALSE <NA>
49 c/c.1/c.1.2 3 FALSE TRUE TESTCLASS_X TRUE 1
50 c/c.1/c.1.3 3 FALSE TRUE TESTCLASS_Y TRUE 1
Spacedman asked me if that isn't simply the result of a depth-first search. Well, it definitely is, but
capture.output(str()
since that is based on C (IIRC) Here's something similar to my first depth-first approaches:
src
is taken from above
objIndex <- function(src){
out <- lapply(1:length(src), function(x){
if(class(src[[x]]) == "list"){
if(length(src[[x]])){
df.1 <- objIndex(src=src[[x]])
} else {
df.1 <- data.frame(
path=names(src[x]),
pos=NA,
is.top=FALSE,
is.bottom=TRUE,
class=class(src[[x]]),
dim=length(src[[x]])
)
}
} else {
df.1 <- data.frame(
path=ifelse(is.null(names(src[x])), NA, names(src[x])),
pos=NA,
is.top=FALSE,
is.bottom=TRUE,
class=class(src[[x]]),
dim=length(src[[x]])
)
}
df.1
})
return(out)
}
> objIndex(src, df=NULL)
[[1]]
[[1]][[1]]
[[1]][[1]][[1]]
path pos is.top is.bottom class dim
1 a.1.1 NA FALSE TRUE numeric 1
[[1]][[1]][[2]]
path pos is.top is.bottom class dim
1 a.1.2 NA FALSE TRUE integer 5
[[1]][[1]][[3]]
path pos is.top is.bottom class dim
1 a.1.3 NA FALSE TRUE integer 0
[[1]][[2]]
[[1]][[2]][[1]]
path pos is.top is.bottom class dim
1 a.2.1 NA FALSE TRUE character 1
[[1]][[2]][[2]]
path pos is.top is.bottom class dim
1 a.2.2 NA FALSE TRUE character 5
[[1]][[2]][[3]]
path pos is.top is.bottom class dim
1 a.2.3 NA FALSE TRUE character 0
[[1]][[3]]
[[1]][[3]][[1]]
path pos is.top is.bottom class dim
1 a.3.1 NA FALSE TRUE numeric 1
[[1]][[3]][[2]]
path pos is.top is.bottom class dim
1 a.3.2 NA FALSE TRUE numeric 2
[[1]][[3]][[3]]
path pos is.top is.bottom class dim
1 a.3.3 NA FALSE TRUE numeric 0
[[1]][[4]]
[[1]][[4]][[1]]
path pos is.top is.bottom class dim
1 a.4.1 NA FALSE TRUE complex 1
[[1]][[4]][[2]]
path pos is.top is.bottom class dim
1 a.4.2 NA FALSE TRUE complex 3
[[1]][[4]][[3]]
path pos is.top is.bottom class dim
1 a.4.3 NA FALSE TRUE complex 0
[[1]][[5]]
[[1]][[5]][[1]]
path pos is.top is.bottom class dim
1 a.5.1 NA FALSE TRUE logical 1
[[1]][[5]][[2]]
path pos is.top is.bottom class dim
1 a.5.2 NA FALSE TRUE logical 3
[[1]][[5]][[3]]
path pos is.top is.bottom class dim
1 a.5.3 NA FALSE TRUE logical 0
[[2]]
[[2]][[1]]
[[2]][[1]][[1]]
[[2]][[1]][[1]][[1]]
path pos is.top is.bottom class dim
1 b.1.1.1 NA FALSE TRUE matrix 1
[[2]][[1]][[1]][[2]]
path pos is.top is.bottom class dim
1 b.1.1.2 NA FALSE TRUE matrix 4
[[2]][[1]][[1]][[3]]
path pos is.top is.bottom class dim
1 b.1.1.3 NA FALSE TRUE matrix 1
[[2]][[1]][[2]]
[[2]][[1]][[2]][[1]]
path pos is.top is.bottom class dim
1 b.1.2.1 NA FALSE TRUE data.frame 1
[[2]][[1]][[2]][[2]]
path pos is.top is.bottom class dim
1 b.1.2.2 NA FALSE TRUE data.frame 2
[[2]][[1]][[2]][[3]]
path pos is.top is.bottom class dim
1 b.1.2.3 NA FALSE TRUE data.frame 0
[[2]][[2]]
[[2]][[2]][[1]]
[[2]][[2]][[1]][[1]]
[[2]][[2]][[1]][[1]][[1]]
path pos is.top is.bottom class dim
1 <<1>> NA FALSE TRUE numeric 1
[[2]][[2]][[1]][[2]]
[[2]][[2]][[1]][[2]][[1]]
path pos is.top is.bottom class dim
1 a NA FALSE TRUE integer 2
[[2]][[2]][[1]][[2]][[2]]
path pos is.top is.bottom class dim
1 b NA FALSE TRUE character 2
[[2]][[2]][[1]][[3]]
[[2]][[2]][[1]][[3]][[1]]
path pos is.top is.bottom class dim
1 <<1>> NA FALSE TRUE integer 2
[[2]][[2]][[1]][[3]][[2]]
path pos is.top is.bottom class dim
1 <<2>> NA FALSE TRUE character 1
[[2]][[2]][[1]][[3]][[3]]
path pos is.top is.bottom class dim
1 <<3>> NA FALSE TRUE integer 3
[[2]][[2]][[1]][[3]][[4]]
path pos is.top is.bottom class dim
1 <<4>> NA FALSE TRUE logical 1
[[2]][[2]][[1]][[3]][[5]]
path pos is.top is.bottom class dim
1 <<5>> NA FALSE TRUE integer 5
[[2]][[2]][[1]][[4]]
path pos is.top is.bottom class dim
1 b.2.1.4 NA FALSE TRUE list 0
[[3]]
[[3]][[1]]
[[3]][[1]][[1]]
path pos is.top is.bottom class dim
1 c.1.1 NA FALSE TRUE environment 0
[[3]][[1]][[2]]
path pos is.top is.bottom class dim
1 c.1.2 NA FALSE TRUE TESTCLASS_X 1
[[3]][[1]][[3]]
path pos is.top is.bottom class dim
1 c.1.3 NA FALSE TRUE TESTCLASS_Y 1
A partial solution can be obtained with rapply
. This only calculates values for the end nodes, so you get the portion of the data frame where is.bottom
is TRUE
. Getting the rest of the table should be possible from inspecting names, but I suspect its faffy, and those nodes are fairly boring (all just class = "list"
, is.S4 = FALSE
).
The class
and is.S4
columns are easy to obtain.
out_class <- rapply(src, class)
out_isS4 <- rapply(src, isS4)
I'm not quite sure what you are doing with the dim
column, but a similar call should get you started.
out_dim <- rapply(src, length)
Do get the names how you want them, we use the trick that you want the slash separators where there is a dot followed by a letter. This will probably break if you don't have named elements in the list, though I've not tested it.
sep <- "/"
out_name <- gsub("\\.(?=[[:alpha:]])", sep, names(out_class), perl = TRUE)
Likewise, the depth of nesting can be found from the number of slashes that you just added to the names.
out_pos <- sapply(strsplit(out_name, sep), length)
Finally, we combine these into a data frame.
(out <- data.frame(
name = out_name,
pos = out_pos,
class = out_class,
is.S4 = out_isS4,
dim = out_dim
))
After two crappy approaches, I came up with the one posted here . Yet it still feels like this is too complicated and that it could be realized easier.
I'm very sorry for the messed up format! I'll move the code to a Github repo in a couple of days.
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.