R - Nested list to tibble

Question

I have a nested list like so:

> ex <- list(list(c("This", "is", "an", "example", "."), c("I", "really", "hate", "examples", ".")), list(c("How", "do", "you", "feel", "about", "examples", "?")))
> ex
[[1]]
[[1]][[1]]
[1] "This"    "is"      "an"      "example" "."      

[[1]][[2]]
[1] "I"        "really"   "hate"     "examples" "."       


[[2]]
[[2]][[1]]
[1] "How"      "do"       "you"      "feel"     "about"    "examples" "?"

I want to convert it to a tibble like so:

> tibble(d_id = as.integer(c(1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2)),
+        s_id = as.integer(c(1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 1, 1, 1, 1, 1, 1, 1)),
+        t_id = as.integer(c(1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 1, 2, 3, 4, 5, 6, 7)),
+        token = c("This", "is", "an", "example", ".", "I", "really",
+                  "hate", "examples", ".", "How", "do", "you", "feel", "about", "examples", "?"))
# A tibble: 17 x 4
    d_id  s_id  t_id token   
   <int> <int> <int> <chr>   
 1     1     1     1 This    
 2     1     1     2 is      
 3     1     1     3 an      
 4     1     1     4 example 
 5     1     1     5 .       
 6     1     2     1 I       
 7     1     2     2 really  
 8     1     2     3 hate    
 9     1     2     4 examples
10     1     2     5 .       
11     2     1     1 How     
12     2     1     2 do      
13     2     1     3 you     
14     2     1     4 feel    
15     2     1     5 about   
16     2     1     6 examples
17     2     1     7 ?

What is the most efficient way for me to perform this? Preferably using tidyverse functionality?

Answer 1

We can do

ex %>% 
   set_names(seq_along(ex)) %>% 
   map( ~ set_names(.x, seq_along(.x)) %>% 
                 stack) %>% 
   bind_rows(.id = 'd_id') %>%
   group_by(d_id, s_id = ind) %>% 
   mutate(t_id = row_number()) %>% 
   select(d_id, s_id, t_id, token = values)
# A tibble: 17 x 4
# Groups: d_id, s_id [3]
#   d_id  s_id   t_id token   
#   <chr> <chr> <int> <chr>   
# 1 1     1         1 This    
# 2 1     1         2 is      
# 3 1     1         3 an      
# 4 1     1         4 example 
# 5 1     1         5 .       
# 6 1     2         1 I       
# 7 1     2         2 really  
# 8 1     2         3 hate    
# 9 1     2         4 examples
#10 1     2         5 .       
#11 2     1         1 How     
#12 2     1         2 do      
#13 2     1         3 you     
#14 2     1         4 feel    
#15 2     1         5 about   
#16 2     1         6 examples
#17 2     1         7 ?

Answer 2

Time to get some sequences working, which should be very efficient:

d_id <- rep(seq_along(ex), lengths(ex))
s_id <- sequence(lengths(ex))
t_id <- lengths(unlist(ex, rec=FALSE))

data.frame(
  d_id  = rep(d_id, t_id),
  s_id  = rep(s_id, t_id),
  t_id  = sequence(t_id),
  token = unlist(ex)
)

#   d_id s_id t_id    token
#1     1    1    1     This
#2     1    1    2       is
#3     1    1    3       an
#4     1    1    4  example
#5     1    1    5        .
#6     1    2    1        I
#7     1    2    2   really
#8     1    2    3     hate
#9     1    2    4 examples
#10    1    2    5        .
#11    2    1    1      How
#12    2    1    2       do
#13    2    1    3      you
#14    2    1    4     feel
#15    2    1    5    about
#16    2    1    6 examples
#17    2    1    7        ?

This will run in about 2 seconds for a 500K sample of your ex list. I suspect that will be hard to beat in terms of efficiency.

Answer 3

You can use melt from the reshape2 package:

library(data.table)
setDT(melt(ex))[, .(d_id = L1, s_id = L2, t_id = rowid(L1, L2), token = value)]

    d_id s_id t_id    token
 1:    1    1    1     This
 2:    1    1    2       is
 3:    1    1    3       an
 4:    1    1    4  example
 5:    1    1    5        .
 6:    1    2    1        I
 7:    1    2    2   really
 8:    1    2    3     hate
 9:    1    2    4 examples
10:    1    2    5        .
11:    2    1    1      How
12:    2    1    2       do
13:    2    1    3      you
14:    2    1    4     feel
15:    2    1    5    about
16:    2    1    6 examples
17:    2    1    7        ?

I'm showing it here with data.table, since I know how to do the column selection and renaming in one step from there (though it should be no trouble with dplyr instead). The melt.list function is coming from reshape2.

Answer 4

Another tidyverse solution:

library(tidyverse)
ex %>%
  modify_depth(-1,~tibble(token=.x) %>% rowid_to_column("t_id")) %>%
  map(~map_dfr(.x,identity,.id = "s_id")) %>%
  map_dfr(identity,.id = "d_id")

# # A tibble: 17 x 4
#     d_id  s_id  t_id    token
#    <chr> <chr> <int>    <chr>
#  1     1     1     1     This
#  2     1     1     2       is
#  3     1     1     3       an
#  4     1     1     4  example
#  5     1     1     5        .
#  6     1     2     1        I
#  7     1     2     2   really
#  8     1     2     3     hate
#  9     1     2     4 examples
# 10     1     2     5        .
# 11     2     1     1      How
# 12     2     1     2       do
# 13     2     1     3      you
# 14     2     1     4     feel
# 15     2     1     5    about
# 16     2     1     6 examples
# 17     2     1     7        ?

R - Nested list to tibble

Question

4 answers

solution1
6 2018-04-19 01:04:40

solution2
5 ACCPTED 2018-04-19 01:59:51

solution3
4 2018-04-19 02:25:51

solution4
1 2018-04-20 18:48:29

R - Nested list to tibble

Question

4 answers

solution1 6 2018-04-19 01:04:40

solution2 5 ACCPTED 2018-04-19 01:59:51

solution3 4 2018-04-19 02:25:51

solution4 1 2018-04-20 18:48:29

solution1
6 2018-04-19 01:04:40

solution2
5 ACCPTED 2018-04-19 01:59:51

solution3
4 2018-04-19 02:25:51

solution4
1 2018-04-20 18:48:29