[英]How to flatten a tibble in dplyr?
I have the following: 我有以下几点:
structure(list(label = list(list(structure(list(id = 431676528L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori",
name = "per sviluppatori", color = "d4c5f9", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 442034204L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale",
name = "sito principale", color = "5319e7", default = FALSE), .Names = c("id",
"url", "name", "color", "default"))), list(structure(list(id = 442051239L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/mappa",
name = "mappa", color = "0052cc", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 431676528L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori",
name = "per sviluppatori", color = "d4c5f9", default = FALSE), .Names = c("id",
"url", "name", "color", "default")), structure(list(id = 442034204L,
url = "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale",
name = "sito principale", color = "5319e7", default = FALSE), .Names = c("id",
"url", "name", "color", "default")))), mainId = c("216226960",
"215647494")), class = c("tbl_df", "tbl", "data.frame"), row.names = c(NA,
-2L), .Names = c("label", "mainId")) -> dt
# A tibble: 2 x 2
label mainId
<list> <chr>
1 <list [2]> 216226960
2 <list [3]> 215647494
And I would like to flatten the values from label
pairing them with mainId
(so that I can link each subelement from label
with its main ID). 而且,我想将label
与mainId
配对的label
的值mainId
(以便可以将label
每个子元素与其主ID链接起来)。 Thus I'm trying to get a tibble with headers: label
, url
, name
, color
, mainId
因此,我尝试使用标题进行mainId
: label
, url
, name
, color
, mainId
I thought I would use something like flatten_dfr()
but I don't know to retain mainId
, mostly because I'm not sure how the argument flatten_dfr(.id='')
is supposed to work. 我以为我会使用flatten_dfr()
类的东西,但我不知道保留mainId
,主要是因为我不确定参数flatten_dfr(.id='')
应该如何工作。
For instance 例如
dt %>% flatten_dfr(.id= 'mainId')
Error in bind_rows_(x, .id) : Argument 1 must have names bind_rows_(x,.id)中的错误:参数1必须具有名称
What is Argument 1? 什么是参数1? I would guess it's .x
but I'm confused by this unusual naming. 我想它是.x
但是我对这种不寻常的命名感到困惑。 Also, rownames are deprecated in tibbles thus I'm very confused about what kind of names the function looks for (colnames? rownames?). 另外,行名在小标题中已被弃用,因此我对函数查找的名称种类(名称?行名称?)感到非常困惑。
UPDATE: 更新:
this is the output from traceback()
这是traceback()
的输出
> traceback()
13: stop(list(message = "Argument 1 must have names", call = bind_rows_(x,
.id), cppstack = list(file = "", line = -1L, stack = c("1 dplyr.so 0x000000010a9ce464 _ZN4Rcpp9exceptionC2EPKcb + 276",
"2 dplyr.so 0x000000010a9ce2a0 _ZN4Rcpp4stopERKNSt3__112basic_stringIcNS0_11char_traitsIcEENS0_9allocatorIcEEEE + 48",
"3 dplyr.so 0x000000010a9e5ae6 _ZN5dplyr11bad_pos_argIPKcEEviT_ + 918",
"4 dplyr.so 0x000000010a9fd43a _Z11rbind__implN4Rcpp6VectorILi19ENS_15PreserveStorageEEERKN5dplyr12SymbolStringE + 4634",
"5 dplyr.so 0x000000010a9ffcce _Z10bind_rows_N4Rcpp6VectorILi19ENS_15PreserveStorageEEEP7SEXPREC + 286",
"6 dplyr.so 0x000000010a9c28bc _dplyr_bind_rows_ + 140",
"7 libR.dylib 0x0000000101201b90 R_doDotCall + 64",
"8 libR.dylib 0x0000000101203db3 do_dotcall + 355",
"9 libR.dylib 0x0000000101232249 Rf_eval + 1657",
"10 libR.dylib 0x0000000101244a40 do_begin + 656",
"11 libR.dylib 0x0000000101231f78 Rf_eval + 936",
"12 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"13 libR.dylib 0x00000001012320f6 Rf_eval + 1318",
"14 libR.dylib 0x0000000101244a40 do_begin + 656",
"15 libR.dylib 0x0000000101231f78 Rf_eval + 936",
"16 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"17 libR.dylib 0x00000001012320f6 Rf_eval + 1318",
"18 libR.dylib 0x0000000101244a40 do_begin + 656",
"19 libR.dylib 0x0000000101231f78 Rf_eval + 936",
"20 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"21 libR.dylib 0x00000001012320f6 Rf_eval + 1318",
"22 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"23 libR.dylib 0x0000000101236384 bcEval + 16148",
"24 libR.dylib 0x0000000101231e11 Rf_eval + 577",
"25 libR.dylib 0x0000000101240949 forcePromise + 169",
"26 libR.dylib 0x0000000101232071 Rf_eval + 1185",
"27 libR.dylib 0x0000000101246761 do_withVisible + 49",
"28 libR.dylib 0x0000000101281e00 do_internal + 336",
"29 libR.dylib 0x0000000101236959 bcEval + 17641",
"30 libR.dylib 0x0000000101231e11 Rf_eval + 577",
"31 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"32 libR.dylib 0x0000000101236384 bcEval + 16148",
"33 libR.dylib 0x0000000101231e11 Rf_eval + 577",
"34 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"35 libR.dylib 0x00000001012320f6 Rf_eval + 1318",
"36 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"37 libR.dylib 0x00000001012320f6 Rf_eval + 1318",
"38 libR.dylib 0x00000001012460fe do_eval + 1534",
"39 libR.dylib 0x00000001012367ac bcEval + 17212",
"40 libR.dylib 0x0000000101231e11 Rf_eval + 577",
"41 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"42 libR.dylib 0x0000000101236384 bcEval + 16148",
"43 libR.dylib 0x0000000101231e11 Rf_eval + 577",
"44 libR.dylib 0x0000000101240949 forcePromise + 169",
"45 libR.dylib 0x0000000101232071 Rf_eval + 1185",
"46 libR.dylib 0x0000000101246761 do_withVisible + 49",
"47 libR.dylib 0x0000000101281e00 do_internal + 336",
"48 libR.dylib 0x0000000101236959 bcEval + 17641",
"49 libR.dylib 0x0000000101231e11 Rf_eval + 577",
"50 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"51 libR.dylib 0x0000000101236384 bcEval + 16148",
"52 libR.dylib 0x0000000101231e11 Rf_eval + 577",
"53 libR.dylib 0x00000001012428e2 R_execClosure + 898",
"54 libR.dylib 0x00000001012320f6 Rf_eval + 1318",
"55 libR.dylib 0x00000001012729b8 Rf_ReplIteration + 904",
"56 libR.dylib 0x0000000101273eaf run_Rmainloop + 207",
"57 rsession 0x000000010084e259 _ZN7rstudio1r7session12runEmbeddedRERKNS_4core8FilePathES5_bb7SA_TYPERKNS1_9CallbacksEPNS1_17InternalCallbacksE + 425",
"58 rsession 0x000000010082d25c _ZN7rstudio1r7session3runERKNS1_8ROptionsERKNS1_10RCallbacksE + 3164",
"59 rsession 0x0000000100108603 main + 28963",
"60 rsession 0x0000000100003264 start + 52",
"61 ??? 0x000000000000000b 0x0 + 11"
))))
12: .Call(`_dplyr_bind_rows_`, dots, id)
11: bind_rows_(x, .id)
10: dplyr::bind_rows(res, .id = .id)
9: flatten_dfr(., .id = "mainId")
8: function_list[[k]](value)
7: withVisible(function_list[[k]](value))
6: freduce(value, `_function_list`)
5: `_fseq`(`_lhs`)
4: eval(quote(`_fseq`(`_lhs`)), env, env)
3: eval(quote(`_fseq`(`_lhs`)), env, env)
2: withVisible(eval(quote(`_fseq`(`_lhs`)), env, env))
1: dt %>% flatten_dfr(.id = "mainId")
UPDATE 2: 更新2:
The solutions so far can address the example I provided initially, but cannot handle NA
. 到目前为止的解决方案可以解决我最初提供的示例,但不能处理NA
。 For instance: 例如:
h <- structure(list(label = list(list(NA_character_)), mainId = "242390063"), .Names = c("label",
"mainId"), row.names = c(NA, -1L), class = c("tbl_df", "tbl",
"data.frame"))
map_df(h, flatten_dfr)
Error in bind_rows_(x, .id) : Argument 1 must have names bind_rows_(x,.id)中的错误:参数1必须具有名称
I apologize for not incnluding NA
in my original example, but maybe someone can elaborate on his original answer to address this issue? 对于在原始示例中不包含NA
,我深表歉意,但是也许有人可以详细说明他的原始答案以解决此问题?
First convert the data in the label
column to tibble
s and then use unnest
to extract the information in the nested label
column. 首先将label
列中的数据转换为tibble
s,然后使用unnest
提取嵌套label
列中的信息。
library(tidyverse)
dt %>%
mutate(label = map(label, bind_rows)) %>%
unnest()
# A tibble: 5 x 6
# mainId id url name color default
# <chr> <int> <chr> <chr> <chr> <lgl>
# 1 216226960 431676528 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori per sviluppatori d4c5f9 F
# 2 216226960 442034204 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale sito principale 5319e7 F
# 3 215647494 442051239 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/mappa mappa 0052cc F
# 4 215647494 431676528 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori per sviluppatori d4c5f9 F
# 5 215647494 442034204 https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale sito principale 5319e7 F
We can convert each list element to a tibble in the list column and then use unnest
. 我们可以将每个列表元素转换为列表列中的小标题,然后使用unnest
。
library(tidyverse)
dt2 <- dt %>%
mutate(label = map(label, ~map_dfr(., as.tibble))) %>%
unnest()
dt2
# # A tibble: 5 x 6
# mainId id url name color default
# <chr> <int> <chr> <chr> <chr> <lgl>
# 1 216226960 431676528 https://api.github.com/repos/emergenzeHack/ter~ per svilup~ d4c5~ F
# 2 216226960 442034204 https://api.github.com/repos/emergenzeHack/ter~ sito princ~ 5319~ F
# 3 215647494 442051239 https://api.github.com/repos/emergenzeHack/ter~ mappa 0052~ F
# 4 215647494 431676528 https://api.github.com/repos/emergenzeHack/ter~ per svilup~ d4c5~ F
# 5 215647494 442034204 https://api.github.com/repos/emergenzeHack/ter~ sito princ~ 5319~ F
I think the answer to the specific question (what is the first argument) is that the list(s) in the "label"
column are too deeply nested and at their highest level they are unnamed. 我认为特定问题(第一个参数是什么)的答案是"label"
列中的列表嵌套太深,并且在其最高级别没有命名。 Following traceback() output, the "x"'s in the bind_rows call are being passed one by one to bind_rows_
and getting stuck.: 在traceback()输出之后,将bind_rows调用中的“ x”一一传递给bind_rows_
并被卡住。
#Notice:
#list(list(structure(list(id = 431676528L, ....
names( dt[["label"]][1] )
NULL
dt[["label"]][1]
#----------------
dt[["label"]][1]
[[1]]
[[1]][[1]]
[[1]][[1]]$id
[1] 431676528
[[1]][[1]]$url
[1] "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/per%20sviluppatori"
[[1]][[1]]$name
[1] "per sviluppatori"
[[1]][[1]]$color
[1] "d4c5f9"
[[1]][[1]]$default
[1] FALSE
[[1]][[2]]
[[1]][[2]]$id
[1] 442034204
[[1]][[2]]$url
[1] "https://api.github.com/repos/emergenzeHack/terremotocentro/labels/sito%20principale"
[[1]][[2]]$name
[1] "sito principale"
[[1]][[2]]$color
[1] "5319e7"
[[1]][[2]]$default
[1] FALSE
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.