简体   繁体   English

将嵌套列表转换为R中的data.frame

[英]convert nested lists to data.frame in R

I have problem with converting nested lists to data.frame. 我在将嵌套列表转换为data.frame时遇到问题。

First I have downloaded dataset in JSON format from Data API: 首先,我从Data API下载了JSON格式的数据集:

   request2 <-
  POST(
    url = "https://xxxx",
    add_headers('x-dataapi-key' = "xxxx", 'content-type' = "application/json"),
    body = list(oib = oibreq),
    encode = "json"
  )
jsonContent2 <- content(request2, type = "application/json")
json2 <-
  fromJSON(toJSON(jsonContent2, null = "null"), flatten = TRUE)

Object json2 has a form of nested lists. 对象json2具有嵌套列表的形式。 Here is a dataset: 这是一个数据集:

> sample <- dput(json2)
structure(
  list(
    datumStanja = list(
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00",
      "2016-03-02T00:00:00+01:00"
    ),
    oib = list(
      "00045103869",
      "92680516748",
      "18527887472",
      "18527887472",
      "18527887472",
      "18527887472",
      "00045103869",
      "00045103869",
      "18527887472",
      "92680516748"
    ),
    iban = list(
      "HR0424840081101570980",
      "HR8623400091110462926",
      "HR9123400091110714260",
      "HR5124850031100201015",
      "HR4224910051100006698",
      "HR7524810001100101268",
      "HR8225000091101167416",
      "HR3223400091110156505",
      "HR6323400091110193874",
      "HR4223300031100429609"
    ),
    blokada = list(TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE, FALSE,
                   FALSE, FALSE),
    vbdi = list(
      "2484008",
      "2340009",
      "2340009",
      "2485003",
      "2491005",
      "2481000",
      "2500009",
      "2340009",
      "2340009",
      "2330003"
    ),
    brojRacuna = list(
      "1101570980",
      "1110462926",
      "1110714260",
      "1100201015",
      "1100006698",
      "1100101268",
      "1101167416",
      "1110156505",
      "1110193874",
      "1100429609"
    ),
    banka = list(
      "RAIFFEISENBANK AUSTRIA d.d.",
      "PRIVREDNA BANKA ZAGREB d.d.",
      "PRIVREDNA BANKA ZAGREB d.d.",
      "CROATIA BANKA d.d.",
      "CREDO BANKA d.d.",
      "KREDITNA BANKA ZAGREB d.d.",
      "HYPO ALPE-ADRIA-BANK d.d.",
      "PRIVREDNA BANKA ZAGREB d.d.",
      "PRIVREDNA BANKA ZAGREB d.d.",
      "SOCIETE GENERALE - SPLITSKA BANKA d.d."
    ),
    datumOtvaranja = list(
      "2003-02-19T00:00:00+01:00",
      "2011-02-08T00:00:00+01:00",
      "2015-03-30T00:00:00+02:00",
      "2002-02-21T00:00:00+01:00",
      "2002-04-16T00:00:00+02:00",
      "2002-06-24T00:00:00+02:00",
      "2004-07-19T00:00:00+02:00",
      "2004-09-08T00:00:00+02:00",
      "2005-09-28T00:00:00+02:00",
      "2009-12-21T00:00:00+01:00"
    ),
    datumZatvaranja = list(
      NULL,
      NULL,
      NULL,
      "2009-11-06T00:00:00+01:00",
      "2009-02-17T00:00:00+01:00",
      "2009-03-18T00:00:00+01:00",
      "2008-08-14T00:00:00+02:00",
      "2009-07-13T00:00:00+02:00",
      "2013-09-18T00:00:00+02:00",
      "2013-07-09T00:00:00+02:00"
    ),
    povijestBlokada = list(
      structure(
        list(
          pocetak = list(
            "2011-08-04T00:00:00+02:00",
            "2011-09-06T00:00:00+02:00",
            "2011-11-25T00:00:00+01:00",
            "2011-12-30T00:00:00+01:00",
            "2012-02-20T00:00:00+01:00",
            "2012-03-23T00:00:00+01:00",
            "2012-05-21T00:00:00+02:00"
          ),
          kraj = list(
            "2011-08-10T00:00:00+02:00",
            "2011-09-13T00:00:00+02:00",
            "2011-12-28T00:00:00+01:00",
            "2012-01-16T00:00:00+01:00",
            "2012-03-16T00:00:00+01:00",
            "2012-05-16T00:00:00+02:00",
            NULL
          ),
          brojDana = list(6L, 7L, 33L, 17L, 25L, 54L,
                          1381L)
        ),
        .Names = c("pocetak", "kraj", "brojDana"),
        class = "data.frame",
        row.names = c(NA, 7L)
      ),
      structure(
        list(
          pocetak = list(
            "2012-05-30T00:00:00+02:00",
            "2012-06-21T00:00:00+02:00",
            "2012-06-29T00:00:00+02:00",
            "2012-09-06T00:00:00+02:00",
            "2014-06-09T00:00:00+02:00"
          ),
          kraj = list(
            "2012-06-05T00:00:00+02:00",
            "2012-06-26T00:00:00+02:00",
            "2012-07-03T00:00:00+02:00",
            "2013-03-06T00:00:00+01:00",
            NULL
          ),
          brojDana = list(6L, 5L, 4L, 181L, 632L)
        ),
        .Names = c("pocetak", "kraj",
                   "brojDana"),
        class = "data.frame",
        row.names = c(NA, 5L)
      ),

      structure(
        list(
          pocetak = list("2015-03-31T00:00:00+02:00"),
          kraj = list("2015-09-30T00:00:00+02:00"),
          brojDana = list(183L)
        ),
        .Names = c("pocetak", "kraj", "brojDana"),
        class = "data.frame",
        row.names = 1L
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(),
        .Names = character(0),
        row.names = integer(0),
        class = "data.frame"
      ),
      structure(
        list(
          pocetak = list(
            "2012-05-30T00:00:00+02:00",
            "2012-06-21T00:00:00+02:00",
            "2012-06-29T00:00:00+02:00",
            "2012-09-06T00:00:00+02:00"
          ),
          kraj = list(
            "2012-06-05T00:00:00+02:00",
            "2012-06-26T00:00:00+02:00",
            "2012-07-03T00:00:00+02:00",
            "2013-03-06T00:00:00+01:00"
          ),
          brojDana = list(6L,
                          5L, 4L, 181L)
        ),
        .Names = c("pocetak", "kraj", "brojDana"),
        class = "data.frame",
        row.names = c(NA, 4L)
      )
    ),
    isActive = list(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE,
                    FALSE, FALSE)
  ),
  .Names = c(
    "datumStanja",
    "oib",
    "iban",
    "blokada",
    "vbdi",
    "brojRacuna",
    "banka",
    "datumOtvaranja",
    "datumZatvaranja",
    "povijestBlokada",
    "isActive"
  ),
  class = "data.frame",
  row.names = c(NA,
                10L)
)

Here is the structure of the first raw as an example: 这是第一个raw的结构作为示例:

str(json2[1,])
'data.frame':   1 obs. of  11 variables:
 $ datumStanja    :List of 1
  ..$ : chr "2016-03-02T00:00:00+01:00"
 $ oib            :List of 1
  ..$ : chr "00045103869"
 $ iban           :List of 1
  ..$ : chr "HR0424840081101570980"
 $ blokada        :List of 1
  ..$ : logi TRUE
 $ vbdi           :List of 1
  ..$ : chr "2484008"
 $ brojRacuna     :List of 1
  ..$ : chr "1101570980"
 $ banka          :List of 1
  ..$ : chr "RAIFFEISENBANK AUSTRIA d.d."
 $ datumOtvaranja :List of 1
  ..$ : chr "2003-02-19T00:00:00+01:00"
 $ datumZatvaranja:List of 1
  ..$ : NULL
 $ povijestBlokada:List of 1
  ..$ :'data.frame':    7 obs. of  3 variables:
  .. ..$ pocetak :List of 7
  .. .. ..$ : chr "2011-08-04T00:00:00+02:00"
  .. .. ..$ : chr "2011-09-06T00:00:00+02:00"
  .. .. ..$ : chr "2011-11-25T00:00:00+01:00"
  .. .. ..$ : chr "2011-12-30T00:00:00+01:00"
  .. .. ..$ : chr "2012-02-20T00:00:00+01:00"
  .. .. ..$ : chr "2012-03-23T00:00:00+01:00"
  .. .. ..$ : chr "2012-05-21T00:00:00+02:00"
  .. ..$ kraj    :List of 7
  .. .. ..$ : chr "2011-08-10T00:00:00+02:00"
  .. .. ..$ : chr "2011-09-13T00:00:00+02:00"
  .. .. ..$ : chr "2011-12-28T00:00:00+01:00"
  .. .. ..$ : chr "2012-01-16T00:00:00+01:00"
  .. .. ..$ : chr "2012-03-16T00:00:00+01:00"
  .. .. ..$ : chr "2012-05-16T00:00:00+02:00"
  .. .. ..$ : NULL
  .. ..$ brojDana:List of 7
  .. .. ..$ : int 6
  .. .. ..$ : int 7
  .. .. ..$ : int 33
  .. .. ..$ : int 17
  .. .. ..$ : int 25
  .. .. ..$ : int 54
  .. .. ..$ : int 1381
 $ isActive       :List of 1
  ..$ : logi TRUE

As you can see, variable "povijestBlokada" have lists inside list. 如您所见,变量“ povijestBlokada”在列表中具有列表。 My goal is to convert this nested list object to data.frame with one column per distinct type value 我的目标是将此嵌套列表对象转换为data.frame,每个不同类型值对应一列

. I tried with data.tree package, but I couldn't convert ii with as.Node function. 我尝试使用data.tree包,但无法使用as.Node函数转换ii。 Do you have any suggestions? 你有什么建议吗?

I am adding original JSON data (I have used dput, sure there is better way to do is but I don't know how): 我正在添加原始JSON数据(我已经使用过dput,请确保还有更好的方法,但我不知道如何做):

structure("[{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR0424840081101570980\"],\"blokada\":[true],\"vbdi\":[\"2484008\"],\"brojRacuna\":[\"1101570980\"],\"banka\":[\"RAIFFEISENBANK AUSTRIA d.d.\"],\"datumOtvaranja\":[\"2003-02-19T00:00:00+01:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2011-08-04T00:00:00+02:00\"],\"kraj\":[\"2011-08-10T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2011-09-06T00:00:00+02:00\"],\"kraj\":[\"2011-09-13T00:00:00+02:00\"],\"brojDana\":[7]},{\"pocetak\":[\"2011-11-25T00:00:00+01:00\"],\"kraj\":[\"2011-12-28T00:00:00+01:00\"],\"brojDana\":[33]},{\"pocetak\":[\"2011-12-30T00:00:00+01:00\"],\"kraj\":[\"2012-01-16T00:00:00+01:00\"],\"brojDana\":[17]},{\"pocetak\":[\"2012-02-20T00:00:00+01:00\"],\"kraj\":[\"2012-03-16T00:00:00+01:00\"],\"brojDana\":[25]},{\"pocetak\":[\"2012-03-23T00:00:00+01:00\"],\"kraj\":[\"2012-05-16T00:00:00+02:00\"],\"brojDana\":[54]},{\"pocetak\":[\"2012-05-21T00:00:00+02:00\"],\"kraj\":null,\"brojDana\":[1389]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"92680516748\"],\"iban\":[\"HR8623400091110462926\"],\"blokada\":[true],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110462926\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2011-02-08T00:00:00+01:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2012-05-30T00:00:00+02:00\"],\"kraj\":[\"2012-06-05T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2012-06-21T00:00:00+02:00\"],\"kraj\":[\"2012-06-26T00:00:00+02:00\"],\"brojDana\":[5]},{\"pocetak\":[\"2012-06-29T00:00:00+02:00\"],\"kraj\":[\"2012-07-03T00:00:00+02:00\"],\"brojDana\":[4]},{\"pocetak\":[\"2012-09-06T00:00:00+02:00\"],\"kraj\":[\"2013-03-06T00:00:00+01:00\"],\"brojDana\":[181]},{\"pocetak\":[\"2014-06-09T00:00:00+02:00\"],\"kraj\":null,\"brojDana\":[640]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR9123400091110714260\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110714260\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2015-03-30T00:00:00+02:00\"],\"datumZatvaranja\":null,\"povijestBlokada\":[{\"pocetak\":[\"2015-03-31T00:00:00+02:00\"],\"kraj\":[\"2015-09-30T00:00:00+02:00\"],\"brojDana\":[183]}],\"isActive\":[true]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR5124850031100201015\"],\"blokada\":[false],\"vbdi\":[\"2485003\"],\"brojRacuna\":[\"1100201015\"],\"banka\":[\"CROATIA BANKA d.d.\"],\"datumOtvaranja\":[\"2002-02-21T00:00:00+01:00\"],\"datumZatvaranja\":[\"2009-11-06T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR4224910051100006698\"],\"blokada\":[false],\"vbdi\":[\"2491005\"],\"brojRacuna\":[\"1100006698\"],\"banka\":[\"CREDO BANKA d.d.\"],\"datumOtvaranja\":[\"2002-04-16T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-02-17T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR7524810001100101268\"],\"blokada\":[false],\"vbdi\":[\"2481000\"],\"brojRacuna\":[\"1100101268\"],\"banka\":[\"KREDITNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2002-06-24T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-03-18T00:00:00+01:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR8225000091101167416\"],\"blokada\":[false],\"vbdi\":[\"2500009\"],\"brojRacuna\":[\"1101167416\"],\"banka\":[\"HYPO ALPE-ADRIA-BANK d.d.\"],\"datumOtvaranja\":[\"2004-07-19T00:00:00+02:00\"],\"datumZatvaranja\":[\"2008-08-14T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"00045103869\"],\"iban\":[\"HR3223400091110156505\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110156505\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2004-09-08T00:00:00+02:00\"],\"datumZatvaranja\":[\"2009-07-13T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"18527887472\"],\"iban\":[\"HR6323400091110193874\"],\"blokada\":[false],\"vbdi\":[\"2340009\"],\"brojRacuna\":[\"1110193874\"],\"banka\":[\"PRIVREDNA BANKA ZAGREB d.d.\"],\"datumOtvaranja\":[\"2005-09-28T00:00:00+02:00\"],\"datumZatvaranja\":[\"2013-09-18T00:00:00+02:00\"],\"povijestBlokada\":[],\"isActive\":[false]},{\"datumStanja\":[\"2016-03-10T00:00:00+01:00\"],\"oib\":[\"92680516748\"],\"iban\":[\"HR4223300031100429609\"],\"blokada\":[false],\"vbdi\":[\"2330003\"],\"brojRacuna\":[\"1100429609\"],\"banka\":[\"SOCIETE GENERALE - SPLITSKA BANKA d.d.\"],\"datumOtvaranja\":[\"2009-12-21T00:00:00+01:00\"],\"datumZatvaranja\":[\"2013-07-09T00:00:00+02:00\"],\"povijestBlokada\":[{\"pocetak\":[\"2012-05-30T00:00:00+02:00\"],\"kraj\":[\"2012-06-05T00:00:00+02:00\"],\"brojDana\":[6]},{\"pocetak\":[\"2012-06-21T00:00:00+02:00\"],\"kraj\":[\"2012-06-26T00:00:00+02:00\"],\"brojDana\":[5]},{\"pocetak\":[\"2012-06-29T00:00:00+02:00\"],\"kraj\":[\"2012-07-03T00:00:00+02:00\"],\"brojDana\":[4]},{\"pocetak\":[\"2012-09-06T00:00:00+02:00\"],\"kraj\":[\"2013-03-06T00:00:00+01:00\"],\"brojDana\":[181]}],\"isActive\":[false]}]", class = "json")

I manage to unnest json2$povijestBlokada with library dplyr : 我设法与库dplyr json2$povijestBlokada dplyr

  • Convert list to as tbl_df with as_data_frame 使用as_data_frame将列表转换为tbl_df as_data_frame
  • Select your nested element povijestBlokada and unnest it. 选择您的嵌套元素povijestBlokada并将其povijestBlokada嵌套。
  • Problem is your NULL values in some columns but replacing them with "NA" do the trick. 问题是您在某些列中使用了NULL值,但是用"NA"替换它们可以解决问题。 If you had NA in the first place, I think you can unnest a second time 如果您首先遇到NA ,我想您第二次就可以嵌套
library(dplyr)
DT <- as_data_frame(json2)
DT1 <- DT %>% select(povijestBlokada) %>% unnest()
DT1 %>% rowwise() %>% mutate_each(funs(replace(., is.null(.), "NA")))
#> Source: local data frame [17 x 3]
#> Groups: <by row>
#> 
#>                      pocetak                      kraj brojDana
#>                        (chr)                     (chr)    (chr)
#> 1  2011-08-04T00:00:00+02:00 2011-08-10T00:00:00+02:00        6
#> 2  2011-09-06T00:00:00+02:00 2011-09-13T00:00:00+02:00        7
#> 3  2011-11-25T00:00:00+01:00 2011-12-28T00:00:00+01:00       33
#> 4  2011-12-30T00:00:00+01:00 2012-01-16T00:00:00+01:00       17
#> 5  2012-02-20T00:00:00+01:00 2012-03-16T00:00:00+01:00       25
#> 6  2012-03-23T00:00:00+01:00 2012-05-16T00:00:00+02:00       54
#> 7  2012-05-21T00:00:00+02:00                        NA     1381
#> 8  2012-05-30T00:00:00+02:00 2012-06-05T00:00:00+02:00        6
#> 9  2012-06-21T00:00:00+02:00 2012-06-26T00:00:00+02:00        5
#> 10 2012-06-29T00:00:00+02:00 2012-07-03T00:00:00+02:00        4
#> 11 2012-09-06T00:00:00+02:00 2013-03-06T00:00:00+01:00      181
#> 12 2014-06-09T00:00:00+02:00                        NA      632
#> 13 2015-03-31T00:00:00+02:00 2015-09-30T00:00:00+02:00      183
#> 14 2012-05-30T00:00:00+02:00 2012-06-05T00:00:00+02:00        6
#> 15 2012-06-21T00:00:00+02:00 2012-06-26T00:00:00+02:00        5
#> 16 2012-06-29T00:00:00+02:00 2012-07-03T00:00:00+02:00        4
#> 17 2012-09-06T00:00:00+02:00 2013-03-06T00:00:00+01:00      181

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM