簡體   English   中英

從數據幀列中的嵌套數據幀列表創建數據幀中的列

[英]create columns in dataframe from nested list of dataframes in a dataframe column

這就是我的數據框的樣子

str(FBInsightsExpanded)

Classes ‘data.table’ and 'data.frame':  4 obs. of  3 variables:
 $ impressions          : chr  "26580" "5741" "7335" "4123" ...
 $ date_start           : chr  "2018-10-14" "2018-10-14" "2018-10-14" "2018-10-14" ...
 $ action_values        :List of 4
  ..$ :'data.frame':    11 obs. of  2 variables:
  .. ..$ action_type: chr  "app_custom_event.fb_mobile_add_to_cart" "app_custom_event.fb_mobile_add_to_wishlist" "app_custom_event.fb_mobile_content_view" "app_custom_event.fb_mobile_initiated_checkout" ...
  .. ..$ value      : chr  "40505.79" "9262.82" "470464" "2319.35" ...
  ..$ :'data.frame':    8 obs. of  2 variables:
  .. ..$ action_type: chr  "app_custom_event.fb_mobile_add_to_cart" "app_custom_event.fb_mobile_add_to_wishlist" "app_custom_event.fb_mobile_content_view" "app_custom_event.fb_mobile_initiated_checkout" ...
  .. ..$ value      : chr  "26309.21" "6970.84" "697029" "196.8" ...
  ..$ :'data.frame':    10 obs. of  2 variables:
  .. ..$ action_type: chr  "app_custom_event.fb_mobile_add_to_cart" "app_custom_event.fb_mobile_add_to_wishlist" "app_custom_event.fb_mobile_content_view" "app_custom_event.fb_mobile_initiated_checkout" ...
  .. ..$ value      : chr  "42180.84" "4852.95" "282354" "5152.95" ...
  ..$ :'data.frame':    8 obs. of  2 variables:
  .. ..$ action_type: chr  "app_custom_event.fb_mobile_add_to_cart" "app_custom_event.fb_mobile_add_to_wishlist" "app_custom_event.fb_mobile_content_view" "app_custom_event.fb_mobile_initiated_checkout" ...
  .. ..$ value      : chr  "253737.97" "44378.59" "575184.59" "5294" ...

我需要的是一個如下所示的數據框:

impressions   date_start  fb_mobile_add_to_wishlist fb_mobile_content_view fb_mobile_initiated_checkout

“df”是你的data.frame,試試:

purrr::pmap_df(list(df$impressions,df$date_start,df$action_values)
              ,function(x,y,z) 
                  if (is.null(z)) data.frame(impressions=x, date_start=y,stringsAsFactors=FALSE)
                  else z %>% 
                    spread("action_type","value") %>% 
                    mutate(impressions=x, date_start=y) %>%
                    select(impressions,date_start,everything()) %>%
                    rename_at(vars(matches("^app_custom_event\\.")),funs(str_replace(.,"^app_custom_event\\.","")))
) 

#  impressions date_start fb_mobile_add_to_cart fb_mobile_add_to_wishlist fb_mobile_content_view
#1       26580 2018-10-14              40505.79                   9262.82                   <NA>
#2        5741 2018-10-14              26309.21                   6970.84                 697029
#3        7335 2018-10-14                  <NA>                      <NA>                   <NA>

4 4123 2018-10-14 42180.84 4852.95 282354

數據:

df <- data.frame(impressions=c("26580", "5741", "7335", "4123"),
             date_start=c("2018-10-14", "2018-10-14", "2018-10-14", "2018-10-14"),
            action_values=I(list( 
 data.frame(action_type=c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist"),
       value=c(  "40505.79", "9262.82"),stringsAsFactors=FALSE),
 data.frame(action_type=c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
       value=c(  "26309.21", "6970.84", "697029"),stringsAsFactors=FALSE),
 NULL,
 data.frame(action_type=c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
       value=c(  "42180.84", "4852.95", "282354"),stringsAsFactors=FALSE)
)),stringsAsFactors=FALSE)

由於 Nicholas2 提供了數據:

data.frame(
  impressions = c("26580", "5741", "7335", "4123"),
  date_start = c("2018-10-14", "2018-10-14", "2018-10-14", "2018-10-14"),
  action_values = I(list(
    data.frame(
      action_type = c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
      value = c("40505.79", "9262.82", "470464"), 
      stringsAsFactors = FALSE
    ),
    data.frame(
      action_type = c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
      value = c("26309.21", "6970.84", "697029"), 
      stringsAsFactors = FALSE
    ),
    data.frame(
      action_type = c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
      value = c("253737.97", "44378.59", "575184.59"), 
      stringsAsFactors = FALSE
    ),
    data.frame(
      action_type = c("app_custom_event.fb_mobile_add_to_cart", "app_custom_event.fb_mobile_add_to_wishlist", "app_custom_event.fb_mobile_content_view"),
      value = c("42180.84", "4852.95", "282354"), 
      stringsAsFactors = FALSE
    )
  )),
  stringsAsFactors = FALSE
) -> xdf

這是一個 0 依賴基礎 R 解決方案:

do.call(
  rbind.data.frame,
  lapply(
    1:nrow(xdf), # row by row
    function(.i) {
      x <- xdf$action_values[[.i]] # extract the data frame
      x$action_type <- gsub("app_custom_event.", "", x$action_type) # clean colnames
      x <- as.data.frame(t(unstack(x, value ~ action_type,  stringsAsFactors = FALSE)), stringsAsFactors=FALSE) # reshape it
      x$impressions <- xdf$impressions[[.i]] # add the other two columns
      x$date_start <- xdf$date_start[[.i]]
      rownames(x) <- NULL # not necessary but i like proper rownames
      x
    }
  )
)
##   fb_mobile_add_to_cart fb_mobile_add_to_wishlist fb_mobile_content_view impressions date_start
## 1              40505.79                   9262.82                 470464       26580 2018-10-14
## 2              26309.21                   6970.84                 697029        5741 2018-10-14
## 3             253737.97                  44378.59              575184.59        7335 2018-10-14
## 4              42180.84                   4852.95                 282354        4123 2018-10-14

頭對頭:

library(microbenchmark)

microbenchmark(
  base = do.call(
    rbind.data.frame,
    lapply(
      1:nrow(xdf), 
      function(.i) {
        x <- xdf$action_values[[.i]]
        x$action_type <- gsub("app_custom_event.", "", x$action_type)
        x <- as.data.frame(t(unstack(x, value ~ action_type, stringsAsFactors = FALSE)), stringsAsFactors=FALSE)
        x$impressions <- xdf$impressions[[.i]]
        x$date_start <- xdf$date_start[[.i]]
        rownames(x) <- NULL
        x
      }
    )
  ),
  tidyverse = purrr::pmap_df(
    list(xdf$impressions, xdf$date_start, xdf$action_values),
    function(x,y,z) z %>%
      tidyr::spread("action_type", "value") %>%
      dplyr::mutate(impressions = x, date_start = y) %>%
      dplyr::select(impressions, date_start, dplyr::everything()) %>%
      dplyr::rename_at(
        dplyr::vars(dplyr::matches("^app_custom_event\\.")),
        dplyr::funs(stringr::str_replace(.,"^app_custom_event\\.",""))
      )
  )
) %>% { print(.) ; . } %>% autoplot()
## Unit: milliseconds
##       expr       min        lq      mean    median        uq       max neval
##       base  1.641284  1.770162  2.320804  1.836188  2.077139  9.536442   100
##  tidyverse 17.152142 17.908212 21.554416 18.503356 22.255375 46.630603   100

在此處輸入圖片說明

另外:👏🏼 鼓勵我們為你做你的工作。 我有點希望人們可以放手,直到你證明你嘗試了一些東西,但不能讓 57-package-dependency-with-compilation-on-linux 解決方案作為唯一的答案。

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM