拆分R中不同列中的值

Question

我的數據集中的一列包含如下值

utm_source=google&utm_medium=cpc&utm_campaign=1234567&utm_term=brand%20&utm_content=Brand&gclid=ERtyuiipotf_YTj

我應該如何將其拆分為不同的列及其在 R 中的值？

utm_source utm_medium  utm_campaign utm_brand  utm_content
  google      cpc          1234567   brand%20     Brand

dput(column)給出以下輸出

structure(list("null", "gclid=ertyyhglkdl-kjkY", 
    "utm_source=google&utm_medium=cpc&utm_campaign=1234556&utm_term=brand%20shirts&utm_content=Brand&gclid=jhajsgjdgd_ajs", 
    "utm_source=google&utm_medium=cpc&utm_campaign=1674814043&utm_term=brand%20shirts&utm_content=Brand&gclid=KvgMsEAAYASAAEgLq6vD_BwE", 
    "null", "null", "null", "null", "null", "null", "null", "null", 
    "null", "null", "utm_source=fb&utm_medium=ctw&utm_campaign=Shirt_rem&utm_content=CasciaShirt"), class = c("extracted", 
"list"))

Answer 1

使用 OP 的更新示例作為list ，我們遍歷list ， if元素不為"null" ，則創建一個tibble ，將列拆分為&與separate_rows ，然后將該列拆分為多列（ separate ），創建一個從命名向量（ deframe ）與as_tibble_row ）

library(dplyr)
library(tidyr)
library(tibble)
library(purrr)
map_dfr(lst1, ~ if(.x != "null") tibble(col1 = .x) %>% 
             separate_rows(col1, sep="&") %>% 
             separate(col1, into = c('col1', 'col2'), sep="\\=") %>%
             deframe %>% 
             as_tibble_row())

-輸出

# A tibble: 4 x 6
#  gclid                    utm_source utm_medium utm_campaign utm_term       utm_content
#  <chr>                    <chr>      <chr>      <chr>        <chr>          <chr>      
#1 ertyyhglkdl-kjkY         <NA>       <NA>       <NA>         <NA>           <NA>       
#2 jhajsgjdgd_ajs           google     cpc        1234556      brand%20shirts Brand      
#3 KvgMsEAAYASAAEgLq6vD_BwE google     cpc        1674814043   brand%20shirts Brand      
#4 <NA>                     fb         ctw        Shirt_rem    <NA>           CasciaShirt

或者不是在循環中執行此操作，我們可以將list轉換為data.frame的列，執行一次並轉換為寬格式

library(data.table)
keep(lst1, ~ .x != "null") %>%
     flatten_chr %>% 
     tibble(col1 = .) %>%
     mutate(rn = row_number()) %>% 
     separate_rows(col1, sep='&') %>% 
     separate(col1, into = c('col1', 'col2'), sep="\\=") %>%
     pivot_wider(names_from = col1, values_from = col2) %>% 
     select(-rn)
# A tibble: 4 x 6
#  gclid                    utm_source utm_medium utm_campaign utm_term       utm_content
#  <chr>                    <chr>      <chr>      <chr>        <chr>          <chr>      
#1 ertyyhglkdl-kjkY         <NA>       <NA>       <NA>         <NA>           <NA>       
#2 jhajsgjdgd_ajs           google     cpc        1234556      brand%20shirts Brand      
#3 KvgMsEAAYASAAEgLq6vD_BwE google     cpc        1674814043   brand%20shirts Brand      
#4 <NA>                     fb         ctw        Shirt_rem    <NA>           CasciaShirt

數據

lst1 <- structure(list("null", "gclid=ertyyhglkdl-kjkY", "utm_source=google&utm_medium=cpc&utm_campaign=1234556&utm_term=brand%20shirts&utm_content=Brand&gclid=jhajsgjdgd_ajs", 
    "utm_source=google&utm_medium=cpc&utm_campaign=1674814043&utm_term=brand%20shirts&utm_content=Brand&gclid=KvgMsEAAYASAAEgLq6vD_BwE", 
    "null", "null", "null", "null", "null", "null", "null", "null", 
    "null", "null", "utm_source=fb&utm_medium=ctw&utm_campaign=Shirt_rem&utm_content=CasciaShirt"), class = c("extracted", 
"list"))

Answer 2

我不確定這是否是預期的輸出。 以下可能是您目標的基本 R 選項

Reduce(
  function(...) merge(..., all = TRUE),
  lapply(
    column,
    function(x) {
      u <- unlist(strsplit(x, "&"))
      setNames(data.frame(as.list(gsub(".*=", "", u))), gsub("=.*", "", u))
    }
  )
)

這使

  utm_source utm_medium utm_campaign utm_content null                    gclid
1         fb        ctw    Shirt_rem CasciaShirt <NA>                     <NA>
2     google        cpc      1234556       Brand <NA>           jhajsgjdgd_ajs
3     google        cpc   1674814043       Brand <NA> KvgMsEAAYASAAEgLq6vD_BwE
4       <NA>       <NA>         <NA>        <NA> null         ertyyhglkdl-kjkY
        utm_term
1           <NA>
2 brand%20shirts
3 brand%20shirts
4           <NA>

更新

如果你想保留所有數據即使它是null ，你可以試試下面的代碼

Reduce(
  function(x, y) {
    if (all(is.na(x)) | all(is.na(y))) {
      return(rbind(x, y))
    }
    dplyr::full_join(x, y)
  },
  lapply(
    column,
    function(x) {
      if (x == "null") {
        return(NA)
      }
      u <- unlist(strsplit(x, "&"))
      setNames(data.frame(as.list(gsub(".*=", "", u))), gsub("=.*", "", u))
    }
  )
)

這使

                      gclid utm_source utm_medium utm_campaign       utm_term
1                      <NA>       <NA>       <NA>         <NA>           <NA>
2          ertyyhglkdl-kjkY       <NA>       <NA>         <NA>           <NA>
3            jhajsgjdgd_ajs     google        cpc      1234556 brand%20shirts
4  KvgMsEAAYASAAEgLq6vD_BwE     google        cpc   1674814043 brand%20shirts
5                      <NA>       <NA>       <NA>         <NA>           <NA>
6                      <NA>       <NA>       <NA>         <NA>           <NA>
7                      <NA>       <NA>       <NA>         <NA>           <NA>
8                      <NA>       <NA>       <NA>         <NA>           <NA>
9                      <NA>       <NA>       <NA>         <NA>           <NA>
10                     <NA>       <NA>       <NA>         <NA>           <NA>
11                     <NA>       <NA>       <NA>         <NA>           <NA>
12                     <NA>       <NA>       <NA>         <NA>           <NA>
13                     <NA>       <NA>       <NA>         <NA>           <NA>
14                     <NA>       <NA>       <NA>         <NA>           <NA>
15                     <NA>         fb        ctw    Shirt_rem           <NA>
   utm_content
1         <NA>
2         <NA>
3        Brand
4        Brand
5         <NA>
6         <NA>
7         <NA>
8         <NA>
9         <NA>
10        <NA>
11        <NA>
12        <NA>
13        <NA>
14        <NA>
15 CasciaShirt

拆分R中不同列中的值

問題描述

2 個解決方案

解決方案1
1 2020-10-04 20:50:55

數據

解決方案2
1 已采納 2020-10-04 21:51:59

拆分R中不同列中的值

問題描述

2 個解決方案

解決方案1 1 2020-10-04 20:50:55

數據

解決方案2 1 已采納 2020-10-04 21:51:59

解決方案1
1 2020-10-04 20:50:55

解決方案2
1 已采納 2020-10-04 21:51:59