簡體   English   中英

從基礎 R 方法到 tidyverse

[英]From Base R approach to tidyverse

我有一個大的調查數據並繼承了一個舊的基礎 R 代碼,我想看看是否有更好/有效的 tidyverse 方法來做到這一點。 在這里,我們簡單地將變量 LS 解析為具有標准名稱模式的 X 單獨列

這里的片段:

for(i in 1:nchar(df$LS)){
  df=within(df,assign(paste0("f7_",i),substr(df$LS,i,i)))
}

ls_names <-vector()
for (i in 1:79) {
  ls_names[i] <- paste0("f7_",i)
} 

這里是duput樣本

  df<- structure(list(hhnr = c("992690", "138428", "735527", "555317", 
"555317"), indnr = c("01", "02", "01", "01", "02"), date = c("20201106", 
"20201106", "20201106", "20201106", "20201106"), LS = c("2213232213332122212111322122122123333533334455424333344454555443555515545125150  ", 
"1132242122422311113321224244112242323431432345544444344455545442355525525115150  ", 
"3343444344334434324141444143311312244312411455334213254455555555555525555155110  ", 
"3343123224113324343331414324324334224442324455544425355555551555555555555255150  ", 
"3444344344444222222221332323333324222343232445322313355555555454554435545415150  "
)), row.names = c(NA, 5L), class = "data.frame")

是的。 purrr::map_df在這里很有幫助。 嘗試這個

cbind(df[1:3], map_df(str_split(df$LS, ""), 
                      ~setNames(.x, paste0("f7_", seq_len(length(.x))))))
    hhnr indnr     date f7_1 f7_2 f7_3 f7_4 f7_5 f7_6 f7_7 f7_8 f7_9 f7_10 f7_11 f7_12 f7_13
1 992690    01 20201106    2    2    1    3    2    3    2    2    1     3     3     3     2
2 138428    02 20201106    1    1    3    2    2    4    2    1    2     2     4     2     2
3 735527    01 20201106    3    3    4    3    4    4    4    3    4     4     3     3     4
4 555317    01 20201106    3    3    4    3    1    2    3    2    2     4     1     1     3
5 555317    02 20201106    3    4    4    4    3    4    4    3    4     4     4     4     4
  f7_14 f7_15 f7_16 f7_17 f7_18 f7_19 f7_20 f7_21 f7_22 f7_23 f7_24 f7_25 f7_26 f7_27 f7_28
1     1     2     2     2     1     2     1     1     1     3     2     2     1     2     2
2     3     1     1     1     1     3     3     2     1     2     2     4     2     4     4
3     4     3     4     3     2     4     1     4     1     4     4     4     1     4     3
4     3     2     4     3     4     3     3     3     1     4     1     4     3     2     4
5     2     2     2     2     2     2     2     2     1     3     3     2     3     2     3
  f7_29 f7_30 f7_31 f7_32 f7_33 f7_34 f7_35 f7_36 f7_37 f7_38 f7_39 f7_40 f7_41 f7_42 f7_43
1     1     2     2     1     2     3     3     3     3     5     3     3     3     3     4
2     1     1     2     2     4     2     3     2     3     4     3     1     4     3     2
3     3     1     1     3     1     2     2     4     4     3     1     2     4     1     1
4     3     2     4     3     3     4     2     2     4     4     4     2     3     2     4
5     3     3     3     3     2     4     2     2     2     3     4     3     2     3     2
  f7_44 f7_45 f7_46 f7_47 f7_48 f7_49 f7_50 f7_51 f7_52 f7_53 f7_54 f7_55 f7_56 f7_57 f7_58
1     4     5     5     4     2     4     3     3     3     3     4     4     4     5     4
2     3     4     5     5     4     4     4     4     4     3     4     4     4     5     5
3     4     5     5     3     3     4     2     1     3     2     5     4     4     5     5
4     4     5     5     5     4     4     4     2     5     3     5     5     5     5     5
5     4     4     5     3     2     2     3     1     3     3     5     5     5     5     5
  f7_59 f7_60 f7_61 f7_62 f7_63 f7_64 f7_65 f7_66 f7_67 f7_68 f7_69 f7_70 f7_71 f7_72 f7_73
1     5     5     5     4     4     3     5     5     5     5     1     5     5     4     5
2     5     4     5     4     4     2     3     5     5     5     2     5     5     2     5
3     5     5     5     5     5     5     5     5     5     5     2     5     5     5     5
4     5     5     1     5     5     5     5     5     5     5     5     5     5     5     5
5     5     5     5     4     5     4     5     5     4     4     3     5     5     4     5
  f7_74 f7_75 f7_76 f7_77 f7_78 f7_79 f7_80 f7_81
1     1     2     5     1     5     0            
2     1     1     5     1     5     0            
3     1     5     5     1     1     0            
4     2     5     5     1     5     0            
5     4     1     5     1     5     0 

或者

df %>% mutate(LS = str_split(LS, "")) %>%
  rename(f7 = LS) %>%
  unnest_wider(f7, names_sep = "_")

或者

pmap_df(df %>% mutate(LS = map(str_split(LS, ""), ~setNames(... , paste0("f7_",seq_len(length(...)))))),
        ~c(list(...)[1:3], unlist(..4)))

附加選項

df<- structure(list(hhnr = c("992690", "138428", "735527", "555317", 
                             "555317"), indnr = c("01", "02", "01", "01", "02"), date = c("20201106", 
                                                                                          "20201106", "20201106", "20201106", "20201106"), LS = c("2213232213332122212111322122122123333533334455424333344454555443555515545125150  ", 
                                                                                                                                                  "1132242122422311113321224244112242323431432345544444344455545442355525525115150  ", 
                                                                                                                                                  "3343444344334434324141444143311312244312411455334213254455555555555525555155110  ", 
                                                                                                                                                  "3343123224113324343331414324324334224442324455544425355555551555555555555255150  ", 
                                                                                                                                                  "3444344344444222222221332323333324222343232445322313355555555454554435545415150  "
                                                                                          )), row.names = c(NA, 5L), class = "data.frame")

library(tidyverse)
df %>% 
  mutate(tmp = map(LS, ~unlist(strsplit(.x, split = "")))) %>% 
  unnest_wider(tmp) %>% 
  rename_with(.fn = ~str_replace(string = .x, "^\\.\\.\\.", "f7_"), .cols = starts_with("..."))

#> # A tibble: 5 x 85
#>   hhnr   indnr date  LS    f7_1  f7_2  f7_3  f7_4  f7_5  f7_6  f7_7  f7_8  f7_9 
#>   <chr>  <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr> <chr>
#> 1 992690 01    2020~ "221~ 2     2     1     3     2     3     2     2     1    
#> 2 138428 02    2020~ "113~ 1     1     3     2     2     4     2     1     2    
#> 3 735527 01    2020~ "334~ 3     3     4     3     4     4     4     3     4    
#> 4 555317 01    2020~ "334~ 3     3     4     3     1     2     3     2     2    
#> 5 555317 02    2020~ "344~ 3     4     4     4     3     4     4     3     4    
#> # ... with 72 more variables: f7_10 <chr>, f7_11 <chr>, f7_12 <chr>,
#> #   f7_13 <chr>, f7_14 <chr>, f7_15 <chr>, f7_16 <chr>, f7_17 <chr>,
#> #   f7_18 <chr>, f7_19 <chr>, f7_20 <chr>, f7_21 <chr>, f7_22 <chr>,
#> #   f7_23 <chr>, f7_24 <chr>, f7_25 <chr>, f7_26 <chr>, f7_27 <chr>,
#> #   f7_28 <chr>, f7_29 <chr>, f7_30 <chr>, f7_31 <chr>, f7_32 <chr>,
#> #   f7_33 <chr>, f7_34 <chr>, f7_35 <chr>, f7_36 <chr>, f7_37 <chr>,
#> #   f7_38 <chr>, f7_39 <chr>, f7_40 <chr>, f7_41 <chr>, f7_42 <chr>,
#> #   f7_43 <chr>, f7_44 <chr>, f7_45 <chr>, f7_46 <chr>, f7_47 <chr>,
#> #   f7_48 <chr>, f7_49 <chr>, f7_50 <chr>, f7_51 <chr>, f7_52 <chr>,
#> #   f7_53 <chr>, f7_54 <chr>, f7_55 <chr>, f7_56 <chr>, f7_57 <chr>,
#> #   f7_58 <chr>, f7_59 <chr>, f7_60 <chr>, f7_61 <chr>, f7_62 <chr>,
#> #   f7_63 <chr>, f7_64 <chr>, f7_65 <chr>, f7_66 <chr>, f7_67 <chr>,
#> #   f7_68 <chr>, f7_69 <chr>, f7_70 <chr>, f7_71 <chr>, f7_72 <chr>,
#> #   f7_73 <chr>, f7_74 <chr>, f7_75 <chr>, f7_76 <chr>, f7_77 <chr>,
#> #   f7_78 <chr>, f7_79 <chr>, f7_80 <chr>, f7_81 <chr>

reprex package (v2.0.0) 於 2021 年 4 月 13 日創建

我們可以為此使用tidyr::separate 我會首先修剪LS的空白,這會使事情變得更容易。

library(tidyverse)

df %>% 
  mutate(LS = str_trim(LS)) %>% 
  separate(LS,
           into = paste0("f7_", seq_len(nchar(.$LS[1])+1)),  
           sep = "(?=[0-9])")

#>     hhnr indnr     date f7_1 f7_2 f7_3 f7_4 f7_5 f7_6 f7_7 f7_8 f7_9 f7_10
#> 1 992690    01 20201106         2    2    1    3    2    3    2    2     1
#> 2 138428    02 20201106         1    1    3    2    2    4    2    1     2
#> 3 735527    01 20201106         3    3    4    3    4    4    4    3     4
#> 4 555317    01 20201106         3    3    4    3    1    2    3    2     2
#> 5 555317    02 20201106         3    4    4    4    3    4    4    3     4
#>   f7_11 f7_12 f7_13 f7_14 f7_15 f7_16 f7_17 f7_18 f7_19 f7_20 f7_21 f7_22 f7_23
#> 1     3     3     3     2     1     2     2     2     1     2     1     1     1
#> 2     2     4     2     2     3     1     1     1     1     3     3     2     1
#> 3     4     3     3     4     4     3     4     3     2     4     1     4     1
#> 4     4     1     1     3     3     2     4     3     4     3     3     3     1
#> 5     4     4     4     4     2     2     2     2     2     2     2     2     1
#>   f7_24 f7_25 f7_26 f7_27 f7_28 f7_29 f7_30 f7_31 f7_32 f7_33 f7_34 f7_35 f7_36
#> 1     3     2     2     1     2     2     1     2     2     1     2     3     3
#> 2     2     2     4     2     4     4     1     1     2     2     4     2     3
#> 3     4     4     4     1     4     3     3     1     1     3     1     2     2
#> 4     4     1     4     3     2     4     3     2     4     3     3     4     2
#> 5     3     3     2     3     2     3     3     3     3     3     2     4     2
#>   f7_37 f7_38 f7_39 f7_40 f7_41 f7_42 f7_43 f7_44 f7_45 f7_46 f7_47 f7_48 f7_49
#> 1     3     3     5     3     3     3     3     4     4     5     5     4     2
#> 2     2     3     4     3     1     4     3     2     3     4     5     5     4
#> 3     4     4     3     1     2     4     1     1     4     5     5     3     3
#> 4     2     4     4     4     2     3     2     4     4     5     5     5     4
#> 5     2     2     3     4     3     2     3     2     4     4     5     3     2
#>   f7_50 f7_51 f7_52 f7_53 f7_54 f7_55 f7_56 f7_57 f7_58 f7_59 f7_60 f7_61 f7_62
#> 1     4     3     3     3     3     4     4     4     5     4     5     5     5
#> 2     4     4     4     4     3     4     4     4     5     5     5     4     5
#> 3     4     2     1     3     2     5     4     4     5     5     5     5     5
#> 4     4     4     2     5     3     5     5     5     5     5     5     5     1
#> 5     2     3     1     3     3     5     5     5     5     5     5     5     5
#>   f7_63 f7_64 f7_65 f7_66 f7_67 f7_68 f7_69 f7_70 f7_71 f7_72 f7_73 f7_74 f7_75
#> 1     4     4     3     5     5     5     5     1     5     5     4     5     1
#> 2     4     4     2     3     5     5     5     2     5     5     2     5     1
#> 3     5     5     5     5     5     5     5     2     5     5     5     5     1
#> 4     5     5     5     5     5     5     5     5     5     5     5     5     2
#> 5     4     5     4     5     5     4     4     3     5     5     4     5     4
#>   f7_76 f7_77 f7_78 f7_79 f7_80
#> 1     2     5     1     5     0
#> 2     1     5     1     5     0
#> 3     5     5     1     1     0
#> 4     5     5     1     5     0
#> 5     1     5     1     5     0

代表 package (v0.3.0) 於 2021 年 4 月 13 日創建

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM