簡體   English   中英

tidyverse:一個變量與data.frame中所有其他變量的交叉表

[英]tidyverse: Cross tables of one variable with all other variables in data.frame

我想與data.frame中的所有其他變量建立變量的交叉表。

library(tidyverse)
library(janitor)

humans <- starwars %>%
  filter(species == "Human")

humans %>%
  janitor::tabyl(gender, eye_color)



gender blue blue-gray brown dark hazel yellow
 female    3         0     5    0     1      0
   male    9         1    12    1     1      2

humans %>%
  dplyr::select_if(is.character) %>%
  dplyr::select(-name, -gender) %>%
  purrr::map(.f = ~janitor::tabyl(dat = humans, gender, .x))

Error: Unknown columns `blond`, `none`, `brown`, `brown, grey`, `brown` and ... 
Call `rlang::last_error()` to see a backtrace

假設我們需要成對表與'性別'

humans %>%
  dplyr::select_if(is.character) %>%
  dplyr::select(-name, -gender) %>%
  imap(~ tibble(!! .y := .x) %>% 
             mutate(gender = humans[['gender']]) %>% 
             janitor::tabyl(!!rlang::sym(names(.)[1]), gender))
#$hair_color
#    hair_color female male
#        auburn      1    0
#  auburn, grey      0    1
# auburn, white      0    1
#         black      1    7
#         blond      0    3
#        brown      6    8
#  brown, grey      0    1
#         grey      0    1
#         none      0    3
#        white      1    1

#$skin_color
# skin_color female male
#       dark      0    4
#       fair      3   13
#      light      6    5
#...

更新

xtable::xtableList要求list元素的名稱相同。 要實現這一點,請在list元素中更改相同的第一列名稱,然后創建標識符列

library(xtable)
humans %>%
 dplyr::select_if(is.character) %>%
 dplyr::select(-name, -gender) %>%
 imap(~ tibble(!! .y := .x) %>% 
         mutate(gender = humans[['gender']]) %>% 
         janitor::tabyl(!!rlang::sym(names(.)[1]), gender) %>%  
         mutate(colNname = .y) %>% 
         rename_at(1, ~ 'Variable')) %>%
 xtableList

tably將名稱作為參數,並將向量傳遞給它。

如果您使用imap您將可以訪問列的名稱,您可以轉換為符號,並且當janitor支持准引用時,您可以編寫:

humans %>%
  select_if(is.character) %>%
  select(-name, -gender) %>%
  imap(.f = ~janitor::tabyl(dat = humans, !!sym(.y), gender))
#$`hair_color`
#     hair_color female male
#         auburn      1    0
#   auburn, grey      0    1
#  auburn, white      0    1
#          black      1    7
#          blond      0    3
#          brown      6    8
#    brown, grey      0    1
#           grey      0    1
#           none      0    3
#          white      1    1
# 
# $skin_color
#  skin_color female male
#        dark      0    4
#        fair      3   13

有趣的是tabyl.data.frame調用一個對符號有效的未導出函數,所以通過直接調用它我們可以跳過unquoting並使用基數R.

cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
lapply(cols, function(x) janitor:::tabyl_2way(humans, as.name(x), quote(gender)))
# [[1]]
#     hair_color female male
#         auburn      1    0
#   auburn, grey      0    1
#  auburn, white      0    1
#          black      1    7
#          blond      0    3
#          brown      6    8
#    brown, grey      0    1
#           grey      0    1
#           none      0    3
#          white      1    1
# 
# [[2]]
#  skin_color female male
#        dark      0    4

為了使它適用於xtable @ akrun的建議也適用於此:

humans %>%
  select_if(is.character) %>%
  select(-name, -gender) %>%
  imap(.f = ~tabyl(dat = humans, !!sym(.y), gender) %>% rename_at(1,~"x")) %>%
  xtableList

要么

cols <- setdiff(names(Filter(is.character,humans)), c("name","gender"))
l <- lapply(cols, function(x) {
  res <- janitor:::tabyl_2way(humans, as.name(x), quote(gender))
  names(res)[1] <- "x"
  res
})
xtableList(l)

僅使用data.table (和一個%>% ):

library(data.table)
swDT <- data.table(starwars)
setkey(swDT, gender, hair_color)


swDT[species == "Human"
     ][CJ(gender, hair_color, unique =TRUE), .N, .EACHI] %>% 
  dcast(hair_color ~ gender, value.var = "N")


       hair_color female male
 1:        auburn      1    0
 2:  auburn, grey      0    1
 3: auburn, white      0    1
 4:         black      1    7
 5:         blond      0    3
 6:         brown      6    8
 7:   brown, grey      0    1
 8:          grey      0    1
 9:          none      0    3
10:         white      1    1

在列表中柱starwars增加了復雜性,但這里用一個例子mtcars :交叉表cyl對所有其他變量。

mtcars %>%
  tidyr::gather(var, value, -cyl) %>%
  janitor::tabyl(cyl, value, var, show_missing_levels = FALSE) %>%
  purrr::map2(.x = ., .y = names(.), ~ janitor::adorn_title(.x, col_name = .y))

返回交叉表列表。 cyl x am,cyl x carb等:

$`am`
     am  
 cyl  0 1
   4  3 8
   6  4 3
   8 12 2

$carb
     carb          
 cyl    1 2 3 4 6 8
   4    5 6 0 0 0 0
   6    2 0 0 4 1 0
   8    0 4 3 6 0 1

...

如果您將對這些data.frames進行進一步操作,您可能會發現此標題選項更友好:

purrr::map2(.x = ., .y = names(.), ~ janitor::adorn_title(.x, col_name = .y, placement = "combined"))

哪個給你:

$vs
 cyl/vs  0  1
      4  1 10
      6  3  4
      8 14  0

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM