简体   繁体   中英

Create a matrix of 1s and 0s based on strings in multiple columns

I have a tibble, df :

df <- structure(
    list(
        ID = c("ID1", "ID2", "ID3", "ID4", "ID5", "ID6", "ID7", "ID8", "ID9", "ID10"),
        dog = c("big", "small", "small", "big", "small", "small", "big", "big", "medium", "medium"),
        cat = c("fluffy", "fluffy", "fluffy", "fluffy", "fluffy", "shorthair", "shorthair", "shorthair", "shorthair", "shorthair"), 
        fish = c("carp", "spotted", "spotted", "carp", "guppy", "guppy", "guppy", "spotted", "spotted", "carp")), 
        row.names = c(NA, -10L),
        class = c("tbl_df", "tbl", "data.frame"))

df
# A tibble: 10 x 4
   ID    dog    cat       fish   
   <chr> <chr>  <chr>     <chr>  
 1 ID1   big    fluffy    carp   
 2 ID2   small  fluffy    spotted
 3 ID3   small  fluffy    spotted
 4 ID4   big    fluffy    carp   
 5 ID5   small  fluffy    guppy  
 6 ID6   small  shorthair guppy  
 7 ID7   big    shorthair guppy  
 8 ID8   big    shorthair spotted
 9 ID9   medium shorthair spotted
10 ID10  medium shorthair carp 

I want to create a column for each value in every column (eg. big , small , medium , fluffy , etc), and if that row has that value, it would get a 1 , otherwise it would get a 0 (eg. ID1 would have a 1 in the big column, 0 in the small column, 1 in the fluffy column, etc).

I have done this using pivot_wider , but it looks very messy. Is there a cleaner solution (preferably using dplyr )?

df_mat <- df %>%
    pivot_wider(
        names_from = `dog`,
        values_from = `dog`
    ) %>% 
    pivot_wider(
        names_from = `cat`,
        values_from = `cat`
    ) %>% 
    pivot_wider(
        names_from = `fish`,
        values_from = `fish`
    ) %>%
    mutate_at(
        vars(-`ID`), 
        funs(
            case_when(
                !is.na(.) ~ 1,
                TRUE ~ 0
                )
        )
    )

df_mat
# A tibble: 10 x 9
   ID      big small medium fluffy shorthair  carp spotted guppy
   <chr> <dbl> <dbl>  <dbl>  <dbl>     <dbl> <dbl>   <dbl> <dbl>
 1 ID1       1     0      0      1         0     1       0     0
 2 ID2       0     1      0      1         0     0       1     0
 3 ID3       0     1      0      1         0     0       1     0
 4 ID4       1     0      0      1         0     1       0     0
 5 ID5       0     1      0      1         0     0       0     1
 6 ID6       0     1      0      0         1     0       0     1
 7 ID7       1     0      0      0         1     0       0     1
 8 ID8       1     0      0      0         1     0       1     0
 9 ID9       0     0      1      0         1     0       1     0
10 ID10      0     0      1      0         1     1       0     0

Here is a data.table way:

> dcast(melt(as.data.table(df), id.vars = 'ID'), ID ~ value, fun.aggregate = length)
      ID big carp fluffy guppy medium shorthair small spotted
 1:  ID1   1    1      1     0      0         0     0       0
 2: ID10   0    1      0     0      1         1     0       0
 3:  ID2   0    0      1     0      0         0     1       1
 4:  ID3   0    0      1     0      0         0     1       1
 5:  ID4   1    1      1     0      0         0     0       0
 6:  ID5   0    0      1     1      0         0     1       0
 7:  ID6   0    0      0     1      0         1     1       0
 8:  ID7   1    0      0     1      0         1     0       0
 9:  ID8   1    0      0     0      0         1     0       1
10:  ID9   0    0      0     0      1         1     0       1

Equivalent code with tidyr :

df %>%
    pivot_longer(dog:fish) %>%
    pivot_wider(ID, value, values_fn = length, values_fill = 0)

# "# A tibble: 10 x 9
#    ID      big fluffy  carp small spotted guppy shorthair medium
#    <chr> <int>  <int> <int> <int>   <int> <int>     <int>  <int>
#  1 ID1       1      1     1     0       0     0         0      0
#  2 ID2       0      1     0     1       1     0         0      0
#  3 ID3       0      1     0     1       1     0         0      0
#  4 ID4       1      1     1     0       0     0         0      0
#  5 ID5       0      1     0     1       0     1         0      0
#  6 ID6       0      0     0     1       0     1         1      0
#  7 ID7       1      0     0     0       0     1         1      0
#  8 ID8       1      0     0     0       1     0         1      0
#  9 ID9       0      0     0     0       1     0         1      1
# 10 ID10      0      0     1     0       0     0         1      1"

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM