简体   繁体   中英

Elements of a dataframe as the column names of a new dataframe in R

I have the following data frame named DF in r:

   1          2         3    
1  VW       Mercedes  Audi                                    
2  Porsche  BMW       VW                                                    
3  Audi     Honda     Toyota                                             
4  Dodge    Opel      VW                                     
5  Lexus    Volvo     BMW                                                      
6  Dodge    VW        Porsche 

i want to create a new dataframe (DF2) where each element of DF are the column names of new data frame and column names of DF are elements of DF2:

     Audi BMW Dodge Honda Lexus Mercedes Opel Porsche Toyota Volvo VW
1    3     0  0     0      0     2        0    0        0     0    1
2    0     2  0     0      0     0        0    1        0     0    3
3    1     0  0     2      0     0        0    0        3     0    0 
4    0     0  1     0      0     0        2    0        3     0    3     
5    0     3  0     0      1     0        0    0        0     2    0
6    0     0  1     0      0     0        0    3        0     0    2

Try this:

names <- unique(unlist(df))
x <- sapply(names, function(x) apply(df, 1, function(y) names(df)[x==y]))
x[] <- as.numeric(x)
x[is.na(x)] <- 0
x
     VW Porsche Audi Dodge Lexus Mercedes BMW Honda Opel Volvo Toyota
[1,] 1  0       3    0     0     2        0   0     0    0     0     
[2,] 3  1       0    0     0     0        2   0     0    0     0     
[3,] 0  0       1    0     0     0        0   2     0    0     3     
[4,] 3  0       0    1     0     0        0   0     2    0     0     
[5,] 0  0       0    0     1     0        3   0     0    2     0     
[6,] 2  3       0    1     0     0        0   0     0    0     0  

Another alternative:

library(tidyr)
library(dplyr)

DF %>%
  add_rownames() %>%
  gather(key, value, -rowname, convert = TRUE) %>% 
  spread(value, key, fill = 0) %>%
  select(-rowname)

Which gives:

#Source: local data frame [6 x 11]
#
#   Audi   BMW Dodge Honda Lexus Mercedes  Opel Porsche Toyota Volvo    VW
#  (dbl) (dbl) (dbl) (dbl) (dbl)    (dbl) (dbl)   (dbl)  (dbl) (dbl) (dbl)
#1     3     0     0     0     0        2     0       0      0     0     1
#2     0     2     0     0     0        0     0       1      0     0     3
#3     1     0     0     2     0        0     0       0      3     0     0
#4     0     0     1     0     0        0     2       0      0     0     3
#5     0     3     0     0     1        0     0       0      0     2     0
#6     0     0     1     0     0        0     0       3      0     0     2

Here is another option with acast from reshape2

library(reshape2)
acast(melt(as.matrix(df)), Var1~value, value.var='Var2', fill=0)
#  Audi BMW Dodge Honda Lexus Mercedes Opel Porsche Toyota Volvo VW
#1    3   0     0     0     0        2    0       0      0     0  1
#2    0   2     0     0     0        0    0       1      0     0  3
#3    1   0     0     2     0        0    0       0      3     0  0
#4    0   0     1     0     0        0    2       0      0     0  3
#5    0   3     0     0     1        0    0       0      0     2  0
#6    0   0     1     0     0        0    0       3      0     0  2

This also works:

DF <- read.table( text =
"  VW       Mercedes  Audi                                    
   Porsche  BMW       VW                                                    
   Audi     Honda     Toyota                                             
   Dodge    Opel      VW                                     
   Lexus    Volvo     BMW                                                      
   Dodge    VW        Porsche " )

DF1 <- apply(DF,1:2,as.character) # Convert factors to strings, if necessary.
cars <- sort(unique(c(as.matrix(DF1))))
DF2 <- data.frame( matrix(0,nrow(DF),length(cars)) )
colnames(DF2) <- cars
for ( i in 1:nrow(DF) ) { DF2[i,DF1[i,]] <- 1:ncol(DF) }

The for -loop is harmless, since it is nothing growing in there.

  Audi BMW Dodge Honda Lexus Mercedes Opel Porsche Toyota Volvo VW
1    3   0     0     0     0        2    0       0      0     0  1
2    0   2     0     0     0        0    0       1      0     0  3
3    1   0     0     2     0        0    0       0      3     0  0
4    0   0     1     0     0        0    2       0      0     0  3
5    0   3     0     0     1        0    0       0      0     2  0
6    0   0     1     0     0        0    0       3      0     0  2
> 

for -loop is faster. Strange, isn't it?

library(microbenchmark)

mra68 <- function()
{
  DF1 <- apply(DF,1:2,as.character)
  cars <- sort(unique(c(as.matrix(DF1))))
    DF2 <- data.frame( matrix(0,nrow(DF),length(cars)) )
  colnames(DF2) <- cars
    for ( i in 1:nrow(DF) ) { DF2[i,DF1[i,]] <- 1:ncol(DF) }
    return( DF2 )
}

DatamineR <- function()
{
  names <- unique(unlist(DF))
  x <- sapply(names, function(x) apply(DF, 1, function(y) names(DF)[x==y]))
  x[] <- as.numeric(x)
  x[is.na(x)] <- 0
  return(x)
}

microbenchmark( mra68(), DatamineR() )

.

> microbenchmark( mra68(), DatamineR() )
Unit: milliseconds
        expr      min        lq     mean    median        uq       max neval
     mra68() 2.360912  4.618337  4.74136  4.738126  4.931509  8.496653   100
 DatamineR() 8.151552 16.083225 16.42256 16.284309 16.480636 20.860074   100

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM