I have the following data frame named DF in r:
1 2 3
1 VW Mercedes Audi
2 Porsche BMW VW
3 Audi Honda Toyota
4 Dodge Opel VW
5 Lexus Volvo BMW
6 Dodge VW Porsche
i want to create a new dataframe (DF2) where each element of DF are the column names of new data frame and column names of DF are elements of DF2:
Audi BMW Dodge Honda Lexus Mercedes Opel Porsche Toyota Volvo VW
1 3 0 0 0 0 2 0 0 0 0 1
2 0 2 0 0 0 0 0 1 0 0 3
3 1 0 0 2 0 0 0 0 3 0 0
4 0 0 1 0 0 0 2 0 3 0 3
5 0 3 0 0 1 0 0 0 0 2 0
6 0 0 1 0 0 0 0 3 0 0 2
Try this:
names <- unique(unlist(df))
x <- sapply(names, function(x) apply(df, 1, function(y) names(df)[x==y]))
x[] <- as.numeric(x)
x[is.na(x)] <- 0
x
VW Porsche Audi Dodge Lexus Mercedes BMW Honda Opel Volvo Toyota
[1,] 1 0 3 0 0 2 0 0 0 0 0
[2,] 3 1 0 0 0 0 2 0 0 0 0
[3,] 0 0 1 0 0 0 0 2 0 0 3
[4,] 3 0 0 1 0 0 0 0 2 0 0
[5,] 0 0 0 0 1 0 3 0 0 2 0
[6,] 2 3 0 1 0 0 0 0 0 0 0
Another alternative:
library(tidyr)
library(dplyr)
DF %>%
add_rownames() %>%
gather(key, value, -rowname, convert = TRUE) %>%
spread(value, key, fill = 0) %>%
select(-rowname)
Which gives:
#Source: local data frame [6 x 11]
#
# Audi BMW Dodge Honda Lexus Mercedes Opel Porsche Toyota Volvo VW
# (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl) (dbl)
#1 3 0 0 0 0 2 0 0 0 0 1
#2 0 2 0 0 0 0 0 1 0 0 3
#3 1 0 0 2 0 0 0 0 3 0 0
#4 0 0 1 0 0 0 2 0 0 0 3
#5 0 3 0 0 1 0 0 0 0 2 0
#6 0 0 1 0 0 0 0 3 0 0 2
Here is another option with acast
from reshape2
library(reshape2)
acast(melt(as.matrix(df)), Var1~value, value.var='Var2', fill=0)
# Audi BMW Dodge Honda Lexus Mercedes Opel Porsche Toyota Volvo VW
#1 3 0 0 0 0 2 0 0 0 0 1
#2 0 2 0 0 0 0 0 1 0 0 3
#3 1 0 0 2 0 0 0 0 3 0 0
#4 0 0 1 0 0 0 2 0 0 0 3
#5 0 3 0 0 1 0 0 0 0 2 0
#6 0 0 1 0 0 0 0 3 0 0 2
This also works:
DF <- read.table( text =
" VW Mercedes Audi
Porsche BMW VW
Audi Honda Toyota
Dodge Opel VW
Lexus Volvo BMW
Dodge VW Porsche " )
DF1 <- apply(DF,1:2,as.character) # Convert factors to strings, if necessary.
cars <- sort(unique(c(as.matrix(DF1))))
DF2 <- data.frame( matrix(0,nrow(DF),length(cars)) )
colnames(DF2) <- cars
for ( i in 1:nrow(DF) ) { DF2[i,DF1[i,]] <- 1:ncol(DF) }
The for
-loop is harmless, since it is nothing growing in there.
Audi BMW Dodge Honda Lexus Mercedes Opel Porsche Toyota Volvo VW
1 3 0 0 0 0 2 0 0 0 0 1
2 0 2 0 0 0 0 0 1 0 0 3
3 1 0 0 2 0 0 0 0 3 0 0
4 0 0 1 0 0 0 2 0 0 0 3
5 0 3 0 0 1 0 0 0 0 2 0
6 0 0 1 0 0 0 0 3 0 0 2
>
for
-loop is faster. Strange, isn't it?
library(microbenchmark)
mra68 <- function()
{
DF1 <- apply(DF,1:2,as.character)
cars <- sort(unique(c(as.matrix(DF1))))
DF2 <- data.frame( matrix(0,nrow(DF),length(cars)) )
colnames(DF2) <- cars
for ( i in 1:nrow(DF) ) { DF2[i,DF1[i,]] <- 1:ncol(DF) }
return( DF2 )
}
DatamineR <- function()
{
names <- unique(unlist(DF))
x <- sapply(names, function(x) apply(DF, 1, function(y) names(DF)[x==y]))
x[] <- as.numeric(x)
x[is.na(x)] <- 0
return(x)
}
microbenchmark( mra68(), DatamineR() )
.
> microbenchmark( mra68(), DatamineR() )
Unit: milliseconds
expr min lq mean median uq max neval
mra68() 2.360912 4.618337 4.74136 4.738126 4.931509 8.496653 100
DatamineR() 8.151552 16.083225 16.42256 16.284309 16.480636 20.860074 100
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.