简体   繁体   中英

Adaboost: Problem with confusion matrix - `data` and `reference` should be factors with the same levels

Im new in ML and I have a problem with my confusion matrix. Unfortunatelly, I have this error (The error occurs when generating the confusion matrix.):

data and reference should be factors with the same levels.

Here is my code:

library(caret)
library(fastAdaboost)

data <- read.csv('~/Desktop/test1.csv', sep = ";")
data1 <- subset(data,select=c(4,5,6,7,8,12,15,16))

set.seed(1234)
parts = createDataPartition(data1$Status.szkody, p = 0.7, list = F)
train = data1[parts, ]
test = data1[-parts, ]

model <- adaboost(Status.szkody ~., data = train,6)

a <- predict(model, train, type = "class")

train$Status.szkody = as.factor(train$Status.szkody)
confusionMatrix(a,train$Status.szkody, mode = "everything")

I see that "train$Status.szkody" has a level and an "a" not, but how to deal with it?

> str(a)
List of 5
 $ formula:Class 'formula'  language Status.szkody ~ .
  .. ..- attr(*, ".Environment")=<environment: R_GlobalEnv> 
 $ votes  : num [1:40845, 1:2] 1.14 1.77 1.59 1.35 1.77 ...
 $ class  : Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
 $ prob   : num [1:40845, 1:2] 0.644 1 0.9 0.762 1 ...
 $ error  : num 0.234
> str(train$Status.szkody)
 Factor w/ 2 levels "0","1": 1 1 1 1 1 1 1 1 1 1 ...
> levels(a)
NULL
> levels(train$Status.szkody)
[1] "0" "1"

Moreover, I tried with "cvms::confusion_matrix(train$Status.szkody,a)", but there is an error - 'targets' and 'predictions' must have same length.

Any help would be greatly appreciated, because I do not know how to deal with it. Thanks in advance.

Edit1:

dput(head(data1,30))

structure(list(Miesiąc = c("styczeń", "luty", "styczeń", "styczeń", 
"styczeń", "styczeń", "styczeń", "styczeń", "styczeń", "styczeń", 
"styczeń", "styczeń", "styczeń", "styczeń", "styczeń", "styczeń", 
"styczeń", "styczeń", "styczeń", "styczeń", "styczeń", "styczeń", 
"styczeń", "styczeń", "styczeń", "styczeń", "styczeń", "styczeń", 
"styczeń", "styczeń"), Kwartał = c(1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), Terminal = c("Katowice", "Legnica", 
"Katowice", "Legnica", "Sosnowiec", "Wrocław", "Legnica", "Katowice", 
"Katowice", "Legnica", "Gliwice", "Wrocław", "Wrocław", "Legnica", 
"Wrocław", "Legnica", "Sosnowiec", "Wrocław", "Katowice", "Gliwice", 
"Gliwice", "Gliwice", "Katowice", "Wrocław", "Legnica", "Legnica", 
"Gliwice", "Legnica", "Katowice", "Legnica"), Towar = c("RTV", 
"RTV", "Telefony", "AGD", "Komputery", "AGD małe", "AGD do zabudowy", 
"Telefony", "RTV", "AGD małe", "AGD", "RTV", "Komputery", "AGD małe", 
"RTV", "AGD do zabudowy", "RTV", "Komputery", "Telefony", "Komputery", 
"RTV", "AGD małe", "AGD małe", "AGD", "Telefony", "Telefony", 
"AGD małe", "AGD do zabudowy", "AGD do zabudowy", "AGD do zabudowy"
), Status.szkody = c(0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 
0L, 0L, 0L, 0L, 0L, 1L, 0L, 0L, 0L, 0L, 0L, 0L, 1L, 1L, 0L, 0L, 
1L, 0L, 0L, 0L), Kraj = c("PL", "PL", "PL", "PL", "PL", "PL", 
"PL", "PL", "PL", "DE", "DE", "DE", "DE", "PL", "PL", "PL", "PL", 
"PL", "PL", "DE", "DE", "DE", "DE", "DE", "DE", "AT", "DE", "DE", 
"AT", "DE"), Usługa = c("Express", "Express", "Express", "Express", 
"Express", "Express", "Express", "Express", "Express", "Express", 
"Express", "Express", "Express", "Express", "Express", "Express", 
"Express", "Express", "Express", "Express", "Express", "Express", 
"Express", "Express", "Express", "Express", "Express", "Express", 
"Express", "Express"), Partner = c("Partner D", "Partner A", 
"Partner D", "Partner A", "Partner C", "Partner D", "Partner D", 
"Partner A", "Partner D", "Partner B", "Partner C", "Partner A", 
"Partner C", "Partner B", "Partner D", "Partner B", "Partner D", 
"Partner E", "Partner B", "Partner D", "Partner E", "Partner D", 
"Partner E", "Partner B", "Partner D", "Partner D", "Partner C", 
"Partner A", "Partner E", "Partner B")), row.names = c(NA, 30L
), class = "data.frame")

You should use a$class which are your predictions of your model in a vector. You can use the following code:

library(caret)
library(fastAdaboost)

set.seed(1234)
parts = createDataPartition(data1$Status.szkody, p = 0.7, list = F)
train = data1[parts, ]
test = data1[-parts, ]

model <- adaboost(Status.szkody ~., data = train,6)

a <- predict(model, train, type = "class")

train$Status.szkody = as.factor(train$Status.szkody)
confusionMatrix(a$class,train$Status.szkody, mode = "everything")
#> Warning in confusionMatrix.default(a$class, train$Status.szkody, mode =
#> "everything"): Levels are not in the same order for reference and data.
#> Refactoring data to match.
#> Confusion Matrix and Statistics
#> 
#>           Reference
#> Prediction  0  1
#>          0 20  1
#>          1  0  0
#>                                           
#>                Accuracy : 0.9524          
#>                  95% CI : (0.7618, 0.9988)
#>     No Information Rate : 0.9524          
#>     P-Value [Acc > NIR] : 0.7358          
#>                                           
#>                   Kappa : 0               
#>                                           
#>  Mcnemar's Test P-Value : 1.0000          
#>                                           
#>             Sensitivity : 1.0000          
#>             Specificity : 0.0000          
#>          Pos Pred Value : 0.9524          
#>          Neg Pred Value :    NaN          
#>               Precision : 0.9524          
#>                  Recall : 1.0000          
#>                      F1 : 0.9756          
#>              Prevalence : 0.9524          
#>          Detection Rate : 0.9524          
#>    Detection Prevalence : 1.0000          
#>       Balanced Accuracy : 0.5000          
#>                                           
#>        'Positive' Class : 0               
#> 

Created on 2022-07-23 by the reprex package (v2.0.1)

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM