I am using SVM to classify my text where in i don't actually get the result instead get with numerical probabilities.
Dataframe (1:20 trained set, 21:50 test set)
Updated:
ou <- structure(list(text = structure(c(1L, 6L, 1L, 1L, 8L, 13L, 24L,
5L, 11L, 12L, 33L, 36L, 20L, 25L, 4L, 19L, 9L, 29L, 22L, 3L,
8L, 8L, 8L, 2L, 8L, 27L, 30L, 3L, 14L, 35L, 3L, 34L, 23L, 31L,
22L, 6L, 6L, 7L, 17L, 3L, 8L, 32L, 18L, 15L, 21L, 26L, 3L, 16L,
10L, 28L), .Label = c("access, access, access, access", "character(0)",
"report", "report, access", "report, access, access", "report, access, access, access",
"report, access, access, access, access, access, access", "report, access, access, access, access, access, access, access",
"report, access, access, access, access, access, access, report",
"report, access, access, access, access, access, report", "report, access, access, access, report",
"report, access, access, access, report, access", "report, access, access, report, access, access, access, access, access, access",
"report, data", "report, data, data", "report, data, data, data",
"report, data, data, data, data", "report, data, data, data, data, data",
"report, data, data, data, report, report, data, access,access",
"report, data, data, report", "report, data, report", "report, report",
"report, report, access, access, access", "report, report, access, access, report, report, report, report, report, report, data, data, report, access, report, report",
"report, report, access, report, report, report, report, report, data, data, report, access, report, report",
"report, report, access, report, report, report, report, report, report, data, data, report, access, report, report",
"report, report, data", "report, report, data, report", "report, report, report, data, report, report, data, data, report, data, data",
"report, report, report, report", "report, report, report, report, data, report, report, data, report, data, report",
"report, report, report, report, report, data, report, data, data",
"report, report, report, report, report, report, report", "report, report, report, report, report, report, report, access, access, access",
"report, report, report, report, report, report, report, report, data, data, report, access, report, report",
"report, report, report, report, report, report, report, report, report, report, data, report, report, report, report, report, report, report,report"
), class = "factor"), value = structure(c(2L, 2L, 2L, 2L, 2L,
2L, 3L, 2L, 2L, 2L, 3L, 3L, 3L, 3L, 2L, 3L, 2L, 3L, 3L, 3L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L,
1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L), .Label = c("",
"Access", "Report/Data"), class = "factor")), .Names = c("text",
"value"), class = "data.frame", row.names = c(NA, -50L))
library(RTextTools)
doc_matrix <- create_matrix(ou$text, language="english", removeNumbers=TRUE, stemWords=TRUE, removeSparseTerms=.998)
#container <- create_container(doc_matrix, ou$text, trainSize=1:20, testSize=21:50, virgin=FALSE)
container <- create_container(doc_matrix, as.numeric(factor(ou$text)), trainSize=1:20, testSize=21:50, virgin=FALSE)
#Training models
SVM <- train_model(container,"SVM")
MAXENT <- train_model(container,"MAXENT")
BAGGING <- train_model(container,"BAGGING")
TREE <- train_model(container,"TREE")
#Classify data using trained models
SVM_CLASSIFY <- classify_model(container, SVM)
MAXENT_CLASSIFY <- classify_model(container, MAXENT)
BAGGING_CLASSIFY <- classify_model(container, BAGGING)
#Analytics
analytics <- create_analytics(container,SVM_CLASSIFY)
models <- train_models(container, algorithms=c("MAXENT","SVM"))
results <- classify_models(container, models)
analytics <- create_analytics(container, results)
summary(analytics)
SVM <- cross_validate(container, 5, "SVM")
write.csv(analytics@document_summary, "DocumentSummary.csv")
text value
21 report, access, access, access, access, access, access, access Access
22 report, access, access, access, access, access, access, access Access
23 report, access, access, access, access, access, access, access Access
24 character(0) NA
25 report, access, access, access, access, access, access, access Access
26 report, report, data Report/Data
27 report, report, report, report Report/Data
28 report Report/Data
29 report, data Report/Data
30 report, report, report, report, report, report, report, report,
data, data, report, access, report, report Report/Data
the result where probabilities are :
> MAXENTROPY_LABEL MAXENTROPY_PROB SVM_LABEL SVM_PROB MANUAL_CODE CONSENSUS_CODE CONSENSUS_AGREE CONSENSUS_INCORRECT PROBABILITY_CODE PROBABILITY_INCORRECT
> 1 8 0.999999066 22 0.070090645 8 8 1 0 8 0
> 2 8 0.999999066 22 0.070090645 8 8 1 0 8 0
> 3 8 0.999999066 22 0.070090645 8 8 1 0 8 0
> 4 1 0.055555556 12 0.071384112 2 12 1 1 12 1
> 5 8 0.999999066 22 0.070090645 8 8 1 0 8 0
> 6 25 1 12 0.074126949 27 25 1 1 25 1
> 7 33 0.627904676 13 0.068572857 30 33 1 1 33 1
> 8 33 0.406792176 12 0.074592181 3 33 1 1 33 1
> 9 20 1 12 0.074507793 14 20 1 1 20 1
EDIT 1: How can i achieve the label names instead of SVM label numbers.
What I usually do is
ou <- cbind(ou$text, results)
And to have the labels printed:
ou$value <- "NONE"
ou$value[results$SVM_LABEL=="1"] <- "Access"
ou$value[results$SVM_LABEL=="-1"] <- "Report/Data"
ou
(assuming you used 1 and -1 when training the model)
I know it's a little bit primitive but it's clear and works fine
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.