使用插入符号的随机森林无法正常工作

Question

我正在尝试使用插入符号包在闪亮的随机森林.....试图从随机森林模型输出中打印变量的重要性....以下是下面的代码...

library(shiny)

pacman::p_load(Amelia,broom,caret,cluster,clustertend,clValid,corrplot,dbscan,dplyr,DT,data.table,forecast,fpc,FPDclustering,fpp,GGally,ggfortify,ggraph,ggplot2,ggrepel,ggthemes,gmodels,googleVis,gridExtra,igraph,knitr,mice,missForest,NbClust,optCluster,pacman,plyr,purrr,qcc,randomForest,rCharts,reshape2,tibble,tidyr,tidyverse,TSA,tseries,vegan,VIM,zoo) 

library(markdown)
library(imputeTS)

定义应用程序的UI

 ui <- navbarPage(

  # Application title
  titlePanel("MS IIMA_paper ShinyApp "),

  # Sidebar layout with input and output definitions ----
  sidebarLayout(

    # Sidebar panel for inputs ----
    sidebarPanel(

      # Input: Select a file ----
      fileInput("dataset", "Choose CSV File",
                multiple = TRUE,
                accept = c("text/csv",
                           "text/comma-separated-values,text/plain",
                           ".csv")),
      # Include clarifying text ----
      helpText("Note: First select the dataset of csv format only for the App to give any insight!!"),
      # Horizontal line ----
      tags$hr(),

      # Input: Checkbox if file has header ----
      checkboxInput("header", "Header", TRUE),

      # Input: Select separator ----
      radioButtons("sep", "Separator",
                   choices = c(Comma = ",",
                               Semicolon = ";",
                               Tab = "\t"),
                   selected = ","),

      # Horizontal line ----
      tags$hr(),


      # Input: actionButton() to defer the rendering of output ----
      # until the user explicitly clicks the button (rather than
      # doing it immediately when inputs change). This is useful if
      # the computations required to render output are inordinately
      # time-consuming.
      actionButton("update", "Update button", class = "btn-primary"),
      tags$hr()
      ),

      # Show a plot of the generated distribution
      mainPanel(
        tabsetPanel(
          navbarMenu("Random Forest",
                     tabPanel("Structure of Data",
                              h4("Input data str"),
                              verbatimTextOutput("summary_rf")),
                     tabPanel("VarImp-Gini (Table)",
                              h4("Variable Importance"),
                              verbatimTextOutput("varImp"))

          )
          )

         )
   )
)

定义绘制直方图所需的服务器逻辑

 server <- function(input, output) {

  datasetInput <- eventReactive(input$update, {
    read.csv(input$dataset$datapath,
             header = input$header,
             sep = input$sep)
  }, ignoreNULL = FALSE)

  #Selecting only numeric variables
  MS.num<- reactive({sapply(datasetInput(), is.numeric)})
  MS.DATA.IN.NUM <- reactive({datasetInput()[ , MS.num()]})
  # imputing NAs by zeros
  df<- reactive({imputeTS::na.replace(MS.DATA.IN.NUM(), 0)})
  # Keeping a sample of 10k for modeling
  sample_data <-reactive({df()[1:10000,]})


  #### Kmeans

  opt.cluster=9
  set.seed(115)
  MS.DATA.KMEANS.Mdl <- reactive({kmeans(scale(sample_data()),opt.cluster,nstart=25)})

  # appending clusters to the raw sample data

  x<-reactive({
    cluster<-MS.DATA.KMEANS.Mdl()$cluster
    cluster
  })

    add_to_df <- reactive({
    sample_data1<-cbind(sample_data(),x())
    sample_data1

  })

  ##### Random Forest on Kmeans data
  # cluster.means1<-reactive({
  #   cluster1<-MS.DATA.KMEANS.Mdl()$cluster
  #   cluster1
  # })

  MS.DATA_KMEANS<-reactive({
    df<-add_to_df() %>% mutate_each(funs(as.factor),one_of("x()"))
    df
  }) 

  # Generate a summary of the dataset ----
  output$summary_rf <- renderPrint({
    dataset <- MS.DATA_KMEANS()
    str(dataset)
  })

插入符包

  # for var imp using caret

  # create training and test sets (75:25 split) using 'caret' package
  set.seed(123)# for reproducibility
  inTrain <- reactive({caret::createDataPartition(y = as.factor(MS.DATA_KMEANS()$cluster.kmeans()), p = 0.75, list = FALSE)}) 

  # subset
  training <- reactive({MS.DATA_KMEANS()[inTrain(), ]}) 
  testing <- reactive({MS.DATA_KMEANS()[-inTrain(), ]}) 


  # Random forest using caret package
  set.seed(122)
  modFit.rfcaret <- reactive({caret::train(cluster.kmeans~ ., method = "rf",data =training(),trControl = trainControl(method = "cv"), number = 25)}) 

  output$varImp <- renderPrint({
    rfImp=varImp(modFit.rfcaret(),scale = FALSE)
    dataset<-setDT(rfImp$importance, keep.rownames = TRUE)[]
    dataset
  })


}

# Run the application 
shinyApp(ui = ui, server = server)

我收到以下错误：

Warning: Error in caret::createDataPartition: attempt to apply non-function
Stack trace (innermost first):
    130: caret::createDataPartition
    129: <reactive:inTrain> [C:\Users\ADMIN\Documents\shiny_test/app.R#155]
    118: inTrain
    117: [.data.frame
    116: [ [C:\Users\ADMIN\Documents\shiny_test/app.R#158]
    115: <reactive:training> [C:\Users\ADMIN\Documents\shiny_test/app.R#158]
    104: training
    103: eval
    102: eval
    101: eval.parent
    100: train.formula
     99: caret::train
     98: <reactive:modFit.rfcaret> [C:\Users\ADMIN\Documents\shiny_test/app.R#164]
     87: modFit.rfcaret
     86: varImp
     85: renderPrint [C:\Users\ADMIN\Documents\shiny_test/app.R#167]
     84: func
     83: eval
     82: eval
     81: withVisible
     80: evalVis
     79: utils::capture.output
     78: paste
     77: origRenderFunc
     76: output$varImp
      1: runApp

我无法调试问题所在。

head(df) # after appending cluster.kmeans to raw data

##   mob.data mob.sms mob.voice prepaid.sms mob.pre.voice mob.data.upld
## 1      634      80      85.8           2           8.8            46
## 2       16      27     247.9          39           3.0             0
## 3      560     532       8.3           3          59.1           231
## 4     5582      23     157.4         942           2.6           385
## 5     7176      31      27.6         300           3.5             0
## 6      524      76      13.5          11           8.4           268
##   mob.data.dwnld mob.sms.amt mob.voice.amt mob.data.amt cluster.kmeans
## 1            628        1.39          34.8           35              5
## 2             18        0.52           1.2           20              9
## 3            660        2.61           2.6           37              2
## 4           5212        2.09           2.0           35              8
## 5           6817        8.70           1.0           31              8
## 6            476        0.17           2.6           35              8

Answer 1

错误表明与createDataPartition（）有关的部分代码一定存在问题。 快速查看您的代码后，我认为问题可能恰在这部分：

...createDataPartition(y = as.factor(MS.DATA_KMEANS()$cluster.kmeans())...

这里更清楚：

MS.DATA_KMEANS()$cluster.kmeans()

cluster.kmeans()不是反应性参数，只是MS.DATA_KMEANS（）是反应性的，因此您应该使用： cluster.kmeans 。 因此，这里是解决方案：

...createDataPartition(y = as.factor(MS.DATA_KMEANS()$cluster.kmeans)...

使用插入符号的随机森林无法正常工作

问题描述

1 个解决方案

解决方案1
0 2017-08-21 11:21:12

使用插入符号的随机森林无法正常工作

问题描述

1 个解决方案

解决方案1 0 2017-08-21 11:21:12

解决方案1
0 2017-08-21 11:21:12