简体   繁体   中英

Spark ClassCastException: JavaRDD cannot be cast to org.apache.spark.mllib.linalg.Vector

I want to implement ARIMA time series using Java. I have following Scala code:

object SingleSeriesARIMA {
    def main(args: Array[String]): Unit = {
    // The dataset is sampled from an ARIMA(1, 0, 1) model generated in R.
    val lines = scala.io.Source.fromFile("../data/R_ARIMA_DataSet1.csv").getLines()
    val ts = Vectors.dense(lines.map(_.toDouble).toArray)
    val arimaModel = ARIMA.fitModel(1, 0, 1, ts)
    println("coefficients: " + arimaModel.coefficients.mkString(","))
    val forecast = arimaModel.forecast(ts, 20)
    println("forecast of next 20 observations: " + forecast.toArray.mkString(",")) 
    }
}

I have tried following solution:

public class JavaARIMA {

public static void main(String args[])
        {
    System.setProperty("hadoop.home.dir", "C:/winutils");  
    SparkConf conf = new SparkConf().setAppName("Spark-TS Ticker Example").setMaster("local").set("spark.sql.warehouse.dir", "file:///C:/Users/devanshi/Downloads/Spark/sparkdemo/spark-warehouse/");
    JavaSparkContext context = new JavaSparkContext(conf);

    JavaRDD<String> lines = context.textFile("path/inputfile");

    JavaRDD<Vector> ts = lines.map(
              new Function<String, Vector>() {
                public Vector call(String s) {
                  String[] sarray = s.split(",");
                  double[] values = new double[sarray.length];
                  for (int i = 0; i < sarray.length; i++) {
                    values[i] = Double.parseDouble(sarray[i]);
                  }
                  return Vectors.dense(values);
                }
              }
            );
    double[] total = {1.0,0.0,1.0};
    //DenseVector dv = new DenseVector(total);
    //convert(dv,toBreeze());
   //ARIMAModel arimaModel = ARIMA.fitModel(1, 0, 1, dv, true, "css-cgd", null);
    ARIMAModel arimaModel = ARIMA.fitModel(1, 0, 1, (Vector) ts, false, "css-cgd", total);

   //  arimaModel = ARIMA.fitModel(1, 0, 1, ts);
    System.out.println("coefficients: " + arimaModel.coefficients()); 
    Vector forcst = arimaModel.forecast((Vector) ts,20);
    System.out.println("forecast of next 20 observations: " + forcst);
}
}

But I got:

Exception in thread "main" java.lang.ClassCastException:
org.apache.spark.api.java.JavaRDD cannot be cast to
org.apache.spark.mllib.linalg.Vector

Please help me if possible.

You cannot type cast JavaRDD to Vector, instead you need to use rdd.foreach to get individual Vector. So code could be like this.

ts.foreach(new VoidFunction<Vector>() {
    @Override
    public void call(Vector v) throws Exception {
        double[] total = { 1.0, 0.0, 1.0 };
        ARIMAModel arimaModel = ARIMA.fitModel(1, 0, 1, (Vector) v, false, "css-cgd", total);

        System.out.println("coefficients: " + arimaModel.coefficients());
        Vector forcst = arimaModel.forecast((Vector) v, 20);
        System.out.println("forecast of next 20 observations: " + forcst);
    }
});

Hope this helps...

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM