Cassandra Hadoop MapReduce : java.lang.ClassCastException: java.util.HashMap cannot be cast to java.nio.ByteBuffer

Question

I'm trying to create a mapreduce job with Apache Cassandra. The input date come from cassandra and the output goes to cassandra too.

The program try to select all data from a table called tweetstore and after that insert the count of lines which contains a name of user.

This is the main class of the mapreduce job :

package com.cassandra.hadoop;
import java.io.*;
import java.lang.*;
import java.util.*;
import java.nio.ByteBuffer;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.cassandra.hadoop.ColumnFamilyInputFormat;
import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat;
import org.apache.cassandra.hadoop.ConfigHelper;
import org.apache.cassandra.hadoop.cql3.*;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.cassandra.thrift.IndexExpression;
import org.apache.cassandra.thrift.IndexOperator;
import org.apache.cassandra.utils.ByteBufferUtil;

public class App 
{
static final String KEYSPACE_NAME = "tweet_cassandra_map_reduce";
static final String INPUT_COLUMN_FAMILY = "tweetstore";
static final String OUTPUT_COLUMN_FAMILY = "tweetcount";
static final String COLUMN_NAME = "user";

public static void main( String[] args ) throws IOException, InterruptedException, ClassNotFoundException 
{
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = new Job(conf, "tweet count");
    job.setJarByClass(App.class);

    // mapper configuration.
    job.setMapperClass(TweetMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setInputFormatClass(ColumnFamilyInputFormat.class);

    // Reducer configuration
    job.setReducerClass(TweetAggregator.class);
    job.setOutputKeyClass(ByteBuffer.class);
    job.setOutputValueClass(List.class);
    job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

    // Cassandra input column family configuration
    ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE_NAME, INPUT_COLUMN_FAMILY);

    job.setInputFormatClass(ColumnFamilyInputFormat.class);
    SlicePredicate slicePredicate = new SlicePredicate();
    slicePredicate.setSlice_range(new SliceRange(ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, Integer.MAX_VALUE));

    // Prepare index expression.
    IndexExpression ixpr = new IndexExpression();ixpr.setColumn_name(ByteBufferUtil.bytes(COLUMN_NAME));
    ixpr.setOp(IndexOperator.EQ);
    ixpr.setValue(ByteBufferUtil.bytes(otherArgs.length > 0 && !StringUtils.isBlank(otherArgs[0])?otherArgs[0]: "mevivs"));

    List<IndexExpression> ixpressions = new ArrayList<IndexExpression>();
    ixpressions.add(ixpr);
    ConfigHelper.setInputRange(job.getConfiguration(), ixpressions);
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), slicePredicate);

    // Cassandra output family configuration.
    ConfigHelper.setOutputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
    ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE_NAME, OUTPUT_COLUMN_FAMILY);
    job.setOutputFormatClass(ColumnFamilyOutputFormat.class);
    job.getConfiguration().set("row_key", "key");
    job.waitForCompletion(true);
}
}

The mapper code

package com.cassandra.hadoop;
import java.io.*;
import java.lang.*;
import java.util.*;
import java.nio.ByteBuffer;
import java.util.SortedMap;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.cassandra.db.Column;
import org.apache.cassandra.utils.ByteBufferUtil;

public class TweetMapper extends Mapper<ByteBuffer, SortedMap<ByteBuffer, Column>, Text, IntWritable>
{
static final String COLUMN_NAME = App.COLUMN_NAME;
private final static IntWritable one = new IntWritable(1);

/* (non-Javadoc)
* @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.
Mapper.Context)
*/
public void map(ByteBuffer key, SortedMap<ByteBuffer, Column> columns, Context context) throws IOException, InterruptedException
{
    Column column = columns.get(ByteBufferUtil.bytes(COLUMN_NAME));
    String value = ByteBufferUtil.string(column.value());
    context.write(new Text(value), one);
}
}

The reducer code :

package com.cassandra.hadoop;
import java.io.IOException;
import java.util.*;
import java.lang.*;
import java.io.*;
import java.nio.ByteBuffer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.thrift.Column;
import org.apache.cassandra.thrift.Mutation;
import org.apache.cassandra.thrift.ColumnOrSuperColumn;
import org.apache.cassandra.db.marshal.Int32Type;

public class TweetAggregator extends Reducer<Text,IntWritable, Map<String,ByteBuffer>, List<ByteBuffer>>
{
private static Map<String,ByteBuffer> keys = new HashMap<>();
public void reduce(Text word, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
    int sum = 0;
    for (IntWritable val : values)
    sum += val.get();
    System.out.println("writing");
    keys.put("key", ByteBufferUtil.bytes(word.toString()));
    context.write(keys, getBindVariables(word, sum));
}

private List<ByteBuffer> getBindVariables(Text word, int sum)
{
    List<ByteBuffer> variables = new ArrayList<ByteBuffer>();
    variables.add(Int32Type.instance.decompose(sum));
    return variables;
}
 }

The problem when I try to execute the job with hadoop cammand at the reduce step, this error appears :

15/02/14 16:53:13 WARN hadoop.AbstractColumnFamilyInputFormat: ignoring  jobKeyRange specified without start_key
15/02/14 16:53:14 INFO mapred.JobClient: Running job: job_201502141652_0001
15/02/14 16:53:15 INFO mapred.JobClient:  map 0% reduce 0%
15/02/14 16:53:20 INFO mapred.JobClient:  map 66% reduce 0%
15/02/14 16:53:22 INFO mapred.JobClient:  map 100% reduce 0%
15/02/14 16:53:28 INFO mapred.JobClient:  map 100% reduce 33%
15/02/14 16:53:30 INFO mapred.JobClient: Task Id : attempt_201502141652_0001_r_000000_0, Status : FAILED
java.lang.ClassCastException: java.util.HashMap cannot be cast to java.nio.ByteBuffer
at org.apache.cassandra.hadoop.ColumnFamilyRecordWriter.write(ColumnFamilyRecordWriter.java:50)
at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:588)
at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
at com.cassandra.hadoop.TweetAggregator.reduce(TweetAggregator.java:40)
at com.cassandra.hadoop.TweetAggregator.reduce(TweetAggregator.java:20)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:650)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
attempt_201502141652_0001_r_000000_0: writing

ANY HELP please !! Thanks

Answer 1

Looks like your job setup for the reducer has bytebuffer as the output, rather than the map<>. Try changing this in your job setup

job.setOutputKeyClass(ByteBuffer.class);

to this

job.setOutputKeyClass(Map<String,ByteBuffer>.class);

anyway, the generic types in the job.set.... need to align across the generic type args for the mapper and reducer, so check to make sure they align.

Cassandra Hadoop MapReduce : java.lang.ClassCastException: java.util.HashMap cannot be cast to java.nio.ByteBuffer

Question

1 answers

solution1
0 2015-02-14 16:48:12

Cassandra Hadoop MapReduce : java.lang.ClassCastException: java.util.HashMap cannot be cast to java.nio.ByteBuffer

Question

1 answers

solution1 0 2015-02-14 16:48:12

solution1
0 2015-02-14 16:48:12