简体   繁体   中英

Cassandra Hadoop MapReduce : java.lang.ClassCastException: java.util.HashMap cannot be cast to java.nio.ByteBuffer

I'm trying to create a mapreduce job with Apache Cassandra. The input date come from cassandra and the output goes to cassandra too.

The program try to select all data from a table called tweetstore and after that insert the count of lines which contains a name of user.

This is the main class of the mapreduce job :

package com.cassandra.hadoop;
import java.io.*;
import java.lang.*;
import java.util.*;
import java.nio.ByteBuffer;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.util.GenericOptionsParser;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.cassandra.hadoop.ColumnFamilyInputFormat;
import org.apache.cassandra.hadoop.ColumnFamilyOutputFormat;
import org.apache.cassandra.hadoop.ConfigHelper;
import org.apache.cassandra.hadoop.cql3.*;
import org.apache.cassandra.thrift.SlicePredicate;
import org.apache.cassandra.thrift.SliceRange;
import org.apache.cassandra.thrift.IndexExpression;
import org.apache.cassandra.thrift.IndexOperator;
import org.apache.cassandra.utils.ByteBufferUtil;

public class App 
{
static final String KEYSPACE_NAME = "tweet_cassandra_map_reduce";
static final String INPUT_COLUMN_FAMILY = "tweetstore";
static final String OUTPUT_COLUMN_FAMILY = "tweetcount";
static final String COLUMN_NAME = "user";

public static void main( String[] args ) throws IOException, InterruptedException, ClassNotFoundException 
{
    Configuration conf = new Configuration();
    String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
    Job job = new Job(conf, "tweet count");
    job.setJarByClass(App.class);

    // mapper configuration.
    job.setMapperClass(TweetMapper.class);
    job.setMapOutputKeyClass(Text.class);
    job.setMapOutputValueClass(IntWritable.class);
    job.setInputFormatClass(ColumnFamilyInputFormat.class);

    // Reducer configuration
    job.setReducerClass(TweetAggregator.class);
    job.setOutputKeyClass(ByteBuffer.class);
    job.setOutputValueClass(List.class);
    job.setOutputFormatClass(ColumnFamilyOutputFormat.class);

    // Cassandra input column family configuration
    ConfigHelper.setInputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setInputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setInputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
    ConfigHelper.setInputColumnFamily(job.getConfiguration(), KEYSPACE_NAME, INPUT_COLUMN_FAMILY);

    job.setInputFormatClass(ColumnFamilyInputFormat.class);
    SlicePredicate slicePredicate = new SlicePredicate();
    slicePredicate.setSlice_range(new SliceRange(ByteBufferUtil.EMPTY_BYTE_BUFFER, ByteBufferUtil.EMPTY_BYTE_BUFFER, false, Integer.MAX_VALUE));

    // Prepare index expression.
    IndexExpression ixpr = new IndexExpression();ixpr.setColumn_name(ByteBufferUtil.bytes(COLUMN_NAME));
    ixpr.setOp(IndexOperator.EQ);
    ixpr.setValue(ByteBufferUtil.bytes(otherArgs.length > 0 && !StringUtils.isBlank(otherArgs[0])?otherArgs[0]: "mevivs"));

    List<IndexExpression> ixpressions = new ArrayList<IndexExpression>();
    ixpressions.add(ixpr);
    ConfigHelper.setInputRange(job.getConfiguration(), ixpressions);
    ConfigHelper.setInputSlicePredicate(job.getConfiguration(), slicePredicate);

    // Cassandra output family configuration.
    ConfigHelper.setOutputRpcPort(job.getConfiguration(), "9160");
    ConfigHelper.setOutputInitialAddress(job.getConfiguration(), "localhost");
    ConfigHelper.setOutputPartitioner(job.getConfiguration(), "Murmur3Partitioner");
    ConfigHelper.setOutputColumnFamily(job.getConfiguration(), KEYSPACE_NAME, OUTPUT_COLUMN_FAMILY);
    job.setOutputFormatClass(ColumnFamilyOutputFormat.class);
    job.getConfiguration().set("row_key", "key");
    job.waitForCompletion(true);
}
}

The mapper code

package com.cassandra.hadoop;
import java.io.*;
import java.lang.*;
import java.util.*;
import java.nio.ByteBuffer;
import java.util.SortedMap;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.cassandra.db.Column;
import org.apache.cassandra.utils.ByteBufferUtil;

public class TweetMapper extends Mapper<ByteBuffer, SortedMap<ByteBuffer, Column>, Text, IntWritable>
{
static final String COLUMN_NAME = App.COLUMN_NAME;
private final static IntWritable one = new IntWritable(1);

/* (non-Javadoc)
* @see org.apache.hadoop.mapreduce.Mapper#map(KEYIN, VALUEIN, org.apache.hadoop.mapreduce.
Mapper.Context)
*/
public void map(ByteBuffer key, SortedMap<ByteBuffer, Column> columns, Context context) throws IOException, InterruptedException
{
    Column column = columns.get(ByteBufferUtil.bytes(COLUMN_NAME));
    String value = ByteBufferUtil.string(column.value());
    context.write(new Text(value), one);
}
}

The reducer code :

package com.cassandra.hadoop;
import java.io.IOException;
import java.util.*;
import java.lang.*;
import java.io.*;
import java.nio.ByteBuffer;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.cassandra.utils.ByteBufferUtil;
import org.apache.cassandra.thrift.Column;
import org.apache.cassandra.thrift.Mutation;
import org.apache.cassandra.thrift.ColumnOrSuperColumn;
import org.apache.cassandra.db.marshal.Int32Type;

public class TweetAggregator extends Reducer<Text,IntWritable, Map<String,ByteBuffer>, List<ByteBuffer>>
{
private static Map<String,ByteBuffer> keys = new HashMap<>();
public void reduce(Text word, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException
{
    int sum = 0;
    for (IntWritable val : values)
    sum += val.get();
    System.out.println("writing");
    keys.put("key", ByteBufferUtil.bytes(word.toString()));
    context.write(keys, getBindVariables(word, sum));
}

private List<ByteBuffer> getBindVariables(Text word, int sum)
{
    List<ByteBuffer> variables = new ArrayList<ByteBuffer>();
    variables.add(Int32Type.instance.decompose(sum));
    return variables;
}
 }

The problem when I try to execute the job with hadoop cammand at the reduce step, this error appears :

15/02/14 16:53:13 WARN hadoop.AbstractColumnFamilyInputFormat: ignoring  jobKeyRange specified without start_key
15/02/14 16:53:14 INFO mapred.JobClient: Running job: job_201502141652_0001
15/02/14 16:53:15 INFO mapred.JobClient:  map 0% reduce 0%
15/02/14 16:53:20 INFO mapred.JobClient:  map 66% reduce 0%
15/02/14 16:53:22 INFO mapred.JobClient:  map 100% reduce 0%
15/02/14 16:53:28 INFO mapred.JobClient:  map 100% reduce 33%
15/02/14 16:53:30 INFO mapred.JobClient: Task Id : attempt_201502141652_0001_r_000000_0, Status : FAILED
java.lang.ClassCastException: java.util.HashMap cannot be cast to java.nio.ByteBuffer
at org.apache.cassandra.hadoop.ColumnFamilyRecordWriter.write(ColumnFamilyRecordWriter.java:50)
at org.apache.hadoop.mapred.ReduceTask$NewTrackingRecordWriter.write(ReduceTask.java:588)
at org.apache.hadoop.mapreduce.TaskInputOutputContext.write(TaskInputOutputContext.java:80)
at com.cassandra.hadoop.TweetAggregator.reduce(TweetAggregator.java:40)
at com.cassandra.hadoop.TweetAggregator.reduce(TweetAggregator.java:20)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:176)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:650)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:418)
at org.apache.hadoop.mapred.Child$4.run(Child.java:255)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:415)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1136)
at org.apache.hadoop.mapred.Child.main(Child.java:249)
attempt_201502141652_0001_r_000000_0: writing

ANY HELP please !! Thanks

Looks like your job setup for the reducer has bytebuffer as the output, rather than the map<>. Try changing this in your job setup

job.setOutputKeyClass(ByteBuffer.class);

to this

job.setOutputKeyClass(Map<String,ByteBuffer>.class);

anyway, the generic types in the job.set.... need to align across the generic type args for the mapper and reducer, so check to make sure they align.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM