[英]Reducer doesn't call reduce method when using my own class as output value MapReduce Hadoop
I was trying to use my own Class object as the output value of my Mapper and use them inside the Reducer but the reduce()
method isn't called and my app was going to be terminated if I remove the default constructor of DateIncome class. 我寫我的代碼如下:
司機:
package it.polito.bigdata.hadoop.lab;
import com.sun.xml.internal.ws.policy.privateutil.PolicyUtils;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.KeyValueTextInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.NullOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
import java.io.File;
/**
* MapReduce program
*/
public class DriverBigData extends Configured implements Tool {
@Override
public int run(String[] args) throws Exception {
int exitCode = 0;
FileUtils.forceDelete(new File("output/"));
Path inputPath = new Path("input/");
Path outputPath = new Path("output");
int numberOfReducer = 1;
//FileUtils.forceDelete(new File(String.valueOf(outputPath.isUriPathAbsolute())));
Configuration configuration = this.getConf();
Job job = Job.getInstance(configuration);
job.setJobName("myJob");
FileInputFormat.addInputPath(job, inputPath);
FileOutputFormat.setOutputPath(job, outputPath);
job.setJarByClass(DriverBigData.class);
job.setInputFormatClass(KeyValueTextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
job.setMapperClass(MapperBigData.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(DateIncome.class);
job.setReducerClass(ReducerBigData.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(FloatWritable.class);
// job.setCombinerClass(CombinerBigData.class);
job.setNumReduceTasks(numberOfReducer);
// Execute the job and wait for completion
if (job.waitForCompletion(true))
exitCode = 0;
else
exitCode = 1;
return exitCode;
}
/**
* Main of the driver
*/
public static void main(String args[]) throws Exception {
// Exploit the ToolRunner class to "configure" and run the Hadoop application
int res = ToolRunner.run(new Configuration(), new DriverBigData(), args);
System.exit(res);
}
}
映射器:
package it.polito.bigdata.hadoop.lab;
import java.io.IOException;
import java.util.*;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import javax.swing.plaf.synth.ColorType;
/**
* Lab - Mapper
*/
/* Set the proper data types for the (key,value) pairs */
class MapperBigData extends Mapper<
Text, // Input key type
Text, // Input value type
Text, // Output key type
DateIncome> {// Output value type
protected void map(
Text key, // Input key type
Text value, // Input value type
Context context) throws IOException, InterruptedException {
try {
DateIncome income = new DateIncome(key.toString(),Float.parseFloat(value.toString()));
context.write(key, income);
}catch (Exception e){
System.err.println(e.toString());
}
}
}
減速器:
package it.polito.bigdata.hadoop.lab;
import java.io.IOException;
import java.util.*;
import com.google.common.collect.Multimap;
import javafx.util.Pair;
import org.apache.hadoop.io.FloatWritable;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
/**
* Lab - Reducer
*/
/* Set the proper data types for the (key,value) pairs */
class ReducerBigData extends Reducer<
Text, // Input key type
DateIncome, // Input value type
Text, // Output key type
FloatWritable> { // Output value type
float maxIncome = 0;
String maxDAte = "";
@Override
protected void reduce(
Text key, // Input key type
Iterable<DateIncome> values, // Input value type
Context context) throws IOException, InterruptedException {
System.out.println("reducer");
for (DateIncome dateIncome : values) {
System.out.println(dateIncome.getDate() + " " + dateIncome.getIncome());
if (maxIncome <= dateIncome.getIncome()) {
maxIncome = dateIncome.getIncome();
maxDAte = dateIncome.getDate();
}
}
}
@Override
protected void cleanup(Context context) throws IOException, InterruptedException {
super.cleanup(context);
context.write(new Text(maxDAte), new FloatWritable(maxIncome));
}
}
日期收入:
package it.polito.bigdata.hadoop.lab;
import org.apache.hadoop.io.Writable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class DateIncome implements Writable {
private String date;
private float income;
public DateIncome() {
}
public DateIncome(String date, float income) {
this.date = date;
this.income = income;
}
public String getDate() {
return date;
}
public void setDate(String dateValue) {
date = dateValue;
}
public float getIncome() {
return income;
}
public void setIncome(float incomeValue) {
income = incomeValue;
}
@Override
public void readFields(DataInput in) throws IOException {
income = in.readFloat();
date = in.readUTF();
}
@Override
public void write(DataOutput out) throws IOException {
out.writeFloat(income);
out.writeUTF(date);
}
public String toString() {
return new String("date:" + date + " income:" + income);
}
}
輸入.txt:
2015-11-01 1000
2015-11-02 1305
2015-12-01 500
2015-12-02 750
2016-01-01 345
2016-01-02 1145
2016-02-03 200
2016-02-04 500
output:
2015-11-02 1305.0
所以,我的問題是如果我刪除 DateIncome class 的默認構造函數,reducer 的reduce()
方法將不會被調用。 為什么 Hadoop 確實需要默認構造函數,盡管提供了另一個構造函數?
所有Writable
實現都應具有默認構造函數,否則您的 object 將不會被反序列化。
在反序列化過程中,object 通過默認構造函數進行實例化,然后才填充所有字段。 因此,如果您創建的不是默認構造函數,則此過程將被破壞。
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.