[英]Read from HDFS and write to HBASE
映射器正在從兩個位置讀取文件1)用戶訪問過的文章(按國家分類)2)國家的統計信息(按國家/地區)
兩個Mapper的輸出是Text,Text
我正在運行Amazon Cluster的程序
我的目標是從兩個不同的集合中讀取數據,並將結果合並並存儲在hbase中。
HDFS到HDFS正常運行。 代碼陷入減少67%並給出錯誤
17/02/24 10:45:31 INFO mapreduce.Job: map 0% reduce 0%
17/02/24 10:45:37 INFO mapreduce.Job: map 100% reduce 0%
17/02/24 10:45:49 INFO mapreduce.Job: map 100% reduce 67%
17/02/24 10:46:00 INFO mapreduce.Job: Task Id : attempt_1487926412544_0016_r_000000_0, Status : FAILED
Error: java.lang.IllegalArgumentException: Row length is 0
at org.apache.hadoop.hbase.client.Mutation.checkRow(Mutation.java:565)
at org.apache.hadoop.hbase.client.Put.<init>(Put.java:110)
at org.apache.hadoop.hbase.client.Put.<init>(Put.java:68)
at org.apache.hadoop.hbase.client.Put.<init>(Put.java:58)
at com.happiestminds.hadoop.CounterReducer.reduce(CounterReducer.java:45)
at com.happiestminds.hadoop.CounterReducer.reduce(CounterReducer.java:1)
at org.apache.hadoop.mapreduce.Reducer.run(Reducer.java:171)
at org.apache.hadoop.mapred.ReduceTask.runNewReducer(ReduceTask.java:635)
at org.apache.hadoop.mapred.ReduceTask.run(ReduceTask.java:390)
at org.apache.hadoop.mapred.YarnChild$2.run(YarnChild.java:164)
at java.security.AccessController.doPrivileged(Native Method)
at javax.security.auth.Subject.doAs(Subject.java:422)
at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1698)
at org.apache.hadoop.mapred.YarnChild.main(YarnChild.java:158)
驅動程序類為
package com.happiestminds.hadoop;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.MasterNotRunningException;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.mapreduce.TableMapReduceUtil;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.MultipleInputs;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class Main extends Configured implements Tool {
/**
* @param args
* @throws Exception
*/
public static String outputTable = "mapreduceoutput";
public static void main(String[] args) throws Exception {
int exitCode = ToolRunner.run(new Main(), args);
System.exit(exitCode);
}
@Override
public int run(String[] args) throws Exception {
Configuration config = HBaseConfiguration.create();
try{
HBaseAdmin.checkHBaseAvailable(config);
}
catch(MasterNotRunningException e){
System.out.println("Master not running");
System.exit(1);
}
Job job = Job.getInstance(config, "Hbase Test");
job.setJarByClass(Main.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
MultipleInputs.addInputPath(job, new Path(args[0]), TextInputFormat.class, ArticleMapper.class);
MultipleInputs.addInputPath(job, new Path(args[1]), TextInputFormat.class, StatisticsMapper.class);
TableMapReduceUtil.addDependencyJars(job);
TableMapReduceUtil.initTableReducerJob(outputTable, CounterReducer.class, job);
//job.setReducerClass(CounterReducer.class);
job.setNumReduceTasks(1);
return job.waitForCompletion(true) ? 0 : 1;
}
}
減速器類為
package com.happiestminds.hadoop;
import java.io.IOException;
import org.apache.hadoop.hbase.client.Mutation;
import org.apache.hadoop.hbase.client.Put;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableReducer;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class CounterReducer extends TableReducer<Text, Text, ImmutableBytesWritable> {
public static final byte[] CF = "counter".getBytes();
public static final byte[] COUNT = "combined".getBytes();
@Override
protected void reduce(Text key, Iterable<Text> values,
Reducer<Text, Text, ImmutableBytesWritable, Mutation>.Context context)
throws IOException, InterruptedException {
String vals = values.toString();
int counter = 0;
StringBuilder sbr = new StringBuilder();
System.out.println(key.toString());
for (Text val : values) {
String stat = val.toString();
if (stat.equals("***")) {
counter++;
} else {
sbr.append(stat + ",");
}
}
sbr.append("Article count : " + counter);
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn(CF, COUNT, Bytes.toBytes(sbr.toString()));
if (counter != 0) {
context.write(null, put);
}
}
}
依存關系
<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-client</artifactId>
<version>2.7.3</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-client</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-common</artifactId>
<version>1.2.2</version>
</dependency>
<dependency>
<groupId>org.apache.hbase</groupId>
<artifactId>hbase-server</artifactId>
<version>1.2.2</version>
</dependency>
</dependencies>
一個好的做法是在將值提交到某個地方之前先對其進行驗證。 在您的特定情況下,你可以驗證你的密鑰和SBR或將其包裝成適當的通知政策的try-catch部分。 如果它們不正確,則應將它們輸出到一些日志中,並使用新的測試用例更新單元測試:
try
{
Put put = new Put(Bytes.toBytes(key.toString()));
put.addColumn(CF, COUNT, Bytes.toBytes(sbr.toString()));
if (counter != 0) {
context.write(null, put);
}
}
catch (IllegalArgumentException ex)
{
System.err.println("Error processing record - Key: "+ key.toString() +", values: " +sbr.ToString());
}
根據程序拋出的異常,很明顯密鑰長度為0,因此在放入hbase之前,您可以檢查密鑰長度是否為0,然后只能放入hbase。
更清楚為什么hbase不支持密鑰長度為0
因為HBase數據模型不允許長度為0的行鍵,所以它至少應為1個字節。 保留0字節的行鍵供內部使用(指定空的開始鍵和結束鍵)。
您可以嘗試檢查是否要插入任何空值嗎?
HBase數據模型不允許長度為零的行鍵,它至少應為1個字節。
請在執行put命令之前檢查您的reducer代碼,是否將某些值填充為null。
您得到的錯誤是不言自明的。 HBase中的行鍵不能為空(盡管值可以為空)。
@Override
protected void reduce(Text key, Iterable<Text> values,
Reducer<Text, Text, ImmutableBytesWritable, Mutation>.Context context)
throws IOException, InterruptedException {
if (key == null || key.getLength() == 0) {
// Log a warning about the empty key.
return;
}
// Rest of your reducer follows.
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.