[英]How to do HBase range scan for Hexadecimal row key?
当尝试在HBase Shell上执行范围扫描时,以下在HBase Shell中起作用。
scan 'mytable', {STARTROW => "\x00\x00\x00\x00\x01\x8F\xF6\x83", ENDROW => "\x00\x00\x00\x00\x01\x8F\xF6\x8D"}
但是,当尝试实现Java客户端执行相同的操作时,它不会检索到任何结果。
Scan scan = new Scan(Bytes.ToBytes("\x00\x00\x00\x00\x01\x8F\xF6\x83"),Bytes.toBytes("\x00\x00\x00\x00\x01\x8F\xF6\x8D");
scan.setFilter(colFilter);
scan.setOtherStuff...
ResultScanner scanner = table.getScanner(scan);
for (Result result = scanner.next(); result != null; result = scanner.next()) {
....
}
我试图转义“ \\”字符并传递开始和结束行键。 但是它没有按预期工作。
我将输入数据作为命令行参数传递。
time java -jar $ARIADNE3D_CLI PCRangeSearchTxt -table_name $TABLE_NAME -m 4 -start_key "\x00\x00\x00\x00\x01\x8F\xF6\x8D" -end_key "\x00\x00\x00\x00\x01\x8F\xF6\x8D" -o $SCRATCH/txt-1.txt
PCRangeSearchTxt的Java实现如下
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package umg.ariadne3d.core.query.pc;
import java.io.*;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import umg.ariadne3d.core.common.Constants;
import umg.core.common.Executable;
/**
* Point cloud range search.
* @author VVo
*/
public class PCRangeSearchTxt implements Executable {
static Logger LOGGER = Logger.getLogger(PCRangeSearchTxt.class);
public static final String NAME = "PCRANGESEARCHTXT"; //PCRangeSearchTxt
public static void main(String[] args) {
args = new String[]{
// "-t", "d15-tiny-m4",
// "-m", "4",
// "-index", "/Users/vu/scratch/ariadne3d/pointcloud/meta/hilbert.json",
// "-query", "/Users/vu/scratch/ariadne3d/query/q0.json",
// "-las_meta", "/Users/vu/scratch/ariadne3d/pointcloud/meta/d15-meta.json",
// "-o", "/Users/vu/tmp/a.las"
};
Executable prog = new PCRangeSearchTxt();
int err = prog.run(args);
System.exit(err);
}
@Override
public int run(String[] args) {
CommandLine cmd = parseArgs(args);
String tableName = cmd.getOptionValue("t");
String start_key = cmd.getOptionValue("start_key");
String end_key = cmd.getOptionValue("end_key");
final String FILENAME = cmd.getOptionValue("o");
int modelNo = Integer.parseInt(cmd.getOptionValue("m"));
try{
File file = new File(FILENAME);
// if file doesnt exists, then create it
if (!file.exists()) {
file.createNewFile();
}
}catch (IOException e) {
e.printStackTrace();
}
Configuration conf = HBaseConfiguration.create();
String[] connectionParams = null;
if (cmd.hasOption("conn")) {
connectionParams = cmd.getOptionValues("conn");
}
if (connectionParams != null) {
conf.set(Constants.HBASE_CONFIGURATION_ZOOKEEPER_QUORUM, connectionParams[0]);
LOGGER.debug(String.format("Set quorum string %s", conf.get(Constants.HBASE_CONFIGURATION_ZOOKEEPER_QUORUM)));
conf.setInt(Constants.HBASE_CONFIGURATION_ZOOKEEPER_CLIENTPORT, Integer.parseInt(connectionParams[1]));
LOGGER.debug(String.format("Set port %d", conf.getInt(Constants.HBASE_CONFIGURATION_ZOOKEEPER_CLIENTPORT, 0)));
}
try {
long start = System.currentTimeMillis();
Connection connection = ConnectionFactory.createConnection(conf);
HBaseConfiguration.addHbaseResources(conf);
Table table = connection.getTable(TableName.valueOf(tableName));
byte[] keyStart = Bytes.toBytes(start_key);
byte[] keyEnd = Bytes.toBytes(end_key);
Scan scan = new Scan(keyStart, keyEnd);
ResultScanner scanner = table.getScanner(scan);
FileWriter writer = new FileWriter(FILENAME, true);
try{
for (Result result = scanner.next(); result != null; result = scanner.next()) {
writer.write(result.toString()+"\n");
}
}finally {
writer.close();
scanner.close();
}
long end = System.currentTimeMillis();
System.out.printf("Total time %d \n", end - start);
table.close();
connection.close();
return 0;
} catch (IOException ex) {
LOGGER.error(ex);
return 1;
}
}
private static CommandLine parseArgs(String[] args) {
Options options = new Options();
Option o;
// table name
o = new Option("t","table_name", true, "HBase table name");
options.addOption(o);
o = new Option("m", "model_number", true, "model number");
options.addOption(o);
o = new Option("start_key", true, "start key for range scan");
options.addOption(o);
o = new Option("end_key", true, "end key for range scan");
options.addOption(o);
o = new Option("o", "output", true, "create output file");
o.setRequired(false);
options.addOption(o);
// connection parameters
o = new Option("conn", "connection", true, "Zookepper quorum and port");
o.setArgs(2);
o.setRequired(false);
options.addOption(o);
// debug flag
options.addOption("d", "debug", false, "switch on DEBUG log level");
CommandLineParser parser = new PosixParser();
CommandLine cmd = null;
try {
cmd = parser.parse(options, args);
} catch (Exception e) {
System.err.println("ERROR: " + e.getMessage() + "\n");
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(NAME + " ", options, true);
System.exit(-1);
}
if (cmd.hasOption("d")) {
LOGGER.setLevel(Level.DEBUG);
System.out.println("DEBUG ON");
}
return cmd;
}
}
在十六进制的行键上实现HBase范围搜索的正确方法是什么?
我想您知道您在HBase表中用作键的原因,所以我不明白为什么您不能这样做:
byte[]start = Hex.decodeHex("startKey".toCharArray());
byte[]end = Hex.decodeHex("endKey".toCharArray());
Scan scan = new Scan(start, end)
只是不确定为什么要尝试以其他方式执行此操作。
否则,这里有一个问题的答案: HBase Shell RowKey中的非十六进制字符是什么?
希望有帮助:)
我能够找到此问题的解决方案。 基本上,我在代码中所做的就是将行键声明为String变量。 当我将start_key和end_key作为命令行参数传递并且HBase在内部以字节array []序列存储数据时,我可以简单地以已知格式传递开始键,也就是说,我不必将值作为十六进制代码传递。 例如,我可以将start_key和end_key都以其原始形式/人类可读的形式传递,并且HBase会将这种形式映射为HBase内部字节array []形式。
因此,我修改了上述Java类,以将start_key和end_key接受为双精度数据类型值。
// key format: 392994.475499
double startTS = Double.parseDouble(cmd.getOptionValue("start_key"));
double endTS = Double.parseDouble(cmd.getOptionValue("end_key"));
在总结时,要了解HBase的内部结构并接受命令行值作为double值。
通过这样做,我能够按预期运行代码。
为了让大家全面了解我所做的事情,下面共享了该类的修改后的代码。
/*
* To change this license header, choose License Headers in Project Properties.
* To change this template file, choose Tools | Templates
* and open the template in the editor.
*/
package umg.ariadne3d.core.query.pc;
import java.io.*;
import java.nio.file.Paths;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import umg.ariadne3d.core.store.schema.pc.Model4;
import org.apache.commons.cli.CommandLine;
import org.apache.commons.cli.CommandLineParser;
import org.apache.commons.cli.HelpFormatter;
import org.apache.commons.cli.Option;
import org.apache.commons.cli.Options;
import org.apache.commons.cli.PosixParser;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.TableName;
import org.apache.hadoop.hbase.client.*;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import umg.ariadne3d.core.common.Constants;
import umg.core.common.Executable;
/**
* Point cloud range search.
* @author
*/
public class PCMultiClientRangeSearchSQN implements Executable {
static Logger LOGGER = Logger.getLogger(PCMultiClientRangeSearchSQN.class);
public static final String NAME = "PCMULTICLIENTRANGESEARCHSQN"; //PCMultiClientRangeSearchSQN
public static void main(String[] args) {
args = new String[]{
// "-t", "d15-tiny-m4",
// "-m", "4",
// "-index", "/Users/vu/scratch/ariadne3d/pointcloud/meta/hilbert.json",
// "-query", "/Users/vu/scratch/ariadne3d/query/q0.json",
// "-las_meta", "/Users/vu/scratch/ariadne3d/pointcloud/meta/d15-meta.json",
// "-o", "/Users/vu/tmp/a.las"
};
Executable prog = new PCMultiClientRangeSearchSQN();
int err = prog.run(args);
System.exit(err);
}
@Override
public int run(String[] args) {
CommandLine cmd = parseArgs(args);
String tableName = cmd.getOptionValue("t");
// key format: 392994.475499
double startTS = Double.parseDouble(cmd.getOptionValue("start_key"));
double endTS = Double.parseDouble(cmd.getOptionValue("end_key"));
long startRowkey = Math.round((startTS - 388800) / 0.000001);
long endRowkey = Math.round((endTS - 388800) / 0.000001);
int numOfClients;
if (cmd.hasOption("clients")) {
numOfClients = Integer.parseInt(cmd.getOptionValue("clients"));
} else {
numOfClients = 1;//Runtime.getRuntime().availableProcessors();
}
//System.out.println(numOfClients);
final String FILENAME = cmd.getOptionValue("o");
final String FILEPATH = new File("").getAbsolutePath();
for(int i=0; i<numOfClients; i++){
try{
File file = new File(FILENAME+i+".txt");
// if file doesnt exists, then create it
if (!file.exists()) {
file.createNewFile();
}
}catch (IOException e) {
e.printStackTrace();
}
}
Configuration conf = HBaseConfiguration.create();
String[] connectionParams = null;
if (cmd.hasOption("conn")) {
connectionParams = cmd.getOptionValues("conn");
}
if (connectionParams != null) {
conf.set(Constants.HBASE_CONFIGURATION_ZOOKEEPER_QUORUM, connectionParams[0]);
LOGGER.debug(String.format("Set quorum string %s", conf.get(Constants.HBASE_CONFIGURATION_ZOOKEEPER_QUORUM)));
conf.setInt(Constants.HBASE_CONFIGURATION_ZOOKEEPER_CLIENTPORT, Integer.parseInt(connectionParams[1]));
LOGGER.debug(String.format("Set port %d", conf.getInt(Constants.HBASE_CONFIGURATION_ZOOKEEPER_CLIENTPORT, 0)));
}
try {
Connection connection = ConnectionFactory.createConnection(conf);
HBaseConfiguration.addHbaseResources(conf);
Table table = connection.getTable(TableName.valueOf(tableName));
byte[] keyStart = Bytes.toBytes(startRowkey);
byte[] keyEnd = Bytes.toBytes(endRowkey);
ExecutorService executorService = Executors.newFixedThreadPool(100);
//submit the range scan task for execution
for(int j =0; j< numOfClients; j++){
executorService.execute(new RangeScan(keyStart, keyEnd, table, new File(FILEPATH+"/"+FILENAME+j+".txt")));
}
executorService.shutdown();
System.out.println("-----------------------");
// wait until all tasks are finished
try{
executorService.awaitTermination(Long.MAX_VALUE, TimeUnit.NANOSECONDS);
}catch (Exception e){
System.out.println("Error ");
}
System.out.println("All tasks are finished!");
table.close();
connection.close();
return 0;
} catch (IOException ex) {
LOGGER.error(ex);
return 1;
}
}
class RangeScan implements Runnable{
byte[] keyStart;
byte[] keyEnd;
Table table;
File file;
RangeScan(byte[] keyStart, byte[] keyEnd, Table table, File file){
this.keyStart = keyStart;
this.keyEnd = keyEnd;
this.table = table;
this.file = file;
}
@Override
public void run() {
long start = System.currentTimeMillis();
Scan scan = new Scan(keyStart, keyEnd);
try{
ResultScanner scanner = table.getScanner(scan);
FileWriter writer = new FileWriter(file, true);
try{
for (Result result = scanner.next(); result != null; result = scanner.next()) {
// System.out.println("result "+result.toString());
//byte[] rawPointBytes = result.getValue(Model4.RAW_SENSING_DATA_FAM, Model4.POINT_COL);
//LASPointProtos.LASPointP pointP = LASPointProtos.LASPointP.parseFrom(rawPointBytes);
writer.write(result.toString()+"\n");
}
}finally {
writer.close();
scanner.close();
}
long end = System.currentTimeMillis();
System.out.printf("Total time For File %s is %d \n", file.toString(),end - start);
}
catch (Exception ex){
LOGGER.error(ex);
}
// return 0;
}
}
private static CommandLine parseArgs(String[] args) {
Options options = new Options();
Option o;
// table name
o = new Option("t","table_name", true, "HBase table name");
options.addOption(o);
o = new Option("start_key", true, "start key for range scan");
options.addOption(o);
o = new Option("end_key", true, "end key for range scan");
options.addOption(o);
o = new Option("clients", true, "number of concurrent clients");
options.addOption(o);
o = new Option("o", "output", true, "create output file");
o.setRequired(false);
options.addOption(o);
// connection parameters
o = new Option("conn", "connection", true, "Zookepper quorum and port");
o.setArgs(2);
o.setRequired(false);
options.addOption(o);
// debug flag
options.addOption("d", "debug", false, "switch on DEBUG log level");
CommandLineParser parser = new PosixParser();
CommandLine cmd = null;
try {
cmd = parser.parse(options, args);
} catch (Exception e) {
System.err.println("ERROR: " + e.getMessage() + "\n");
HelpFormatter formatter = new HelpFormatter();
formatter.printHelp(NAME + " ", options, true);
System.exit(-1);
}
if (cmd.hasOption("d")) {
LOGGER.setLevel(Level.DEBUG);
System.out.println("DEBUG ON");
}
return cmd;
}
}
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.