[英]How to Avro Binary encode the JSON String using Apache Avro?
我正在嘗試對我的 JSON 字符串進行 avro 二進制編碼。 下面是我的 JSON 字符串,我創建了一個簡單的方法來進行轉換,但我不確定我的做法是否正確?
public static void main(String args[]) throws Exception{
try{
Schema schema = new Parser().parse((TestExample.class.getResourceAsStream("/3233.avsc")));
String json="{"+
" \"location\" : {"+
" \"devices\":["+
" {"+
" \"did\":\"9abd09-439bcd-629a8f\","+
" \"dt\":\"browser\","+
" \"usl\":{"+
" \"pos\":{"+
" \"source\":\"GPS\","+
" \"lat\":90.0,"+
" \"long\":101.0,"+
" \"acc\":100"+
" },"+
" \"addSource\":\"LL\","+
" \"add\":["+
" {"+
" \"val\":\"2123\","+
" \"type\" : \"NUM\""+
" },"+
" {"+
" \"val\":\"Harris ST\","+
" \"type\" : \"ST\""+
" }"+
" ],"+
" \"ei\":{"+
" \"ibm\":true,"+
" \"sr\":10,"+
" \"ienz\":true,"+
" \"enz\":100,"+
" \"enr\":10"+
" },"+
" \"lm\":1390598086120"+
" }"+
" }"+
" ],"+
" \"ver\" : \"1.0\""+
" }"+
"}";
byte[] avroByteArray = fromJsonToAvro(json,schema);
} catch (Exception ex) {
// log an exception
}
下面的方法將我的 JSON 字符串轉換為 Avro 二進制編碼 -
private static byte[] fromJsonToAvro(String json, Schema schema) throws Exception {
InputStream input = new ByteArrayInputStream(json.getBytes());
DataInputStream din = new DataInputStream(input);
Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
DatumReader<Object> reader = new GenericDatumReader<Object>(schema);
Object datum = reader.read(null, decoder);
GenericDatumWriter<Object> w = new GenericDatumWriter<Object>(schema);
ByteArrayOutputStream outputStream = new ByteArrayOutputStream();
Encoder e = EncoderFactory.get().binaryEncoder(outputStream, null);
w.write(datum, e);
e.flush();
return outputStream.toByteArray();
}
任何人都可以看看並讓我知道我嘗試 avro 二進制我的 JSON 字符串的方式是否正確?
我認為 OP 是正確的。 如果這是一個 Avro 數據文件,這將自己寫入 Avro 記錄,而不會存在架構。
這是 Avro 本身中的幾個示例(如果您正在處理文件,則很有用。
• 從 JSON 到 Avro: DataFileWriteTool
• 從 Avro 到 JSON: DataFileReadTool
這是一個雙向的完整示例。
@Grapes([
@Grab(group='org.apache.avro', module='avro', version='1.7.7')
])
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.avro.Schema;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.JsonEncoder;
String schema = '''{
"type":"record",
"namespace":"foo",
"name":"Person",
"fields":[
{
"name":"name",
"type":"string"
},
{
"name":"age",
"type":"int"
}
]
}'''
String json = "{" +
"\"name\":\"Frank\"," +
"\"age\":47" +
"}"
assert avroToJson(jsonToAvro(json, schema), schema) == json
public static byte[] jsonToAvro(String json, String schemaStr) throws IOException {
InputStream input = null;
GenericDatumWriter<GenericRecord> writer = null;
Encoder encoder = null;
ByteArrayOutputStream output = null;
try {
Schema schema = new Schema.Parser().parse(schemaStr);
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
input = new ByteArrayInputStream(json.getBytes());
output = new ByteArrayOutputStream();
DataInputStream din = new DataInputStream(input);
writer = new GenericDatumWriter<GenericRecord>(schema);
Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
encoder = EncoderFactory.get().binaryEncoder(output, null);
GenericRecord datum;
while (true) {
try {
datum = reader.read(null, decoder);
} catch (EOFException eofe) {
break;
}
writer.write(datum, encoder);
}
encoder.flush();
return output.toByteArray();
} finally {
try { input.close(); } catch (Exception e) { }
}
}
public static String avroToJson(byte[] avro, String schemaStr) throws IOException {
boolean pretty = false;
GenericDatumReader<GenericRecord> reader = null;
JsonEncoder encoder = null;
ByteArrayOutputStream output = null;
try {
Schema schema = new Schema.Parser().parse(schemaStr);
reader = new GenericDatumReader<GenericRecord>(schema);
InputStream input = new ByteArrayInputStream(avro);
output = new ByteArrayOutputStream();
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty);
Decoder decoder = DecoderFactory.get().binaryDecoder(input, null);
GenericRecord datum;
while (true) {
try {
datum = reader.read(null, decoder);
} catch (EOFException eofe) {
break;
}
writer.write(datum, encoder);
}
encoder.flush();
output.flush();
return new String(output.toByteArray());
} finally {
try { if (output != null) output.close(); } catch (Exception e) { }
}
}
為完整起見,這里有一個示例,如果您使用的是流(Avro 稱這些容器文件)而不是記錄。 請注意,當您從 JSON 返回到 Avro 時,您不需要傳遞架構。 這是因為它存在於流中。
@Grapes([
@Grab(group='org.apache.avro', module='avro', version='1.7.7')
])
// writes Avro as a http://avro.apache.org/docs/current/spec.html#Object+Container+Files rather than a sequence of records
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.IOException;
import java.io.InputStream;
import org.apache.avro.Schema;
import org.apache.avro.file.DataFileStream;
import org.apache.avro.file.DataFileWriter;
import org.apache.avro.generic.GenericDatumReader;
import org.apache.avro.generic.GenericDatumWriter;
import org.apache.avro.generic.GenericRecord;
import org.apache.avro.io.DatumReader;
import org.apache.avro.io.DatumWriter;
import org.apache.avro.io.Decoder;
import org.apache.avro.io.DecoderFactory;
import org.apache.avro.io.Encoder;
import org.apache.avro.io.EncoderFactory;
import org.apache.avro.io.JsonEncoder;
String schema = '''{
"type":"record",
"namespace":"foo",
"name":"Person",
"fields":[
{
"name":"name",
"type":"string"
},
{
"name":"age",
"type":"int"
}
]
}'''
String json = "{" +
"\"name\":\"Frank\"," +
"\"age\":47" +
"}"
assert avroToJson(jsonToAvro(json, schema)) == json
public static byte[] jsonToAvro(String json, String schemaStr) throws IOException {
InputStream input = null;
DataFileWriter<GenericRecord> writer = null;
Encoder encoder = null;
ByteArrayOutputStream output = null;
try {
Schema schema = new Schema.Parser().parse(schemaStr);
DatumReader<GenericRecord> reader = new GenericDatumReader<GenericRecord>(schema);
input = new ByteArrayInputStream(json.getBytes());
output = new ByteArrayOutputStream();
DataInputStream din = new DataInputStream(input);
writer = new DataFileWriter<GenericRecord>(new GenericDatumWriter<GenericRecord>());
writer.create(schema, output);
Decoder decoder = DecoderFactory.get().jsonDecoder(schema, din);
GenericRecord datum;
while (true) {
try {
datum = reader.read(null, decoder);
} catch (EOFException eofe) {
break;
}
writer.append(datum);
}
writer.flush();
return output.toByteArray();
} finally {
try { input.close(); } catch (Exception e) { }
}
}
public static String avroToJson(byte[] avro) throws IOException {
boolean pretty = false;
GenericDatumReader<GenericRecord> reader = null;
JsonEncoder encoder = null;
ByteArrayOutputStream output = null;
try {
reader = new GenericDatumReader<GenericRecord>();
InputStream input = new ByteArrayInputStream(avro);
DataFileStream<GenericRecord> streamReader = new DataFileStream<GenericRecord>(input, reader);
output = new ByteArrayOutputStream();
Schema schema = streamReader.getSchema();
DatumWriter<GenericRecord> writer = new GenericDatumWriter<GenericRecord>(schema);
encoder = EncoderFactory.get().jsonEncoder(schema, output, pretty);
for (GenericRecord datum : streamReader) {
writer.write(datum, encoder);
}
encoder.flush();
output.flush();
return new String(output.toByteArray());
} finally {
try { if (output != null) output.close(); } catch (Exception e) { }
}
}
為了補充 Keegan 的回答,這個討論可能有用:
要點是有一個特殊的 Json 模式,您可以使用 JsonReader/Writer 來訪問和訪問它。 您應該使用的 Json 架構在此處定義:
https://github.com/apache/avro/blob/trunk/share/schemas/org/apache/avro/data/Json.avsc
當您知道 json 文件的架構( {schema_file}.avsc
)時,您可以使用avro-tools
將 json 文件( {input_file}.json
.json )轉換為 avro 文件( {output_file}.avro
)。 就像下面一樣:
java -jar the/path/of/avro-tools-1.8.1.jar fromjson {input_file}.json --schema-file {schema_file}.avsc > {output_file}.avro
順便說一下, {schema_file}.avsc
文件的內容如下:
{"type": "record",
"name": "User",
"fields": [
{"name": "name", "type": "string"},
{"name": "favorite_number", "type": ["int", "null"]},
{"name": "favorite_color", "type": ["string", "null"]}
]
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.