![](/img/trans.png)
[英]Streaming data from Kinesis to S3 fails with Illegal Character that KPL itself writes
[英]Writing data from Kinesis to S3
我正在使用AWS開發工具包從將數據發布到Kinesis流的Java應用程序編寫數據。 使用以下代碼一次可批量處理10條記錄。
// Convert to JSON object, and then to bytes...
ObjectWriter ow = new ObjectMapper().writer().withDefaultPrettyPrinter();
String json = ow.writeValueAsString(transaction);
// Add byte array to PutRecordsRequestEntry
PutRecordsRequestEntry record = new PutRecordsRequestEntry();
record.setPartitionKey(String.valueOf(java.util.UUID.randomUUID()));
record.setData(ByteBuffer.wrap(json.getBytes()));
// Add to list...
batch.add(record);
// Check and send batches
if(counter>=batchLimit){
logger.info("Sending batch of " + batchLimit + " rows.");
putRecordsRequest.setRecords(batch);
PutRecordsResult result = amazonKinesisClient.putRecords(putRecordsRequest);
batch = new ArrayList<>();
counter=0;
}else{
counter++;
}
然后,我有一個nodejs lambda函數,該函數在Kinesis上收到的每個事務上都會觸發,其想法是編寫從Kinesis傳入的事務,並將它們放入數據流中,以保存到S3。
var AWS = require('aws-sdk');
var firehose = new AWS.Firehose();
exports.handler = function(event, context) {
console.log(event);
var params = {
DeliveryStreamName: "transaction-postings",
Record: {
Data: decodeURIComponent(event)
}
};
firehose.putRecord(params, function(err, data) {
if (err) console.log(err, err.stack); // an error occurred
else {
console.log(data); // successful response
}
context.done();
});
};
但是,當查看S3上的數據時,我所看到的只是以下內容,而不是我所期望的JSON對象列表...
[object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object][object Object]
誰能指出我將Kinesis中的數據作為JSON對象流式傳輸到S3所缺少的東西嗎?
Data: decodeURIComponent(event)
您需要對事件進行序列化,因為Lambda會自動反序列化參數。 即:
Data: JSON.stringify(decodeURIComponent(event))
對於那些想知道是否需要更改代碼的人來說...為了將生產者發送的實際消息寫入S3,需要對PutRecordsRequestEntry的data屬性進行解碼。 換句話說,這些代碼塊顯示了用於從Kinesis流中解析數據的lambda依賴性。
var AWS = require('aws-sdk');
var firehose = new AWS.Firehose();
var firehoseStreamName = "transaction-postings";
exports.handler = function(event, context) {
// This is the actual transaction, encapsulated with Kinesis Put properties
var transaction = event;
// Convert data object because this is all that we need
var buf = new Buffer(transaction.data, "base64");
// Convert to actual string which is readable
var jsonString = buf.toString("utf8");
// Prepare storage to postings firehose stream...
var params = {
DeliveryStreamName: firehoseStreamName,
Record: {
Data: jsonString
}
};
// Store data!
firehose.putRecord(params, function(err, data) {
if (err) {
// This needs to be fired to Kinesis in the future...
console.log(err, err.stack);
}
else{
console.log(data);
}
context.done();
});
};
這是因為以下使用AWS生產者依賴項發送的記錄
<dependency>
<groupId>com.amazonaws</groupId>
<artifactId>amazon-kinesis-producer</artifactId>
<version>0.12.3</version>
</dependency>
看起來像這樣;
{
"kinesisSchemaVersion": "1.0",
"partitionKey": "cb3ff3cd-769e-4d48-969d-918b5378e81b",
"sequenceNumber": "49571132156681255058105982949134963643939775644952428546",
"data": "[base64 string]",
"approximateArrivalTimestamp": 1490191017.614
}
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.