[英]MongoDB to Elasticsearch indexing
停留在 elasticsearch 中的索引數據收集點。
以下是我試圖從 mongo 索引數據的代碼。
const elasticsearch = require('elasticsearch');
// instantiate an Elas
var bulk = [];
var MongoClient = require('mongodb').MongoClient;
var ObjectID = require('mongodb').ObjectID;
var mongoDBName = 'mydb'; // Name of mongodb goes here
var mongoCollectionName = 'mycollection'; // Collection name of mongodb goes here
var connectionString = 'mongodb://127.0.0.1:27017/'; // put username and password for mongo here
var esIndexName = 'new-collection'; // Elasticsearch index name will go here
var bulk = [];
const client = new elasticsearch.Client({
hosts: [ 'http://localhost:9200']
});
// ping the client to be sure Elasticsearch is up
client.ping({
requestTimeout: 30000,
}, function(error) {
// At this point, eastic search is down, please check your Elasticsearch service
if (error) {
console.error('Elasticsearch cluster is down!');
} else {
console.log('Everything is ok');
}
});
MongoClient.connect(connectionString+mongoDBName, function(err, db) {
if(err) throw err;
// for each object in a collection
var collection = db.collection(mongoCollectionName);
var counter = 0;
collection.find().each(function(err, item, response, status) {
console.log(item)
Array.from(item).forEach(itemdata => {
bulk.push({index:{
_index: esIndexName,
_type: mongoCollectionName,
}
})
bulk.push(itemdata)
})
//perform bulk indexing of the data passed
client.bulk({body:bulk}, function( err, response ){
if( err ){
console.log("Failed Bulk operation".red, err)
} else {
console.log("Successfully imported %s".green, mongoCollectionName.length);
}
console.log(response);
});
if(item != null) {
if(counter % 100 == 0) console.log( "Syncing object id: "+ item['_id'] + " #: " + counter);
client.indices.create(
{ index: esIndexName },
function(error, response) {
if (error) {
console.log(error);
} else {
console.log("created a new index", response);
}
}
);
}
counter += 1;
});
});
所以在這里我試圖將數據索引到 elasticsearch 中,我能夠創建集合索引,但未能將數據插入彈性搜索的索引中。 有人能幫我一下嗎? 我在哪里出錯了,我在這里犯了什么錯誤。 我在這里使用nodejs,只是簡單的function 進行測試,稍后將添加lambda function 來更新/刪除以及任何更改。
首先,我建議整理您的代碼; 很難看出這些塊是如何嵌套的。
現在,您的代碼存在幾個問題:
Array.from(item).forEach(itemdata => {
? item
是一個來自 Mongo 的文檔 object ,所以對它做Array.from
沒有效果。.each
回調中調用bulk
API ; 這意味着您將為每個文檔進行 API 調用。 我不認為這是你想要的。ping
調用很好,但它不會阻止您的代碼的 rest 在集群關閉時運行。所以你應該怎么做:
n
文檔時,調用bulk
API並重置你的body。這是您正在尋找的解決方案
index.js
//MongoDB client config
var MongoClient = require('mongodb').MongoClient;
var mongoDBName = 'mydb'; // Name of mongodb goes here
var mongoCollectionName = 'mycollection'; // Collection name of mongodb goes here
var connectionString = 'mongodb://127.0.0.1:27017/'; // put username and password for mongo here
//Elasticsearch client config
const { Client } = require('@elastic/elasticsearch')
const esClient = new Client({ node: 'http://localhost:9200' });
var esIndexName = 'new-collection'; // Elasticsearch index name will go here
let bulk = [];
async function indexData() {
const client = await MongoClient.connect(connectionString, { useNewUrlParser: true })
.catch(err => { console.log(err); });
if (!client) {
return;
}
try {
const db = client.db(mongoDBName);
let collection = db.collection(mongoCollectionName);
await collection.find().forEach((doc) => {
bulk.push({
index: {
_index: esIndexName,
}
})
let { _id, ...data } = doc;
bulk.push(data);
})
console.log(bulk);
await esClient.indices.create({
index: esIndexName,
}, { ignore: [400] })
const { body: bulkResponse } = await esClient.bulk({ refresh: true, body: bulk })
if (bulkResponse.errors) {
const erroredDocuments = []
// The items array has the same order of the dataset we just indexed.
// The presence of the `error` key indicates that the operation
// that we did for the document has failed.
bulkResponse.items.forEach((action, i) => {
const operation = Object.keys(action)[0]
if (action[operation].error) {
erroredDocuments.push({
// If the status is 429 it means that you can retry the document,
// otherwise it's very likely a mapping error, and you should
// fix the document before to try it again.
status: action[operation].status,
error: action[operation].error,
operation: bulk[i * 2],
document: bulk[i * 2 + 1]
})
}
})
console.log(erroredDocuments)
}
const { body: count } = await esClient.count({ index: esIndexName })
console.log(count)
} catch (err) {
console.log(err);
} finally {
client.close();
}
}
indexData();
package.json
{
"name": "elastic-node-mongo",
"version": "1.0.0",
"description": "Simple example to connect ElasticSearch, MongoDB and NodeJS",
"main": "index.js",
"dependencies": {
"@elastic/elasticsearch": "^7.3.0",
"mongodb": "^3.3.2",
"nodemon": "1.18.3"
},
"scripts": {
"dev": "nodemon",
"start": "node index.js"
},
"keywords": [
"nodejs",
"node",
"mongodb",
"elasticsearch",
"docker"
],
"author": "Sathishkumar Rakkiasmy",
"license": "ISC"
}
澄清
我能夠創建集合索引,但未能將數據插入彈性搜索索引。
上面這句話很有道理。 因為bulk
變量是不變的。
請參閱以下鏈接,為什么bulk
變量未更改。
為什么我的變量在 function 內部修改后沒有改變? - 異步代碼參考
了解有關異步編程的更多信息
https://developer.mozilla.org/en-US/docs/Learn/JavaScript/Asynchronous
https://developer.mozilla.org/en-US/docs/Learn/JavaScript/Asynchronous/Async_await
您可以制作logstash將數據從mongo db導入elasticsearch。請查找附件配置供您參考。
input {
mongodb {
codec => “json”
uri => ‘mongodb://localhost:27017/NewDb’
placeholder_db_dir => ‘/home/devbrt.shukla/Desktop/scalaoutput/ELK/logstash-6.4.1/db_dir’
placeholder_db_name => ‘Employee_sqlite.db’
collection => ‘Employee’
batch_size => 5000
generateId => ‘true’
parse_method => “simple”
}
}
filter {
mutate {
remove_field => [ “_id” ]
}
}
output {
elasticsearch {
hosts => [“localhost:9200”]
index => “employee-%{+YYYY.MM.dd}”
}
stdout { codec => rubydebug } }
在 Logstash 中,我們將輸入三個部分,過濾器和 Output。
Input : 是從 sql, mongodb, mysql 等取數據。
過濾器:在本節中,我們可以框定定制的 json 以索引到 elasticsearch。
Output :在本節中,我們將輸入 output 部分的索引名稱,文檔類型和 Ip 地址,即 ZEEC78637DCAFECED64A.
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.