[英]How can I handle a large amount of ElasticSearch Index operations?
I have developed a backend for a photo sharing app, with Node.js. 我已经使用Node.js开发了一个照片共享应用程序的后端。
I use Firebase as the database. 我使用Firebase作为数据库。
Here is a piece of the screenshots. 这是一部分屏幕截图。 'Posts' section in Firebase Firebase中的“帖子”部分
I am going to index the 'Posts' section to ElasticSearch( https://www.firebase.com/blog/2014-01-02-queries-part-two.html ). 我将为ElasticSearch( https://www.firebase.com/blog/2014-01-02-queries-part-two.html )的“帖子”部分建立索引。 Here is the indexing codes: 以下是索引代码:
// initialize our ElasticSearch API var client = new ElasticClient({ host: 'localhost', port: 9200 }); // listen for changes to Firebase data var fb = new Firebase('https://mydb.firebaseio.com/Posts'); fb.on('child_added', createOrUpdateIndex); fb.on('child_changed', createOrUpdateIndex); fb.on('child_removed', removeIndex); var index = 'firebase'; var type = 'post'; function createOrUpdateIndex(snap) { //var data = snap.val(); //console.log(data); client.index(index, type, snap.val(), snap.key()) .on('data', function(data) { console.log('indexed ', snap.key()); }) .on('error', function(err) { console.log(err); }).exec(); } function removeIndex(snap) { client.deleteDocument(index, type, snap.key(), function(error, data) { if( error ) console.error('failed to delete', snap.key(), error); else console.log('deleted', snap.key()); }); }
[2016-04-07 16:15:32,851][WARN ][indices.cluster ] [Caretaker] [[firebase][1]] marking and sending shard failed due to [engine failure, reason [index]] java.nio.file.FileSystemException: /Users/user/Downloads/elasticsearch-2.3.1/data/elasticsearch/nodes/0/indices/firebase/1/index/_a.fdt: Too many open files in system at sun.nio.fs.UnixException.translateToIOException(UnixException.java:91) at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:102) at sun.nio.fs.UnixException.rethrowAsIOException(UnixException.java:107) at sun.nio.fs.UnixFileSystemProvider.newByteChannel(UnixFileSystemProvider.java:214) at java.nio.file.spi.FileSystemProvider.newOutputStream(FileSystemProvider.java:430) at java.nio.file.Files.newOutputStream(Files.java:172) at org.apache.lucene.store.FSDirectory$FSIndexOutput.<init>(FSDirectory.java:271) at org.apache.lucene.store.FSDirectory.createOutput(FSDirectory.java:224) at org.apache.lucene.store.FileSwitchDirectory.createOutput(FileSwitchDirectory.java:155) at org.apache.lucene.store.RateLimitedFSDirectory.createOutput(RateLimitedFSDirectory.java:40) at org.apache.lucene.store.FilterDirectory.createOutput(FilterDirectory.java:73) at org.apache.lucene.store.LockValidatingDirectoryWrapper.createOutput(LockValidatingDirectoryWrapper.java:44) at org.apache.lucene.store.TrackingDirectoryWrapper.createOutput(TrackingDirectoryWrapper.java:43) at org.apache.lucene.codecs.compressing.CompressingStoredFieldsWriter.<init>(CompressingStoredFieldsWriter.java:111) at org.apache.lucene.codecs.compressing.CompressingStoredFieldsFormat.fieldsWriter(CompressingStoredFieldsFormat.java:128) at org.apache.lucene.codecs.lucene50.Lucene50StoredFieldsFormat.fieldsWriter(Lucene50StoredFieldsFormat.java:183) at org.apache.lucene.index.DefaultIndexingChain.initStoredFieldsWriter(DefaultIndexingChain.java:81) at org.apache.lucene.index.DefaultIndexingChain.startStoredFields(DefaultIndexingChain.java:279) at org.apache.lucene.index.DefaultIndexingChain.processDocument(DefaultIndexingChain.java:316) at org.apache.lucene.index.DocumentsWriterPerThread.updateDocument(DocumentsWriterPerThread.java:234) at org.apache.lucene.index.DocumentsWriter.updateDocument(DocumentsWriter.java:450) at org.apache.lucene.index.IndexWriter.updateDocument(IndexWriter.java:1477) at org.elasticsearch.index.engine.InternalEngine.innerIndex(InternalEngine.java:541) at org.elasticsearch.index.engine.InternalEngine.index(InternalEngine.java:457) at org.elasticsearch.index.shard.IndexShard.index(IndexShard.java:601) at org.elasticsearch.index.engine.Engine$Index.execute(Engine.java:836) at org.elasticsearch.action.index.TransportIndexAction.executeIndexRequestOnPrimary(TransportIndexAction.java:237) at org.elasticsearch.action.index.TransportIndexAction.shardOperationOnPrimary(TransportIndexAction.java:158) at org.elasticsearch.action.index.TransportIndexAction.shardOperationOnPrimary(TransportIndexAction.java:66) at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryPhase.doRun(TransportReplicationAction.java:639) at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryOperationTransportHandler.messageReceived(TransportReplicationAction.java:279) at org.elasticsearch.action.support.replication.TransportReplicationAction$PrimaryOperationTransportHandler.messageReceived(TransportReplicationAction.java:271) at org.elasticsearch.transport.RequestHandlerRegistry.processMessageReceived(RequestHandlerRegistry.java:75) at org.elasticsearch.transport.TransportService$4.doRun(TransportService.java:376) at org.elasticsearch.common.util.concurrent.AbstractRunnable.run(AbstractRunnable.java:37) at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145) at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615) at java.lang.Thread.run(Thread.java:745)
I have struggled for this error and now I see it is because of maximum number of opened files in the ElasticSearch module when a great number of 'child_added' events are triggered in a short time. 我一直在为这个错误而苦苦挣扎,现在我发现这是因为在短时间内触发大量“ child_added”事件时,ElasticSearch模块中已打开文件的最大数量。
I think I need to cache the Index operations to avoid this error keeping the limit of opened files as the default value. 我认为我需要缓存索引操作,以避免此错误将打开文件的限制保留为默认值。 How can I do that? 我怎样才能做到这一点?
You may use a Semaphore to limit the number of running index action to a specified capacity. 您可以使用信号量将正在运行的索引操作的数量限制为指定的容量。 Look at my post : https://stackoverflow.com/a/37456691/2733216 看我的帖子: https : //stackoverflow.com/a/37456691/2733216
The code is not tested, but should work. 该代码未经测试,但应该可以工作。
// initialize our ElasticSearch API
var client = new ElasticClient({ host: 'localhost', port: 9200 });
// listen for changes to Firebase data
var fb = new Firebase('https://mydb.firebaseio.com/Posts');
fb.on('child_added', createOrUpdateIndex);
fb.on('child_changed', createOrUpdateIndex);
fb.on('child_removed', removeIndex);
var index = 'firebase';
var type = 'post';
// Create a semaphore of capacity 1
var semaphore = require ('semaphore');
function createOrUpdateIndex(snap) {
semaphore.take(function () {
//var data = snap.val();
//console.log(data);
client.index(index, type, snap.val(), snap.key())
.on('data', function(data) {
semaphore.leave();
console.log('indexed ', snap.key());
})
.on('error', function(err) {
semaphore.leave();
console.log(err);
}).exec();
});
}
function removeIndex(snap) {
client.deleteDocument(index, type, snap.key(), function(error, data) {
if( error ) console.error('failed to delete', snap.key(), error);
else console.log('deleted', snap.key());
});
}
Then following your system concurrency capacity, you may adjust 1 to x 然后,按照系统并发容量,您可以将1调整为x
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.