[英]How to improve Neo4j 2.0 cypher/ExecutionResult performance under heavy load?
背景:隨着並發線程數的增加,我們注意到從ExecutionResult中檢索數據時性能下降。 我們的生產應用程序具有200個工作線程,以嵌入式模式使用Neo4j 2.0.0社區。 例如,以毫秒為單位。
程序的示例輸出(過濾其中一個線程的結果):
2013-12-23 14:39:31,137 [main] INFO net.ahm.graph.CypherLab - >>>>>>>>>>>>>>>>>>>>>>>>>>>>> NUMBER OF PARALLEL CYPHER EXECUTIONS: 1
2013-12-23 14:39:31,137 [main] INFO net.ahm.graph.CypherLab - >>>> STARTED GRAPHDB
2013-12-23 14:39:39,203 [main] INFO net.ahm.graph.CypherLab - >>>> CREATED NODES
2013-12-23 14:39:43,510 [main] INFO net.ahm.graph.CypherLab - >>>> WARMED UP
2013-12-23 14:39:43,510 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> CYPHER TOOK: 0 m-secs
2013-12-23 14:39:43,698 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> GETTING RESULTS TOOK: 188 m-secs
2013-12-23 14:39:43,698 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> CYPHER RETURNED ROWS: 50000
2013-12-23 14:39:43,698 [Thread-4] INFO net.ahm.graph.CypherLab - ### GRAPHDB SHUTDOWNHOOK INVOKED !!!
2013-12-23 14:40:10,470 [main] INFO net.ahm.graph.CypherLab - >>>>>>>>>>>>>>>>>>>>>>>>>>>>> NUMBER OF PARALLEL CYPHER EXECUTIONS: 10
...
2013-12-23 14:40:23,985 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> CYPHER TOOK: 1 m-secs
2013-12-23 14:40:25,219 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> GETTING RESULTS TOOK: 188 m-secs
2013-12-23 14:40:25,219 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> CYPHER RETURNED ROWS: 50000
2013-12-23 14:40:25,234 [Thread-4] INFO net.ahm.graph.CypherLab - ### GRAPHDB SHUTDOWNHOOK INVOKED !!!
2013-12-23 14:41:28,850 [main] INFO net.ahm.graph.CypherLab - >>>>>>>>>>>>>>>>>>>>>>>>>>>>> NUMBER OF PARALLEL CYPHER EXECUTIONS: 50
...
2013-12-23 14:41:41,781 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> CYPHER TOOK: 1 m-secs
2013-12-23 14:41:45,720 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> GETTING RESULTS TOOK: 2481 m-secs
2013-12-23 14:41:45,720 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> CYPHER RETURNED ROWS: 50000
2013-12-23 14:41:46,855 [Thread-4] INFO net.ahm.graph.CypherLab - ### GRAPHDB SHUTDOWNHOOK INVOKED !!!
2013-12-23 14:44:09,267 [main] INFO net.ahm.graph.CypherLab - >>>>>>>>>>>>>>>>>>>>>>>>>>>>> NUMBER OF PARALLEL CYPHER EXECUTIONS: 100
...
2013-12-23 14:44:22,077 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> CYPHER TOOK: 1 m-secs
2013-12-23 14:44:30,915 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> GETTING RESULTS TOOK: 4466 m-secs
2013-12-23 14:44:30,915 [pool-1-thread-1] INFO net.ahm.graph.CypherLab - >>>> CYPHER RETURNED ROWS: 50000
2013-12-23 14:44:31,680 [Thread-4] INFO net.ahm.graph.CypherLab - ### GRAPHDB SHUTDOWNHOOK INVOKED !!!
測試程序:
package net.ahm.graph;
import java.io.File;
import java.util.Map;
import java.util.concurrent.CountDownLatch;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.TimeUnit;
import org.apache.log4j.Logger;
import org.neo4j.cypher.javacompat.ExecutionEngine;
import org.neo4j.cypher.javacompat.ExecutionResult;
import org.neo4j.graphdb.DynamicLabel;
import org.neo4j.graphdb.GraphDatabaseService;
import org.neo4j.graphdb.Node;
import org.neo4j.graphdb.RelationshipType;
import org.neo4j.graphdb.Transaction;
import org.neo4j.graphdb.factory.GraphDatabaseFactory;
import org.neo4j.graphdb.factory.GraphDatabaseSettings;
import org.neo4j.graphdb.schema.IndexDefinition;
import org.neo4j.graphdb.schema.Schema;
import org.neo4j.kernel.impl.util.FileUtils;
import org.neo4j.kernel.impl.util.StringLogger;
public class CypherLab {
private static final Logger LOG = Logger.getLogger(CypherLab.class);
private final static int CONCURRENCY = 100;
public static void main(String[] args) throws Exception {
FileUtils.deleteRecursively(new File("graphdb"));
final GraphDatabaseService graphDb = new GraphDatabaseFactory().newEmbeddedDatabaseBuilder("graphdb")
.setConfig(GraphDatabaseSettings.use_memory_mapped_buffers, "true").setConfig(GraphDatabaseSettings.cache_type, "strong")
.newGraphDatabase();
registerShutdownHook(graphDb);
LOG.info(">>>>>>>>>>>>>>>>>>>>>>>>>>>>> NUMBER OF PARALLEL CYPHER EXECUTIONS: " + CONCURRENCY);
LOG.info(">>>> STARTED GRAPHDB");
createIndex("Parent", "name", graphDb);
createIndex("Child", "name", graphDb);
try (Transaction tx = graphDb.beginTx()) {
Node parent = graphDb.createNode(DynamicLabel.label("Parent"));
parent.setProperty("name", "parent");
for (int i = 0; i < 50000; i++) {
Node child = graphDb.createNode(DynamicLabel.label("Child"));
child.setProperty("name", "child" + i);
parent.createRelationshipTo(child, RelationshipTypes.PARENT_CHILD);
}
tx.success();
}
LOG.info(">>>> CREATED NODES");
final ExecutionEngine engine = new ExecutionEngine(graphDb, StringLogger.SYSTEM);
for (int i = 0; i < 10; i++) {
try (Transaction tx = graphDb.beginTx()) {
ExecutionResult result = engine.execute("match (n:Parent)-[:PARENT_CHILD]->(m:Child) return n.name, m.name");
for (Map<String, Object> row : result) {
assert ((String) row.get("n.name") != null);
assert ((String) row.get("m.name") != null);
}
tx.success();
}
}
LOG.info(">>>> WARMED UP");
ExecutorService es = Executors.newFixedThreadPool(CONCURRENCY);
final CountDownLatch cdl = new CountDownLatch(CONCURRENCY);
for (int i = 0; i < CONCURRENCY; i++) {
es.execute(new Runnable() {
@Override
public void run() {
try (Transaction tx = graphDb.beginTx()) {
long time = System.currentTimeMillis();
ExecutionResult result = engine.execute("match (n:Parent)-[:PARENT_CHILD]->(m:Child) return n.name, m.name");
LOG.info(">>>> CYPHER TOOK: " + (System.currentTimeMillis() - time) + " m-secs");
int count = 0;
time = System.currentTimeMillis();
for (Map<String, Object> row : result) {
assert ((String) row.get("n.name") != null);
assert ((String) row.get("m.name") != null);
count++;
}
LOG.info(">>>> GETTING RESULTS TOOK: " + (System.currentTimeMillis() - time) + " m-secs");
tx.success();
LOG.info(">>>> CYPHER RETURNED ROWS: " + count);
} catch (Throwable t) {
LOG.error(t);
} finally {
cdl.countDown();
}
}
});
}
cdl.await();
es.shutdown();
}
private static void createIndex(String label, String propertyName, GraphDatabaseService graphDb) {
IndexDefinition indexDefinition;
try (Transaction tx = graphDb.beginTx()) {
Schema schema = graphDb.schema();
indexDefinition = schema.indexFor(DynamicLabel.label(label)).on(propertyName).create();
tx.success();
}
try (Transaction tx = graphDb.beginTx()) {
Schema schema = graphDb.schema();
schema.awaitIndexOnline(indexDefinition, 10, TimeUnit.SECONDS);
tx.success();
}
}
private static void registerShutdownHook(final GraphDatabaseService graphDb) {
Runtime.getRuntime().addShutdownHook(new Thread() {
@Override
public void run() {
LOG.info("### GRAPHDB SHUTDOWNHOOK INVOKED !!!");
graphDb.shutdown();
}
});
}
private enum RelationshipTypes implements RelationshipType {
PARENT_CHILD
}
}
合並此提交時應該會更好。 它將作為2.0.1的一部分發布。還有其他一些較小的阻塞點。
您可以嘗試將您的Web服務器線程限制為內核次數(或內核數量* 2)嗎? 看看是否有幫助?
我的理解是,在預熱並且將熱數據集存儲在緩存中之后,它僅受CPU約束,不再受I / O綁定。 因此,您會使CPU和工作人員的線程過多。
如果我使用8個和100個內核運行測試,則會得到以下分布以執行查詢並獲取50k結果:
代碼和詳細的直方圖: https : //gist.github.com/jexp/a164f6cf9686b8125872
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.