[英]Elastic search count query based on field with value containing filesystem path
我在这里问了这个问题,但是当我尝试使用更多数据尝试解决方案时,我很快意识到了我的错误。
所以我回到了第一个。 所以我希望再次提出这个问题并获得更多见解。
我的任务仍然相同,但更精确地基于多个值获取文档计数,包括包含系统文件路径等值的路径字段。
我的示例数据如下所示:
{
"took": 3,
"timed_out": false,
"_shards": {
"total": 5,
"successful": 5,
"skipped": 0,
"failed": 0
},
"hits": {
"total": {
"value": 3,
"relation": "eq"
},
"max_score": 15.9074545,
"hits": [
{
"_index": "stage-data-20210728115212095",
"_type": "_doc",
"_id": "fil.31c425766287497ec5a508d995d1ce36",
"_score": 15.9074545,
"_source": {
"header_action": "uploaded",
"partition": 7,
"offset": 11382619,
"volumeId": "vol.e144f0bc59914725528f08d995ebd8c3",
"lambdaLagMs": 0,
"id": "fil.31c425766287497ec5a508d995d1ce36",
"name": "sampleFile.txt",
"parentFolderId": "fol.6357e749063445b0c5a408d995d1ce36",
"volumeName": "test-vol-b2ee569932dd470788ebc70e6f15bf36",
"type": "text/plain",
"path": "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/sampleFile.txt",
"timeCreated": "2021-10-23T06:10:45.287Z",
"timeModified": "2021-10-23T06:10:45.287Z",
"sizeInBytes": 26,
"isUploaded": true,
"archiveStatus": "None",
"storageTier": "Standard",
"eTag": "ed6a6e795564952d4d9707e7dc91c6a6",
"format": "TXT",
"status": "Available",
"recordDateTime": "2021-10-23 06:10:47.268",
"recordTurnAroundTimeMs": 2629.375,
"dataType": "File"
}
},
{
"_index": "stage-data-20210728115212095",
"_type": "_doc",
"_id": "fil.6075863c66464a2cc5a608d995d1ce36",
"_score": 15.500043,
"_source": {
"header_action": "uploaded",
"partition": 15,
"offset": 11393012,
"volumeId": "vol.e144f0bc59914725528f08d995ebd8c3",
"lambdaLagMs": 0,
"id": "fil.6075863c66464a2cc5a608d995d1ce36",
"name": "testFile.txt",
"parentFolderId": "fol.230c9c8861fa40640cc808d995d1b210",
"volumeName": "test-vol-b2ee569932dd470788ebc70e6f15bf36",
"type": "text/plain",
"path": "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/testFile.txt",
"timeCreated": "2021-10-23T06:10:45.286Z",
"timeModified": "2021-10-23T06:10:45.286Z",
"sizeInBytes": 23,
"isUploaded": true,
"archiveStatus": "None",
"storageTier": "Standard",
"eTag": "2b9f6fc56449eb68b4fa5c5da127c5be",
"format": "TXT",
"status": "Available",
"recordDateTime": "2021-10-23 06:10:47.284",
"recordTurnAroundTimeMs": 2628.936,
"dataType": "File"
}
},
{
"_index": "stage-data-20210728115212095",
"_type": "_doc",
"_id": "fil.27a781dc81554811576308d995d1ce3c",
"_score": 15.500043,
"_source": {
"header_action": "uploaded",
"partition": 6,
"offset": 11377991,
"volumeId": "vol.e144f0bc59914725528f08d995ebd8c3",
"lambdaLagMs": 0,
"id": "fil.27a781dc81554811576308d995d1ce3c",
"name": "smallfile.txt",
"parentFolderId": "fol.6ac9ecb11dae4ebd576208d995d1ce3c",
"volumeName": "test-vol-b2ee569932dd470788ebc70e6f15bf36",
"type": "text/plain",
"path": "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/Folder2/smallfile.txt",
"timeCreated": "2021-10-23T06:10:45.294Z",
"timeModified": "2021-10-23T06:10:45.294Z",
"sizeInBytes": 1249,
"isUploaded": true,
"archiveStatus": "None",
"storageTier": "Standard",
"eTag": "c6e9338f9e54e39b52dd853908a1aecd",
"status": "Available",
"recordDateTime": "2021-10-23 06:10:47.276",
"recordTurnAroundTimeMs": 2629.8689999999997,
"dataType": "File"
}
}
]
}
}
我正在尝试使用 NEST c# 库获取文档数。 这是我的示例代码:
var elasticSettings = new ConnectionSettings(new Uri("https://myelasticurl/"))
.DefaultIndex("stage-data");
var client = new ElasticClient(elasticSettings);
var folderPrefix = "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/Folder2/";
Func<CountDescriptor<dynamic>, ICountRequest> countQueryFilter = q => q.Query(q =>
q.Match(m => m.Field("volumeId").Query("vol.e144f0bc59914725528f08d995ebd8c3"))
&& q.Match(m => m.Field("dataType").Query("File")) &&
q.Wildcard(m => m.Field("path").Value($"{folderPrefix}*")));
var countResponse= client.CountAsync(countQueryFilter);
Console.WriteLine(countResponse.Result.Count);
这里是路径字段的映射
{
"stage-data-20210728115212095": {
"mappings": {
"path": {
"full_name": "path",
"mapping": {
"path": {
"type": "text",
"fields": {
"raw": {
"type": "keyword"
},
"rawlower": {
"type": "keyword",
"normalizer": "lowercase"
},
"tree": {
"type": "text",
"analyzer": "path_analyzer"
},
"tree_level": {
"type": "token_count",
"store": true,
"analyzer": "path_level_analyzer",
"enable_position_increments": false
}
},
"analyzer": "ngram_analyzer"
}
}
}
}
}
}
如果我只搜索volumeId和dataType,我可以得到很好的结果。 即使对于路径字段,对于我在根文件夹中有文件的数据集,例如 /folder1/mytxt.txt 等,查询也有效。 只有当我在上面的例子中有多个级别的文件时,当我尝试搜索这样的路径时,/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/Folder2/,我得到0结果计数。
在这一点上,我不确定是否需要调整此字段的映射设置以使其对搜索更友好,如此处建议的那样,或者我是否只是使用错误的方法来搜索它。
请注意,我确实尝试了以下路径搜索方法:
我得到了相同的结果,返回了 0 条记录。
请提出我所缺少的内容,提前感谢您的帮助。
我在 .NET core 3.1 上使用 NEST 7.13.0。
问候, 维卡斯
我的一位同事对此提供了帮助,该解决方案效果很好。 这是示例代码:
var elasticSettings = new ConnectionSettings(new Uri("https://myelasticurl/"))
.DefaultIndex("stage-data");
var client = new ElasticClient(elasticSettings);
var folderPrefix = "/test_Folder-ed9cc1294ba841f98fa986be7ac38813/Folder1/Folder2/";
Func<CountDescriptor<dynamic>, ICountRequest> countQueryFilter = q => q.Query(q =>
q.Match(m => m.Field("volumeId").Query("vol.e144f0bc59914725528f08d995ebd8c3"))
&& q.Match(m => m.Field("dataType").Query("File")) &&
q.Prefix(m => m.Field("path.raw").Value($"{folderPrefix}")));
var countResponse= client.CountAsync(countQueryFilter);
Console.WriteLine(countResponse.Result.Count);
所以基本上需要使用前缀过滤器以及映射中定义的 path.raw 。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.