如何使用从文件系统拆分获取完整的数据块？

Question

I have search feature implemented on fs so when i have input string from client i split files data based on each line, but if you see server.log if i pulled data based on line it missed data from chunk that has multiple lines eg you can see first event is two lines so based on my search it will return [2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.actorRouter|Adding event to queue: { queue: 'd-email', 我在fs实现了搜索功能，因此当我从客户端输入字符串时，我会根据每一行拆分文件数据，但是如果您看到server.log如果我根据行提取数据，则会丢失具有多行的块中的数据，例如，您可以看到第一个事件是两行，因此根据我的搜索它将返回[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.actorRouter|Adding event to queue: { queue: 'd-email',
it will miss second line for that event. 它将错过该事件的第二行。 How can i get complete data may be based on time variable ? 如何根据时间变量获取完整数据？

searchService.js searchService.js

async.eachSeries(filesData.logFiles, function(logfile, done) {
    // read file
    console.log('SearchEnv in eachSeries', filesData.searchEnv);
    fs.createReadStream('./logs/' + filesData.searchEnv + '/' + logfile.filename)
        .pipe(split())
        .on('data', function(line) {
            if (line.toLowerCase().indexOf(searchStr.toLowerCase()) != -1) parseLog(line, prevLine);
            else prevLine = line;
        });

    function parseLog(line, prev) {
        // Very rudimentary check...
        if (line.indexOf('|') === -1) line = prev + line;
        // Parse as you were doing
        var messageDateInfo = line.split('|')[0].replace(/[\[\]']+/g, '');
        console.log('1st message date is', messageDateInfo)
        messageDateInfo = new Date(messageDateInfo).getTime();
        searchStartDate = new Date(searchStartDate).getTime();
        searchEndDate = new Date(searchEndDate).getTime();
        console.log('message date is', messageDateInfo)
        console.log('start date is ', messageDateInfo - searchStartDate);
        console.log('end date is ', searchEndDate - messageDateInfo);
        if (messageDateInfo - searchStartDate > 0 && searchEndDate - messageDateInfo > 0) {
            // console.log("message date is within this time range");
            results.push({
                filename: logfile.filename,
                value: line
            });
        }

    }
    done();

}, function(err) {
    if (err) {
        console.log('error', err);
    }
    // wrong: results.map(result, function (result){
    results.map(function(result) {
        console.log('results');
    });

    // send back results
    callback(results);
    results = [];
    logFiles = null;
});
}

server.log server.log中

[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.actorRouter|Adding event to queue:  { queue: 'd-email',
  msgId: '7eec01e9-6395-4fee-b44f-f09a40e56978' }
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|infra.templateActor|Filter match for actor/rule (d-email/email_service) with msgId: 7eec01e9-6395-4fee-b44f-f09a40e56978
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: 7eec01e9-6395-4fee-b44f-f09a40e56978|mailDispatcher|Received mail event. msgId=7eec01e9-6395-4fee-b44f-f09a40e56978
[2017-03-22T20:25:04Z]|zldv6658|info|bmid: n/a|mailDispatcher|Mail event with msgId 7eec01e9-6395-4fee-b44f-f09a40e56978 successful: 3 messages delivered
[2017-03-22T20:25:05Z]|zldv6658|verbose|bmid: n/a|routes.event|Received Event from IP (::ffff:130.9.137.139): 74609753-143b-4e06-845c-9a5721575c19
 {"event":{"header":{"eventSource":"AOTSvTM","timestamp":1481966987000,"eventType":"http://aotsvtm.eventing.att.com/SendEscalationsEvent/V1","entityId":"ENTITYID_1"}

Answer 1

You can use the split module (similarly how I demonstrated in my other answer to your very similar question ) with the fs module. 您可以将split模块（与我在您对非常相似的问题的其他回答中所演示的方式类似）与fs模块一起使用。

fs.createReadStream(file)
  .pipe(split())
  .on('data', function (line) {
    //each chunk now is a seperate line! 
  });

See the docs: https://www.npmjs.com/package/split 请参阅文档： https ： //www.npmjs.com/package/split

Answer 2

If your log actually has multiline events, you could just keep the previous line(s) in memory while parsing. 如果您的日志实际上有多行事件，则可以在解析时将前一行保留在内存中。 Also, don't just load the whole thing at once in memory. 另外，不要只是一次将整个内容加载到内存中。 Use streams to reduce the strain on your machine. 使用流减少机器上的压力。

let prevLine;

fs.createReadStream(file)
  .pipe(split())
  .on('data', function (line) {
    if (line.toLowerCase().indexOf(searchStr.toLowerCase()) != -1) parseLog(line, prevLine);
    else prevLine = line;
  });
});

function parseLog(line, prev) {
  // Very rudimentary check...
  if (line.indexOf('|') === -1) line = prev + line;
  // Parse as you were doing
}

As a rule of thumb for the future, log files are much easier to manage when build with single-line-json. 作为未来的经验法则，使用single-line-json构建日志文件时更容易管理。

如何使用从文件系统拆分获取完整的数据块？

问题描述

2 个解决方案

解决方案1
0 2017-03-23 15:40:09

解决方案2
0 2017-03-23 15:53:16

如何使用从文件系统拆分获取完整的数据块？

问题描述

2 个解决方案

解决方案1 0 2017-03-23 15:40:09

解决方案2 0 2017-03-23 15:53:16

解决方案1
0 2017-03-23 15:40:09

解决方案2
0 2017-03-23 15:53:16