Node.js 分割文件行

Question

我想编写一个脚本，将从文件中读取的行分成 25 个包，不幸的是示例 package 返回 40 个代码。 我想这样做，例如，他将我分成 25 件物品的包裹。 我的意思是，例如，我有 60 个代码，这将创建 2 个 25 个包，一个包含 10 个代码。 不幸的是，我无法处理它。

const fs = require('fs');
fs.readFile('code.txt', function (err, data) {
    if (err) throw err;
    const array = data.toString().split("\n");
    let count = 0;

    let items = [];
    for (let i in array) {
        items.push({
            PutRequest: {
                Item: {
                    code: array[i]
                }
            }
        });

        let params = {
            RequestItems: {
                'TABLE_NAME': items
            }
        };

        if (count === 25) {
            dynamoDB.batchWrite(params, function (err, data) {
                if (err) {
                    console.log(err);
                } else {
                    count = 0;
                    items = [];
                }
            });

        }else{
            count++;
        }
    }
});

code.txt 内容

https://0bin.net/paste/NA8-4hkq#1Ohwt5uUkQqE0YscwnxTX2gxEqlvAUVKp1JRipBCsZg

知道我做错了什么吗？

Answer 1

您的 dynamoDB.batchWrite() 是异步的。 因此，它的回调仅在循环完成后执行。 所以项目和计数永远不会重置......

最简单的是，如果您可以切换到基于 promise 的方法，如下所示

const BATCHSIZE = 25;
const fs = require('fs').promises;

async function batchLoad() {
   const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
   while (lines.length > 0) {
      const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
      const params = { RequestItems: { TABLE_NAME: items}};

      await new Promise((resolve, reject) => {
        dynamoDb.batchWrite(params, (err) => {
          if (err) return reject(err);
          resolve();
        });
      });
   }

}

基于回调的方法可能如下所示

const BATCHSIZE = 25;

fs.readFile("code.txt", "utf-8", (err, data) => {
  const lines = data.split("\n");

  function writeBatch() {
    if (!lines.length) return;
    const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
    const params = { RequestItems: { TABLE_NAME: items}};

    dynamoDb.batchWrite(params, err => {
      if (err) ...
      else writeBatch();
    });
  }

  writeBatch();
}

function writeBatch从原始数组中获取一定数量的行并将它们写入数据库。 只有在写入数据库成功后，它才会递归调用自身并处理下一批。 但请注意，这种方法可能会超过最大调用堆栈大小并引发错误。

您也可以使这两种方法中的任何一种都不操纵行数组（这可能非常昂贵），而只是取出当前切片

const BATCHSIZE = 25;
const fs = require('fs').promises;

async function batchLoad() {
   const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
   let currentIndex = 0;
   while (currentIndex < lines.length) {
      const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
      const params = { RequestItems: { TABLE_NAME: items}};

      await new Promise((resolve, reject) => {
        dynamoDb.batchWrite(params, (err) => {
          if (err) return reject(err);
          resolve();
        });
      });
      currentIndex += BATCHSIZE;
   }

}

和

const BATCHSIZE = 25;

fs.readFile("code.txt", "utf-8", (err, data) => {
  const lines = data.split("\n");
  function writeBatch(currentIndex) {
    if (currentIndex >= lines.length) return;
    const items = lines.slice(currentIndex, currentIndex + BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
    const params = { RequestItems: { TABLE_NAME: items}};

    dynamoDb.batchWrite(params, err => {
      if (err) ...
      else writeBatch(currentIndex + BATCHSIZE);
    });
  }

  writeBatch(0);
}

为防止陷入最大调用堆栈异常，您还可以将下一批添加到事件循环中，而不是递归调用它。 IE

dynamoDb.batchWrite(params, err => {
      if (err) ...
      else setTimeout(()=> { writeBatch(currentIndex + BATCHSIZE);}, 0);
    });

这样你就不会从递归调用中建立一个庞大的调用堆栈。

要跟踪已经保存到数据库的记录数，您只需将当前计数器存储在文件中即可。 当您重新启动该过程时，加载该文件并检查要跳过的行数。 不要忘记删除文件，一旦保存了所有记录......例如第一种方法：

const BATCHSIZE = 25;
const fs = require('fs').promises;

async function batchLoad() {
   const lines = (await fs.readFile("code.txt", "utf-8")).split("\n");
   const skipLines = 0;
   try {
     skipLines = +(await fs.readFile("skip.txt", "utf-8"));
     if (isNaN(skipLines)) skipLines = 0;
     lines.splice(0, skipLines);
   } catch (e) {
     skipLines = 0;
   }
   while (lines.length > 0) {
      const items = lines.splice(0, BATCHSIZE).map(l => ({PutRequest: {Item: { code: l }}}));
      const params = { RequestItems: { TABLE_NAME: items}};

      await new Promise((resolve, reject) => {
        dynamoDb.batchWrite(params, (err) => {
          if (err) return reject(err);
          resolve();
        });
      });
      skipLines += BATCHSIZE;
      await fs.writeFile("skip.txt", `${skipLines}`);
   }

   try {
     await fs.unlink("skip.txt");
   } catch (e) {
   }
}

Node.js 分割文件行

问题描述

1 个解决方案

解决方案1
2 已采纳 2021-05-24 14:12:44

Node.js 分割文件行

问题描述

1 个解决方案

解决方案1 2 已采纳 2021-05-24 14:12:44

解决方案1
2 已采纳 2021-05-24 14:12:44