[英]Writing binary data using node.js fs.writeFile to create an image file
[英]Writing large CSV to JS file using Node FS
我有一个包含邮政编码数据(~1.1GB)的大型 CSV 文件,我试图过滤掉我需要的数据,然后将一组值写入 JS 文件。
问题是,我总是使用太多 memory 并收到此错误:
Ineffective mark-compacts near heap limit Allocation failed - JavaScript heap out of memory
我尝试使用以下命令增加 memory: node --max-old-space-size=4096 fileName.js但我仍然达到了 memory 限制,它只需要更长的时间!
这是我写给 JS 的代码
const csvFilePath = "./data/postcodes.csv";
const csv = require("csvtojson");
const fs = require("fs");
csv()
.fromFile(csvFilePath)
.then((jsonArray) => {
const inUsePostcodes = jsonArray.filter((x) => x["In Use?"] === "Yes").map((y) => y.Postcode);
fs.writeFileSync("postcodes.js", inUsePostcodes);
});
这是邮政编码的示例。csv:
Postcode,In Use?,Latitude,Longitude,Easting,Northing,Grid Ref,County,District,Ward,District Code,Ward Code,Country,County Code,Constituency,Introduced,Terminated,Parish,National Park,Population,Households,Built up area,Built up sub-division,Lower layer super output area,Rural/urban,Region,Altitude,London zone,LSOA Code,Local authority,MSOA Code,Middle layer super output area,Parish Code,Census output area,Constituency Code,Index of Multiple Deprivation,Quality,User Type,Last updated,Nearest station,Distance to station,Postcode area,Postcode district,Police force,Water company,Plus Code,Average Income
AB1 0AA,No,57.101474,-2.242851,385386,801193,NJ853011,"","Aberdeen City","Lower Deeside",S12000033,S13002843,Scotland,S99999999,"Aberdeen South",1980-01-01,1996-06-01,"","",,,"","","Cults, Bieldside and Milltimber West - 02","Accessible small town",,46,,S01006514,,S02001237,"Cults, Bieldside and Milltimber West",,S00090303,S14000002,6808,1,0,2020-02-19,"Portlethen",8.31408,AB,AB1,"Scotland","Scottish Water",9C9V4Q24+HV,
AB1 0AB,No,57.102554,-2.246308,385177,801314,NJ851013,"","Aberdeen City","Lower Deeside",S12000033,S13002843,Scotland,S99999999,"Aberdeen South",1980-01-01,1996-06-01,"","",,,"","","Cults, Bieldside and Milltimber West - 02","Accessible small town",,61,,S01006514,,S02001237,"Cults, Bieldside and Milltimber West",,S00090303,S14000002,6808,1,0,2020-02-19,"Portlethen",8.55457,AB,AB1,"Scotland","Scottish Water",9C9V4Q33+2F,
AB1 0AD,No,57.100556,-2.248342,385053,801092,NJ850010,"","Aberdeen City","Lower Deeside",S12000033,S13002843,Scotland,S99999999,"Aberdeen South",1980-01-01,1996-06-01,"","",,,"","","Cults, Bieldside and Milltimber West - 02","Accessible small town",,45,,S01006514,,S02001237,"Cults, Bieldside and Milltimber West",,S00090399,S14000002,6808,1,0,2020-02-19,"Portlethen",8.54352,AB,AB1,"Scotland","Scottish Water",9C9V4Q22+6M,
如何在不达到 memory 限制的情况下从此 CSV 写入 JS 文件?
You need a csv stream parser that will parse it and provide output a line at a time and let you stream it to a file.
这是使用cvs-reader
模块的一种方法:
const fs = require('fs');
const csvReader = require('csv-reader');
const { Transform } = require('stream');
const myTransform = new Transform({
readableObjectMode: true,
writableObjectMode: true,
transform(obj, encoding, callback) {
let data = JSON.stringify(obj);
if (this.tFirst) {
// beginning of transformed data
this.push("[");
this.tFirst = false;
} else {
data = "," + data; // add comma separator if not first object
}
this.push(data);
callback();
}
});
myTransform.tFirst = true;
myTransform._flush = function(callback) {
// end of transformed data
this.push("]");
callback();
}
// All of these arguments are optional.
const options = {
skipEmptyLines: true,
asObject: true, // convert data to object
parseNumbers: true,
parseBooleans: true,
trim: true
};
const csvStream = new csvReader(options);
const readStream = fs.createReadStream('example.csv', 'utf8');
const writeStream = fs.createWriteStream('example.json', {autoClose: false});
readStream.on('error', err => {
console.log(err);
csvStream.destroy(err);
}).pipe(csvStream).pipe(myTransform).pipe(writeStream).on('error', err => {
console.error(err);
}).on('finish', () => {
console.log('done');
});
问题是 csvtojson 节点模块正试图将这个庞大的 jsonObj 存储在内存中!
我找到了一个不同的解决方案,它涉及使用 csv-parser 节点模块,然后一次只解析一行而不是整个 csv!
这是我的解决方案:
const csv = require('csv-parser');
const fs = require('fs');
var stream = fs.createWriteStream("postcodes.js", {flags:'a'});
let first = false;
fs.createReadStream('./data/postcodes.csv')
.pipe(csv())
.on('data', (row) => {
if (row["In Use?"]) {
if (!first) {
first = true;
stream.write(`const postcodes = ["${row.Postcode},\n"`);
} else {
stream.write(`"${row.Postcode},\n"`);
}
}
})
.on('end', () => {
stream.write("]");
console.log('CSV file successfully processed');
});
写像const postcodes =这样的字符串来表示 JavaScript 并不是很漂亮,但它执行所需的 function。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.