简体   繁体   中英

Nodejs: wget, unzip and convert to js without writing to file

Well the title says it all, I'm trying to write a script (that runs in a nodejs/express server-side application) that leverages libraries request , unzip and xml2js to perform a task consisting of fetching a zip file from a given url, whose content is an xml file which I need to parse to a javascript object for some further processing.

So far I've managed to come up with:

var express = require("express");
var app = express();
/* some init code omitted */
var request = require("request");
var unzip = require("unzip");
var xml2js = require("xml2js");
var parser = new xml2js.Parser();

app.get("/import", function(req, res) {
    request("http://path.to/file.zip")
        .pipe(unzip.Parse())
        .on("entry", function(entry) {
            //This is what I'm trying to avoid, which doesn't even work
            entry.pipe(fs.createWriteStream(entry.path));
            fs.readFile(entry.path, function(err, data) {
                if(err) {
                    return res.status(500).send(err);
                }
                parser.parseString(data, function(err, obj) {
                    console.log(util.inspect(obj));
                    /* further processing of obj */
                });
            });
        });
});

Albeit the fact the contents of the xml file are correctly written to disk, I'm looking for an alternative to this approach for two reasons:

  1. to save disk space, since I don't really need to keep the xml file anyway once it has been converted to js
  2. it doesn't even work: fs.readFile probably starts reading the file before fs.createWriteStream is done writing it, because the line console.log(utils.inspect(obj)) logs null (whereas if I run only the innermost fs.readFile block and replace entry.path with the name of the previously written file, it produces the desired output)

I wish I could jot down a jsFiddle for this but I'm clueless as to how, when it comes to expressjs applications. Cheers.

EDITED

Piping is unnecessary, parse data directly from the entry stream:

app.get("/import", function(req, res) {
request("http://link-top.zip")
    .pipe(unzip.Parse())
    .on("entry", function(entry) {
        var chunks = [];
        var res;
        if(entry.path == 'needed.xml') {
            entry.on('data', function(data) {
                chunks.push(data.toString());
            });
            entry.on('end', function () {
                res = chunks.join("");
                parser.parseString(res, function(err, obj) {
                    console.log(util.inspect(obj));
                    /* further processing of obj */
                });
            });
        }
    });
});

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM