简体   繁体   中英

How do I make this subprocess call synchronous in nodejs?

why im getting this error localhost didn't send any data.

Even i am sending the processed csv string from python to nodejs. The csv string is then printed (which is not happening) on successful form submit. Please see into images attached i am getting the csv string in the console but not in the csv file.

My app.js file which sends input ( csvData, req.body.keywords, req.body.full_search ) to the python script for use in processing.

/* csv to json */
const express = require("express"),
  app = express(),
  upload = require("express-fileupload"),
  csvtojson = require("csvtojson");

var http = require('http');
var path = require("path");
var bodyParser = require('body-parser');
var helmet = require('helmet');
var rateLimit = require("express-rate-limit");

let csvData = "test";
app.use(upload());

var server = http.createServer(app);

const limiter = rateLimit({
  windowMs: 15 * 60 * 1000, // 15 minutes
  max: 100 // limit each IP to 100 requests per windowMs
});

// var db = new sqlite3.Database('./database/employees.db');
app.use(bodyParser.urlencoded({extended: false}));
// app.use(express.static(path.join(__dirname,'./Final')));
app.use(helmet());
app.use(limiter);

server.listen(process.env.PORT || 3000, function() { 
    console.log('server running on port 3000'); 
})


app.get('/', function(req, res){
    // res.sendFile(path.join(__dirname,'./index.html'));
    res.sendFile(path.join(__dirname, './index.html'));
});

// Will remove all falsy values: undefined, null, 0, false, NaN and "" (empty string)
function cleanArray(actual) {
  var newArray = new Array();
  for (var i = 0; i < actual.length; i++) {
    if (actual[i]) {
      newArray.push(actual[i]);
    }
  }
  return newArray;
}

function get_array_string(inp){

  var str = "";
  for (var i = 0; i < inp.length; i++){
    str = str + " " + inp[i]
  }
  return str.trim()

}

// form submit request
app.post('/formsubmit', function(req, res){

    // console.log("Form submit request")
    csvData = req.files.csvfile.data.toString('utf8');
    filteredArray = cleanArray(csvData.split(/\r?\n/))
    csvData = get_array_string(filteredArray)

    console.log("URL list received: "+csvData)
    console.log("Search keywords: "+req.body.keywords)
    console.log("Full search: "+req.body.full_search)

    // return csvtojson().fromString(csvData).then(json => 
    //     {return res.status(201).json({csv:csvData, json:json})})

    // Send request to python script
    var spawn = require('child_process').spawn;
    var process = spawn('python', ["./WebExtraction.py", csvData, req.body.keywords, req.body.full_search])

    dataString = "";

    process.stdout.on('data', function(data){

      dataString = dataString + data.toString()

      console.log(dataString)

      res.setHeader('Content-disposition', 'attachment; filename=test.txt');
      res.set('Content-Type', 'text/csv');
      res.status(200).send(dataString);

    });

    process.stdout.on('end', function(){      

    });
    
    process.stdin.end();

});

Below is the part of the python script which sends the csv string export_csv to the node js application.

if(flag == 1):
    # Get the output string   
    # print("################### - OUTPUT - ############################")
    found_results_A = list(set(found_results))
    found_results = get_list(found_results)
    found_results = list(set(found_results))
    notfound_results = get_list(notfound_results)
    notfound_results = list(set(notfound_results))
    found_keywords_list_changed = []

    for ele in found_results_A:
        found_keywords_list_changed.append(href_key_dict[ele])

    # Get the not found results correctly using set operation
    setA = set(found_results)
    setB = set(notfound_results)
    notfound_results = list(set(setB-setA))
    error_urls = get_list(error_urls)
    error_urls = list(set(error_urls))
    ######################################################################################
    ## CREATING THE FINAL DATA FRAME FOR COLLECTING the extracted urls
    ######################################################################################
    colList = ['Found urls', 'Not found urls','Error urls']
    dframe = pd.DataFrame(columns = colList, dtype = str)

    maxlen = get_max_of_list(found_results_A, found_keywords_list_changed, notfound_results, error_urls)

    found_results_A = append_space(found_results_A, maxlen)
    notfound_results = append_space(notfound_results, maxlen)
    error_urls = append_space(error_urls, maxlen)
    found_keywords_list_changed = append_space(found_keywords_list_changed, maxlen)

    if(len(found_results_A) == maxlen and len(notfound_results) == maxlen and len(error_urls) == maxlen and len(found_keywords_list_changed) == maxlen):    
        dframe['Found urls'] = found_results_A
        dframe['keywords'] = found_keywords_list_changed
        dframe['Not found urls'] = notfound_results
        dframe['Error urls'] = error_urls

    try:    
        dframe = dframe.sort_values(by=["Found urls"], ascending=False)
        data = dframe.dropna(axis=0, how='all', thresh=None, subset=None, inplace=False)
        export_csv = dframe.to_csv(encoding = 'ASCII', index = None, header = True)
        # print(create_json(export_csv.strip()))
        print(export_csv.strip())
        sys.stdout.flush()
    except Exception as err:
        print('Exception occurred, Error on line {}'.format(sys.exc_info()[-1].tb_lineno), type(err).__name__, err)
        print(err)

I want to make changes in this to solve the issue but not able to proceed please help me to build this functionality.

process.stdout.on('data', function(data){

      dataString = dataString + data.toString()

      console.log(dataString)

      res.setHeader('Content-disposition', 'attachment; filename=test.txt');
      res.set('Content-Type', 'text/csv');
      res.status(200).send(dataString);

    });

    process.stdout.on('end', function(){      

    });
    
    process.stdin.end();

Below is the error im getting

在此处输入图像描述

Also im able to render the html page which has HTML FORM

在此处输入图像描述

Output of console

在此处输入图像描述

Data sent to stdout in the child process should be concatenated in an on ("data" callback in the parent process and only sent back as an HTTP response when the an on("end" callback is called. Refactoring the child handling code might look like

process.stdout.on('data', function(data){
  dataString = dataString + data.toString()
});

process.stdout.on('end', function(){ 
  console.log(dataString)

  res.setHeader('Content-disposition', 'attachment; filename=test.txt');
  res.set('Content-Type', 'text/csv');
  res.status(200).send(dataString);     

});

process.stdin.end();

If the problem remains unsolved, or other issues emerge, try relaying stderr output received from the child process to the node console of the parent process with something like (untested):

process.stderr.on('data', function(data) {
    console.error( "stderr: %s", data);
}

This should allow you to put debug code in the python script that writes to stderr .

A previous answer (of mine) goes into more detail about obtaining the exit code and data written to stdout and stderr by a child process if needed and of help.

[PS dataString may not have been declared - I couldn't see a declaration for it in the post.]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM