[英]How do I make this subprocess call synchronous in nodejs?
为什么我得到这个错误localhost didn't send any data.
即使我将处理后的 csv 字符串从 python 发送到 nodejs。 然后在成功提交表单时打印 csv 字符串(这没有发生)。 请查看附加的图像,我在控制台中得到 csv 字符串,但在 csv 文件中没有。
我的app.js
文件将输入( csvData, req.body.keywords, req.body.full_search
)发送到 python 脚本以用于处理。
/* csv to json */
const express = require("express"),
app = express(),
upload = require("express-fileupload"),
csvtojson = require("csvtojson");
var http = require('http');
var path = require("path");
var bodyParser = require('body-parser');
var helmet = require('helmet');
var rateLimit = require("express-rate-limit");
let csvData = "test";
app.use(upload());
var server = http.createServer(app);
const limiter = rateLimit({
windowMs: 15 * 60 * 1000, // 15 minutes
max: 100 // limit each IP to 100 requests per windowMs
});
// var db = new sqlite3.Database('./database/employees.db');
app.use(bodyParser.urlencoded({extended: false}));
// app.use(express.static(path.join(__dirname,'./Final')));
app.use(helmet());
app.use(limiter);
server.listen(process.env.PORT || 3000, function() {
console.log('server running on port 3000');
})
app.get('/', function(req, res){
// res.sendFile(path.join(__dirname,'./index.html'));
res.sendFile(path.join(__dirname, './index.html'));
});
// Will remove all falsy values: undefined, null, 0, false, NaN and "" (empty string)
function cleanArray(actual) {
var newArray = new Array();
for (var i = 0; i < actual.length; i++) {
if (actual[i]) {
newArray.push(actual[i]);
}
}
return newArray;
}
function get_array_string(inp){
var str = "";
for (var i = 0; i < inp.length; i++){
str = str + " " + inp[i]
}
return str.trim()
}
// form submit request
app.post('/formsubmit', function(req, res){
// console.log("Form submit request")
csvData = req.files.csvfile.data.toString('utf8');
filteredArray = cleanArray(csvData.split(/\r?\n/))
csvData = get_array_string(filteredArray)
console.log("URL list received: "+csvData)
console.log("Search keywords: "+req.body.keywords)
console.log("Full search: "+req.body.full_search)
// return csvtojson().fromString(csvData).then(json =>
// {return res.status(201).json({csv:csvData, json:json})})
// Send request to python script
var spawn = require('child_process').spawn;
var process = spawn('python', ["./WebExtraction.py", csvData, req.body.keywords, req.body.full_search])
dataString = "";
process.stdout.on('data', function(data){
dataString = dataString + data.toString()
console.log(dataString)
res.setHeader('Content-disposition', 'attachment; filename=test.txt');
res.set('Content-Type', 'text/csv');
res.status(200).send(dataString);
});
process.stdout.on('end', function(){
});
process.stdin.end();
});
下面是 python 脚本的一部分,它将 csv 字符串export_csv
发送到节点 js 应用程序。
if(flag == 1):
# Get the output string
# print("################### - OUTPUT - ############################")
found_results_A = list(set(found_results))
found_results = get_list(found_results)
found_results = list(set(found_results))
notfound_results = get_list(notfound_results)
notfound_results = list(set(notfound_results))
found_keywords_list_changed = []
for ele in found_results_A:
found_keywords_list_changed.append(href_key_dict[ele])
# Get the not found results correctly using set operation
setA = set(found_results)
setB = set(notfound_results)
notfound_results = list(set(setB-setA))
error_urls = get_list(error_urls)
error_urls = list(set(error_urls))
######################################################################################
## CREATING THE FINAL DATA FRAME FOR COLLECTING the extracted urls
######################################################################################
colList = ['Found urls', 'Not found urls','Error urls']
dframe = pd.DataFrame(columns = colList, dtype = str)
maxlen = get_max_of_list(found_results_A, found_keywords_list_changed, notfound_results, error_urls)
found_results_A = append_space(found_results_A, maxlen)
notfound_results = append_space(notfound_results, maxlen)
error_urls = append_space(error_urls, maxlen)
found_keywords_list_changed = append_space(found_keywords_list_changed, maxlen)
if(len(found_results_A) == maxlen and len(notfound_results) == maxlen and len(error_urls) == maxlen and len(found_keywords_list_changed) == maxlen):
dframe['Found urls'] = found_results_A
dframe['keywords'] = found_keywords_list_changed
dframe['Not found urls'] = notfound_results
dframe['Error urls'] = error_urls
try:
dframe = dframe.sort_values(by=["Found urls"], ascending=False)
data = dframe.dropna(axis=0, how='all', thresh=None, subset=None, inplace=False)
export_csv = dframe.to_csv(encoding = 'ASCII', index = None, header = True)
# print(create_json(export_csv.strip()))
print(export_csv.strip())
sys.stdout.flush()
except Exception as err:
print('Exception occurred, Error on line {}'.format(sys.exc_info()[-1].tb_lineno), type(err).__name__, err)
print(err)
我想对此进行更改以解决问题,但无法继续,请帮助我构建此功能。
process.stdout.on('data', function(data){
dataString = dataString + data.toString()
console.log(dataString)
res.setHeader('Content-disposition', 'attachment; filename=test.txt');
res.set('Content-Type', 'text/csv');
res.status(200).send(dataString);
});
process.stdout.on('end', function(){
});
process.stdin.end();
下面是我得到的错误
我也能够呈现具有 HTML 表格的 html 页面
控制台Output
在子进程中发送到stdout
的数据应在父进程中的on ("data"
回调中连接,并且仅在调用on("end"
回调时作为 HTTP 响应发送回。重构子处理代码可能看起来喜欢
process.stdout.on('data', function(data){
dataString = dataString + data.toString()
});
process.stdout.on('end', function(){
console.log(dataString)
res.setHeader('Content-disposition', 'attachment; filename=test.txt');
res.set('Content-Type', 'text/csv');
res.status(200).send(dataString);
});
process.stdin.end();
如果问题仍未解决,或出现其他问题,请尝试将从子进程收到的stderr
output 中继到父进程的节点控制台,例如(未经测试):
process.stderr.on('data', function(data) {
console.error( "stderr: %s", data);
}
这应该允许您将调试代码放入写入stderr
的 python 脚本中。
(我的)先前的答案更详细地介绍了有关获取退出代码和由子进程在需要和帮助时写入stdout
和stderr
的数据。
[PS dataString
可能尚未声明 - 我在帖子中看不到它的声明。]
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.