I am currently using the following function to create a Promise
from the result of calling request.get
:
function dlPromiseForMeta(meta) {
return new Promise(function (resolve, reject) {
meta.error = false;
var fileStream = fs.createWriteStream(meta.filePath);
fileStream.on('error', function (error) {
meta.error = true;
console.log('filesystem ' + meta.localFileName + ' ERROR: ' + error);
console.log('record: ' + JSON.stringify(meta));
reject(meta);
});
fileStream.on('close', function () {
resolve(meta);
});
request.get({
uri: meta.url,
rejectUnauthorized: false,
followAllRedirects: true,
pool: {
maxSockets: 1000
},
timeout: 10000,
agent: false
})
.on('socket', function () {
console.log('request ' + meta.localFileName + ' made');
})
.on('error', function (error) {
meta.error = true;
console.log('request ' + meta.localFileName + ' ERROR: ' + error);
console.log('record: ' + JSON.stringify(meta));
reject(meta);
})
.on('end', function () {
console.log('request ' + meta.localFileName + ' finished');
fileStream.close();
})
.pipe(fileStream);
});
}
This works fine except when I am trying to call it too many times, as in the example below, where imagesForKeywords
returns an rxjs
Observable
:
imagesForKeywords(keywords, numberOfResults)
.mergeMap(function (meta) {
meta.fileName = path.basename(url.parse(meta.url).pathname);
meta.localFileName = timestamp + '_' + count++ + '_' + meta.keyword + '_' + meta.source + path.extname(meta.fileName);
meta.filePath = path.join(imagesFolder, meta.localFileName);
return rxjs.Observable.fromPromise(dlPromiseForMeta(meta))(meta);
});
I start getting ESOCKETTIMEDOUT
errors when the source observable becomes sufficiently large.
So what I would like to do is somehow batch what happens in mergeMap
for every, say, 100
entries... so I do those 100 in parallel, and each batch serially, and then merge them at the end.
How can I accomplish this using rxjs
?
I think the simplest thing to use is bufferTime()
which triggers after a certain number of ms but also has a parameter at the end for count.
Using a timeout seems useful, in case there's a stream pattern that does not reach the batch limit in a reasonable time.
If that does not fit your use-case, comment me with some more details and I will adjust accordingly.
Your code will look like this,
imagesForKeywords(keywords, numberOfResults)
.mergeMap(function (meta) {
meta.fileName = path.basename(url.parse(meta.url).pathname);
meta.localFileName = timestamp + '_' + count++ + '_' + meta.keyword + '_' + meta.source + path.extname(meta.fileName);
meta.filePath = path.join(imagesFolder, meta.localFileName);
return meta;
})
.bufferTime(maxTimeout, null, maxBatch)
.mergeMap(items => rxjs.Observable.forkJoin(items.map(dlPromiseForMeta)))
.mergeMap(arr => rxjs.Observable.from(arr))
Here's a runnable mockup to show it working. Have commented out the last mergeMap
to show the buffering.
I have assumed a couple of things,
// Some mocking const imagesForKeywords = (keywords, numberOfResults) => { return Rx.Observable.from(keywords.map(keyword => { return {keyword} })) } const dlPromiseForMeta = (meta) => { return Promise.resolve(meta.keyword + '_image') } // Compose meta - looks like it can run at scale, since is just string manipulations. const composeMeta = meta => { // meta.fileName = path.basename(url.parse(meta.url).pathname); // meta.localFileName = timestamp + '_' + count++ + '_' + meta.keyword + '_' + meta.source + path.extname(meta.fileName); // meta.filePath = path.join(imagesFolder, meta.localFileName); return meta; } const maxBatch = 3 const maxTimeout = 50 //ms const bufferedPromises = (keywords, numberOfResults) => imagesForKeywords(keywords, numberOfResults) .map(composeMeta) .bufferTime(maxTimeout, null, maxBatch) .mergeMap(items => Rx.Observable.forkJoin(items.map(dlPromiseForMeta))) //.mergeMap(arr => Rx.Observable.from(arr)) const keywords = ['keyw1', 'keyw2', 'keyw3', 'keyw4', 'keyw5', 'keyw6', 'keyw7']; const numberOfResults = 1; bufferedPromises(keywords, numberOfResults) .subscribe(console.log)
<script src="https://cdnjs.cloudflare.com/ajax/libs/rxjs/5.5.6/Rx.js"></script>
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.