简体   繁体   中英

Batch requests placed via nodejs request.get using rxjs

I am currently using the following function to create a Promise from the result of calling request.get :

function dlPromiseForMeta(meta) {
    return new Promise(function (resolve, reject) {

        meta.error = false;

        var fileStream = fs.createWriteStream(meta.filePath);

        fileStream.on('error', function (error) {
            meta.error = true;
            console.log('filesystem ' + meta.localFileName + ' ERROR: ' + error);
            console.log('record: ' + JSON.stringify(meta));
            reject(meta);
        });

        fileStream.on('close', function () {
            resolve(meta);
        });

        request.get({
            uri: meta.url,
            rejectUnauthorized: false,
            followAllRedirects: true,
            pool: {
                maxSockets: 1000
            },
            timeout: 10000,
            agent: false
        })
            .on('socket', function () {
                console.log('request ' + meta.localFileName + ' made');
            })
            .on('error', function (error) {
                meta.error = true;
                console.log('request ' + meta.localFileName + ' ERROR: ' + error);
                console.log('record: ' + JSON.stringify(meta));
                reject(meta);
            })
            .on('end', function () {
                console.log('request ' + meta.localFileName + ' finished');
                fileStream.close();
            })
            .pipe(fileStream);
    });
}

This works fine except when I am trying to call it too many times, as in the example below, where imagesForKeywords returns an rxjs Observable :

imagesForKeywords(keywords, numberOfResults)
    .mergeMap(function (meta) {
        meta.fileName = path.basename(url.parse(meta.url).pathname);
        meta.localFileName = timestamp + '_' + count++ + '_' + meta.keyword + '_' + meta.source + path.extname(meta.fileName);
        meta.filePath = path.join(imagesFolder, meta.localFileName);

        return rxjs.Observable.fromPromise(dlPromiseForMeta(meta))(meta);
    });

I start getting ESOCKETTIMEDOUT errors when the source observable becomes sufficiently large.

So what I would like to do is somehow batch what happens in mergeMap for every, say, 100 entries... so I do those 100 in parallel, and each batch serially, and then merge them at the end.

How can I accomplish this using rxjs ?

I think the simplest thing to use is bufferTime() which triggers after a certain number of ms but also has a parameter at the end for count.

Using a timeout seems useful, in case there's a stream pattern that does not reach the batch limit in a reasonable time.

If that does not fit your use-case, comment me with some more details and I will adjust accordingly.

Your code will look like this,

  • bufferTime as described above
  • forkjoin - run the buffer contents in parallel and emit when all return
  • mergeMap - coalesce the results
imagesForKeywords(keywords, numberOfResults)
  .mergeMap(function (meta) {
    meta.fileName = path.basename(url.parse(meta.url).pathname);
    meta.localFileName = timestamp + '_' + count++ + '_' + meta.keyword + '_' + meta.source + path.extname(meta.fileName);
    meta.filePath = path.join(imagesFolder, meta.localFileName);
    return meta;
  })
  .bufferTime(maxTimeout, null, maxBatch)
  .mergeMap(items => rxjs.Observable.forkJoin(items.map(dlPromiseForMeta)))
  .mergeMap(arr => rxjs.Observable.from(arr))

Here's a runnable mockup to show it working. Have commented out the last mergeMap to show the buffering.

I have assumed a couple of things,

  • imagesForKeywords breaks keywords into observable stream of keyword
  • there is one keyword per dlPromiseForMeta call

 // Some mocking const imagesForKeywords = (keywords, numberOfResults) => { return Rx.Observable.from(keywords.map(keyword => { return {keyword} })) } const dlPromiseForMeta = (meta) => { return Promise.resolve(meta.keyword + '_image') } // Compose meta - looks like it can run at scale, since is just string manipulations. const composeMeta = meta => { // meta.fileName = path.basename(url.parse(meta.url).pathname); // meta.localFileName = timestamp + '_' + count++ + '_' + meta.keyword + '_' + meta.source + path.extname(meta.fileName); // meta.filePath = path.join(imagesFolder, meta.localFileName); return meta; } const maxBatch = 3 const maxTimeout = 50 //ms const bufferedPromises = (keywords, numberOfResults) => imagesForKeywords(keywords, numberOfResults) .map(composeMeta) .bufferTime(maxTimeout, null, maxBatch) .mergeMap(items => Rx.Observable.forkJoin(items.map(dlPromiseForMeta))) //.mergeMap(arr => Rx.Observable.from(arr)) const keywords = ['keyw1', 'keyw2', 'keyw3', 'keyw4', 'keyw5', 'keyw6', 'keyw7']; const numberOfResults = 1; bufferedPromises(keywords, numberOfResults) .subscribe(console.log) 
 <script src="https://cdnjs.cloudflare.com/ajax/libs/rxjs/5.5.6/Rx.js"></script> 

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM