简体   繁体   中英

Why is this an Unhandled Promise Rejection?

I'm trying to rewrite my code to utilize promises correctly.

The full program is supposed to scrape the data from a tshirt site. This first block of code is supposed to enter the front page of the site, grab the product pages that are immediately available and then store the URL's in an array. The remainder URL's will be stored in 'remainder' for a secondScrape to be performed later on.

Currently manually unit testing each section:

//TASK: Create a command line application that goes to an ecommerce site to get the latest prices.
    //Save the scraped data in a spreadsheet (CSV format).



//Modules being used:
var cheerio = require('cheerio');
var request = require('request');

//harcoded url
var url = 'http://shirts4mike.com/';

//url for tshirt pages
var urlSet = new Set();

var remainder;

   const requestPromise = function(url) {
    return new Promise(function(resolve, reject) {
        request(url, function(error, response, html) {

            if(error) return reject(error);

            if(!error && response.statusCode == 200){
                return resolve(html);   
            }       
        });
    });
}


function firstScrape (url) {
    return requestPromise(url)
        .then(function(html) {
            var $ = cheerio.load(html);

            var links = [];

            //get all the links
            $('a[href*=shirt]').each(function(){
                var a = $(this).attr('href');

                //add into link array
                links.push(url + a);

            });
            return links;
            // return this array you've made
        });
}


function nextStep (arrayOfLinks) { 
    var promiseArray = [];

    for(var link in arrayOfLinks){
        promiseArray.push(requestPromise(link));
        return Promise.all(promiseArray);
    }                   
}


function lastStep (arrayOfHTMLresults){ 
    for(var html in arrayOfHTMLresults){
        var $ = cheerio.load(html);

        //if page has a submit it must be a product page
        if($('[type=submit]').length !== 0){

            //add page to set
            urlSet.add(scrapeLink);

        } else if(remainder == undefined) {
            //if not a product page, add it to remainder so it another scrape can be performed.
            remainder = scrapeLink;                         
        }
    }
    console.log(urlSet);
    console.log(remainder);
}


firstScrape(url)
    .then(nextStep)
    .then(lastStep);

I'm currently getting the following error:

(node:71094) UnhandledPromiseRejectionWarning: Unhandled promise rejection (rejection id: 3): Error: Invalid URI "0"

This is the code I'm trying to promisify:

// Load front page of shirts4mike
function firstScrape(){
    request(url, function(error, response, html) {
        if(!error && response.statusCode == 200){
            var $ = cheerio.load(html);

        //iterate over links with 'shirt'
            $('a[href*=shirt]').each(function(){
                var a = $(this).attr('href');

                //create new link
                var scrapeLink = url + a;

                //for each new link, go in and find out if there is a submit button. 
                //If there, add it to the set
                request(scrapeLink, function(error,response, html){
                    if(!error && response.statusCode == 200) {
                        var $ = cheerio.load(html);

                        //if page has a submit it must be a product page
                        if($('[type=submit]').length !== 0){

                            //add page to set
                            urlSet.add(scrapeLink);

                        } else if(remainder == undefined) {
                            //if not a product page, add it to remainder so it another scrape can be performed.
                            remainder = scrapeLink;                         
                        }
                    }
                });
            });     
        }
    });
}

What I can't work out is how can I use urlSet.add(scrapeLink); in lastStep() when it doesn't know what scrapeLink is?

Any idea why? Thank you

.add() is not an Array.prototype method, you also return promiseArray within for loop instead of pushing a Promise to promiseArray and using Promise.all()

function nextStep (arrayOfLinks) { 
    var promiseArray = [];

    for(var i = 0; i < arrayOfLinks.length; i++) {
        var link = requestPromise(arrayOfLinks[i]);
        promiseArray.push(link);
    }   

    return Promise.all(promiseArray)                
}

UPDATE due to question changing:

So from firstScrape() you could return a results Object instead of just an array of links:

return { scrapeLink: link, result: links }

You would then get that in nextStep() as the result of the promise where you could return something with the same shape again:

return { scrapeLink: firstStepResult.scrapLink, result: Promise.all(promiseArray) }

Then in lastStep() instead of arrayOfHTMLresults getting passed in you would then have an Object which would look like:

{ scrapeLink: "http://someurl.com", result: arrayOfHTMLresults }

PREVIOUS answer:

You will need to initialize your variable in the for...in loop. eg with const , var or let depending on your use case and JS version.

for(var link in arrayOfLinks){
    promiseArray.add(requestPromise(link));
    return promiseArray;
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM