简体   繁体   中英

HTTP Request Using Axios

I am trying to crawl a website using NodeJS. I am making an HTTP request using Axios. I am able to only fetch those items which are available when webpage is loaded. All the HTML which is loaded when I scroll down further is not fetched.

Here is my code.

const axios = require('axios');
const cheerio = require('cheerio');
var request = require('request');

// table view
const url = "https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table";

fetchData(url).then((res) => {
    const html = res.data;
    const $ = cheerio.load(html);

    const unilist = $('.TableTabular__TableContainer-febmbj-0.guaRKP > tbody > tr >td ');

    unilist.each(function() {

        let title = $(this).find('div').attr("name");

        if (typeof(title) == 'string') {
            console.log(title);
            } 

    });
})

async function fetchData(url){
    console.log("Crawling data...")

    // make http call to url
    let response = await axios(url).catch((err) => console.log(err));

    if(response.status !== 200){
        console.log("Error occurred while fetching data");
        return;
    }
    return response;

}

I am trying to get all the university names. However, I am only able to get 13 universities because the others are loaded only when the page is manually scrolled down.

How do I access all the universities in the webpage: https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table

var request = require('request');

const url = "https://www.usnews.com/best-colleges/api/search?_sort=rank&_sortDirection=asc&_page=7&study=Engineering";

let options = {
    url: url,
    headers: {
        "authority": "www.usnews.com",
        "method": "GET",
        //"path": `/best-colleges/api/search?_sort=rank&_sortDirection=asc&_page=6&study=Engineering`,
        "scheme": "https",
        "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "cookie": `ak_bmsc=60A136143B076291C93DD9728862F728172B301F314600004917B85E8498E04F~pl5NwmZFHheJnzgnDGIGpBb4YDDOuhPDVqrNGDysdm/dDPzFJis9zP1awrKKsxeJBlvqZWW6E3ssLbAdi/nUkIEkEiVPu1NDDQge8FegXwVN6Ren/u+X8dx6/TRgRIIXtbj2n2ieih1+SzTEccExtz3QgcXFx+ZxSM1O3Xoe5crrhltym4VHVynMHnup+h3TaL9tLmsoWiopb9GlEG1eTlXIoyPsKVt2FA+s1MJP5zVmQ=; akacd_www=2177452799~rv=53~id=9087b102caf120794dbb1eeceaf4ccc8; usn_session_id=891228875906785; usn_visitor_id=8912288759182043; optimizelyEndUserId=oeu1589122887855r0.7555247616511707; usprivacy=1YNY; s_cc=true; s_fid=6C0F54971BC55B63-31DB4C74AAF1424B; ntv_as_us_privacy=1YNY; _ga=GA1.2.1252831587.1589122893; _gid=GA1.2.1293277568.1589122893; _fbp=fb.1.1589122894850.768122457; _ntv_uid=a074b9dd-6b5b-4f4b-b257-f9e7ee116412; __gads=ID=3343601cd2e45d2f:T=1589122898:S=ALNI_MZI2Mh_V-ROYbHt3s2k1h83if7i8A; edu-page-views=2; modal-page-views=2; pageview-count-Best Colleges Q2 2020 Audience Survey=2; CUID=N,1589123026657:ALHGLuQAAAAPTiwxNTg5MTIzMDI2NjU3xMc3klevipXW6CRMhCp96C/0wAIB5hXG0/fOK/1Ol60Pak5Dv6v1GHuSJcnhwzLp/ZPAF0+w1p4ic6ZfQHqgJCnyVI1XNZdQ7uBtRQ7wisLYSy5p3bcKN45s8z0N5XX37CMtZHg8WMEvbF6Q+BNNPpjuqLZ3n2p0hJ8+nTpo1lq/vOQrVU+DCcsiC38OMawezCmWDdUxbg2PiMkU9F/WZ4MfddfaDwqQ1BBQC0QkUZeRHkOCPndfwQOCKX1IKZ81Ju7MTmN1wqFdHaHxmHICvLvD6er4q4B0o8byjDXO0M79Yt82UMi8E2sqIAzin+FaFk181KNB5Z+5LbvWhORCig==; FCCDCF=[["AKsRol8x0eLcCPRNK87LcFg96i4OohYRu7keT-wXifV77qo_eYe6uZ0ThI1Oxd2-Y4V5wtjFjZW02xgjl0IhpmE9ojyljTmH9lrVeqQI3wXUjtift1w_Dqsor4S-4hEwsOEhBLpQrx8Ijd3oIw7mqxKezHDHZiod4A=="],null,["[[],[],[],[],null,null,true]",1589123041768]]; education-compare-slideout-state=collapsed; s_sq=%5B%5BB%5D%5D; utag_main=v_id:0171ff1af36300170b586aee949903073006706b009dc$_sn:1$_ss:0$_pn:2%3Bexp-session$_st:1589125090368$ses_id:1589122888547%3Bexp-session$_prevpage:www.usnews.com%2Fbest-colleges%2Fsearch%3Bexp-1589126890272; kw.pv_session=6; sailthru_visitor=9abdf1e6-3e02-427f-9899-6c232865866f; bm_sv=C8E5F93ED4F69A94559E23D6F676C38F~k2zHi/YOvrX2jg2IjDjERaOLYsf7bu+NjQmXeUuPHueXWih3Xm6rjXIC8wg1E225YVqIN2Q3cxjMPj6wlfrOgX8K9b5WW9BLiQIddDKHAGX7gH591ibZ8/bJFn4E/h7PhohIoGJK8PpG6Vel3r3dp//PcCGwzvgJNlUWVUqki3c=; _sp_id.26f9=f626f911-80a4-4912-b0bc-ad1b520357f6.1589122896.2.1589128312.1589124442.54a5f830-9b4f-471e-b326-7e4654bf5bf1; _sp_ses.26f9=*; RT="sl=0&ss=1589123021504&tt=0&obo=0&bcn=%2F%2F684d0d40.akstat.io%2F&sh=&dm=usnews.com&si=a65156df-2f6b-4e2a-815d-f7fdf1e8928c`,
    }
};

request(options, function (err, resp, html) {
    debugger
    if (!err) {

        var res= JSON.parse(html);
        //var items=res.data.items
        //var totalItems=res.data.totalItems
        //var totalPages=res.data.totalPages


    }
})

Please try this code. 在此处输入图像描述 maybe you have to put your browser cookie in the request url. since this site api is actually restricted for another applications. in the result

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM