I am trying to crawl a website using NodeJS. I am making an HTTP request using Axios. I am able to only fetch those items which are available when webpage is loaded. All the HTML which is loaded when I scroll down further is not fetched.
Here is my code.
const axios = require('axios');
const cheerio = require('cheerio');
var request = require('request');
// table view
const url = "https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table";
fetchData(url).then((res) => {
const html = res.data;
const $ = cheerio.load(html);
const unilist = $('.TableTabular__TableContainer-febmbj-0.guaRKP > tbody > tr >td ');
unilist.each(function() {
let title = $(this).find('div').attr("name");
if (typeof(title) == 'string') {
console.log(title);
}
});
})
async function fetchData(url){
console.log("Crawling data...")
// make http call to url
let response = await axios(url).catch((err) => console.log(err));
if(response.status !== 200){
console.log("Error occurred while fetching data");
return;
}
return response;
}
I am trying to get all the university names. However, I am only able to get 13 universities because the others are loaded only when the page is manually scrolled down.
How do I access all the universities in the webpage: https://www.usnews.com/best-colleges/search?_sort=rank&_sortDirection=asc&study=Engineering&_mode=table
var request = require('request');
const url = "https://www.usnews.com/best-colleges/api/search?_sort=rank&_sortDirection=asc&_page=7&study=Engineering";
let options = {
url: url,
headers: {
"authority": "www.usnews.com",
"method": "GET",
//"path": `/best-colleges/api/search?_sort=rank&_sortDirection=asc&_page=6&study=Engineering`,
"scheme": "https",
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"cookie": `ak_bmsc=60A136143B076291C93DD9728862F728172B301F314600004917B85E8498E04F~pl5NwmZFHheJnzgnDGIGpBb4YDDOuhPDVqrNGDysdm/dDPzFJis9zP1awrKKsxeJBlvqZWW6E3ssLbAdi/nUkIEkEiVPu1NDDQge8FegXwVN6Ren/u+X8dx6/TRgRIIXtbj2n2ieih1+SzTEccExtz3QgcXFx+ZxSM1O3Xoe5crrhltym4VHVynMHnup+h3TaL9tLmsoWiopb9GlEG1eTlXIoyPsKVt2FA+s1MJP5zVmQ=; akacd_www=2177452799~rv=53~id=9087b102caf120794dbb1eeceaf4ccc8; usn_session_id=891228875906785; usn_visitor_id=8912288759182043; optimizelyEndUserId=oeu1589122887855r0.7555247616511707; usprivacy=1YNY; s_cc=true; s_fid=6C0F54971BC55B63-31DB4C74AAF1424B; ntv_as_us_privacy=1YNY; _ga=GA1.2.1252831587.1589122893; _gid=GA1.2.1293277568.1589122893; _fbp=fb.1.1589122894850.768122457; _ntv_uid=a074b9dd-6b5b-4f4b-b257-f9e7ee116412; __gads=ID=3343601cd2e45d2f:T=1589122898:S=ALNI_MZI2Mh_V-ROYbHt3s2k1h83if7i8A; edu-page-views=2; modal-page-views=2; pageview-count-Best Colleges Q2 2020 Audience Survey=2; CUID=N,1589123026657:ALHGLuQAAAAPTiwxNTg5MTIzMDI2NjU3xMc3klevipXW6CRMhCp96C/0wAIB5hXG0/fOK/1Ol60Pak5Dv6v1GHuSJcnhwzLp/ZPAF0+w1p4ic6ZfQHqgJCnyVI1XNZdQ7uBtRQ7wisLYSy5p3bcKN45s8z0N5XX37CMtZHg8WMEvbF6Q+BNNPpjuqLZ3n2p0hJ8+nTpo1lq/vOQrVU+DCcsiC38OMawezCmWDdUxbg2PiMkU9F/WZ4MfddfaDwqQ1BBQC0QkUZeRHkOCPndfwQOCKX1IKZ81Ju7MTmN1wqFdHaHxmHICvLvD6er4q4B0o8byjDXO0M79Yt82UMi8E2sqIAzin+FaFk181KNB5Z+5LbvWhORCig==; FCCDCF=[["AKsRol8x0eLcCPRNK87LcFg96i4OohYRu7keT-wXifV77qo_eYe6uZ0ThI1Oxd2-Y4V5wtjFjZW02xgjl0IhpmE9ojyljTmH9lrVeqQI3wXUjtift1w_Dqsor4S-4hEwsOEhBLpQrx8Ijd3oIw7mqxKezHDHZiod4A=="],null,["[[],[],[],[],null,null,true]",1589123041768]]; education-compare-slideout-state=collapsed; s_sq=%5B%5BB%5D%5D; utag_main=v_id:0171ff1af36300170b586aee949903073006706b009dc$_sn:1$_ss:0$_pn:2%3Bexp-session$_st:1589125090368$ses_id:1589122888547%3Bexp-session$_prevpage:www.usnews.com%2Fbest-colleges%2Fsearch%3Bexp-1589126890272; kw.pv_session=6; sailthru_visitor=9abdf1e6-3e02-427f-9899-6c232865866f; bm_sv=C8E5F93ED4F69A94559E23D6F676C38F~k2zHi/YOvrX2jg2IjDjERaOLYsf7bu+NjQmXeUuPHueXWih3Xm6rjXIC8wg1E225YVqIN2Q3cxjMPj6wlfrOgX8K9b5WW9BLiQIddDKHAGX7gH591ibZ8/bJFn4E/h7PhohIoGJK8PpG6Vel3r3dp//PcCGwzvgJNlUWVUqki3c=; _sp_id.26f9=f626f911-80a4-4912-b0bc-ad1b520357f6.1589122896.2.1589128312.1589124442.54a5f830-9b4f-471e-b326-7e4654bf5bf1; _sp_ses.26f9=*; RT="sl=0&ss=1589123021504&tt=0&obo=0&bcn=%2F%2F684d0d40.akstat.io%2F&sh=&dm=usnews.com&si=a65156df-2f6b-4e2a-815d-f7fdf1e8928c`,
}
};
request(options, function (err, resp, html) {
debugger
if (!err) {
var res= JSON.parse(html);
//var items=res.data.items
//var totalItems=res.data.totalItems
//var totalPages=res.data.totalPages
}
})
Please try this code. maybe you have to put your browser cookie in the request url. since this site api is actually restricted for another applications. in the result
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.