简体   繁体   中英

Nodejs, puppeteer - Saving the result in a json file

I am using Puppeteer to build a basic web-scraper on Node js. I download advertisement data from all subpages of a given category:

`

const puppeteer = require('puppeteer');
 
let scrape = async () => {
    const browser = await puppeteer.launch({ headless: true });
    const page = await browser.newPage();
 
    await page.goto('https://www.olx.pl/d/motoryzacja/samochody/');
 
    var results = [];
    var lastPageNumber = 25;    
    for (let index = 0; index < lastPageNumber; index++) {  
        await page.waitFor(1000);
        results = results.concat(await extractedEvaluateCall(page));
        if (index != lastPageNumber - 1) {
            //await page.waitFor(2000); //error: page.waitFor is not a function? wersja puppeteer nie wspiera?
            await page.click('#root > div.css-50cyfj > div.css-88vtd4 > form > div:nth-child(5) > div > section.css-j8u5qq > div > ul > li:nth-child(2)');
        }
    }
 
    browser.close();
    return results;
};
 
async function extractedEvaluateCall(page) {
    return page.evaluate(() => {
        let data = [];
        let elements = document.querySelectorAll('a');
 
        for (var element of elements) {
            let offer = element.innerText;
            data.push({offer});
        }
 
        return data;
    });
}
 
scrape().then((value) => {
    console.log(value);
    console.log('Collection length: ' + value.length);
    console.log(value[0]);
    console.log(value[value.length - 1]);
});

` RESULT:

},
  {
    offer: 'Wolkswagen passat fl 1.9tdi 130km\n' +
      '\n' +
      '3 700 zł\n' +
      '\n' +
      'Giżycko - Dzisiaj o 17:10\n' +
      '\n' +
      '2002 - 532 321 km\n' +
      'Obserwuj'
  },
  {
    offer: 'Renault Scenic Automat, piękny stan, klimatronik, oryginał, bezwypadkowy, opłacony\n' +
      '\n' +
      '11 999 zł\n' +
      '\n' +
      'Ryki - Dzisiaj o 17:10\n' +
      '\n' +
      '2006 - 175 000 km\n' +
      'Obserwuj'
  },
  5100 more items

How to export console data to json file? The first thing that comes to mind is JSON.parse() but I can't implement it when fetching from html

You should convert your object to string with JSON.stringify() first:

let json = JSON.stringify(results);

Then, write it in a json file with file system :

let fs = require('fs');
fs.writeFile('filename.json', json, 'utf8');

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM