简体   繁体   中英

I can't export scraped data in CSV

I can't get all data in CSV, only last. When scraping is done only last one scraped is saving CSV file but I want to save from all pages.

import requests
from bs4 import BeautifulSoup
from selenium import webdriver
import time
import pandas as pd
from selenium.common.exceptions import NoSuchElementException
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
mainurl = 'https://austin.craigslist.org/search/cta?s=0'
driver.get(mainurl)
res = driver.execute_script("return document.documentElement.outerHTML")
page_soup = BeautifulSoup(res, 'html.parser')

lnk_opener = driver.find_element_by_xpath('//*[@id="sortable-results"]/ul/li[1]/p/a').click()
time.sleep(4)
records = []
i = 1
while i <3:
    i+=1

    try:
        print(driver.current_url)
    except Exception:
        print('Internet Error Detected')

    try:
        title = driver.find_element_by_xpath('//*[@id="titletextonly"]').text
        print(title)
    except Exception:
        print('No Title Given')
    try:
        price = driver.find_element_by_xpath('/html/body/section/section/h2/span/span[2]').text
        print(price)
    except Exception:
        print('No Price Given')
    try:
        phone = driver.find_element_by_xpath('//*[@id="postingbody"]/h2[1]/big').text
        print(phone)
        records.append((phone))
    except Exception:
        print('No Mobile number avalible')
    try:
        loc = driver.find_element_by_xpath('/html/body/section/section/section/div[1]/div/div[2]').text
        print(loc)
    except Exception:
        print('No Location Data Avalible')
    try:
        img = page_soup.find('img')
        immg = print(img.get('src','\n'))
    except Exception:
        print('No img Found')

    nxtpg = driver.find_element_by_xpath('/html/body/section/section/header/div[1]/div/a[3]')
    nxtpg.click()
    time.sleep(4)
    url = driver.find_element_by_xpath("/html/body/section/section/header/div[1]/div/a[3]").get_attribute("href")
    if url == None:
        bckbtn = driver.find_element_by_class_name('backup').click()
        time.sleep(5)
        nextbuttton = driver.find_element_by_xpath('//*[@id="searchform"]/div[3]/div[3]/span[2]/a[3]').click()
        time.sleep(6)
        print(records)

records.append((driver.current_url, title, price, loc, immg))
df = pd.DataFrame(records, columns=['Product Url', 'Title/Model/Make', 'Price', 'GM Location', 'Image Link'])
print(df)
df.to_csv('zzz.csv')
time.sleep(4)
driver.quit()

I think this line

records.append((driver.current_url, title, price, loc, immg))

should be inside the while statement. Also, move i += 1 to the end of the statement, otherwise you're skipping the first iteration.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM