簡體   English   中英

抓取后保存csv中的數據

[英]Saving data in csv after scraping

我已經使用 selenium 從bloomberg 成功抓取了數據。 我需要將此數據放入 csv 文件中。 “day_range”和“52_weekly”應該在兩個值之間有“-”。 我想補充一點。 此外,應該從值中發出逗號,以便值不會 go 到下一個塊

我盡我所能但徒勞無功

import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from my_fake_useragent import UserAgent

options = Options()
ua = UserAgent()
userAgent = ua.random
print(userAgent)
options.add_argument(f'user-agent={userAgent}')
driver = webdriver.Chrome(chrome_options=options, executable_path=r'C:\\Users\\IBTSAM\\Desktop\\chromedriver.exe')

driver.get("https://www.bloomberg.com/quote/IBVC:IND")
time.sleep(3)


filename = "data.csv"
f = open(filename,"w")

headers = "stocks, open, prev_close, _1_year_return, ytd_return, day_range, 52_weekly\n"
f.write(headers)



stocks = driver.find_element_by_xpath(
    '//*[contains(concat( " ", @class, " " ), concat( " ", "priceText__1853e8a5", " " ))]').text
print(stocks)
f.write(stocks)
open_ = driver.find_element_by_xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "openprice", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "value__b93f12ea", " " ))]').text
print(open_)
f.write(open_)
prev_close = driver.find_element_by_xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "previousclosingpriceonetradingdayago", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "value__b93f12ea", " " ))]').text
print(prev_close)
f.write(prev_close)
_1_year_return = driver.find_element_by_xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "totalreturn1year", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "value__b93f12ea", " " ))]').text
print(_1_year_return)
f.write(_1_year_return)
ytd_return = driver.find_element_by_xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "totalreturnytd", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "value__b93f12ea", " " ))]').text
print(ytd_return)
f.write(ytd_return)
day_range_ = driver.find_element_by_xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "rangeoneday", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "text", " " ))]').text
print(day_range_)
f.write(day_range_)
_52_weekly = driver.find_element_by_xpath('//*[contains(concat( " ", @class, " " ), concat( " ", "range52weeks", " " ))]//*[contains(concat( " ", @class, " " ), concat( " ", "text", " " ))]').text
print(_52_weekly)
f.write(_52_weekly)
f.close()
driver.close()

我只需要在 day_range 和 52_weekly 的兩個值之間添加“-”並在值中省略逗號

我不知道您為什么選擇selenium從該站點獲取數據,因為您希望抓取的內容在頁面源中可用,因此requests庫將是一個理想的選擇。 但是,正如您已經嘗試過使用selenium ,我提供了一個使用相同的解決方案。 不要 go 用於硬編碼延遲,因為它總是不一致。 嘗試使用Explicit Wait 我已經使用了所有可以幫助您糾正 rest 的四個字段。

import csv
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

with webdriver.Chrome() as driver, open("output.csv","w",newline="") as f:
    driver.get("https://www.bloomberg.com/quote/IBVC:IND")
    wait = WebDriverWait(driver,10)

    writer = csv.writer(f)
    writer.writerow(["stocks","open","prev_close","days_range"])

    stocks = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,"[class^='priceText__']"))).text.replace(",","")
    open_ = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'.openprice [class^="value__"]'))).text.replace(",","")
    prev_close = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'[class*="previousclosing"] [class^="value__"]'))).text.replace(",","")

    days_range_first = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'.rangeoneday [class^="textLeft__"]'))).text.replace(",","")
    days_range_second = wait.until(EC.presence_of_element_located((By.CSS_SELECTOR,'.rangeoneday [class^="textRight__"]'))).text.replace(",","")
    days_range = f"{days_range_first}{' - '}{days_range_second}"

    writer.writerow([stocks,open_,prev_close,days_range])
    print(stocks,open_,prev_close,days_range)

您可以嘗試 csv 導入,

進口 csv

csvFile = open('stockdata.csv', 'w')
writer = csv.writer(csvFile)
headers = {"stocks", "open", "prev_close", "_1_year_return", "ytd_return", "day_range", "52_weekly"}

writer.writerow(headers)

row =[]
stocks = driver.find_element_by_xpath("//*[contains(@class,'overviewRow')]/span")
row.append(stocks.text)
open_ = driver.find_element_by_xpath("//*[contains(@class,'openprice')]//div")
row.append(open_.text)
prev_close = driver.find_element_by_xpath("//*[contains(@class,'previousclosing')]//div")
row.append(prev_close.text)
_1_year_return = driver.find_element_by_xpath("//*[contains(@class,'totalreturn1year')]//div")
row.append(_1_year_return.text)
ytd_return = driver.find_element_by_xpath("//*[contains(@class,'totalreturnytd')]//div")
row.append(ytd_return.text)
day_range = driver.find_element_by_xpath("//*[contains(@class,'rangeoneday')]//div/div")
row.append(day_range.text)
print(day_range)
weekly_52 = driver.find_element_by_xpath("//*[contains(@class,'range52weeks')]//div/div")
row.append(weekly_52.text)
print(weekly_52)

writer.writerow(row)

driver.close()

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM