[英]Choosing which columns to be exported from MySQL to Excel using Pandas Dataframe
[英]Pandas Dataframe to Excel : no data from web scraping gets exported to excel
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from openpyxl import Workbook
import time
import pandas as pd
from pandas import ExcelWriter
# initial value for table data
total = []
result = []
#initial setting for excel file
wb = Workbook()
ws = wb.create_sheet()
filename = '/Users/sungyeon/Desktop/projects/text.xlsx'
writer = pd.ExcelWriter(filename)
#setting of crawling
driver = webdriver.Chrome('./chromedriver')
target_url = 'https://price.joinsland.joins.com/theme/index_theme.asp?sisaegbn=T05'
driver.get(target_url)
# selection of first dropbox
select1 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='sido']"))))
select1.options[0].click()
# selection of second dropbox
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
# for loop for values of second dropbox
for i in range(0,3):
try:
select2.options[i].click()
title = select2.options[i].text
wb.create_sheet(title)
driver.implicitly_wait(5)
driver.find_element_by_class_name('btn_search').click()
# in case of stale element reference error
except StaleElementReferenceException as e:
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
select2.options[i].click()
title = select2.options[i].text
wb.create_sheet(title)
driver.find_element_by_class_name('btn_search').click()
driver.implicitly_wait(5)
# setting of table data from crawled webpage
driver.current_url
table = driver.find_element_by_class_name('tbl_box')
tbody = table.find_element_by_tag_name('tbody')
rows=tbody.find_elements_by_tag_name('tr')
# making lists of data from crawled data
for index, value in enumerate(rows):
body = value.find_elements_by_tag_name('td')
print('ok5')
for i in range(len(body)):
try:
data = body[i].text
result.append(data)
except StaleElementReferenceException as e:
body = value.find_elements_by_tag_name('td')
continue
# adding data of a row to list of final dataset
total.append(result)
# clearing temp list
result=[]
time.sleep(2)
# to create a new sheet named after the name of dropbox value
# forming dataframe from list of dataset
df = pd.DataFrame.from_records(total)
# converting to excel file into the sheet named after the name of dropbox value
df.to_excel(writer, sheet_name = title)
writer.save()
wb.save(filename=filename)
我曾嘗試使用 selenium 進行網絡抓取。 數據收集部分工作正常,但問題是我沒有導出到 excel 文件的數據。 文件,工作表創建做得很好,但我仍然沒有在每張工作表上寫入數據我猜它可能與縮進有關,但無法弄清楚。 或任何提示將不勝感激! 非常感謝!
發現幾個問題
這是工作代碼
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select, WebDriverWait
from selenium.webdriver.common.by import By
from selenium.common.exceptions import StaleElementReferenceException
from selenium.webdriver.support import expected_conditions as EC
from openpyxl import Workbook
import time
import pandas as pd
from pandas import ExcelWriter
#initial setting for excel file
filename = '/Users/sungyeon/Desktop/projects/text.xlsx'
writer = pd.ExcelWriter(filename)
#setting of crawling
driver = webdriver.Chrome('./chromedriver')
target_url = 'https://price.joinsland.joins.com/theme/index_theme.asp?sisaegbn=T05'
driver.get(target_url)
# selection of first dropbox
select1 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='sido']"))))
select1.options[0].click()
# selection of second dropbox
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
# for loop for values of second dropbox
for i in range(0,3):
total = [] # total for this dropbox value
try:
select2.options[i].click()
title = select2.options[i].text
#wb.create_sheet(title)
driver.implicitly_wait(5)
driver.find_element_by_class_name('btn_search').click()
# in case of stale element reference error
except StaleElementReferenceException as e:
select2 = Select(WebDriverWait(driver, 5).until(EC.element_to_be_clickable((By.XPATH, "//select[@name='gugun']"))))
select2.options[i].click()
title = select2.options[i].text
driver.find_element_by_class_name('btn_search').click()
driver.implicitly_wait(5)
# setting of table data from crawled webpage
driver.current_url
table = driver.find_element_by_class_name('tbl_box')
tbody = table.find_element_by_tag_name('tbody')
rows = tbody.find_elements_by_tag_name('tr')
# making lists of data from crawled data
for index, value in enumerate(rows):
body = value.find_elements_by_tag_name('td')
result=[] # total for row
for i in range(len(body)):
try:
data = body[i].text
result.append(data)
except StaleElementReferenceException as e:
body = value.find_elements_by_tag_name('td')
continue
# adding data of a row to list of final dataset
total.append(result)
# clearing temp list
time.sleep(2)
# to create a new sheet named after the name of dropbox value
# forming dataframe from list of dataset
df = pd.DataFrame.from_records(total)
# converting to excel file into the sheet named after the name of dropbox value
df.to_excel(writer, sheet_name = title)
writer.save()
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.