[英]Append columns to Pandas DataFrame in for loop
我正在修改本教程中的代码以获取历史天气数据。 我想为一系列日期检索几个不同的数量(温度、露点)。 这个想法是将每个数量保存在 Pandas DataFrame 的一列中,而每一行显示时间/日期。 我在现有 DataFrame 中附加/合并数量(当它们在 for 循环中生成时)时遇到问题。
我的 MWE 是:
import numpy as np
import pandas as pd
from datetime import datetime
pd.options.display.max_columns = None
pd.options.display.max_rows = None
from datetime import date, timedelta
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
lookup_URL = 'https://www.wunderground.com/history/daily/us/ny/new-york-city/KLGA/date/{}-{}-{}'
# range of dates
start_date = datetime.strptime('1 January 2015', '%d %B %Y')
end_date = datetime.strptime('3 January 2015', '%d %B %Y')
df = pd.DataFrame()
options = webdriver.ChromeOptions()
options.add_argument('headless')
# Create an instance of ChromeDriver
driver = webdriver.Chrome(executable_path='./chromedriver.exe', options=options)
while start_date != end_date:
print('gathering data from: ', start_date)
formatted_lookup_URL = lookup_URL.format(start_date.year, start_date.month, start_date.day)
driver.get(formatted_lookup_URL)
# wait until website is fully loaded before getting data
# time data
rows = WebDriverWait(driver, 60).until(EC.visibility_of_all_elements_located((By.XPATH, '//td[@class="mat-cell cdk-cell cdk-column-dateString mat-column-dateString ng-star-inserted"]')))
for row in rows:
thehour = row.find_element_by_xpath('.//span[@class="ng-star-inserted"]').text
# append new row to table
df = df.append(pd.DataFrame({'Date':[start_date.strftime('%Y-%m-%d')], 'time':[thehour]}), ignore_index = True)
# other variables
cols = ["mat-cell cdk-cell cdk-column-temperature mat-column-temperature ng-star-inserted",
"mat-cell cdk-cell cdk-column-dewPoint mat-column-dewPoint ng-star-inserted"]
name = ['temp_degF', 'dew_pt_degF']
for ii in range(len(cols)):
rows = WebDriverWait(driver, 0). \
until(EC.visibility_of_all_elements_located((By.XPATH, '//td[@class=' + '"' + cols[ii] + '"' + ']')))
for row in rows:
data = row.find_element_by_xpath('.//span[@class="wu-value wu-value-to"] ').text
# append new rows to column
df.append(pd.DataFrame({name[ii]:[data]}), ignore_index=True)
#df.merge(pd.DataFrame({name[ii]:[data]}), left_index=True, right_index=True)
#df = pd.concat([df,pd.DataFrame({name[ii]:[data]})], axis=0)
#df = df.append(pd.DataFrame({name[ii]:[data]}))
#df.merge(pd.DataFrame({name[ii]:[data]}), left_on='True')
start_date += timedelta(days=1)
print(df.head(5))
使用追加操作时需要分配 df = 。 否则,您实际上不会修改 df。 请参阅此玩具示例。
row1list = [True, False]
row2list = [True, True]
row3list = [False, 2.2]
row4list = [False, np.nan]
df = pd.DataFrame([row1list, row2list, row3list, row4list],
columns=['column1', 'column2'])
df.append(df)
print(df) # unaltered original
# column1 column2
# 0 True False
# 1 True True
# 2 False 2.2
# 3 False NaN
df = df.append(df)
print(df)
# column1 column2
# 0 True False
# 1 True True
# 2 False 2.2
# 3 False NaN
# 0 True False
# 1 True True
# 2 False 2.2
# 3 False NaN
请注意,您可能希望在附加所有内容后重置索引,以便您没有任何重复的索引。
df = df.reset_index(drop=True)
print(df)
# column1 column2
# 0 True False
# 1 True True
# 2 False 2.2
# 3 False NaN
# 4 True False
# 5 True True
# 6 False 2.2
# 7 False NaN
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.