简体   繁体   中英

Append columns to Pandas DataFrame in for loop

I'm adapting the code in this tutorial to get historical weather data. I want to retrieve several different quantities (temperature, dew point) for a range of dates. The idea is to save each quantity in a column of a pandas DataFrame, while each row shows times/dates. I'm having problems with appending/merging the quantities (when they are generated in a for-loop) in an existing DataFrame.

My MWE is:

import numpy as np
import pandas as pd
from datetime import datetime
pd.options.display.max_columns = None
pd.options.display.max_rows = None
from datetime import date, timedelta
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

lookup_URL = 'https://www.wunderground.com/history/daily/us/ny/new-york-city/KLGA/date/{}-{}-{}'

# range of dates
start_date = datetime.strptime('1 January 2015', '%d %B %Y')
end_date = datetime.strptime('3 January 2015', '%d %B %Y')
df = pd.DataFrame()

options = webdriver.ChromeOptions()
options.add_argument('headless')

# Create an instance of ChromeDriver
driver = webdriver.Chrome(executable_path='./chromedriver.exe', options=options)

while start_date != end_date:
    print('gathering data from: ', start_date)
    formatted_lookup_URL = lookup_URL.format(start_date.year, start_date.month, start_date.day)
    driver.get(formatted_lookup_URL)
    # wait until website is fully loaded before getting data
    # time data
    rows = WebDriverWait(driver, 60).until(EC.visibility_of_all_elements_located((By.XPATH, '//td[@class="mat-cell cdk-cell cdk-column-dateString mat-column-dateString ng-star-inserted"]')))

    for row in rows:
        thehour = row.find_element_by_xpath('.//span[@class="ng-star-inserted"]').text
        # append new row to table
        df = df.append(pd.DataFrame({'Date':[start_date.strftime('%Y-%m-%d')], 'time':[thehour]}), ignore_index = True)

    # other variables
    cols = ["mat-cell cdk-cell cdk-column-temperature mat-column-temperature ng-star-inserted",
            "mat-cell cdk-cell cdk-column-dewPoint mat-column-dewPoint ng-star-inserted"]
    name = ['temp_degF', 'dew_pt_degF']

    for ii in range(len(cols)):
        rows = WebDriverWait(driver, 0). \
               until(EC.visibility_of_all_elements_located((By.XPATH, '//td[@class=' + '"' + cols[ii] + '"' + ']')))
        for row in rows:
            data = row.find_element_by_xpath('.//span[@class="wu-value wu-value-to"] ').text
            # append new rows to column
            df.append(pd.DataFrame({name[ii]:[data]}), ignore_index=True)
            #df.merge(pd.DataFrame({name[ii]:[data]}), left_index=True, right_index=True)
            #df = pd.concat([df,pd.DataFrame({name[ii]:[data]})], axis=0)
            #df = df.append(pd.DataFrame({name[ii]:[data]}))
            #df.merge(pd.DataFrame({name[ii]:[data]}), left_on='True')
    
    start_date += timedelta(days=1)

print(df.head(5))

You need to assign df = when you use the append operation. Otherwise you don't actually modify the df. See this toy example.

row1list = [True, False]
row2list = [True, True]
row3list = [False, 2.2]
row4list = [False, np.nan]
df = pd.DataFrame([row1list, row2list, row3list, row4list],
                  columns=['column1', 'column2'])

df.append(df)
print(df)    # unaltered original
#    column1 column2
# 0     True   False
# 1     True    True
# 2    False     2.2
# 3    False     NaN

df = df.append(df)
print(df)
#    column1 column2
# 0     True   False
# 1     True    True
# 2    False     2.2
# 3    False     NaN
# 0     True   False
# 1     True    True
# 2    False     2.2
# 3    False     NaN

Note you may want to reset index after appending everything so that you don't have any duplicate indices.

df = df.reset_index(drop=True)
print(df)
#    column1 column2
# 0     True   False
# 1     True    True
# 2    False     2.2
# 3    False     NaN
# 4     True   False
# 5     True    True
# 6    False     2.2
# 7    False     NaN

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM