简体   繁体   中英

Adding new column to DataFrame using a list

I am scraping the stock prices, and names from Yahoo's finance website. After making a dataframe with three columns "Name", "Code", and "Price" and representing the passed index variable. I want to go to another loop and add a column to the original dataframe with updated prices. But when I add the column it creates NaN values for my original data. What do I need to do to correctly place the indexes and not disturb the original dataframe data?

class Stocks():
    def __init__(self):
        return self

    def Stock_ABV(str):
        pattern = re.compile(r'/([A-Z]*-[A-Z]*|[A-Z]*)')
        match = pattern.finditer(str)
        length = len(str)
        for match in match:
            name = match.group(0) 
        return match.group(0)[1:length]



    def Yahoo_Finance(index):
        url_list = ['https://finance.yahoo.com/quote/GOOG','https://finance.yahoo.com/quote/DOGE-USD',
                'https://finance.yahoo.com/quote/AAPL', 'https://finance.yahoo.com/quote/HMC',
                'https://finance.yahoo.com/quote/TM', 'https://finance.yahoo.com/quote/DKS',
                'https://finance.yahoo.com/quote/SHIB-USD', 'https://finance.yahoo.com/quote/BTC-USD',
                'https://finance.yahoo.com/quote/WMT', 'https://finance.yahoo.com/quote/AMZN',
                'https://finance.yahoo.com/quote/NKE', 'https://finance.yahoo.com/quote/KO',
                'https://finance.yahoo.com/quote/PEP', 'https://finance.yahoo.com/quote/DAL',
                'https://finance.yahoo.com/quote/SAVE', 'https://finance.yahoo.com/quote/BLL',
                'https://finance.yahoo.com/quote/KMB', 'https://finance.yahoo.com/quote/GIS']
        url = ''
        i = 0
        L1 = []
        L2 = []
        structure = pd.DataFrame({'Name': [], 'Code': [], 'Price': []})
        if index == 1:
            while i < len(url_list):
                url = url + url_list[i]
                req = requests.get(url)
                soup = BeautifulSoup(req.text, 'html.parser')
                name = soup.find('h1', {'class': 'D(ib) Fz(18px)'}).text
                price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
                L1.append([name,Stocks.Stock_ABV(url_list[i]),price])
                df = pd.DataFrame(L1, columns = ['Name', 'Code', 'Price'])
                i += 1
                url = ''
            structure = df
            structure = structure.set_index(df.index)
        else:
            while i < len(url_list):
                req = requests.get(url_list[i])
                soup = BeautifulSoup(req.text, 'html.parser')
                price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
                L2.append(price)
                df2 = pd.DataFrame(L2, columns = [f'Price{index}'])
                i += 1
                url = ''
            structure[f'Price{index}'] = L2
            
        
        pd.set_option('display.max_rows', None)
        return structure


    def AFK_Runner():
        Stocks.Yahoo_Finance(1)
    
        return Stocks.Yahoo_Finance(2)

Stocks.AFK_Runner()

Do you know the yfinance package?

# pip install yfinance
import yfinance as yf

data = yf.download('GOOG DOGE-USD AAPL HMC')

Output:

             Adj Close                                         Close                                    ...        Open                                        Volume                                    
                  AAPL  DOGE-USD         GOOG        HMC        AAPL  DOGE-USD         GOOG        HMC  ...        AAPL  DOGE-USD         GOOG        HMC        AAPL      DOGE-USD       GOOG        HMC
Date                                                                                                    ...                                                                                              
1980-03-17         NaN       NaN          NaN   0.718973         NaN       NaN          NaN   0.893750  ...         NaN       NaN          NaN   0.893750         NaN           NaN        NaN    26000.0
1980-03-18         NaN       NaN          NaN   0.731542         NaN       NaN          NaN   0.909375  ...         NaN       NaN          NaN   0.909375         NaN           NaN        NaN     2000.0
1980-03-19         NaN       NaN          NaN   0.724001         NaN       NaN          NaN   0.900000  ...         NaN       NaN          NaN   0.900000         NaN           NaN        NaN     2000.0
1980-03-20         NaN       NaN          NaN   0.724001         NaN       NaN          NaN   0.900000  ...         NaN       NaN          NaN   0.900000         NaN           NaN        NaN        0.0
1980-03-21         NaN       NaN          NaN   0.724001         NaN       NaN          NaN   0.900000  ...         NaN       NaN          NaN   0.900000         NaN           NaN        NaN     2000.0
...                ...       ...          ...        ...         ...       ...          ...        ...  ...         ...       ...          ...        ...         ...           ...        ...        ...
2022-02-09  176.279999  0.159123  2829.060059  30.500000  176.279999  0.159123  2829.060059  30.500000  ...  176.050003  0.158357  2816.995117  30.120001  71285000.0  7.786708e+08  1431400.0  1554600.0
2022-02-10  172.119995  0.151889  2772.050049  30.760000  172.119995  0.151889  2772.050049  30.760000  ...  174.139999  0.159145  2790.000000  31.000000  90865900.0  1.053631e+09  1650900.0  1398400.0
2022-02-11  168.639999  0.144847  2682.600098  30.459999  168.639999  0.144847  2682.600098  30.459999  ...  172.330002  0.151895  2775.000000  30.760000  98566000.0  7.767306e+08  1937700.0  1004200.0
2022-02-12         NaN  0.144405          NaN        NaN         NaN  0.144405          NaN        NaN  ...         NaN  0.144856          NaN        NaN         NaN  6.026994e+08        NaN        NaN
2022-02-13         NaN  0.153793          NaN        NaN         NaN  0.153793          NaN        NaN  ...         NaN  0.144308          NaN        NaN         NaN  1.346092e+09        NaN        NaN

[11055 rows x 24 columns]

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM