I am scraping the stock prices, and names from Yahoo's finance website. After making a dataframe with three columns "Name", "Code", and "Price" and representing the passed index variable. I want to go to another loop and add a column to the original dataframe with updated prices. But when I add the column it creates NaN values for my original data. What do I need to do to correctly place the indexes and not disturb the original dataframe data?
class Stocks():
def __init__(self):
return self
def Stock_ABV(str):
pattern = re.compile(r'/([A-Z]*-[A-Z]*|[A-Z]*)')
match = pattern.finditer(str)
length = len(str)
for match in match:
name = match.group(0)
return match.group(0)[1:length]
def Yahoo_Finance(index):
url_list = ['https://finance.yahoo.com/quote/GOOG','https://finance.yahoo.com/quote/DOGE-USD',
'https://finance.yahoo.com/quote/AAPL', 'https://finance.yahoo.com/quote/HMC',
'https://finance.yahoo.com/quote/TM', 'https://finance.yahoo.com/quote/DKS',
'https://finance.yahoo.com/quote/SHIB-USD', 'https://finance.yahoo.com/quote/BTC-USD',
'https://finance.yahoo.com/quote/WMT', 'https://finance.yahoo.com/quote/AMZN',
'https://finance.yahoo.com/quote/NKE', 'https://finance.yahoo.com/quote/KO',
'https://finance.yahoo.com/quote/PEP', 'https://finance.yahoo.com/quote/DAL',
'https://finance.yahoo.com/quote/SAVE', 'https://finance.yahoo.com/quote/BLL',
'https://finance.yahoo.com/quote/KMB', 'https://finance.yahoo.com/quote/GIS']
url = ''
i = 0
L1 = []
L2 = []
structure = pd.DataFrame({'Name': [], 'Code': [], 'Price': []})
if index == 1:
while i < len(url_list):
url = url + url_list[i]
req = requests.get(url)
soup = BeautifulSoup(req.text, 'html.parser')
name = soup.find('h1', {'class': 'D(ib) Fz(18px)'}).text
price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
L1.append([name,Stocks.Stock_ABV(url_list[i]),price])
df = pd.DataFrame(L1, columns = ['Name', 'Code', 'Price'])
i += 1
url = ''
structure = df
structure = structure.set_index(df.index)
else:
while i < len(url_list):
req = requests.get(url_list[i])
soup = BeautifulSoup(req.text, 'html.parser')
price = soup.find('fin-streamer', {'class': 'Fw(b) Fz(36px) Mb(-4px) D(ib)'}).text
L2.append(price)
df2 = pd.DataFrame(L2, columns = [f'Price{index}'])
i += 1
url = ''
structure[f'Price{index}'] = L2
pd.set_option('display.max_rows', None)
return structure
def AFK_Runner():
Stocks.Yahoo_Finance(1)
return Stocks.Yahoo_Finance(2)
Stocks.AFK_Runner()
Do you know the yfinance
package?
# pip install yfinance
import yfinance as yf
data = yf.download('GOOG DOGE-USD AAPL HMC')
Output:
Adj Close Close ... Open Volume
AAPL DOGE-USD GOOG HMC AAPL DOGE-USD GOOG HMC ... AAPL DOGE-USD GOOG HMC AAPL DOGE-USD GOOG HMC
Date ...
1980-03-17 NaN NaN NaN 0.718973 NaN NaN NaN 0.893750 ... NaN NaN NaN 0.893750 NaN NaN NaN 26000.0
1980-03-18 NaN NaN NaN 0.731542 NaN NaN NaN 0.909375 ... NaN NaN NaN 0.909375 NaN NaN NaN 2000.0
1980-03-19 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 2000.0
1980-03-20 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 0.0
1980-03-21 NaN NaN NaN 0.724001 NaN NaN NaN 0.900000 ... NaN NaN NaN 0.900000 NaN NaN NaN 2000.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
2022-02-09 176.279999 0.159123 2829.060059 30.500000 176.279999 0.159123 2829.060059 30.500000 ... 176.050003 0.158357 2816.995117 30.120001 71285000.0 7.786708e+08 1431400.0 1554600.0
2022-02-10 172.119995 0.151889 2772.050049 30.760000 172.119995 0.151889 2772.050049 30.760000 ... 174.139999 0.159145 2790.000000 31.000000 90865900.0 1.053631e+09 1650900.0 1398400.0
2022-02-11 168.639999 0.144847 2682.600098 30.459999 168.639999 0.144847 2682.600098 30.459999 ... 172.330002 0.151895 2775.000000 30.760000 98566000.0 7.767306e+08 1937700.0 1004200.0
2022-02-12 NaN 0.144405 NaN NaN NaN 0.144405 NaN NaN ... NaN 0.144856 NaN NaN NaN 6.026994e+08 NaN NaN
2022-02-13 NaN 0.153793 NaN NaN NaN 0.153793 NaN NaN ... NaN 0.144308 NaN NaN NaN 1.346092e+09 NaN NaN
[11055 rows x 24 columns]
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.