[英]WEB SCRAPING behind LOGIN(Authentication) in Python
from bs4 import BeautifulSoup
import requests
# Start the session
session = requests.Session()
# Create the payload
payload = {'_username': '[USERNAME]', '_password': '[PASSWORD]'}
# Post the payload to the site to log in
s = session.post("https://github.com/login", data=payload)
# Navigate to the next page and scrape the data
s = session.get('https://github.com/[USERNAME]')
soup = BeautifulSoup(s.text, 'html.parser')
results = soup.find(class_='js-pinned-items-reorder-container')
job_elems = results.find_all(
'div', class_='Box pinned-item-list-item d-flex p-3 width-full js-pinned-item-list-item public fork reorderable sortable-button-item')
for job_elem in job_elems:
title_elem = job_elem.find('span', class_='repo')
print(title_elem.text.strip())
這行不通。 請幫忙。 在上面的代碼中, “[USERNAME]”和“[PASSWORD]”分別是 github 賬戶的用戶名和密碼。
試試這個代碼
from bs4 import BeautifulSoup
import requests
login = 'USERNAME'
password = 'PASSWORD'
data = {'login': login,
'password': password, 'js-webauthn-support': 'supported', 'js-webauthn-iuvpaa-support': 'unsupported',
'commit': 'Sign in'}
with requests.session() as sess:
post_data = sess.get('https://github.com/login')
html = BeautifulSoup(post_data.text, 'html.parser')
#Update data
data.update(timestamp_secret = html.find("input", {'name':'timestamp_secret'}).get('value'))
data.update(authenticity_token= html.find("input", {'name':'authenticity_token'}).get('value'))
data.update(timestamp = html.find("input", {'name':'timestamp'}).get('value'))
#Login
res = sess.post("https://github.com/session", data=data, headers=headers)
#Check login
res = sess.get('https://github.com/')
try:
username = BeautifulSoup(res.text, 'html.parser').find('meta', {'name': 'user-login'}).get('content')
except:
print ('Your username or password is incorrect')
else:
print ("You have successfully logged in as", username)
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.