i need a little help on my little project on learning python web scraping.
from bs4 import BeautifulSoup
import urllib.request
import http.cookiejar
base_url = "https://login.yahoo.com/config/login?.src=flickrsignin&.pc=8190&.scrumb=0&.pd=c%3DH6T9XcS72e4mRnW3NpTAiU8ZkA--&.intl=in&.lang=en&mg=1&.done=https%3A%2F%2Flogin.yahoo.com%2Fconfig%2Fvalidate%3F.src%3Dflickrsignin%26.pc%3D8190%26.scrumb%3D0%26.pd%3Dc%253DJvVF95K62e6PzdPu7MBv2V8-%26.intl%3Din%26.done%3Dhttps%253A%252F%252Fwww.flickr.com%252Fsignin%252Fyahoo%252F%253Fredir%253Dhttps%25253A%25252F%25252Fwww.flickr.com%25252F"
login_action = "/config/login?.src=flickrsignin&.pc=8190&.scrumb=0&.pd=c%3DH6T9XcS72e4mRnW3NpTAiU8ZkA--&.intl=in&.lang=en&mg=1&.done=https%3A%2F%2Flogin.yahoo.com%2Fconfig%2Fvalidate%3F.src%3Dflickrsignin%26.pc%3D8190%26.scrumb%3D0%26.pd%3Dc%253DJvVF95K62e6PzdPu7MBv2V8-%26.intl%3Din%26.done%3Dhttps%253A%252F%252Fwww.flickr.com%252Fsignin%252Fyahoo%252F%253Fredir%253Dhttps%25253A%25252F%25252Fwww.flickr.com%25252F"
cj = http.cookiejar.CookieJar()
opener = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(cj))
opener.addheaders = [('User-agent',
('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_6_7) '
'AppleWebKit/535.1 (KHTML, like Gecko) '
'Chrome/13.0.782.13 Safari/535.1'))
]
login_data = urllib.parse.urlencode({
'login-username' : 'username',
'login-passwd' : 'password',
'remember_me' : True
})
login_data = login_data.encode('ascii')
login_url = base_url + login_action
response = opener.open(login_url, login_data)
print (response.read())
i have tried loggin in, but the output is returned as in the login page html, could anyone help me out to login to this site?
Try read read more on request with beautifulsoup. the User[email]
is only the username input name
and User[password]
is that of the password. Though the code below can only login inside a site without crsf_token
protection
import requests
from requests.packages.urllib3 import add_stderr_logger
import urllib
from bs4 import BeautifulSoup
from urllib.error import HTTPError
from urllib.request import urlopen
import re, random, datetime
random.seed(datetime.datetime.now())
add_stderr_logger()
session = requests.Session()
per_session = session.post(url,
data={'User[email]':'your_email', 'User[password]':'your_password'})
#you can now associate request with beautifulsoup
try:
#it assumed that by now you are logged so we can now use .get and fetch any page of your choice
bsObj = BeautifulSoup(session.get(url).content, 'lxml')
except HTTPError as e:
print(e)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.