![](/img/trans.png)
[英]python- using splinter to open and login webpage but need to save the complete webpage
[英]python - login to webpage and save as png
我有一个网站,要求进行身份验证才能访问该网站,然后再对其进行镜像。 我正在使用以下脚本-
import os
import requests
from subprocess import Popen, PIPE
from selenium import webdriver
abspath = lambda *p: os.path.abspath(os.path.join(*p))
ROOT = abspath(os.path.dirname(__file__))
def execute_command(command):
result = Popen(command, shell=True, stdout=PIPE).stdout.read()
if len(result) > 0 and not result.isspace():
raise Exception(result)
def do_screen_capturing(url, screen_path, width, height):
print "Capturing screen.."
driver = webdriver.PhantomJS()
# it save service log file in same directory
# if you want to have log file stored else where
# initialize the webdriver.PhantomJS() as
# driver = webdriver.PhantomJS(service_log_path='/var/log/phantomjs/ghostdriver.log')
driver.set_script_timeout(30)
if width and height:
driver.set_window_size(width, height)
driver.get(url)
driver.save_screenshot(screen_path)
def do_crop(params):
print "Croping captured image.."
command = [
'convert',
params['screen_path'],
'-crop', '%sx%s+0+0' % (params['width'], params['height']),
params['crop_path']
]
execute_command(' '.join(command))
def do_thumbnail(params):
print "Generating thumbnail from croped captured image.."
command = [
'convert',
params['crop_path'],
'-filter', 'Lanczos',
'-thumbnail', '%sx%s' % (params['width'], params['height']),
params['thumbnail_path']
]
execute_command(' '.join(command))
def get_screen_shot(**kwargs):
url = kwargs['url']
width = int(kwargs.get('width', 1024)) # screen width to capture
height = int(kwargs.get('height', 768)) # screen height to capture
filename = kwargs.get('filename', 'screen.png') # file name e.g. screen.png
path = kwargs.get('path', ROOT) # directory path to store screen
crop = kwargs.get('crop', False) # crop the captured screen
crop_width = int(kwargs.get('crop_width', width)) # the width of crop screen
crop_height = int(kwargs.get('crop_height', height)) # the height of crop screen
crop_replace = kwargs.get('crop_replace', False) # does crop image replace original screen capture?
thumbnail = kwargs.get('thumbnail', False) # generate thumbnail from screen, requires crop=True
thumbnail_width = int(kwargs.get('thumbnail_width', width)) # the width of thumbnail
thumbnail_height = int(kwargs.get('thumbnail_height', height)) # the height of thumbnail
thumbnail_replace = kwargs.get('thumbnail_replace', False) # does thumbnail image replace crop image?
screen_path = abspath(path, filename)
crop_path = thumbnail_path = screen_path
if thumbnail and not crop:
raise Exception, 'Thumnail generation requires crop image, set crop=True'
do_screen_capturing(url, screen_path, width, height)
if crop:
if not crop_replace:
crop_path = abspath(path, 'crop_'+filename)
params = {
'width': crop_width, 'height': crop_height,
'crop_path': crop_path, 'screen_path': screen_path}
do_crop(params)
if thumbnail:
if not thumbnail_replace:
thumbnail_path = abspath(path, 'thumbnail_'+filename)
params = {
'width': thumbnail_width, 'height': thumbnail_height,
'thumbnail_path': thumbnail_path, 'crop_path': crop_path}
do_thumbnail(params)
return screen_path, crop_path, thumbnail_path
if __name__ == '__main__':
'''
Requirements:
Install NodeJS
Using Node's package manager install phantomjs: npm -g install phantomjs
install selenium (in your virtualenv, if you are using that)
install imageMagick
add phantomjs to system path (on windows)
'''
s = requests.Session()
s.auth = ('username', 'password')
r = s.get('https://website.com:8443/path/to/site', verify=False)
url = r.text
screen_path, crop_path, thumbnail_path = get_screen_shot(
url=url, filename='test.png',
crop=True, crop_replace=False,
thumbnail=True, thumbnail_replace=False,
thumbnail_width=200, thumbnail_height=150,
)
我知道它正在认证,因为我可以打印r.status
并得到200,而r.headers
返回标头。 r.text
给出编码错误。 上面的代码没有失败,但是返回了空白图像。
这是在Windows计算机上。
编辑-如果我删除了不登录而直接点击URL的请求-
url = 'https://website.com:8443/path/to/site'
screen_path, crop_path, thumbnail_path = get_screen_shot(
url=url, filename='test.png',
crop=True, crop_replace=False,
thumbnail=True, thumbnail_replace=False,
thumbnail_width=200, thumbnail_height=150,
)
它需要登录页面的屏幕截图。 我想要做的是登录后获取页面的屏幕截图。
当页面未完全加载时,似乎调用了save_screenshot()
。
在这种情况下,您需要显式等待例如登录表单变得可见。 例:
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
driver.get(url)
WebDriverWait(driver, 10).until(
EC.visibility_of_element_located((By.ID, "myForm"))
)
driver.save_screenshot(screen_path)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.