简体   繁体   中英

Python Errno 10054 while working with Selenium and BeautifuSoup

I'm trying to download all the PDF files which can be downloaded without login or subscription on this web page , but I got this error.

[Errno 10054] An existing connection was forcibly closed by the remote host

How can I solve this error?

# -*- coding: utf-8 -*-

from selenium import webdriver
from bs4 import BeautifulSoup
import urllib2 as ul

def download_pdf(file_name, download_url):
    response = ul.urlopen(download_url)
    file = open(file_name + ".pdf", 'wb')
    file.write(response.read())
    file.close()
    print("Completed")

chrome_path = r"C:\Users\HarutakaKawamura\Desktop\bs\chromedriver_win32\chromedriver.exe"
driver = webdriver.Chrome(chrome_path)
driver.get('https://www.osapublishing.org/search.cfm?q=comsol&meta=1&cj=1&cc=1')

driver.implicitly_wait(10)
links = driver.find_elements_by_xpath("//a[contains(text(), 'PDF')]")
titles = driver.find_elements_by_xpath("//h3[contains(@class, 'sri-title')]")

for i in range(len(links)):

    href = links[i].get_attribute("href")
    bs = BeautifulSoup(ul.urlopen(href), 'lxml')

    if len(str(bs)) < 1000:

        download_url = bs.findAll("frame")[1]['src']
        file_name = titles[i].find_element_by_tag_name("a").text
        download_pdf(file_name, download_url)

try this:

import socket
import errno

def download_pdf(file_name, download_url):
    error_counter = 0
    while error_counter <= 5:
        try:
            response = ul.urlopen(download_url)
            file = open(file_name + ".pdf", 'wb')
            file.write(response.read())
            file.close()
            print("Completed")
            break
        except socket.error as error:
            if error.errno == errno.ECONNRESET:
                error_counter += 1
            else:
                raise

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM