如何在python請求中迭代多個鏈接？

Question

在這里，我僅發送一個網址， page響應為200 。 並提取數據。

當我嘗試使用文件發送多個鏈接時， response為400

單個網址的代碼：

import requests
import lxml.html as lh
import pandas as pd
import html
from lxml import html
from bs4 import BeautifulSoup
import requests

from bs4 import BeautifulSoup
import requests
#cars = [] # gobal array for storing each car_data object
url='http://www.redbook.com.au/cars/research/used/details/1969-ford-mustang-mach-1-manual/SPOT-ITM-225034'
car_data={} # use it as a local variable
headers = {'User-Agent':'Mozilla/5.0'}
page = (requests.get(url, headers=headers))
tree = html.fromstring(page.content)
if tree.xpath('//tr[td="Badge"]//following-sibling::td[2]/text()'):
    badge = tree.xpath('//tr[td="Badge"]//following-sibling::td[2]/text()')[0]
    car_data["badge"] = badge   
if tree.xpath('//tr[td="Series"]//following-sibling::td[2]/text()'):
    car_data["series"] = tree.xpath('//tr[td="Series"]//following-sibling::td[2]/text()')[0]
if tree.xpath('//tr[td="Body"]//following-sibling::td[2]/text()'):
    car_data["body_small"] = tree.xpath('//tr[td="Body"]//following-sibling::td[2]/text()')[0]
#cars.append(car_data) #Append it to global array

輸出：

{'badge': 'Mach 1', 'body_small': 'Fastback', 'series': '(No Series)'}

我嘗試多個鏈接的代碼：

import requests
import lxml.html as lh
import pandas as pd
import html
from lxml import html
from bs4 import BeautifulSoup
import requests


cars = [] # gobal array for storing each car_data object
f = open("file.txt",'r') #file.txt would contain all the links that you wish to read
#This for loop will perform your thing for each url in the file
for url in f: 
    car_data={} # use it as a local variable
    headers = {'User-Agent':'Mozilla/5.0'}
    page = (requests.get(url, headers=headers))
    tree = html.fromstring(page.content)
    if tree.xpath('//tr[td="Badge"]//following-sibling::td[2]/text()'):
        badge = tree.xpath('//tr[td="Badge"]//following-sibling::td[2]/text()')[0]
        car_data["badge"] = badge
        print(car_data)
    if tree.xpath('//tr[td="Series"]//following-sibling::td[2]/text()'):
        car_data["series"] = tree.xpath('//tr[td="Series"]//following-sibling::td[2]/text()')[0]
    if tree.xpath('//tr[td="Body"]//following-sibling::td[2]/text()'):
        car_data["body_small"] = tree.xpath('//tr[td="Body"]//following-sibling::td[2]/text()')[0]
    cars.append(car_data) #Append it to global array

file.txt：

http://www.redbook.com.au/cars/research/used/details/1969-ford-mustang-mach-1-manual/SPOT-ITM-225034
http://www.redbook.com.au/cars/research/used/details/1969-ford-falcon-gtho-phase-i-xw-manual/SPOT-ITM-222630
http://www.redbook.com.au/cars/research/used/details/1969-ford-falcon-xt-auto/SPOT-ITM-222613
http://www.redbook.com.au/cars/research/used/details/1969-ford-falcon-xt-manual/SPOT-ITM-222612

Answer 1

您需要在列表中添加行，然后迭代該列表以實現循環。 with open使用with open讀取完成后，它將自動關閉文件。

with open('file.txt') as f:
    #read file without newlines 
    urls = f.read().splitlines()
for url in urls: 
    ...

如何在python請求中迭代多個鏈接？

問題描述

1 個解決方案

解決方案1
0 2019-08-04 02:48:58

如何在python請求中迭代多個鏈接？

問題描述

1 個解決方案

解決方案1 0 2019-08-04 02:48:58

解決方案1
0 2019-08-04 02:48:58