I'm trying to scrape the proxy ip number from this site: https://advanced.name/freeproxy?ddexp4attempt=2 but i'm having difficulty in scraping only the td tag that contains the ip addresses. here is my code:
from helium import *
from bs4 import BeautifulSoup
for x in range(1,10):
url = 'https://advanced.name/freeproxy?ddexp4attempt=2&page='
browser = start_chrome(url+str(x), headless=True)
soup = BeautifulSoup(browser.page_source, 'html.parser')
proxies = soup.find_all('div', {'class':'table-responsive'})
for ips in proxies:
rows = ips.find_all('td')
for ip in rows:
print(ip.text)
You need to target the tr
which have the attribute data-ip
in them.
Try this:
from bs4 import BeautifulSoup
import requests
url = 'https://proxy-list.org/english/index.php'
pagecontent = requests.get(url)
soup = BeautifulSoup(browser.pagecontent, 'html.parser')
def filter_tr(element):
if element.name == 'tr':
if element.has_attr('data-ip'):
return True
return False
trs_with_data_ip = soup.find_all(filter_tr)
for tr in trs_with_data_ip:
print(tr.text)
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.