[英]Web Scraping with Selenium Python [Twitter + Instagram]
[英]Scraping Instagram Lists with selenium and python
IM 试图从 Instagram 中抓取列表,在这种情况下,是我自己的 Instagram 个人资料中的以下列表和关注者列表。 当我执行它成功登录的代码时,也成功获取了第一个列表(以下列表),但是当被附加到数组时,关注者的列表总是在用户 121 处停止。 有人可以帮我理解为什么停止附加列表吗? 我试过切换时间并使 selenium 滚动两倍,但它不能解决手头的问题,谢谢。
主文件
#imports
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import func
import re
#setting up and loading the browser
driver = webdriver.Firefox(executable_path=r'C:\Users\tuxo9\Downloads\geckodriver\geckodriver.exe')
driver.get("https://www.instagram.com/")
#log in
username = "input username here"
password = "input password here"
func.login(username, password, driver)
#navigate to following list
#more efficient way of obtaining users following page
driver.get("https://www.instagram.com/" + username)
following = driver.find_element_by_xpath("/html/body/div[1]/section/main/div/header/section/ul/li[3]/a")
following.send_keys(Keys.RETURN)
time.sleep(5)
followingList = []
quantityFollowing = driver.find_element_by_xpath("/html/body/div[1]/section/main/div/header/section/ul/li[3]/a/span").text
print(quantityFollowing)
scrollbar = driver.find_element_by_class_name("isgrP")
func.createUserList(driver, quantityFollowing, scrollbar, followingList)
followersList = []
followersQuantity = driver.find_element_by_xpath("/html/body/div[1]/section/main/div/header/section/ul/li[2]/a/span").text
followersQuantity = re.sub(",", "", followersQuantity) # error due to having comma, make function to check this and remove it
print(followersQuantity)
followers = driver.find_element_by_xpath("/html/body/div[1]/section/main/div/header/section/ul/li[2]/a")
followers.send_keys(Keys.RETURN)
scrollbar = driver.find_element_by_class_name("isgrP")
func.createUserList(driver, followersQuantity, scrollbar, followersList)
print(followingList)
print(len(followingList))
print(quantityFollowing)
print(followersList)
功能文件
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import func
def login (username, password, driver):
#accept cookie button has been pressed
cookie = driver.find_element_by_xpath("/html/body/div[4]/div/div/button[1]")
cookie.send_keys(Keys.RETURN)
#username and password details have been entered
time.sleep(5)
user = driver.find_element_by_name("username")
pwb = driver.find_element_by_name("password")
submit = driver.find_element_by_xpath("/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[3]/button")
user.clear()
user.send_keys(username)
pwb.clear()
pwb.send_keys(password)
submit.send_keys(Keys.RETURN)
time.sleep(3)
#check if details worked
#This finds the error message and uses 'elements' to store elements in array but there should only be one
check = driver.find_elements_by_id("slfErrorAlert")
#checks if the error element is there and acts on it
if check == []:
print("success")
else:
print("Details are incorrect!")
driver.close()
def createUserList(driver, quantity, scrollbar, list):
z = 1
y = 12
while len(list) != int(quantity):
if y > int(quantity):
y = int(quantity) + 1
if z > y:
z = y - 2
scrollbar.send_keys(Keys.END)
time.sleep(3)
for x in range(z, y):
name = driver.find_element_by_css_selector("li.wo9IH:nth-child("+ str(x) +") > div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:nth-child(1) > a:nth-child(1)")
user = name.get_attribute("title")
print(user)
list.append(user)
z = z + 11
y = y + 11
在func.py
import from selenium.common.exceptions import NoSuchElementException
然后使用try
和except NoSuchElementException
来测试元素是否存在,如果不存在,这是它滚动并使元素需要出现的错误。 例子:
for x in range(z, y):
try:
driver.find_element_by_css_selector("li.wo9IH:nth-child("+ str(x) +") > div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:nth-child(1) > a:nth-child(1)")
except NoSuchElementException:
scrollbar.send_keys(Keys.END)
time.sleep(3)
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.