繁体   English   中英

使用 selenium 和 python 抓取 Instagram 列表

[英]Scraping Instagram Lists with selenium and python

IM 试图从 Instagram 中抓取列表,在这种情况下,是我自己的 Instagram 个人资料中的以下列表和关注者列表。 当我执行它成功登录的代码时,也成功获取了第一个列表(以下列表),但是当被附加到数组时,关注者的列表总是在用户 121 处停止。 有人可以帮我理解为什么停止附加列表吗? 我试过切换时间并使 selenium 滚动两倍,但它不能解决手头的问题,谢谢。

主文件

#imports
import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import func
import re

#setting up and loading the browser
driver = webdriver.Firefox(executable_path=r'C:\Users\tuxo9\Downloads\geckodriver\geckodriver.exe')
driver.get("https://www.instagram.com/")

#log in
username = "input username here"
password = "input password here"
func.login(username, password, driver)

#navigate to following list

#more efficient way of obtaining users following page
driver.get("https://www.instagram.com/" + username)


following = driver.find_element_by_xpath("/html/body/div[1]/section/main/div/header/section/ul/li[3]/a")
following.send_keys(Keys.RETURN)
time.sleep(5)


followingList = []
quantityFollowing = driver.find_element_by_xpath("/html/body/div[1]/section/main/div/header/section/ul/li[3]/a/span").text
print(quantityFollowing)
scrollbar = driver.find_element_by_class_name("isgrP")


func.createUserList(driver, quantityFollowing, scrollbar, followingList)

followersList = []
followersQuantity = driver.find_element_by_xpath("/html/body/div[1]/section/main/div/header/section/ul/li[2]/a/span").text
followersQuantity = re.sub(",", "", followersQuantity) # error due to having comma, make function to check this and remove it
print(followersQuantity)
followers = driver.find_element_by_xpath("/html/body/div[1]/section/main/div/header/section/ul/li[2]/a")
followers.send_keys(Keys.RETURN)
scrollbar = driver.find_element_by_class_name("isgrP")

func.createUserList(driver, followersQuantity, scrollbar, followersList)

print(followingList)
print(len(followingList))
print(quantityFollowing)
print(followersList)

功能文件

import time
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
import func

def login (username, password, driver):
    #accept cookie button has been pressed
    cookie  = driver.find_element_by_xpath("/html/body/div[4]/div/div/button[1]")
    cookie.send_keys(Keys.RETURN)

    #username and password details have been entered
    time.sleep(5)
    user = driver.find_element_by_name("username")
    pwb = driver.find_element_by_name("password")
    submit = driver.find_element_by_xpath("/html/body/div[1]/section/main/article/div[2]/div[1]/div/form/div/div[3]/button")
    user.clear()
    user.send_keys(username)
    pwb.clear()
    pwb.send_keys(password)
    submit.send_keys(Keys.RETURN)
    time.sleep(3)

    #check if details worked

    #This finds the error message and uses 'elements' to store elements in array but there should only be one
    check = driver.find_elements_by_id("slfErrorAlert") 
    
    #checks if the error element is there and acts on it
    if check == []:
        print("success")
    else:
        print("Details are incorrect!")
        driver.close()

def createUserList(driver, quantity, scrollbar, list):
    z = 1
    y = 12

    while len(list) != int(quantity):
        if y > int(quantity):
            y = int(quantity) + 1

        if z > y:
            z = y - 2

        scrollbar.send_keys(Keys.END)
        time.sleep(3)


        for x in range(z, y):
            name = driver.find_element_by_css_selector("li.wo9IH:nth-child("+ str(x) +") > div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:nth-child(1) > a:nth-child(1)")
            user = name.get_attribute("title")
            print(user)
            list.append(user)
        

        z = z + 11
        y = y + 11

func.py import from selenium.common.exceptions import NoSuchElementException然后使用tryexcept NoSuchElementException来测试元素是否存在,如果不存在,这是它滚动并使元素需要出现的错误。 例子:

for x in range(z, y):
    try:
       driver.find_element_by_css_selector("li.wo9IH:nth-child("+ str(x) +") > div:nth-child(1) > div:nth-child(1) > div:nth-child(2) > div:nth-child(1) > span:nth-child(1) > a:nth-child(1)")
except NoSuchElementException:
       scrollbar.send_keys(Keys.END)
       time.sleep(3)

暂无
暂无

声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM