[英]Scraping Instagram followers page using selenium and python
我有一个关于抓取 Instagram 关注者页面的问题。 我有一个代码,但它只显示 9 个关注者。 请帮助我。
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
def login(driver):
username = "xxxx@yahoo.com" # <username here>
password = "xxxx" # <password here>
# Load page
driver.get("https://www.instagram.com/accounts/login/")
# Login
driver.find_element_by_xpath("//div/input[@name='username']").send_keys(username)
driver.find_element_by_xpath("//div/input[@name='password']").send_keys(password)
driver.find_element_by_xpath("//span/button").click()
# Wait for the login page to load
WebDriverWait(driver, 15).until(
EC.presence_of_element_located((By.LINK_TEXT, "See All")))
def scrape_followers(driver, account):
# Load account page
driver.get("https://www.instagram.com/{0}/".format(account))
# Click the 'Follower(s)' link
driver.find_element_by_partial_link_text("follower").click()
# Wait for the followers modal to load
xpath = "//div[@style='position: relative; z-index: 1;']/div/div[2]/div/div[1]"
WebDriverWait(driver, 10).until(
EC.presence_of_element_located((By.XPATH, xpath)))
# You'll need to figure out some scrolling magic here. Something that can
# scroll to the bottom of the followers modal, and know when its reached
# the bottom. This is pretty impractical for people with a lot of followers
# Finally, scrape the followers
xpath = "//div[@style='position: relative; z-index: 1;']//ul/li/div/div/div/div/a"
followers_elems = driver.find_elements_by_xpath(xpath)
return [e.text for e in followers_elems]
if __name__ == "__main__":
driver = webdriver.Firefox()
try:
login(driver)
followers = scrape_followers(driver, "instagram")
print(followers)
finally:
driver.quit()
此代码取自另一个页面。 我不明白如何向下滚动关注者页面。
您必须添加一个 for 循环,以便您可以向下滚动页面以查看关注者。 这个 for 循环可以是这样的:
#Find the followers page
dialog = driver.find_element_by_xpath('/html/body/div[2]/div/div[2]/div/div[2]')
#find number of followers
allfoll=int(driver.find_element_by_xpath("//li[2]/a/span").text)
#scroll down the page
for i in range(int(allfoll/2)):
driver.execute_script("arguments[0].scrollTop = arguments[0].scrollHeight", dialog)
time.sleep(random.randint(500,1000)/1000)
print("Extract friends %",round((i/(allfoll/2)*100),2),"from","%100")
您可以通过增加 scrollTop 使用 javascript 轻松向下滚动。 您运行此滚动,直到列表中的用户数量不再发生变化。
可以使用以下功能检查用户数量的差异
count = 0
def check_difference_in_count(driver):
global count
new_count = len(driver.find_elements_by_xpath("//div[@role='dialog']//li"))
if count != new_count:
count = new_count
return True
else:
return False
下面的脚本向下滚动用户容器,直到它到达底部
while 1:
# scroll down
driver.execute_script("document.querySelector('div[role=dialog] ul').parentNode.scrollTop=1e100")
try:
WebDriverWait(driver, 5).until(check_difference_in_count)
except:
break
该项目是否有工作的最终版本?
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.