Writing script for college sports class, keep getting error 'AttributeError: module 'scrapy' has no attribute 'spider''

Question

This is my code, not sure what I am doing wrong here. Appreciate any help.

from selenium import webdriver
from bs4 import BeautifulSoup
import scrapy
from scrapy.spiders import Spider
import requests
import time
import xlsxwriter
import pandas as pd

url = 'https://www.ufc.com/athletes/all?filters%5B0%5D=status%3A23'
driver = webdriver.Chrome('/Applications/Python 3.9/chromedriver')
driver.get(url)

class WebSpider(scrapy.spider):
    name = "Web_Spider"
    allowed_domains = ['https://www.ufc.com/athletes']
    start_urls = ['https://www.ufc.com/athletes/all?filters%5B0%5D=status%3A2']

    def __init__(self):
        self.driver = driver

    def parse(self, response):
        self.driver.get(response.url)

        while True:
            next = self.drive.find_element_by_xpath('//*[@id="block-mainpagecontent"]/div/div/div[2]/div/div/ul/li/a')

            try:
                next.click()

            except:
                break

        self.driver.close()

I keep getting the error 'AttributeError: module 'scrapy' has no attribute 'spider''. Not sure what to do here, Scrapy is installed correctly and up-to-date.

Answer 1

这是scrapy.Spider，大写“s”

Answer 2

Try now: 

from selenium import webdriver
from bs4 import BeautifulSoup
import scrapy
from scrapy.spiders import Spider
import requests
import time
import xlsxwriter
import pandas as pd

url = 'https://www.ufc.com/athletes/all?filters%5B0%5D=status%3A23'
driver = webdriver.Chrome('/Applications/Python 3.9/chromedriver')
driver.get(url)

class WebSpider(scrapy.Spider):
    name = "Web_Spider"
    allowed_domains = ['https://www.ufc.com/athletes']
    start_urls = ['https://www.ufc.com/athletes/all?filters%5B0%5D=status%3A2']

    def __init__(self):
        self.driver = driver

    def parse(self, response):
        self.driver.get(response.url)

        while True:
            next = self.drive.find_element_by_xpath('//*[@id="block-mainpagecontent"]/div/div/div[2]/div/div/ul/li/a')

            try:
                next.click()

            except:
                break

        self.driver.close()

Answer 3

Depending what you are trying to do, I wouldn't go with Selenium here as you can fetch the data directly through ajax. Selenium will still work but it's a bit overkill and less efficient.

Try this:

import requests
from bs4 import BeautifulSoup
import re


url = 'https://www.ufc.com/views/ajax?_wrapper_format=drupal_ajax'
headers = {'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Mobile Safari/537.36'}

page = 1
end_of_load = False
while end_of_load == False:
    payload = {
    'view_name': 'all_athletes',
    'view_display_id': 'page',
    'view_path': '/athletes/all',
    'pager_element': '0',
    'gender': 'All',
    'page': '%s' %page}
    
    
    jsonData = requests.post(url, headers=headers, data=payload).json()
    print('Page: %s' %page)
    page += 1
    
    html = jsonData[-1]['data']

    soup = BeautifulSoup(html, 'html.parser')
    
    player_cards = soup.find_all('div',{'class':re.compile('.*view-mode-all-athletes-result.*')})
    if not player_cards:
        end_of_load = True
  
    else:
        for player_card in player_cards:
            name = player_card.find('span',{'class':re.compile('.*athlete__name.*')}).text.strip()
            try:
                weight_class = player_card.find('div',{'class':re.compile('.*weight-class.*')}).text.strip()
            except:
                weight_class = 'N/A'
            try:
                record = player_card.find('span',{'class':re.compile('.*athlete__record.*')}).text.strip()
            except:
                record = 'N/A'
            print('\t%s - %s\t%s' %(name,weight_class,record))

Writing script for college sports class, keep getting error 'AttributeError: module 'scrapy' has no attribute 'spider''

Question

3 answers

solution1
1 2021-07-19 17:34:55

solution2
1 2021-07-19 17:37:03

solution3
-1 ACCPTED 2021-07-20 09:19:24

Writing script for college sports class, keep getting error 'AttributeError: module 'scrapy' has no attribute 'spider''

Question

3 answers

solution1 1 2021-07-19 17:34:55

solution2 1 2021-07-19 17:37:03

solution3 -1 ACCPTED 2021-07-20 09:19:24

solution1
1 2021-07-19 17:34:55

solution2
1 2021-07-19 17:37:03

solution3
-1 ACCPTED 2021-07-20 09:19:24