I am trying to read and write all output from Mysql. When My spiders start scraping I want to get all URL from MySQL database so I try to make a function to read data.
readdata.py:
import mysql.connector
from mysql.connector import Error
from itemadapter import ItemAdapter
def dataReader(marketName):
try:
connection = mysql.connector.connect(host='localhost',
database='test',
user='root',
port=3306,
password='1234')
sql_select_Query = "SELECT shop_URL FROM datatable.bot_markets WHERE shop_name='"+marketName+"';"
cursor = connection.cursor()
cursor.execute(sql_select_Query)
records = cursor.fetchall()
return records
except Error as e:
print("Error reading data from MySQL table", e)
finally:
if (connection.is_connected()):
connection.close()
cursor.close()
print("MySQL connection is closed")
I want to call the this function from my spider as follow.
My Spider:
import scrapy
import re
import mysql.connector
from ..items import FirstBotItem
from scrapy.utils.project import get_project_settings
from first_bot.readdata import dataReader
class My_Spider(scrapy.Spider):
name = "My_Spider"
allowed_domains = ["quotes.toscrape.com/"]
start_urls = dataReader(name)
def parse(self, response):
location = "quotes"
for product in response.xpath('.//div[@class="product-card product-action "]'):
product_link = response.url
prices = product.xpath('.//div[@class="price-tag"]/span[@class="value"]/text()').get()
if prices != None:prices = re.sub(r"[\s]", "", prices)
title = product.xpath('.//h5[@class="title product-card-title"]/a/text()').get()
unit = product.xpath('.//div[@class="select single-select"]//i/text()').get()
if unit != None: unit = re.sub(r"[\s]", "", unit)
item = FirstBotItem()
item['LOKASYON'] = location
item['YEAR'] = 2020
item['MONTH'] = 8
yield item
I am doing something wrong with start_urls but I couldn't figure out. I am getting this error.
_set_url
raise TypeError('Request url must be str or unicode, got %s:' % type(url).__name__)
TypeError: Request url must be str or unicode, got tuple:
2020-08-24 15:46:31 [scrapy.core.engine] INFO: Closing spider (finished)
My main task is to get all URLs from the database. Because someone will write the URL on the same website and the spider will crawl automatically.
You can try to change the logic in dataReader
method from:
records = cursor.fetchall()
return records
to :
records = cursor.fetchall()
records_list = []
for rec in records:
records_list.append(rec)
return records_list
您应该在 dataReader 函数中编写return list(records)
而不是return records
。
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.