[英]scrapy exceptions.TypeError: 'int' object has no attribute '__getitem__'
当我在使用Scrapy Collection到Mysql时出现了一些问题,希望大家给个解决方案,谢谢。pipelines.py类型错误:
2013-12-06 18:07:02+0800 [-] ERROR: Unhandled error in Deferred:
2013-12-06 18:07:02+0800 [-] Unhandled Error
Traceback (most recent call last):
File "/usr/lib/python2.7/threading.py", line 524, in __bootstrap
self.__bootstrap_inner()
File "/usr/lib/python2.7/threading.py", line 551, in __bootstrap_inner
self.run()
File "/usr/lib/python2.7/threading.py", line 504, in run
self.__target(*self.__args, **self.__kwargs)
--- <exception caught here> ---
File "/usr/local/lib/python2.7/dist-packages/twisted/python/threadpool.py", line 191, in _worker
result = context.call(ctx, function, *args, **kwargs)
File "/usr/local/lib/python2.7/dist-packages/twisted/python/context.py", line 118, in callWithContext
return self.currentContext().callWithContext(ctx, func, *args, **kw)
File "/usr/local/lib/python2.7/dist-packages/twisted/python/context.py", line 81, in callWithContext
return func(*args,**kw)
File "/usr/local/lib/python2.7/dist-packages/twisted/enterprise/adbapi.py", line 448, in _runInteraction
result = interaction(trans, *args, **kw)
File "/home/hugo/spider/spider/pipelines.py", line 39, in _conditional_insert
tx.execute('INSERT INTO book_updata values (%s, %s, %s, %s, %s)' ,(item['name'][i], item['siteid'][i], item['page_url'][i], item['page_title'][i], time.time()))
exceptions.TypeError: 'int' object has no attribute '__getitem__'
错误:exceptions.TypeError:'int'对象没有属性' getitem '
和代码:
# Don't forget to add your pipeline to the ITEM_PIPELINES setting
# See: http://doc.scrapy.org/topics/item-pipeline.html
# -*- coding: utf-8 -*-
from scrapy import log
from twisted.enterprise import adbapi
from scrapy.http import Request
from scrapy.exceptions import DropItem
from scrapy.contrib.pipeline.images import ImagesPipeline
import time
import MySQLdb
import MySQLdb.cursors
import socket
import select
import sys
import os
import errno
class MySQLStorePipeline(object):
def __init__ (self):
self.dbpool = adbapi.ConnectionPool('MySQLdb',
db = 'test',
user = 'root',
passwd = '153325',
cursorclass =MySQLdb.cursors.DictCursor,
charset = 'utf8',
use_unicode = False
)
def process_item(self,item, spider):
query = self.dbpool.runInteraction(self._conditional_insert,item)
return item
def _conditional_insert (self, tx, item):
for i in range(len(item['name'])):
tx.execute("select * from book where name = '%s'" % (item['name'][i]))
result = tx.fetchone()
#(name, page_url, page_title, siteid, date)
if result:
for i in range(len(item['name'])):
tx.execute('INSERT INTO book_updata values (%s, %s, %s, %s, %s)' ,(item['name'][i], item['siteid'][i], item['page_url'][i], item['page_title'][i], time.time()))
log.msg("\n ====Old novel: %s is update!==== \n" % item['name'][i], level=log.DEBUG)
else:
log.msg("\n ===New novel: %s is into db==== \n" % item['name'][i], level=log.DEBUG)
tx.execute("INSERT INTO book (name, category, page_url, page_title, author, img_url, intro, state, time) values ('%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s', '%s')" % (item['name'][i], item['category'][i], item['page_url'][i], item['page_title'][i], item['author'][i], item['img_url'][i], item['intro'][i], item['state'][i], int(time.time())))
def handle_error(self, e):
log.err(e)
看来tx.execute('INSERT INTO book_updata ...)
中的item[xxx]
之一是int而不是列表或dict 。 因此,请检查item
数据格式,以查看数据格式是否错误。
声明:本站的技术帖子网页,遵循CC BY-SA 4.0协议,如果您需要转载,请注明本站网址或者原文地址。任何问题请咨询:yoyou2525@163.com.