在spider中最後一個函數返回item時會scrapy會調用pipeline裡面的 ...
在spider中最後一個函數返回item時會scrapy會調用pipeline裡面的
process_item(self, item, spider):
函數並傳入item,spider等參數
在這裡可以將數據進行持久化儲存
我的piple代碼
# -*- coding: utf-8 -*- # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html import MySQLdb import MySQLdb.cursors from twisted.enterprise import adbapi class MyPipeline(object): ##這裡的函數名於setting中的對應 def __init__(self, dbpool): self.dbpool = dbpool @classmethod ##得到資料庫的連接 def from_settings(cls, settings): dbargs = dict( host = settings['MYSQL_HOST'], db = settings['MYSQL_DBNAME'], port = settings['MYSQL_PORT'], user = settings['MYSQL_USER'], passwd = settings['MYSQL_PASSWD'], charset = 'utf8', cursorclass = MySQLdb.cursors.DictCursor, use_unicode = True, ) dbpool = adbapi.ConnectionPool('MySQLdb', **dbargs) return cls(dbpool) def process_item(self, item, spider): ##這個函數會在spider返回時調用 d = self.dbpool.runInteraction(self._do_upinsert, item, spider) return item def _do_upinsert(self, conn, item, spider): valid = True for data in item: if not data: valid = False if valid: ##執行sql result = conn.execute(‘sql’) if result: print 'added a record' else: print 'failed insert into table'
代碼git地址:過幾天會上傳