1.通过工具创建数据库scrapy
2.在scrapy数据库中创建douban表
mysql> create table scrapy.douban(id int primary key auto_increment, name varchar(100) NOT NULL, author varchar(50) NULL, press varchar(100) NULL, dat e varchar(30) NULL, page varchar(30) NULL, price varchar(30) NULL, score varchar(30) NULL, ISBN varchar(30) NULL, author_profile varchar(1500) NULL, c ontent_description varchar(1500) NULL, link varchar(255) NULL )default charset=utf8;
3.在scrapy爬虫代码中设置指向数据库的参数pipeline.py
# -*- coding: utf-8 -*- # Define your item pipelines here # # Don‘t forget to add your pipeline to the ITEM_PIPELINES setting # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html import json from twisted.enterprise import adbapi from scrapy import log import MySQLdb import MySQLdb.cursors class DoubanPipeline(object): def __init__(self): self.file = open("./books.json", "wb") def process_item(self, item, spider): # 编码的转换 for k in item: item[k] = item[k].encode("utf8") line = json.dumps(dict(item), ensure_ascii=False) + "\n" self.file.write(line) return item class MySQLPipeline(object): def __init__(self): self.dbpool = adbapi.ConnectionPool("MySQLdb", db = "scrapy", # 数据库名 user = "root", # 数据库用户名 passwd = "qmf123456", # 密码 cursorclass = MySQLdb.cursors.DictCursor, charset = "utf8", use_unicode = False ) def process_item(self, item, spider): query = self.dbpool.runInteraction(self._conditional_insert, item) query.addErrback(self.handle_error) return item def _conditional_insert(self, tb, item): tb.execute("insert into douban (name, author, press, date, page, price, score, ISBN, author_profile,\ content_description, link) values (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)",\ (item["name"], item["author"], item["press"], item["date"], item["page"], item["price"], item["score"], item["ISBN"], item["author_profile"], item["content_description"], item["link"])) log.msg("Item data in db: %s" % item, level=log.DEBUG) def handle_error(self, e): log.err(e)
在setting.py文件中设置
4.安装MySQLdb驱动
MySQL-python-1.2.3.win-amd64-py2.7.exe
查看驱动是否安装成功:
5.通过Python 的MySQLdb查询数据库信息
import MySQLdb conn=MySQLdb.connect(host="127.0.0.1",user="root",passwd="qmf123456",db="scrapy") cursor = conn.cursor() n = cursor.execute("select count(*) from douban") for row in cursor.fetchall(): for r in row: print r
https://my.oschina.net/u/993130/blog/213617
http://www.jb51.net/article/57290.htm
http://www.cnblogs.com/sislcb/archive/2008/11/24/1339913.html
http://drizzlewalk.blog.51cto.com/2203401/448874
时间: 2024-10-10 18:24:06