一、数据库封装
import pymysql
class MysqlHelper(object):
def __init__(self):
self.db = pymysql.connect(host=‘127.0.0.1‘, port=3306, user=‘root‘, password=‘abc123‘, database=‘py1011‘, charset=‘utf8‘)
self.cursor = self.db.cursor()
def execute_modify_sql(self,sql, data):
self.cursor.execute(sql, data)
self.db.commit()
def __del__(self):
self.cursor.close()
self.db.close()
if __name__ == ‘__main__‘:
conn = MysqlHelper()
conn.execute_modify_sql(‘insert into lianjiaxinxi(title) VALUE (%s)‘, data=(‘huzeqi hehehe‘))
二、链家信息爬取
import requests
from lxml import etree
import mysqlhelper
# 这是我们分页的url, 所以叫base_url
base_url = ‘https://bj.lianjia.com/zufang/pg%srp1/‘
myhelper = mysqlhelper.MysqlHelper()
sql = ‘INSERT INTO lianjiaxinxi (title, region, zone, meters, location, price) VALUES‘ \
‘ (%s, %s, %s, %s, %s, %s)‘
for i in range(1, 4):
url = base_url % i
response = requests.get(url)
html_ele = etree.HTML(response.text)
# ./div[2]/h2/a
# ul 所有信息
li_list = html_ele.xpath(‘//ul[@id="house-lst"]/li‘)
for li_ele in li_list:
title = li_ele.xpath(‘./div[2]/h2/a‘)[0].text
print(title)
region = li_ele.xpath(‘./div[2]/div[1]/div[1]/a/span‘)[0].text
print(region)
# div[2]/div[1]/div[1]/span[1]/
zone = li_ele.xpath(‘./div[2]/div[1]/div[1]/span[1]/span‘)[0].text
print(zone)
meters = li_ele.xpath(‘./div[2]/div[1]/div[1]/span[2]‘)[0].text
print(meters)
location = li_ele.xpath(‘./div[2]/div[1]/div[1]/span[3]‘)[0].text
print(location)
price = li_ele.xpath(‘.//div[@class="price"]/span‘)[0].text
print(price)
import re
res_match = re.match(‘\d+‘, meters)
meters = res_match.group(0)
data = (title, region, zone, meters, location, price)
myhelper.execute_modify_sql(sql, data)
原文地址:https://www.cnblogs.com/luwanhe/p/9500547.html