import requests from bs4 import BeautifulSoup import lxml import re import time import random import pymysql.cursors connection = pymysql.connect(host=‘localhost‘, user=‘root‘, password=‘123‘, db=‘asd‘, charset=‘utf8mb4‘, cursorclass=pymysql.cursors.DictCursor) payload = { "Ancoding":"gzip, deflate, sdch, br", "Accept-Language":"zh-CN,zh;q=0.8", "Connection":"keep-alive", "Cookie":"hng=; uss=UIMY14A%2B04Bbq%2BqRxS6C9OzJWudsw14Q1kb5mDDqxW%2BQ3YG%2BUcpgrDRWnRQ%3D; uc3=sg2=AC4AfXCJ7XkLw0gCUD1tD9ZxhXFdweN2A6VfybWadxI%3D&nk2=&id2=&lg2=; t=3c0787f77a28e0854ef28fc360b2c555; cookie2=1c912d33e44bdb2008763748702a61f4; _tb_token_=78577371d8136; l=AiQkmjyCyPnG7qTN1Iu5fBqvdCgWvUgn; isg=AvDwL_qYXdDeegACSXGXiIOKwb7f2NSDXgsSOepBvMsepZFPkkmkE0aNixo_; pnm_cku822=; cna=T7gREcWMLDsCAavWmjBJPJpS; Hm_lvt_c478afee593a872fd45cb9a0d7a9da3b=1495496950; Hm_lpvt_c478afee593a872fd45cb9a0d7a9da3b=1495496950", "Host":"tanggulake.tmall.com", "Referer":"https://tanggulake.tmall.com/search.htm?spm=a220o.1000855.w5002-15900729481.1.b3kpys&search=y", "User-Agent":"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36", "X-Requested-With":"XMLHttpRequest"} with connection.cursor() as cursor: # Create a new sql = "select * from 竞店" cursor.execute(sql) q = cursor.fetchall() # connection is not autocommit by default. So you must commit to save # your changes. connection.commit() for i in q: url =i["地址"] url_re = requests.get(url+"1", params=payload) soup = BeautifulSoup(url_re.text, "lxml") pig=soup.select("div > div > div > div > span:nth-of-type(1)") get_pig=(pig[2].text.split("/"))[1] print(get_pig) ids=[] for pij in range(1,int(get_pig)+1): time.sleep(random.randrange(1,5)) ur1=i["地址"]+str(pij) url_re1=requests.get(ur1,params=payload) soup=BeautifulSoup(url_re1.text,"lxml") date = soup.select("div > div > div > dl") for spid in date: ids.append(re.sub("\D", "", spid.get("data-id"))) with connection.cursor() as cursor: # Create a new sql = ‘select id from‘+" " +i["店铺名称"] cursor.execute(sql) q = cursor.fetchall() q = [i["id"] for i in q] for w in ids: if w not in q: sql = "INSERT INTO "+i["店铺名称"]+ "(`id`) VALUES (%s)" cursor.execute(sql, w) # connection is not autocommit by default. So you must commit to save # your changes. connection.commit()
时间: 2024-10-06 12:27:56