目标爬取社会信用码
http://125.35.6.84:81/xk/#
1.首先界面是一个简单的分页查询
2.刷新一下,发现数据是ajax请求的
3.查相关参数
4.点击一条记录进去
5.发现数据也是ajax请求的
6.查看参数发现,是根据之前的ID查询的
7.开始work,首先爬取前面数据的ID,在根据ID爬取社会信用码
import json
import requests
import re
all_count=0
page_count=[1]
page_size=15
data_id=[]
social_credit_code=[]
def post1(url):
while True:
print(page_count[0])
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}
params = {
'on': True,
'page': page_count[0],
'pageSize ': 15,
'productName ': '',
'conditionType ': 1,
'applyname ': '',
'applysn ': '',
}
res = requests.post(url=url, params=params, headers=headers)
json_data = res.json()
for i in json_data['list']:
#打印每次获取的ID值
print(i["ID"])
data_id.append(i["ID"])
# 获取总页数
all_count = json_data['pageCount']
# 判断分页是否结束自定义页数
if page_count[0] == 5:
break
else:
page_count[0] += 1
print(data_id)
def post2(url, data_id):
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36"
}
for i in data_id:
params = {
'id': i
}
res = requests.post(url=url, params=params, headers=headers)
json_data = res.json()
#打印每次获取的社会信用代码
print(json_data['businessLicenseNumber'])
social_credit_code.append(json_data['businessLicenseNumber'])
if __name__ == '__main__':
url = "http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList"
url2 = "http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsById"
post1(url)
post2(url2,data_id)
print(social_credit_code)
原文地址:https://www.cnblogs.com/zx125/p/11399239.html
时间: 2024-11-09 03:48:22