本实例实现了抓取网易云课堂中以‘java’为关键字的搜索结果,经详细查看请求的方式为post,请求的结果为JSON数据
具体实现代码如下:
import requests import json finalstr = ‘‘ #初始化字符串 totlePage = 0 #初始化总页数 test = 0 #初始化数据总条数 url = ‘http://study.163.com/p/search/studycourse.json‘ headers = {‘content-type‘: ‘application/json‘} def getData(count): #定义一个方法,返回json型请求结果 payload = { ‘pageIndex‘:count, #页码为变量 ‘pageSize‘:‘50‘, ‘keyword‘:‘java‘, ‘searchTimeType‘:‘-1‘, ‘orderType‘:‘5‘, ‘priceType‘:‘-1‘ } req = requests.post(url,data=json.dumps(payload),headers=headers) res_json = json.loads(req.text) return res_json final = getData(1)[‘result‘][‘list‘] #判断是否有搜索结果 if final != None: totlePage = getData(1)[‘result‘][‘query‘][‘totlePageCount‘] #获取页码总数 for j in range(1,totlePage+1): #页码循环 final = getData(j)[‘result‘][‘list‘] for i in range(len(final)): #每页中的数据项循环 rt = ‘标题:‘+final[i][‘productName‘]+‘\n‘ finalstr += rt rp = ‘发布者:‘+final[i][‘provider‘]+‘\n‘ finalstr += rp strpri = final[i][‘discountPrice‘] if strpri == None: strpri = 0 rn = ‘价格:‘+str(strpri)+‘\n‘ finalstr += rn strcou = final[i][‘learnerCount‘] if strcou == None: strcou = 0 rd = ‘学习人数:‘+str(strcou)+‘\n\n‘ finalstr += rd print(‘当前正在读取第‘+str(j)+‘页的第‘+str(i+1)+‘条数据...‘) test += 1 f = open(‘网易云课堂搜索java时的数据,共‘+str(test)+‘条.txt‘,‘w‘,1,‘UTF-8‘) #保存数据到TXT f.write(finalstr) print(‘正在保存。。。‘) f.close() print(‘保存完毕!共‘+str(test)+‘条数据‘) else: print(‘没有查询结果,请换个关键词试试!‘)
时间: 2024-10-13 15:17:42