- #拉勾网需要User-Agent请求头才能获取到内容
from urllib import request url="https://www.lagou.com/jobs/list_python%20?labelWords=&fromSearch=true&suginput=" headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.94 Safari/537.36‘ } req = request.Request(url,headers=headers) resp = request.urlopen(req) print(resp.read())
2.json解析后的内容
3.尝试获取(需要User-Agent请求头以及Referer)
from urllib import request,parse #需要User-Agent请求头以及Referer # url="https://www.lagou.com/jobs/list_python%20?labelWords=&fromSearch=true&suginput=" urlajax="https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false" headers = { ‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.96 Safari/537.36‘, ‘Referer‘: ‘https://www.lagou.com/jobs/list_python%20?labelWords=&fromSearch=true&suginput=‘ } data={ ‘first‘:‘true‘, ‘pn‘:1, ‘kd‘:‘python‘ } req = request.Request(urlajax,headers=headers,data=parse.urlencode(data).encode(‘utf-8‘),method=‘POST‘) resp = request.urlopen(req) print(resp.read().decode(‘utf-8‘))
原文地址:https://www.cnblogs.com/834477300j/p/9424886.html
时间: 2024-11-01 12:09:52