我,找回,密码了!
https://blog.csdn.net/weixin_42590877/article/details/99686720?tdsourcetag=s_pcqq_aiomsg
就是这个网址助我!
这个是我要爬的,看着暖和 https://item.jd.com/33919692427.html
# In[sy_final]:
#!/usr/bin/python
import requests
import json
#这个headers害惨了我,一定要有referer才可以运行
headers = {
‘User-Agent‘: ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.142 Safari/537.36‘,
‘Referer‘: ‘https://item.jd.com/33919692427.html‘
}
#这个url备用
url = ‘https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv382&productId=33919692427&score=0&sortType=5&page=0&pageSize=10&isShadowSku=0&fold=1‘
file = open("E:\\2020期末复习\\python\\军大衣前20页评论2.txt","w"); #打开一个文件
for i in range(20): #假定爬20页
url = ‘https://sclub.jd.com/comment/productPageComments.action?callback=fetchJSON_comment98vv382&productId=33919692427&score=0&sortType=5&page=%d&pageSize=10&isShadowSku=0&fold=1‘ % i
r = requests.get(url,headers=headers)
#jd = json.loads(r.text.lstrip(‘fetchJSON_comment98vv382(‘).rstrip(‘);‘))
#jd = json.loads(r.text)
jd = json.loads(r.text.strip(‘fetchJSON_comment98vv382();‘))
com_list=jd[‘comments‘]
for j in com_list:
file.write(j[‘content‘]); #写入内容
print(j[‘content‘]);
file.write(‘\r\n‘); #换行
file.close
原文地址:https://www.cnblogs.com/sakuraXiYue/p/12005991.html