# -*- coding: utf-8 -*- import requests import os import csv import time import random base_url = ‘http://api-t.iqiyi.com/feed/get_feeds?authcookie=97RRnhwyZA35LUddm35Yh4x5m2Vv9DJtUOuFHAeFVinDJg17wOO5Em1Se5vGm2vqn8SoSb24&device_id=a35b964e5084125fb7dfab30205fe32b&m_device_id=969312df66b3063f0ad005ebce2181f1&agenttype=118&wallId=214740047&feedTypes=1%2C7&count=20&top=1&hasRecomFeed=1&needTotal=1&baseTvId=646605300&version=1&qypid=01010011010000000000&isVideoPage=true&tvid=646605300&categoryid=2&albumid=206289501&upOrDown=1&snsTime={snstime}&t={t}&‘ cookies = {‘Cookie‘:‘xxx‘} headers = {‘User-Agent‘:‘xxx‘} resp = requests.get(base_url.format(t=int(time.time()*1000), snstime=int(time.time())), headers=headers, cookies=cookies) jsondata = resp.json() data = jsondata.get("data") feeds = data.get(‘feeds‘) feedId = ‘‘ path = os.getcwd()+"/laozichuanqi.csv" csvfile = open(path, ‘a+‘, encoding=‘utf-8‘, newline=‘‘) writer = csv.writer(csvfile) writer.writerow((‘name‘, ‘description‘)) feedId = ‘‘ for feed in feeds: feedId = feed.get("feedId") name = feed.get("name") description = feed.get("description") print(name+"--"+description) writer.writerow((name, description)) # print(feedId) url = base_url+"feedId={feedId}" for i in range(105): realurl = url.format(feedId=feedId, t=int(time.time()*1000+random.random()*1000), snstime=int(time.time()+random.random()*100)) resp = requests.get(realurl, headers=headers, cookies=cookies) jsondata = resp.json() data = jsondata.get("data") feeds = data.get(‘feeds‘) print(feedId) print(len(feeds)) print(realurl) time.sleep(15) # exit() if data.get("counts") == 0: break for feed in feeds: feedId = feed.get("feedId") print(feedId) name = feed.get("name") description = feed.get("description") print(name + "--" + description) writer.writerow((name, description)) csvfile.close()
以上代码有些问题:
例如,爬取会循环,不再继续往下爬。
问题查找中。。。
未完待续。。。
时间: 2024-10-10 14:39:34