import requests from bs4 import BeautifulSoup import random import time from fake_useragent import UserAgent for page in range(1, 11): fst_url = r‘https://colorhub.me/search?tag=data&page={}‘.format(page) UA = UserAgent() fst_response = requests.get(fst_url, headers={‘User-Agent‘: UA.random}) fst_soup = BeautifulSoup(fst_response.text, ‘lxml‘) # print(fst_soup.findAll(name=‘div‘)) # exit() sec_urls = [i.find(‘a‘)[‘href‘] for i in fst_soup.findAll(name=‘div‘, attrs={‘class‘: ‘card‘})] pic_names = [i.find(‘a‘)[‘title‘] for i in fst_soup.findAll(name = ‘div‘, attrs={‘class‘:‘card‘})] for sec_url, pic_name in zip(sec_urls, pic_names): UA = UserAgent() ua = UA.random sec_response = requests.get(sec_url, headers={‘User-Agent‘: ua}) sec_soup = BeautifulSoup(sec_response.text, ‘lxml‘) pic_url = ‘https:‘+sec_soup.find(‘img‘, {‘class‘: ‘card-img-top‘})[‘src‘] pic_response = requests.get(pic_url, headers={‘User-Agent‘: ua}) with open(pic_name+‘.jpg‘, mode=‘wb‘) as fn: fn.write(pic_response.content) print(pic_name) seconds = random.uniform(1,3) time.sleep(seconds) fn.close()
请各位大虾赐教!
原文地址:https://www.cnblogs.com/zhzhang/p/11239645.html
时间: 2024-10-05 09:56:49