import requests from bs4 import BeautifulSoup def get_book(url): wb_data = requests.get(url) soup = BeautifulSoup(wb_data.text,‘lxml‘) title_list = soup.select(‘h1 > span‘) title = title_list[0].text author_list = soup.select(‘div#info > a‘) author = author_list[0].text.replace(" ", "").replace("\n", "") score_list = soup.select(‘strong.ll.rating_num‘) score = score_list[0].text data = { ‘title‘:title, ‘score‘:score, ‘author‘:author, } print(data) def get_all_book(): for i in range(0,250,25): url = ‘https://book.douban.com/top250?start=‘ + str(i) wb_data = requests.get(url) soup = BeautifulSoup(wb_data.text,‘lxml‘) href_list = soup.select(‘div.pl2 > a‘) for href in href_list: link = href.get(‘href‘) get_book(link) get_all_book()
原文地址:https://www.cnblogs.com/hiss/p/8870792.html
时间: 2024-10-15 13:57:13