写了两个版本:
1、面向过程版本:
import requests from pyquery import PyQuery as pq url=‘https://movie.douban.com/top250‘ moves=[] def sec(item): return item[1] for i in range(0,255,25): content=requests.get(url+"?start="+str(i))#?start=25 for movie in pq(content.text).find(‘.item‘): moves.append([pq(movie).find(‘.title‘).html(),pq(movie).find(‘.rating_num‘).html()]) moves.sort(key=sec,reverse=True) for move in moves: print(move[0],move[1])
2、面向对象版本:
import requests from pyquery import PyQuery as pq class Douban: def __init__(self): self.moves=[] def geturl(self): url=‘https://movie.douban.com/top250?start=%s‘ urls=[] for i in range(0,250,25): urls.append(url%i) return urls def downloader(self,url): r=requests.get(url) return r.text def html_parser(self,page): for movie in pq(page).find(‘.item‘): title=pq(movie).find(‘.title‘).html() score=pq(movie).find(‘.rating_num‘).html() self.moves.append({ ‘title‘:title, ‘score‘:score, }) def output(self): self.moves.sort(key=lambda x:x[‘score‘],reverse=True) for move in self.moves: print(move[‘title‘],move[‘score‘]) def start(self): for url in self.geturl(): #print(url) page=self.downloader(url) self.html_parser(page) self.output() dou=Douban() dou.start()
原文地址:https://www.cnblogs.com/babihuang/p/9085867.html
时间: 2024-10-30 09:54:08