#-*-coding:utf-8 -*- import urllib2 import sys import re reload(sys) sys.setdefaultencoding(‘utf-8‘) url=‘http://www.qiushibaike.com/hot/page/1/‘ header={‘User-Agent‘:‘Mozilla/5.0 (Windows NT 6.1; WOW64)‘} try: page=1 while True: rawurl=‘http://www.qiushibaike.com/hot/page/‘ url=rawurl+str(page) request=urllib2.Request(url,headers=header) response=urllib2.urlopen(request) content=response.read().decode(‘utf-8‘) pattern=re.compile(‘<div class="article block untagged.*?<div.*?clearfix">.*?<span>(.*?)</span>‘,re.S) items=re.findall(pattern,content) text=re.compile(‘<br/>‘) for item in items: print re.sub(text,‘\n‘,item) print q=raw_input() if q == ‘q‘:break page=page+1 except urllib2.URLError,e: if hasattr(e,‘code‘): print e.code if hasattr(e,‘reason‘): print e.reason
实现的功能比较简单,按回车键会爬下一页的段子,输入q退出
时间: 2024-08-02 15:13:56