网站选择桌面壁纸网站的汽车主题:
下面的两个print在调试时打开
#print tag #print attrs
#!/usr/bin/env python import re import urllib2 import HTMLParser base = "http://desk.zol.com.cn" path = '/home/mk/cars/' star = '' def get_url(html): parser = parse(False) request = urllib2.Request(html) response = urllib2.urlopen(request) resp = response.read() parser.feed(resp) def download(url): content = urllib2.urlopen(url).read() format = '[0-9]*\.jpg'; res = re.search(format,url); print 'downloading:',res.group() filename = path+res.group() f = open(filename,'w+') f.write(content) f.close() class parse(HTMLParser.HTMLParser): def __init__(self,Index): self.Index = Index; HTMLParser.HTMLParser.__init__(self) def handle_starttag(self,tag,attrs): #print tag #print attrs if(self.Index): if not cmp(tag,'a'): if(len(attrs) == 4): if(attrs[0] ==('class','pic')): #print tag #print attrs new = base+attrs[1][1] print 'found a link:',new global star star = new get_url(new) else: if not cmp(tag,'img'): if(attrs[0] == ('id','bigImg')): #print tag #print attrs Image_url = attrs[1][1] print 'found a picture:',Image_url download(Image_url) if not cmp(tag,'a'): if (len(attrs) == 4): if (attrs[1] == ('class','next')): #print tag #print attrs next = base + attrs[2][1] print 'found a link:',next if (star != next): get_url(next) Index_url = 'http://desk.zol.com.cn/qiche/' con = urllib2.urlopen(Index_url).read() Parser_index = parse(True) Parser_index.feed(con)
仅仅就是抓桌面壁纸网站上的优美的壁纸 。。。
时间: 2024-10-13 23:03:22