#!/usr/bin/env python #coding:utf-8 import urllib import re def GetHtml(url): """获取HTML页面所有元素.""" page = urllib.urlopen(url) html = page.read() return html def GetImg(html): """ 获取HTML页面所有.jpg图片.""" reg = r‘src="(.+?\.jpg)"‘ imgre = re.compile(reg) imglist = re.findall(imgre, html) x = 0 for imgurl in imglist: urllib.urlretrieve(imgurl, ‘%s.jpg‘ %x, cbx) x += 1 print "img: %s is done!" %x def cbx(a, b, c): """显示下载进度. @a: 已经下载的数据块 @b: 数据块大小 @c: 远程文件的大小 """ per = 100.0 * a * b / c if per > 100: per = 100 print "%.2f%%" %per html = GetHtml("http://www.baidu.com") print GetImg(html)
时间: 2024-11-05 06:25:10