#!/usr/bin/pytyon # -*- coding:utf-8 -*- import re import urllib vurl = 'http://www.enet.com.cn/eschool/video/c++/' domain = 'http://www.enet.com.cn' def getVideoList(vurl): #find all c++ url html = urllib.urlopen(vurl).read() reg = r'href="(/eschool/video/autohtml/310/.*?.shtml)".*>(.*?)<\/a>' videoRe = re.compile(reg) videoList = re.findall(videoRe, html) videoListCount = len(videoList) x = 0 # iteration c++ url for urlInfo in videoList: url = urlInfo[0] # find real c++ url url = domain+url html = urllib.urlopen(url).read() reg = r'(http:\/\/images\.enet\.com\.cn\/eschool\/c\+\+\/.*?\.swf)' videoRe = re.compile(reg) swfUrl = re.findall(videoRe, html) #print swfUrl # name reg = r'c\+\+\/(.*?)\/.*?\.swf' videoRe = re.compile(reg) videoName = re.findall(videoRe, swfUrl[0])[0].replace('/','_')+'_'+urlInfo[1].replace(':','').replace(' ','_').replace('.','_')+'.swf' urllib.urlretrieve(swfUrl[0], videoName) x = x + 1 ratio = x*100/videoListCount print videoName,' to --> ',x,' ==> ',videoListCount print '[','#'*ratio,' '*(100-ratio),'] ',ratio,'%' #exit() else: print 'endding!!' getVideoList(vurl)
时间: 2024-11-05 14:57:04