# -*- coding:utf8 -*-
from bs4 import BeautifulSoup
import os, sys, urllib2, urllib
import thread, threading
class downloader(threading.Thread):
"""docstring for downloader"""
def __init__(self, url, name):
threading.Thread.__init__(self)
self.url = url
self.name = name
def run(self):
print ‘downling from %s‘ % self.url
urllib.urlretrieve(self.url, self.name)
threads=[]
def page_loop(page=1):
url = ‘http://www.beautylegmm.com/Tiara/beautyleg-936.html?page=%s‘ % page
content = urllib2.urlopen(url)
soup = BeautifulSoup(content)
my_girl = soup.find_all(‘img‘)
global x
# 加入结束检测,写的不好....
if len(my_girl) <5:
print ‘已经全部抓取完毕‘
sys.exit(0)
print ‘开始抓取‘
for girl in my_girl:
link = girl.get(‘src‘)
if ‘jpg‘ in link:
flink = ‘http://www.beautylegmm.com‘ + link
print flink
path = ‘dbmeizi‘+‘/‘ + str(x) + flink[-4:]
x = x + 1
t = downloader(flink, path)
threads.append(t)
t.start()
# content2 = urllib2.urlopen(flink).read()
# with open(‘dbmeizi‘+‘/‘ + str(x) + flink[-4:],‘wb‘) as code:
# code.write(content2)
page = int(page) + 1
print ‘开始抓取下一页‘
print ‘the %s page‘ % page
page_loop(page)
x = 1
page_loop()
时间: 2024-10-12 09:21:05