# python3 # jiandan meizi tu import urllib.request import os import time import random def url_open(url): req1 = urllib.request.Request(url, headers={‘User-Agent‘: ‘Mozilla/4.0‘}) req2 = urllib.request.Request(url, headers={‘User-Agent‘: ‘Mozilla/4.1‘}) req3 = urllib.request.Request(url, headers={‘User-Agent‘: ‘Mozilla/4.5‘}) req4 = urllib.request.Request(url, headers={‘User-Agent‘: ‘Mozilla/5.1‘}) req_list = [req1, req2,req3, req4] response = urllib.request.urlopen(random.choice(req_list)) html = response.read() # print (‘url_open done!‘) return html def get_current_page(url): html = url_open(url).decode(‘utf-8‘) a = html.find(‘current-comment-page‘) + 23 b = html.find(‘]‘,a) return html[a:b] def find_imgs(url): html = url_open(url).decode(‘utf-8‘) img_addrs = [] a = html.find(‘img src="http‘) while a != -1: b = html.find(‘.jpg‘,a, a+255) if b != -1: img_addrs.append(html[a+9:b+4]) else: b = a + 13 a = html.find(‘img src="http‘, b) return img_addrs def save_imgs(folder,img_addrs): for each in img_addrs: filename = each.split(‘/‘)[-1] with open(filename,‘wb‘) as f: img = url_open(each) f.write(img) def download_mm(folder = ‘xx‘,pages = 300): # os.mkdir(folder) os.chdir(folder) url = ‘http://jandan.net/ooxx/‘ current_page_num = int(get_current_page(url)) for i in range(pages): print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),‘current_page_num‘, current_page_num) if i%3 == 0: print (time.strftime("%Y-%m-%d %H:%M:%S",time.localtime()),"sleep 2 seconds...") time.sleep(2) current_page_num -= 1 page_url = url + ‘page-‘ + str(current_page_num) + ‘#comments‘ img_addrs = find_imgs(page_url) save_imgs(folder, img_addrs) if __name__ == ‘__main__‘: download_mm()
时间: 2025-01-04 01:47:12