#coding=utf-8 import urllib2 import re from bs4 import BeautifulSoup import sys reload(sys) sys.setdefaultencoding(‘utf-8‘) def getHtml(url): page=urllib2.urlopen(url) html=page.read() return html url="http://www.yw11.com/html/mi/3-85-0-1.htm" user_agent=‘Mozilla/5.0 (Macintosh; Intel Mac OS X 10_10_4) AppleWebKit/600.7.12 (KHTML, like Gecko) Version/8.0.7 Safari/600.7.12‘ headers={"User-Agent":user_agent} request=urllib2.Request(url,headers=headers) html=getHtml(request) # print html soup=BeautifulSoup(html,‘html.parser‘) divs=soup.find_all(‘div‘,attrs={"class":"listbox1_text"})[0] ul=divs.find_all(‘ul‘)[0] lis=ul.find_all(‘li‘) f=open(‘name1.txt‘,‘a‘) for index in range(len(lis)): # print lis[index].text name=lis[index].text.lstrip() f.write(name) f.write(‘\r\n‘) print "抓取了"+str(index)+"个名字" f.close() f=open(‘name1.txt‘,‘r‘) lines=f.readlines() print "当前一共有"+str(len(lines)) f.close() ‘‘‘ for index in range(len(trs)): style=trs[index].get(‘style‘) tds=trs[index].find_all(‘td‘) name=tds[1].text hex=tds[2].text print(‘颜色:‘+name+‘颜色值:‘+hex+‘背景色样式‘+style) ‘‘‘
时间: 2024-10-25 19:05:33