import urllib2 import re import MySQLdb class LatestTest: #初始化 def __init__(self): self.url="https://toutiao.io/latest" self.UserAgent=‘Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.97 Safari/537.36‘ self.header={‘User-Agent‘:self.UserAgent} #获取URL、标题、邮箱 保存到list def getDate(self): request=urllib2.Request(self.url,headers=self.header) respone=urllib2.urlopen(request).read() #print respone content=re.compile(r‘<div class="post">.*?class="title">.*?href="(.*?)">(.*?)</a>.*?<div class="meta">.*?<span>(.*?)</span>‘,re.S) urls=re.findall(content,respone) namelist=[] for url in urls: #print url[0],url[1],url[2] namelist.append([url[0].strip(),url[1].strip(),url[2].strip()]) if len(namelist)>=10: break return namelist #保存数据到mysql数据库 def savaDateMysql(self,url,title,email): sql="insert into content(url,title,email)values(‘%s‘,‘%s‘,‘%s‘)" %(url,title,email) try: conn=MySQLdb.connect(‘192.168.200.23‘,‘root‘,‘g6s8m3t7s‘,‘mysql‘,charset=‘utf8‘) cursor=conn.cursor() # cursor.execute(‘create table IF NOT EXISTS content(id int AUTO_INCREMENT PRIMARY KEY,url varchar(100),title varchar(100),email varchar(100))‘) #cursor.execute(‘drop table IF EXISTS content‘) cursor.execute(sql) conn.commit() except Exception,e: print e finally: conn.close() if __name__==‘__main__‘: lat=LatestTest() contentlist=lat.getDate() try: for tent in contentlist: url=tent[0].strip() title=tent[1].strip() email=tent[2].strip() print url,title,email lat.savaDateMysql(url,title,email) except Exception,e: print e
时间: 2024-10-13 12:18:33