import urllib.request
import re as gg
import os,string,sys
import easygui as g
result = []
#urllib.urlopen(url) 获取网页源码函数
#urllib.urlretrieve(url,‘存储名字‘) 将网页资源下载到本地函数
def getyuanma(_url):
page = urllib.request.urlopen(_url).read()
page1 = page.decode(‘UTF-8‘) ##转换编码
f = open(r‘C:\Users\Administrator\Desktop\python\爬虫\源码1.txt‘,‘w‘,encoding=‘utf-8‘)
f.write(str(page1))
f.close()
def getQQnum(file_url):
f1 = open(file_url,‘r‘,encoding=‘utf-8‘)
for i in f1:
p = gg.findall(r‘([0-9]{8,11}\@[0-9a-zA-Z]{0,10}.{0,4})‘,i)
if len(p) != 0:
print(p)
def getyeshu(file_url):
f2 = open(file_url,‘r‘,encoding=‘utf-8‘)
for i in f2:
p = gg.findall(r‘pn=([0-9]{1,3})\">尾页‘,i)
if len(p) != 0:
_num = p[0]
return _num
break
def main():
_url1 = g.enterbox("请输入链接地址:","输入地址")
getyuanma(_url1)
print(_url1)
_num = getyeshu(r‘C:\Users\Administrator\Desktop\python\爬虫\源码1.txt‘)
if _num != None:
for i in range(1,int(_num)+1):
print(i)
getyuanma(_url1+r‘?pn=‘+str(i))
getQQnum(r‘C:\Users\Administrator\Desktop\python\爬虫\源码1.txt‘)
main()