[[email protected] getpage]$ cat job.py
#coding: utf-8
#title..href...
import urllib.request
import time
url=[‘‘]*30
page=83909
j=0
while j<30:
url[j]=‘http://job.csdn.net/Job/Index?jobID=‘+str(page)
content=urllib.request.urlopen(url[j]).read().decode(‘utf-8‘)
open(r‘job/‘+str(page)+‘.html‘,‘w+‘).write(content)
print(‘donwloading ‘,j,‘ page:‘,url[j])
j=j+1
page=page+1
time.sleep(0.1)
else:
print(‘download article finished‘)
时间: 2024-10-05 10:33:57