1 #!/usr/bin/env python 2 # -*- coding: UTF-8 -*- 3 #by i3ekr 4 5 import re,optparse,sys,requests,time,os 6 7 parse = optparse.OptionParser(usage="python %prog -i ‘127.0.0.1‘",version="%prog 1.0") 8 parse.add_option(‘-i‘,‘--ip‘,action=‘store‘,dest=‘ip‘,help=‘ip parse...‘) 9 parse.add_option(‘-o‘,‘--out‘,action=‘store‘,dest=‘out‘,help=‘this parse is out result file exp:-o "/tmp/result.txt""‘) 10 parse.set_defaults(v=1.2) 11 options,args=parse.parse_args() 12 13 14 def gethtml(ip,page): 15 try: 16 html = requests.get("https://www.bing.com/search?q=ip:"+ip+"&qs=ds&first="+str(page)+"&FORM=PERE4").content 17 return html 18 except Exception as e: 19 return "访问错误" 20 exit() 21 22 def chongfu(): 23 with open("./tmp.txt","r") as f: 24 tmp = f.read() 25 url = tmp.split("\r\n") 26 for i in set(url): 27 with open("ok.txt","a") as f: 28 f.write(i+‘\r\n‘) 29 30 def geturl(html): 31 try: 32 url = re.findall(r"(?<=<cite>).*?(?=</cite>)", html) 33 print url 34 for u in url: 35 with open("./tmp.txt","a") as f: 36 f.write(u+"\r\n") 37 f.close() 38 except Exception as e: 39 raise e 40 41 #根据索引出来的搜索量来判断有多少个页面,返回值是页面数量 42 def result_page(): 43 try: 44 num = str(re.findall(r"<span class=\"sb_count\">(.*?)</span><span class=\"ftrB\"",html)[0]).strip(" 条结果") 45 page = int(num.replace(",","")) 46 return page/10 47 except Exception as e: 48 print "没有与此相关的结果" 49 exit() 50 51 52 53 54 if __name__ == "__main__": 55 print """ 56 ========================= 57 [+] by i3ekr 58 [+] Blog nul1.cnblogs.com 59 [+] Time 2018/6/13 60 ========================= 61 """ 62 if len(sys.argv) > 2: 63 url_pangzhan = [] 64 pg = 1 65 ip = options.ip 66 f = False 67 while True: 68 if f == False: 69 html = requests.get("https://www.bing.com/search?q=ip:"+ip+"&qs=ds&first=1&FORM=PERE4").content 70 result_page() 71 f = True 72 else: 73 for i in xrange(0,result_page()): 74 html = gethtml(ip,pg) 75 url = geturl(html) 76 print "第[%s]页"%(i+1) 77 pg+=11 78 79 chongfu() 80 os.remove(‘tmp.txt‘) 81 exit() 82 83 else: 84 print options.usage() 85 exit()
原文地址:https://www.cnblogs.com/nul1/p/9189692.html
时间: 2024-10-10 07:06:32