#!/usr/bin/python # -*- coding: utf-8 -*- import requests import time import urllib2 import re import BeautifulSoup #you need to install BeautifulSoup and requests modules from http://pypi.python.org/ manuelly def main(): url = ‘http://www.the_web_site.com/login.aspx‘ headers = {‘content-type‘: ‘application/x-www-form-urlencoded‘} #use requests to keep the cookies session = requests.Session() response = session.get(url, headers=headers) #use beautifulsoup module to retrieve hidden postdata __VIEWSTATE and __EVENTVALIDATION soup = BeautifulSoup.BeautifulSoup(response.content) postdata = { ‘__VIEWSTATE‘: soup.find(‘input‘, id=‘__VIEWSTATE‘)[‘value‘], ‘__EVENTVALIDATION‘: soup.find(‘input‘, id=‘__EVENTVALIDATION‘)[‘value‘], ‘ctl00$ContentPlaceHolder1$UserName1‘: ‘username‘, ‘ctl00$ContentPlaceHolder1$Password1‘: ‘password‘, ‘ctl00$ContentPlaceHolder1$RememberMe1‘: ‘on‘, ‘ctl00$ContentPlaceHolder1$LoginButton1.x‘: ‘46‘, ‘ctl00$ContentPlaceHolder1$LoginButton1.y‘: ‘0‘ } #login to the site response = session.post(url, data=postdata, headers=headers) print response #search_file_write(response) #get the web page content output = session.get(‘http://www.the_web_site.com/the_web_page.aspx‘) print output #search_file_write(output) respHtml = output.text #print respHtml #print type(respHtml) #check the webpage whether there have something you want found_s = re.search(u‘http://www.the_web_site.com/\d{7}.aspx\?sjuser=.{14}‘, respHtml) found_p = re.search(u‘分</span>:\d{1,3}分‘, respHtml) print "found =",found_s if(found_s): #print the search result print "\033[1;32;40mThere have something !\033[0m" s_link = found_s.group(0) s_point = found_p.group(0) print "URL=",s_link print "Point=",s_point else: print "\033[1;31;40mno found.\033[0m" def search_file_write(find_url): spath = "record.txt" f = open(spath,‘a‘) f.write(u"%s \n" %find_url) f.close() if __name__=="__main__": while True: main() time.sleep(30)
时间: 2024-10-14 14:05:04