#1 cookie的处理
import urllib2, cookielib
cookie_support = urllib2.HTTPCookieProcessor(cookielib.CookieJar())
opener = urllib2.build_opener(cookie_support, urllib2.HTTPHandler)
urllib2.install_opener(opener)
content = urllib2.urlopen( ‘http://XXXX‘ ).read()
#2 用代理和cookie
opener = urllib2.build_opener(proxy_support, cookie_support, urllib2.HTTPHandler)
#3 表单的处理
import urllib
postdata = urllib.urlencode({
‘username‘ : ‘XXXXX‘ ,
‘password‘ : ‘XXXXX‘ ,
‘continueURI‘ : ‘http://www.verycd.com/‘ ,
‘fk‘ :fk,
‘login_submit‘ : ‘登录‘
})
req = urllib2.Request(
url = ‘http://secure.verycd.com/signin/*/http://www.verycd.com/‘ ,
data = postdata
)
result = urllib2.urlopen(req).read()
#4 伪装成浏览器访问
headers = {
‘User-Agent‘ : ‘Mozilla/5.0 (Windows; U; Windows NT 6.1; en-US; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6‘
}
req = urllib2.Request(
url = ‘http://secure.verycd.com/signin/*/http://www.verycd.com/‘ ,
data = postdata,
headers = headers
)
#5 反”反盗链”
headers = {
‘Referer‘ : ‘http://www.cnbeta.com/articles‘
}
|