1 import urllib2 2 response = urllib2.urlopen("http://www.baidu.com") 3 html = response.read() 4 5 #eg2 6 import urllib2 7 req = urllib2.Request("http://www.baidu.com") 8 response = urllib2.urllib2(req) 9 the_page = response.read() 10 11 #eg3 POST传送数据 12 import urllib 13 import urllib2 14 15 url = "http://www.msdn.com" 16 values={‘name‘:‘Xu‘, 17 ‘location‘:‘YJ‘, 18 ‘language‘:‘Python‘} 19 20 data = urllib.urlencode(values) 21 req = urllib2.Request(url,data) #发送请求,同时传送data表单 22 response = urllib2.urlopen(req) #接收数据 23 the_page = response.read() 24 25 #eg4 GET传送数据 26 #略 27 28 #eg5 加入User-Agent 29 import urllib 30 import urllib2 31 32 url = "http://www.msdn.com" 33 user_agent = ‘Mozilla/4.0(compatible;MSIE 5.5;Windows NT)‘ 34 values={‘name‘:‘Xu‘, 35 ‘location‘:‘YJ‘, 36 ‘language‘:‘Python‘} 37 38 headers = {‘User-Agent‘:user_agent} 39 data = urllib.urlencode(values) 40 req = urllib2.Request(url,data,headers) #发送请求,同时传送data表单和User-agent 41 response = urllib2.urlopen(req) #接收数据 42 the_page = response.read() 43 44 #eg6捕获异常 45 try: 46 response = urllib2.urlopen(req) #接收数据 47 except urllib2.URLError,e: 48 print e.reason 49 print e.code #404 or 500... 50 #way2 51 try: 52 response = urllib2.urlopen(req) #接收数据 53 except urllib2.HTTPError,e: 54 print e.code #404 or 500... 55 except urllib2.URLError,e: 56 print e.reason 57 58 #way3. we command to handle exception in this way 59 try: 60 response = urllib2.urlopen(req) #接收数据 61 except urllib2.URLError,e: 62 if hasattr(e,‘code‘): 63 print ‘Error code:‘,e.code 64 elif hasattr(e,‘reason‘): 65 print ‘Reason:‘,e.reason 66 67 #eg7 68 from urllib2 import Request,urlopen,URLError,HTTPError 69 old_url = "http://www.baidu.com" 70 req = Request(old_url) 71 response = urlopen(req) 72 rel_url = response.geturl() 73 info = response.info() 74 75 #eg8 cookie 76 import urllib2 77 import cookielib 78 cookie = cookielib.CookieJar() 79 opener = urllib2.build_opener(urllib2.HTTPCookieProcessor(cookie)) 80 response = opener.open("http://www.baidu.com") 81 for item in cookie: 82 print item.name,item. 83 84 #eg9 正则表达式 85 import re 86 pattern = re.compile(r"hello") 87 match1 = pattern.match("hello world") 88 if match1: 89 print match1.group() 90 else: 91 print "match失败"
时间: 2024-10-11 05:27:53