1 def get_content():
2 user_agent="Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36"
3 headers = { ‘User-Agent‘ : user_agent }
4 url = "http://bj.58.com/"
5 req = urllib2.Request(url, headers = headers)
6 response = urllib2.urlopen(req)
7 the_page = response.read()
8 type = sys.getfilesystemencoding()
9 the_page = the_page.decode("UTF-8").encode(type)
10 print the_page
抓取网页时,乱码问题
时间: 2024-10-12 07:51:39