1 lines (8 sloc) 333 Bytes 2 from urllib.request import urlopen 3 from bs4 import BeautifulSoup 4 5 html = urlopen("http://en.wikipedia.org/wiki/Python_(programming_language)") 6 bsObj = BeautifulSoup(html, "html.parser") 7 content = bsObj.find("div", {"id":"mw-content-text"}).get_text() 8 content = bytes(content, "UTF-8") 9 content = content.decode("UTF-8") 10 print(content)
1 from urllib.request import urlopen 2 3 textPage = urlopen("http://www.pythonscraping.com/pages/warandpeace/chapter1.txt") 4 print(str(textPage.read(),‘utf-8‘))用字符串转换编码
时间: 2024-11-06 18:59:45