import requestsfrom bs4 import BeautifulSoup def getHTMLText(url): try: r = requests.get(url) r.raise_for_status() r.encoding = ‘utf-8‘ return r.text except: return ‘‘ def fillUnivList(ulist, html): soup = BeautifulSoup(html, ‘html.parser‘) meta = soup.find_all(‘meta‘, attrs={‘name‘: ‘description‘}) ulist.append(meta[0].attrs[‘content‘]) def main(): start_url = ‘http://www.runoob.com/python/python-exercise-example‘ uinfo = [] for i in range(101): url = start_url + str(i) +‘.html‘ try: html = getHTMLText(url) fillUnivList(uinfo, html) except: continue for i in range(101): try: with open(‘100.txt‘, ‘a‘) as f: f.write(uinfo[i] + ‘\n‘) except: continue print(uinfo) main()
时间: 2024-12-12 11:58:34