import requestsfrom bs4 import BeautifulSouprespone=requests.get(‘https://www.autohome.com.cn/news/‘)respone.encoding=‘gbk‘# print(respone.text) soup=BeautifulSoup(respone.text,‘html.parser‘)div=soup.find(name=‘div‘,attrs={‘id‘:‘auto-channel-lazyload-article‘})li_list=div.find_all(name=‘li‘) i=1for li in li_list: print(‘pro:‘,i) title=li.find(name=‘h3‘) if not title: continue p=li.find(name=‘p‘) a=li.find(name=‘a‘) img=li.find(name=‘img‘) print(title.text) print(p.text) print(‘https:‘+a.attrs.get(‘href‘)) print(‘https:‘+img.get(‘src‘)) #img.get==img.attrs.get #请求下载图片 src=‘https:‘+img.get(‘src‘) file_name=src.rsplit(‘/‘,maxsplit=1)[1] with open(file_name,‘wb‘) as f: ret=requests.get(src) f.write(ret.content)
原文地址:https://www.cnblogs.com/xpptt/p/11772628.html
时间: 2024-10-02 15:26:24