#!/usr/bin/env python# -*- coding:utf-8 -*- import osimport reimport requestsfrom bs4 import NavigableStringfrom bs4 import BeautifulSoup res=requests.get("https://www.qiushibaike.com/")qiushi=res.contentsoup=BeautifulSoup(qiushi,"html.parser")duanzis=soup.find_all(class_="content")for i in duanzis: duanzi=i.span.contents[0] # duanzi=i.span.string print(duanzi) # print(i.span.string) res=requests.get("http://699pic.com/sousuo-218808-13-1-0-0-0.html")image=res.contentsoup=BeautifulSoup(image,"html.parser")images=soup.find_all(class_="lazy") for i in images: original=i["data-original"] title=i["title"] # print(title) # print(original) # print("") try: with open(os.getcwd()+"\\jpg\\"+title+‘.jpg‘,‘wb‘) as file: file.write(requests.get(original).content) except: pass r = requests.get("http://699pic.com/sousuo-218808-13-1.html")fengjing = r.contentsoup = BeautifulSoup(fengjing, "html.parser")# 找出所有的标签images = soup.find_all(class_="lazy")# print images # 返回list对象 for i in images: jpg_rl = i["data-original"] # 获取url地址 title = i["title"] # 返回title名称 print(title) print(jpg_rl) print("") r = requests.get("https://www.qiushibaike.com/")r=requests.get("http://www.cnblogs.com/nicetime/")blog=r.contentsoup=BeautifulSoup(blog,"html.parser")soup=BeautifulSoup(blog,features="lxml")print(soup.contents[0].contents) tag=soup.find(‘div‘)tag=soup.find(class_="menu-bar menu clearfix")tag=soup.find(id="menu")print(list(tag)) tag01=soup.find(class_="c_b_p_desc") print(len(list(tag01.contents)))print(len(list(tag01.children)))print(len(list(tag01.descendants))) print(tag01.contents)print(tag01.children)for i in tag01.children: print(i) print(len(tag01.contents)) for i in tag01: print(i) print(tag01.contents[0].string)print(tag01.contents[1])print(tag01.contents[1].string) url = "http://www.dygod.net/html/tv/oumeitv/109673.html"s = requests.get(url)print(s.text.encode("iso-8859-1").decode(‘gbk‘))res = re.findall(‘href="(.*?)">ftp‘,s.text)for resi in res: a=resi.encode("iso-8859-1").decode(‘gbk‘) print(a)
原文地址:https://www.cnblogs.com/NiceTime/p/10125289.html
时间: 2024-10-08 18:52:01