json是一种嵌套了列表与字典的格式,json包可以读取返回的json格式,json.load(html返回的对象)
csv模块,用来操作csv文件,
1 import csv 2 #from os import open 3 4 csvFile = open("../files/test.csv", ‘w+‘, newline=‘‘) 5 try: 6 writer = csv.writer(csvFile) 7 writer.writerow((‘number‘, ‘number plus 2‘, ‘number times 2‘)) 8 for i in range(10): 9 writer.writerow( (i, i+2, i*2)) 10 finally: 11 csvFile.close()
爬取一个页面上的表格
import csv from urllib.request import urlopen from bs4 import BeautifulSoup html = urlopen("http://en.wikipedia.org/wiki/Comparison_of_text_editors") bsObj = BeautifulSoup(html, "html.parser") #The main comparison table is currently the first table on the page table = bsObj.findAll("table",{"class":"wikitable"})[0] rows = table.findAll("tr") csvFile = open("files/editors.csv", ‘wt‘, newline=‘‘, encoding=‘utf-8‘) writer = csv.writer(csvFile) try: for row in rows: csvRow = [] for cell in row.findAll([‘td‘, ‘th‘]): csvRow.append(cell.get_text()) writer.writerow(csvRow) finally: csvFile.close()
时间: 2024-10-04 20:03:35