爬取指定主题的论文,并以相关度排序。
1 #!/usr/bin/python3 2 # -*- coding: utf-8 -*- 3 import requests 4 import linecache 5 import random 6 from bs4 import BeautifulSoup 7 8 if __name__=="__main__": 9 keywords=‘通信‘ ### 查询的主题 10 n=0 11 target=‘http://search.cnki.net/search.aspx?q=‘+str(keywords)+‘&rank=relevant&cluster=all&val=CJFDTOTAL&p={}‘ 12 user_agent = ‘Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36‘ 13 headers = {‘User-Agent‘:user_agent} 14 for i in range(10): 15 i=i*15 16 target=target.format(i) 17 req=requests.get(url=target) 18 html=req.text 19 html=html.replace(‘<br>‘,‘ ‘).replace(‘<br/>‘,‘ ‘).replace(‘/>‘,‘>‘) 20 bf=BeautifulSoup(html,"html.parser") 21 texts=bf.find(‘div‘,class_=‘articles‘) 22 texts_div=texts.find_all(‘div‘,class_=‘wz_content‘) 23 for item in texts_div: 24 item_name=item.find(‘a‘).text 25 item_href=item.find(‘a‘)[‘href‘] 26 item_refer2=item.find(‘span‘,class_=‘count‘).text 27 print(‘{} {} {}\n‘.format(item_name,item_href,item_refer2)) 28 print(n)
原文地址:https://www.cnblogs.com/ljy1227476113/p/10913508.html
时间: 2024-10-09 23:38:35