1.获取每个国家的论文数量,采取的方法是写好sql语句,直接用sql语句统计数量,可能这种方式速度会比较慢,另外一种方法是把id全部传过来,在本地做统计。
import pymysql import json import re import collections import json def get_article_from_mysql(sql): conn= pymysql.connect( host=‘localhost‘, port = 3306, user=‘root‘, passwd=‘‘, db =‘python‘, ) cursor = conn.cursor() a = cursor.execute(sql) b = cursor.fetchmany(a) return b[0][0] def getsqllist(): country = [‘USA‘, ‘China‘, ‘UK‘, ‘Germany‘, ‘Italy‘, ‘Japan‘, ‘Canada‘, ‘France‘, ‘Spain‘, ‘Australia‘] top_country = [] for i in country: top_country.append(‘%‘+i+‘%‘) year = [] for i in range(1995,2017): year.append(str(i)) print year str1 = ‘SELECT COUNT(*) FROM alzheimer where authorinfor like ‘ str2 = ‘&& union_kwd_str != \‘\‘ && pub_year = ‘ countrydict = {} for i in top_country: sqllist = [] for j in year: sql = str1+"‘"+i+"‘"+str2+"‘"+j+"‘" sqllist.append(sql) countrydict[i] = sqllist return countrydict def change_with_year(): countrydict = getsqllist() fobj = open(‘1203_topcountry_article_change_with_year.json‘,‘w‘) countrylist = [‘USA‘, ‘China‘, ‘UK‘, ‘Germany‘, ‘Italy‘, ‘Japan‘, ‘Canada‘, ‘France‘, ‘Spain‘, ‘Australia‘] for country in countrylist: this_country_sql_list = countrydict[‘%‘+country+‘%‘] this_year_article_num = [] for sql in this_country_sql_list: this_year_article_num.append(get_article_from_mysql(sql)) data = { ‘name‘:country, ‘type‘:‘line‘, ‘stack‘: ‘total‘, ‘areaStyle‘: ‘{normal: {}}‘, ‘data‘:this_year_article_num } print country,this_year_article_num json_data = json.dumps(data) fobj.write(json_data) fobj.write(‘,‘) fobj.write(‘\n‘) #change_with_year() def article_each_year(): str1 = ‘SELECT COUNT(*) FROM alzheimer where union_kwd_str != \‘\‘ && pub_year =‘ all_this_year_article_num = [] for i in range(1996,2017): year = str(i) sql = str1+"‘"+year+"‘" all_this_year_article_num.append(get_article_from_mysql(sql)) data = { ‘article_each_year‘:all_this_year_article_num } fobj = open(‘1204_article_each_year_num.json‘,‘w‘) json_data = json.dumps(data) fobj.write(json_data) print all_this_year_article_num article_each_year()
2)获得每个国家每年的论文信息后,求比例。比如1996年美国的比例 = 1996年美国的论文量/1996年全世界的论文量
from __future__ import division import json country = [‘USA‘, ‘China‘, ‘UK‘, ‘Germany‘, ‘Italy‘, ‘Japan‘, ‘Canada‘, ‘France‘, ‘Spain‘, ‘Australia‘] l = [ [1, 0, 15, 10, 19, 23, 32, 26, 49, 93, 161, 348, 669, 1471, 1669, 1892, 2128, 2762, 3090, 2675, 1458], [0, 0, 0, 0, 0, 2, 1, 1, 5, 14, 12, 19, 29, 78, 111, 144, 351, 517, 737, 1062, 899], [ 2, 2, 1, 3, 14, 11, 10, 20, 26, 34, 45, 72, 127, 204, 265, 308, 428, 570, 697, 723, 597], [ 0, 3, 4, 7, 8, 11, 9, 8, 27, 24, 21, 33, 67, 98, 124, 149, 253, 303, 370, 358, 318], [ 0, 1, 5, 5, 3, 7, 3, 10, 12, 27, 22, 28, 44, 67, 94, 106, 187, 285, 330, 322, 280], [ 1, 3, 4, 8, 7, 10, 13, 10, 11, 18, 26, 30, 30, 61, 85, 120, 189, 256, 295, 291, 199], [ 0, 5, 7, 2, 4, 2, 6, 10, 13, 18, 21, 32, 34, 69, 96, 112, 197, 286, 340, 340, 251], [ 0, 4, 2, 6, 7, 5, 9, 10, 13, 12, 17, 17, 29, 44, 65, 63, 126, 155, 189, 209, 184], [ 0, 0, 1, 2, 1, 6, 2, 0, 5, 7, 9, 18, 14, 46, 57, 72, 111, 146, 252, 198, 173], [ 0, 0, 1, 0, 0, 1, 2, 3, 4, 13, 11, 14, 22, 45, 53, 74, 120, 162, 245, 195, 169] ] print len(l[0]) percent_dict ={} for i in range(10): percent_dict[i] = [] year = [1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016] ltotal = [19, 54, 63, 84, 106, 127, 134, 171, 293, 504, 695, 1306, 2325, 4566, 5415, 5859, 7537, 9603, 11349, 10850, 7211] # for i in range(len(l[0])): # ltotal_i = 0 # for j in range(10): # ltotal_i += l[j][i] # ltotal.append(ltotal_i) for i in range(len(l[0])): for j in range(10): percent_dict[j].append(round(l[j][i]/ltotal[i],2)) print percent_dict fobj = open(‘1204_top10country_article.json‘,‘w‘) for i in range(10): data = { ‘name‘:country[i], ‘type‘:‘line‘, ‘stack‘: ‘total‘, ‘areaStyle‘: ‘{normal: {}}‘, ‘data‘:percent_dict[i] } json_data = json.dumps(data) fobj.write(json_data) fobj.write(‘,‘) fobj.write(‘\n‘)
时间: 2024-11-12 21:00:12