1 # Author :Zcb 2 3 #中文词频统计 4 import jieba 5 import sys 6 7 f = open("d:/政府工作报告.txt",‘r‘) 8 txt = f.read() 9 f.close() 10 11 d={} 12 ls=jieba.lcut(txt) 13 exclude =[‘“‘,‘”‘,‘要‘,‘和‘,‘,‘,‘的‘,‘\n‘,‘。‘,‘、‘] #这里是要exclude 的列表,根据需求可灵活改变 14 for w in ls: 15 if w in exclude or len(w)==1: 16 continue 17 d[w]=d.get(w,0)+1 18 19 #####################按val排序 20 res = sorted(d.items(),key =lambda x:x[1],reverse=True) 21 fd= open("d:/统计结果.txt",‘w‘) 22 old_stdout = sys.stdout 23 sys.stdout = fd 24 for w in res: 25 if(w[1]<=25): 26 continue 27 print(‘"{}"出现了{}次‘.format(w[0],w[1])) 28 sys.stdout = old_stdout 29 fd.close()
原文地址:https://www.cnblogs.com/zach0812/p/11258982.html
时间: 2024-10-07 08:41:29