1.准备工作
环境要求:Jupyter、python3.7,jieba库
2.python代码
#! python3 # -*- coding: utf-8 -*- import os, codecs import jieba from collections import Counter def get_words(txt): seg_list = jieba.cut(txt) #对文本进行分词 c = Counter() for x in seg_list: #进行词频统计 if len(x)>1 and x != ‘\r\n‘: c[x] += 1 print(‘常用词频度统计结果‘) for (k,v) in c.most_common(20): #遍历输出高频词 print(‘%s%s %s %d‘ % (‘ ‘*(5-len(k)), k, ‘*‘*int(v/3), v)) if __name__ == ‘__main__‘: with codecs.open(‘19d.txt‘, ‘r‘, ‘utf8‘) as f: txt = f.read() get_words(txt)
3.显示效果
input
引用
https://blog.csdn.net/onestab/article/details/78307765
原文地址:https://www.cnblogs.com/justlikecode/p/10645295.html
时间: 2024-10-13 10:23:25