一、对新时代中国特色社会主义做词频统计
import jieba txt = open("新时代中国特色社会主义.txt","r",encoding="utf-8").read() words = jieba.lcut(txt) counts = {} for word in words: if len(word) == 1: continue else: counts[word] = counts.get(word,0)+1 items = list(counts.items()) items.sort(key=lambda x:x[1], reverse=True) for i in range(20): word, count = items[i] print("{0:<10}{1:>5}".format(word, count))
二、根据词频制作词云
#GovRptWordCloudv2.py import jieba import wordcloud from imageio import imread mask = imread("dd.png") f = open("新时代中国特色社会主义.txt","r",encoding="utf-8") t = f.read() f.close() ls = jieba.lcut(t) txt = " ".join(ls) w = wordcloud.WordCloud(font_path = "simkai.ttf",mask = mask,width = 1000,height = 700,background_color = "black",max_words = 20) w.generate(txt) w.to_file("grwordcloud.png")
原文地址:https://www.cnblogs.com/slj-xt/p/12658666.html
时间: 2024-10-07 20:39:24