1 import sys 2 reload(sys) 3 sys.setdefaultencoding(‘utf-8‘) 4 5 from os import path 6 from PIL import Image 7 import numpy as np 8 import matplotlib.pyplot as plt 9 10 from wordcloud import WordCloud 11 import jieba 12 d = path.dirname(__file__) 13 14 stopWordFile = u‘stopwords.txt‘ 15 stopWordList = [] 16 for L in open(path.join(d , stopWordFile)).readlines(): 17 stopWordList.append(L.strip().decode(‘utf-8‘)) 18 stopWordList = set(stopWordList) 19 20 new = ‘words.txt‘ 21 22 text = open(path.join( d , new )).read().strip(‘\r‘) 23 wordDict = {} 24 for w in jieba.cut(text): 25 if w not in stopWordList: 26 wordDict.setdefault(w , 0) 27 wordDict[w] += 1 28 29 maskImg = ‘china.jpeg‘ 30 alice_mask = np.array( Image.open(path.join(d , maskImg))) 31 32 wc = WordCloud(background_color = ‘white‘, max_words = 2000 , mask = alice_mask , 33 font_path = path.join(d , ‘msyh.ttf‘)) 34 wc.generate_from_frequencies(wordDict) 35 36 wc.to_file(path.join(d, "example.png")) 37 38 # show 39 plt.imshow(wc, interpolation=‘bilinear‘) 40 plt.axis("off") 41 plt.figure() 42 plt.imshow(alice_mask, cmap=plt.cm.gray, interpolation=‘bilinear‘) 43 plt.axis("off") 44 plt.show()
时间: 2024-11-05 14:54:17