# CalThreeKingdoms.pyimport jieba ‘‘‘安装第三方库 中文分词jieba pip install jieba统计三国演义中出现次数最多的人‘‘‘ txt = open("threekingdoms.txt", "rt", encoding="utf-8").read()# 排除出现频率较高的非人名exclude = ["却说", "二人", "不可", "荆州", "不能", "如此", "将军", "商议", "如何", "主公", "军士", "左右", "军马", "引兵", "次日", "大喜", "天下", "东吴", "于是", "今日", "不敢", "魏兵", "陛下", "一人", "都督", "人马", "不知", "汉中", "只见", "众将", "后主", "上马", "大叫", "蜀兵", "太守", "此人", "夫人", "先主", "后人", "背后", "城中", "天子", "一面", "何不", "大军", "忽报", "先生", "百姓", "何故", "然后", "先锋", "不如", "赶来"]words = jieba.lcut(txt)counts = {}for word in words: if len(word) == 1 or word in exclude: continue elif word in ["丞相"]: word = "曹操" elif word in ["玄德", "玄德曰"]: word = "刘备" elif word in ["孔明", "孔明曰"]: word = "诸葛亮" elif word in ["关公", "云长"]: word = "关羽" counts[word] = counts.get(word, 0) + 1counts = list(counts.items())counts.sort(key=lambda x: x[1], reverse=True)for i in range(15): word, count = counts[i] print("{:<10}: {:>10}".format(word, count))
原文地址:https://www.cnblogs.com/wumingoo1/p/10324180.html
时间: 2024-11-01 17:55:12