Individual Project - Word frequency program by HJB

using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Text;
using System.Text.RegularExpressions;
using System.Threading.Tasks;

using System.Collections;
namespace ConsoleApplication1
{
class v
{

public int n { get; set; }
public string s { get; set; }
public v(int nu, string st)
{
this.n = nu;
this.s = st;
}

}
class Program
{
static void Main(string[] args)
{
if (args.Length ==1)
{
Dictionary<string, v> ht = new Dictionary<string, v>();

if (args.Length == 0)
{
Console.WriteLine("please input the correct file adress");
}
string path = args[0];

if (!Directory.Exists(path))
{
Console.WriteLine("wrong path");
}
else
{
if ((Directory.GetFiles(path).Length == 0) && (Directory.GetDirectories(path).Length == 0))
Console.WriteLine("empty directory");
else
{

string[] file = (Directory.GetFiles(path, "*", SearchOption.AllDirectories)).Where(s => s.EndsWith(".txt") || s.EndsWith(".h") || s.EndsWith(".cpp") || s.EndsWith(".cs")).ToArray();
int l = file.Length;
for (int i = 0; i < l; i++)
{
string s = "(\\b[a-zA-Z]{3}[A-Za-z0-9]+)|(\\b[a-zA-Z]{3})|((_[a-zA-Z]{3}[A-Za-z0-9]+)|(_[a-zA-Z]{3}))";
string article = File.ReadAllText(file[i]);
foreach (Match m in Regex.Matches(article, s))
{
string c = m.ToString();
if (c.Contains("_"))
{
c = c.Replace("_", "");
}
string n = c.ToLower();
if (ht.ContainsKey(n))
{
v w = (v)ht[n];
if (c.CompareTo(w.s) >= 0)
{
w.n += 1;
w.s = c;
}
else
w.n += 1;
}
else
{
v w = new v(1, c);
ht.Add(n, w);
}
}
}

}
}
var re = ht.OrderByDescending(v => v.Value.n).ThenBy(v => v.Value.s);
StreamWriter sw = new StreamWriter(path + "\\黄敬博.txt", false);
foreach (var skey in re)
{

v w = (v)ht[skey.Key];
sw.WriteLine(w.s + "\t" + w.n);

}
sw.Flush();

}
if (args[0].Equals("-e2"))
{
int j = 1;
Dictionary<string, v> ht = new Dictionary<string, v>();
if (args.Length == 0)
{
Console.WriteLine("please input the correct file adress");
}
string path = args[1];
if (!Directory.Exists(path))
{
Console.WriteLine("wrong path");
}
else
{
if ((Directory.GetFiles(path).Length == 0) && (Directory.GetDirectories(path).Length == 0))
Console.WriteLine("empty directory");
else
{

string[] file = (Directory.GetFiles(path, "*", SearchOption.AllDirectories)).Where(s => s.EndsWith(".txt") || s.EndsWith(".h") || s.EndsWith(".cpp") || s.EndsWith(".cs")).ToArray();
int l = file.Length;
for (int i = 0; i < l; i++)
{
string s = @"\b[A-Za-z]{3,}[A-Za-z0-9]*\s{1}\b[A-Za-z]{3,}[A-Za-z0-9]*";
string article = File.ReadAllText(file[i]);
Regex reg = new Regex(s);
Match m = reg.Match(article, 0);
while (m.Success)
{

String word = m.ToString();
string n = word.ToLower();
if (ht.ContainsKey(n))
{
v w = (v)ht[n];
if (word.CompareTo(w.s) >= 0)
{
w.n += 1;
w.s = word;
}
else
w.n += 1;
}
else
{
v w = new v(1, word);
ht.Add(n, w);
}
m = reg.Match(article, m.Index + m.ToString().IndexOf(‘ ‘));
}

}

}
}
var re = ht.OrderByDescending(v => v.Value.n).ThenBy(v => v.Value.s);
StreamWriter sw = new StreamWriter(path + "\\黄敬博.txt", false);
foreach (var skey in re)
{

v w = (v)ht[skey.Key];
sw.WriteLine(w.s + "\t" + w.n);
sw.Flush();
j++;
if (j > 10)
break;
}
}
if (args[0].Equals("-e3") )
{
int j = 1;
Dictionary<string, v> ht = new Dictionary<string, v>();
if (args.Length == 0)
{
Console.WriteLine("please input the correct file adress");
}
string path = args[1];
if (!Directory.Exists(path))
{
Console.WriteLine("wrong path");
}
else
{
if ((Directory.GetFiles(path).Length == 0) && (Directory.GetDirectories(path).Length == 0))
Console.WriteLine("empty directory");
else
{

string[] file = (Directory.GetFiles(path, "*", SearchOption.AllDirectories)).Where(s => s.EndsWith(".txt") || s.EndsWith(".h") || s.EndsWith(".cpp") || s.EndsWith(".cs")).ToArray();
int l = file.Length;
for (int i = 0; i < l; i++)
{
string s = @"\b[A-Za-z]{3,}[A-Za-z0-9]*\s{1}\b[A-Za-z]{3,}[A-Za-z0-9]*\s{1}\b[A-Za-z]{3,}[A-Za-z0-9]*";
string article = File.ReadAllText(file[i]);
Regex reg = new Regex(s);
Match m = reg.Match(article, 0);
while (m.Success)
{

String word = m.ToString();
string n = word.ToLower();
if (ht.ContainsKey(n))
{
v w = (v)ht[n];
if (word.CompareTo(w.s) >= 0)
{
w.n += 1;
w.s = word;
}
else
w.n += 1;
}
else
{
v w = new v(1, word);
ht.Add(n, w);
}
m = reg.Match(article, m.Index + m.ToString().IndexOf(‘ ‘));
}

}

}
}
var re = ht.OrderByDescending(v => v.Value.n).ThenBy(v => v.Value.s);
StreamWriter sw = new StreamWriter(path+"\\黄敬博.txt", false);

foreach (var skey in re)
{

v w = (v)ht[skey.Key];
sw.WriteLine(w.s + "\t" + w.n);
sw.Flush();
j++;
if (j > 10)
break;
}
}
}
}
}

代码总共由3部分组成,每部分对应相对的mode。

mode2、3是基于mode1基础上做出的一点点改变。

这个程序主要利用了Dictionary功能。key设为string,使用时将单词的小写形式作为key。value设为一个小结构,包含一个string类型的单词和一个int类型的次数。

mode1遍历的过程为按照单个单词遍历的,mode2为两个,mode3为三个。

但mode2读取完两个单词需要往回退一个,mode3读取完要往回退两个。

若读取的单词的小写形式在dictionary中已经包含了。则value中的int值+1,然后比较value中的string与当前的单词,选择ASC码靠前的那个保存。

最后按照要求输出。

时间: 2024-08-10 21:19:18

Individual Project - Word frequency program by HJB的相关文章

SoftwareEngineering Individual Project - Word frequency program

说实话前面c#实在没怎么学过.这次写起来感觉非常陌生,就连怎么引用名空间都忘记了.在经过恶补后还是慢慢地适应了. 1.项目预计用时: 构建并写出大概的数据结构,程序框架及模块: 30min 实现文件夹递归方问方法 :30min 实现从文件中读出符合要求的单词并统计 :2-3h 实现对单词的排序 : 1h 输出:10min 细节修改及错误排查:2-3h 程序优化: 1h 2.项目的实际用时: 构建并写出大概的数据结构,程序框架及模块: 30min 实现文件夹递归方问方法 :30min 实现从文件中

软件工程:Individual Project - Word frequency program

千辛万苦敲完了这个项目的代码,说实话真的没想到会花费这么多的时间,在现实的强烈对比下才发现自己真的是图样图森破. 1.预计用时 因为上个学期的OO课做过类似的程序,想一想觉得再做一遍so easy,于是感觉心理有谱,预计用4个小时搞定,再用最多1个小时做测试. 2.现实的残酷性 前3个小时是用来干杂务的(写宏函数,建树,写小函数,对命令行情况的分类讨论,设计整个项目的结构), 再有3个小时是找各种资料的(文件流,模板库,string类,还有忘得差不多了的二叉树遍历), 再一个3个小时是用来敲代码

Individual Project - Word frequency program

1.做这个项目之前,因为之前在OO课中做过一些项目,这钟算法也非常熟悉,因此算上单纯的词法算法,和C#语言中文件操作的学习,预计一天之内应该可以写好.2.实际上做起来时,我发现c#与之前学过的java还是有些差别的,算法非常简单,但是学习使用c#花了许多时间,零零碎碎共做了2天. 3.原本一直认为程序的最大资源使用会是单词按词频排序,但是经过算法的分析,还是单词+空格+单词这种格式的判断比较耗费时间 我的算法是这样的:读入一个文本文件的所有字符,以一个字符串形式储存.从头到尾遍历字符串,认为大小

Individual Project Records

At the midnight of September 20, I finished my individual projcet -- a word frequency program. You can find requirements in details at http://www.cnblogs.com/jiel/p/3978727.html Before beginning coding, I suppose I can finfish it in about 4 hours or

Word frequency analysis

Write a program that reads a file, breaks each line into words, scripts whitespace and punctuation from the words, and converts them to lowercase. Modify the program to print the 20 most frequently-used words in the book. First I downloaded the e-boo

192. Word Frequency

192. Word Frequency QuestionEditorial Solution My Submissions Total Accepted: 5272 Total Submissions: 20228 Difficulty: Medium Write a bash script to calculate the frequency of each word in a text file words.txt. For simplicity sake, you may assume:

Scrutiny of Partner&#39;s individual project Code

因为队友的代码并没有完整的实现个人项目的完整功能. 已实现功能: 1.对单个单词进行词频统计 2.能够按照老师的要求的格式对制定的有效字符串进行匹配,并且输出至指定文件. 未实现: 1.对连续多个单词进行匹配以及计数 2.对已存储的单词进行排序输出. 代码优点: 1.逻辑严谨 2.格式规范优美 代码缺点: 1.注释较少,代码可读性差,建议在较难或者核心的代码语句或者函数部分配上详细注释或者为程序本神配上文档 2.模块责任分割不均,模块之间功能有少许重复. 3.使用c++进行工程项目,却并没有使用

[Bash]LeetCode192..统计词频 | Word Frequency

Write a bash script to calculate the frequency of each word in a text file words.txt. For simplicity sake, you may assume: words.txt contains only lowercase characters and space ' ' characters. Each word must consist of lowercase characters only. Wor

Word Frequency

Write a bash script to calculate the frequency of each word in a text file words.txt. For simplicity sake, you may assume: words.txt contains only lowercase characters and space ' ' characters. Each word must consist of lowercase characters only. Wor