linux实现针对文本统计字母出现的次数(所有的可打印的字符)

最近在看看有意思的编程，发现算法真是一个好东西，呵呵，自己也写了一个简单的demo

代码具体如下：

#include <stdio.h>
#include <string.h>
#include <unistd.h>
#include <stdint.h>
#include <fcntl.h>
#include <sys/types.h>
#include <sys/stat.h>
#define bSize 255
#define bufSize 4096
static uint64_t  arr[255] ={0};
static uint64_t  hash = 0;
void whash(char *s)
{
    int len = strlen(s);
    int i =0,total =0;
    while(i < len) {
        int index = s[i] -32;
        if((hash &(1 <<index)) ==0) {
            hash |= (1 <<index);
            arr[index]++;
        } else {
            arr[index]++;
        }
        i++;
    }
}
int bread(char *file)
{
    int fd = open(file,O_RDONLY,0666);
    struct stat st;
    char buf[bufSize] = {‘\0‘};
    if(fd == -1) {
        return -1;
    }
    fstat(fd,&st);
    size_t  total = st.st_size;
    while(total > 0) {
        int left =read(fd,buf,bufSize);
        if(left < 0) {
            break;
        }
        whash(buf);
        total -= left;
        memset(buf,‘\0‘,bufSize);
    }
    return 0;
}
int main(void) {
    printf("bread = %d\n",bread("./sry.c"));
    int i = 0;
    for(;i < bSize;i++) {
        if(arr[i] > 0){
            printf("count(%c) =%d\n",i+32,arr[i]);
        }
    }
    return 0;
}

运行结果如下：

[email protected]:~/code_c_20160101/algorithm/str:./sry
bread = 0
count( ) =335
count(") =6
count(#) =9
count(%) =3
count(&) =2
count(‘) =4
count(() =23
count()) =23
count(*) =2
count(+) =9
count(,) =11
count(-) =4
count(.) =10
count(/) =3
count(0) =15
count(1) =4
count(2) =4
count(3) =2
count(4) =3
count(5) =4
count(6) =6
count(9) =1
count(;) =27
count(<) =14
count(=) =19
count(>) =9
count(D) =1
count(L) =1
count(N) =1
count(O) =2
count(R) =1
count(S) =6
count(Y) =1
count([) =7
count(\) =4
count(]) =7
count(_) =5
count(a) =28
count(b) =14
count(c) =17
count(d) =29
count(e) =48
count(f) =28
count(g) =1
count(h) =22
count(i) =61
count(k) =1
count(l) =23
count(m) =3
count(n) =41
count(o) =10
count(p) =4
count(r) =31
count(s) =33
count(t) =55
count(u) =23
count(v) =2
count(w) =4
count(x) =5
count(y) =4
count(z) =8
count({) =13
count(|) =1
count(}) =13

时间： 2024-11-11 17:09:30

linux实现针对文本统计字母出现的次数(所有的可打印的字符)的相关文章

Linux基础命令---文本统计paste

paste 将文件以行的方式合并在一起,用tab字符分隔开,将结果送到标准输出.此命令的适用范围:RedHat.RHEL.Ubuntu.CentOS.SUSE.openSUSE.Fedora. 1.语法 paste [选项] file 2.选项列表选项说明 --version 显示命令版本信息 --help 显示帮助文档 -d | --delimiters=LIST 指定间隔字符 -s | --serial 顺序的合并一个文件的多行到一行 3.实例 1)顺序合并文件,使用间隔符":

统计文本中英文字母及英文单词的次数并排序

一.读取文本中英文字母出现的次数并降序输出英文字母的百分比源码: package total; import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; import java.util.Arrays; public class Statistics_letter { public static void main(String[] args) throws IOException

用javaIO流读取文本中英文字母和英文单词的出现次数及频率

(hdu 简单题 128道)AC Me(统计一行文本中各个字母出现的次数)

题目: AC Me Time Limit: 2000/1000 MS (Java/Others) Memory Limit: 65536/32768 K (Java/Others)Total Submission(s): 13465 Accepted Submission(s): 5927 Problem Description Ignatius is doing his homework now. The teacher gives him some articles and as

linux 中的文本处理工具

文本处理工具在linux系统中文本工具有很多现在具体介绍几款如抽取文本的工具和文件三剑客文件内容:less和 cat 文件截取:head和tail 按列抽取:cut 按关键字抽取:grep egrep 首先有查看文件的cat tac cat [OPTION]... [FILE]... -E: 显示行结束符$ -n: 对显示出的每一行进行编号 -A:显示所有控制符 -b:非空行编号 -s:压缩连续的空行成一行 tac 与cat 命令一样不过是取反 more: 分页查看文件 m

Linux中关于文本查看和处理的常用命令

Linux中关于文本查看和处理的常用命令一.学习目标掌握以下命令: 查看文本: cat .tac.more.less.head.tail 文本处理:cut.sort.uniq.wc.tr 二.查看文本命令 cat : 连接并显示文本 cat [options] file,... -n :显示行号 -E :显示结束符$ more :分屏查看文件 space键: 向后翻一屏 b键:向前翻一屏 e

Linux基础之文本处理工具grep及正则表达式(附带egrep与grep的不同)

文本处理工具grep,正则表达式在Linux学习过程中很容易出现困惑与障碍的地方,这里分享下学习这方面内容的一些感受. grep Global search REgular expression and Print out the line 作用:文本搜索工具,根据用户指定的'模式(过滤条件)'对目标文本逐行进行匹配检查:打印匹配到的行: '模式':由正则表达式的元字符及文本字符所编写出的过滤条件. grep [OPTIONS] PATTERN [FILE...] grep [OPTIONS]

linux中的文本处理方法集锦

sed awk grep 这三个工具都要用到正则表达式,把常用贴出来. 1．行的匹配 [[email protected] /]# sed -n '2p' /etc/passwd 打印出第2行 [[email protected] /]# sed -n '1,3p' /etc/passwd 打印出第1到第3行 [[email protected] /]# sed -n '$p' /etc/passwd 打印出最后一行 [[email protected] /]# sed -n '/user

Linux Shell处理文本最常用的工具大盘点

导读本文将介绍Linux下使用Shell处理文本时最常用的工具:find.grep.xargs.sort.uniq.tr.cut.paste.wc.sed.awk:提供的例子和参数都是最常用和最为实用的,我对shell脚本使用的原则是命令单行书写,尽量不要超过2行:如果有更为复杂的任务需求,还是考虑python吧! find文件查找查找txt和pdf文件找txt和pdf文件 find . $ -name "*.txt" -o -name "*.pdf" $