样本:英文世界名著(1000部)
样本容量:1.4亿(包含6918个文件,366M)
#include <stdio.h> #include <stdlib.h> #include <string.h> int CountL[26]; int CheckL(char c) { int index; char i, j; int flag = 0; for(i = 'A', j = 'a'; i <= 'Z', j <= 'z'; i++, j++) { if(i == c) { CountL[i - 65]++; flag = 1; break; } if(j == c) { CountL[j - 97]++; flag = 1; break; } } return flag; } long LetterScan(char *filepath) { FILE *fp; long AllCount = 0; char c; if((fp = fopen(filepath, "r")) == NULL) { perror("Can't open!"); exit(1); } while(!feof(fp)) { c = fgetc(fp); if(CheckL(c)) { AllCount++; } } fclose(fp); return AllCount; } char *SortL() { int i, j, t; char c; char *Letter; Letter = (char *)calloc(26, sizeof(char)); for(i = 0; i < 26; i++) { Letter[i] = i + 97; } for(i = 0; i < 25; i++) { for(j = 0; j < 25 - i; j++) { if(CountL[j] < CountL[j + 1]) { t = CountL[j]; c = Letter[j]; CountL[j] = CountL[j + 1]; Letter[j] = Letter[j + 1]; CountL[j + 1] = t; Letter[j + 1] = c; } } } return Letter; } int main() { char *FilePath = "/share/test/doc/test.txt"; int i; long count = 0; char *Letter; Letter = (char *)calloc(26, sizeof(char)); system("rm -rf doc/test.txt"); system("cat doc/*txt > doc/test.txt"); count = LetterScan(FilePath); Letter = SortL(); printf("测试文本包含有效英文字母总数:%ld\n", count); for(i = 0; i < 26; i++) { printf("%c共出现%d次,概率是%f\n", Letter[i], CountL[i], (float)CountL[i] / count); } free(Letter); return 0; }
26英文字母使用频率的大样本分析,布布扣,bubuko.com
时间: 2024-10-22 05:02:43