读取文件,统计各个字母出现的频率。
源程序:
1 /** 2 * Count the frequence of each character by reading a file. 3 * 4 * @author LuoPeng 5 * @time 2015.3.5 6 * 7 */ 8 public class CharacterFrequence { 9 10 /** 11 * Count the frequence of each character 12 * @param filePath the path of the file to be read 13 * @return the frequence of each character 14 */ 15 public double[] countFrequence ( String filePath ) { 16 17 double [] frequence = null; 18 int [] counts = null; 19 long totalCharacter = 0L; 20 BufferedReader br = null; 21 22 try { 23 // Get the reader 24 br = new BufferedReader(new InputStreamReader(new FileInputStream(filePath))); 25 // Read message from the file if the reader is not null 26 if ( br != null ) { 27 // Make and initialize the array 28 frequence = new double[26]; 29 counts = new int[26]; 30 31 // Store the message of each line 32 String line = br.readLine(); 33 // Store the length of line 34 int tempCount = 0; 35 // The loop variable 36 int i = 0; 37 while ( line != null ) { 38 line = line.trim().toLowerCase(); 39 tempCount = line.length(); 40 for ( i = 0; i < tempCount; i++ ) { 41 switch (line.charAt(i)) { 42 case ‘a‘: 43 counts[0]++; 44 break; 45 case ‘b‘: 46 counts[1]++; 47 break; 48 case ‘c‘: 49 counts[2]++; 50 break; 51 case ‘d‘: 52 counts[3]++; 53 break; 54 case ‘e‘: 55 counts[4]++; 56 break; 57 case ‘f‘: 58 counts[5]++; 59 break; 60 case ‘g‘: 61 counts[6]++; 62 break; 63 case ‘h‘: 64 counts[7]++; 65 break; 66 case ‘i‘: 67 counts[8]++; 68 break; 69 case ‘j‘: 70 counts[9]++; 71 break; 72 case ‘k‘: 73 counts[10]++; 74 break; 75 case ‘l‘: 76 counts[11]++; 77 break; 78 case ‘m‘: 79 counts[12]++; 80 break; 81 case ‘n‘: 82 counts[13]++; 83 break; 84 case ‘o‘: 85 counts[14]++; 86 break; 87 case ‘p‘: 88 counts[15]++; 89 break; 90 case ‘q‘: 91 counts[16]++; 92 break; 93 case ‘r‘: 94 counts[17]++; 95 break; 96 case ‘s‘: 97 counts[18]++; 98 break; 99 case ‘t‘: 100 counts[19]++; 101 break; 102 case ‘u‘: 103 counts[20]++; 104 break; 105 case ‘v‘: 106 counts[21]++; 107 break; 108 case ‘w‘: 109 counts[22]++; 110 break; 111 case ‘x‘: 112 counts[23]++; 113 break; 114 case ‘y‘: 115 counts[24]++; 116 break; 117 case ‘z‘: 118 counts[25]++; 119 break; 120 default: 121 ; 122 } 123 } 124 line = br.readLine(); 125 } 126 127 // calculate the number of characters 128 for ( i = 0; i < counts.length; i++ ) { 129 totalCharacter += counts[i]; 130 } 131 // calculate the frequence 132 for ( i = 0; i < frequence.length; i++ ) { 133 frequence[i] = 1.0*counts[i]/totalCharacter; 134 } 135 } 136 } catch (FileNotFoundException e) { 137 e.printStackTrace(); 138 System.out.println("Could not find the file..."); 139 } catch (IOException e) { 140 e.printStackTrace(); 141 System.out.println("Read message error..."); 142 } finally { 143 // Close the IO 144 try { 145 br.close(); 146 } catch (IOException e) { 147 e.printStackTrace(); 148 System.out.println("Close IO exception..."); 149 } 150 } 151 152 return frequence; 153 154 } 155 }
统计结果:
上述两张图片的结果分别对应两篇文章《The Sorrows of Young Werther》和《Anna Karenina》,纵坐标表示出现的频率,横坐标对应各个字母(1对应a,2对应b,……,26对应z)。
时间: 2024-11-16 14:45:02