public static Encoding GetTextEncoding(string filePath) { FileStream fs = new FileStream(filePath, FileMode.Open); byte[] buff = new byte[10]; int result = fs.Read(buff, 0, 10); if (result > 3) { if (buff[0] == 239 && buff[1] == 187 && buff[2] == 191) {// utf-8 return Encoding.UTF8; } else if (buff[0] == 254 && buff[1] == 255) {// big endian unicode return Encoding.BigEndianUnicode; } else if (buff[0] == 255 && buff[1] == 254) {// unicode return Encoding.Unicode; } else if (isUtf8(buff)) {// utf-8 return Encoding.UTF8; } else {// ansi return Encoding.Default; } } return null; } // 110XXXXX, 10XXXXXX // 1110XXXX, 10XXXXXX, 10XXXXXX // 11110XXX, 10XXXXXX, 10XXXXXX, 10XXXXXX private static bool isUtf8(byte[] buff) { for (int i = 0; i < buff.Length; i++) { if ((buff[i] & 0xE0) == 0xC0) // 110x xxxx 10xx xxxx { if ((buff[i + 1] & 0x80) != 0x80) { return false; } } else if ((buff[i] & 0xF0) == 0xE0) // 1110 xxxx 10xx xxxx 10xx xxxx { if ((buff[i + 1] & 0x80) != 0x80 || (buff[i + 2] & 0x80) != 0x80) { return false; } } else if ((buff[i] & 0xF8) == 0xF0) // 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx { if ((buff[i + 1] & 0x80) != 0x80 || (buff[i + 2] & 0x80) != 0x80 || (buff[i + 3] & 0x80) != 0x80) { return false; } } } return true; }
参考:
http://blog.csdn.net/xt_chaoji/article/details/7345052
http://blog.csdn.net/nocml/article/details/8106068
时间: 2024-10-02 21:26:15