转自:一叶飘舟 http://blog.csdn.net/jdsjlzx/article/details/7058823
package lia.meetlucene; import java.io.IOException; import org.apache.lucene.index.CorruptIndexException; public class Unicode { public static void main(String[] args) throws CorruptIndexException, IOException { String s = "简介"; String tt = gbEncoding(s); // String tt1 = "你好,我想给你说一个事情"; System.out.println("unicodeBytes is: " + tt); // 输出“简介”的unicode编码 System.out.println("对应的中文: " + decodeUnicode("\\u7b80\\u4ecb")); // System.out.println(decodeUnicode(tt1)); // 输出unicode编码对应的中文 System.out.println("~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"); System.out.println(s.indexOf("\\")); } public static String gbEncoding(final String gbString) { char[] utfBytes = gbString.toCharArray(); String unicodeBytes = ""; for (int byteIndex = 0; byteIndex < utfBytes.length; byteIndex++) { String hexB = Integer.toHexString(utfBytes[byteIndex]); if (hexB.length() <= 2) { hexB = "00" + hexB; } unicodeBytes = unicodeBytes + "\\u" + hexB; } return unicodeBytes; } public static String decodeUnicode(final String dataStr) { int start = 0; int end = 0; final StringBuffer buffer = new StringBuffer(); while (start > -1) { end = dataStr.indexOf("\\u", start + 2); String charStr = ""; if (end == -1) { charStr = dataStr.substring(start + 2, dataStr.length()); } else { charStr = dataStr.substring(start + 2, end); } char letter = (char) Integer.parseInt(charStr, 16); // 16进制parse整形字符串。 buffer.append(new Character(letter).toString()); start = end; } return buffer.toString(); } }
代码详解:
public static String decodeUnicode(final String dataStr) { int start = 0; int end = 0; final StringBuffer buffer = new StringBuffer(); while (start > -1) { end = dataStr.indexOf("\\u", start + 1); //使得第一个unicode在start~end之间,+1,+2,+3均可 System.out.println(start + "asdfasd~~~~~~~~~~~~~~~~~~~~~``" + end); // the index of the first occurrence of the specified substring, // starting at the specified index, // or -1 if there is no such occurrence. String charStr = ""; if (end == -1) { charStr = dataStr.substring(start + 2, dataStr.length()); } else { charStr = dataStr.substring(start + 2, end); } char letter = 0; if (charStr.length() == 4) { letter = (char) Integer.parseInt(charStr, 16); // 16进制parse整形字符串。 } //防止出错 buffer.append(new Character(letter).toString()); start = end; } return buffer.toString(); }
时间: 2024-12-27 14:31:43