(转载)GBK、UTF8、UNICODE编码转换

GBK、UTF8、UNICODE编码转换

 1 string GBKToUTF8(const std::string& strGBK)
 2 {
 3     int nLen = MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, NULL, 0);
 4     WCHAR * wszUTF8 = new WCHAR[nLen];
 5     MultiByteToWideChar(CP_ACP, 0, strGBK.c_str(), -1, wszUTF8, nLen);
 6
 7     nLen = WideCharToMultiByte(CP_UTF8, 0, wszUTF8, -1, NULL, 0, NULL, NULL);
 8     char * szUTF8 = new char[nLen];
 9     WideCharToMultiByte(CP_UTF8, 0, wszUTF8, -1, szUTF8, nLen, NULL, NULL);
10
11     std::string strTemp(szUTF8);
12     delete[]wszUTF8;
13     delete[]szUTF8;
14     return strTemp;
15 }
 1 string UTF8ToGBK(const std::string& strUTF8)
 2 {
 3     int nLen = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
 4     unsigned short * wszGBK = new unsigned short[nLen + 1];
 5     memset(wszGBK, 0, nLen * 2 + 2);
 6     MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, (LPWSTR)wszGBK, nLen);
 7
 8     nLen = WideCharToMultiByte(CP_ACP, 0, (LPWSTR)wszGBK, -1, NULL, 0, NULL, NULL);
 9     char *szGBK = new char[nLen + 1];
10     memset(szGBK, 0, nLen + 1);
11     WideCharToMultiByte(CP_ACP,0, (LPWSTR)wszGBK, -1, szGBK, nLen, NULL, NULL);
12
13     std::string strTemp(szGBK);
14     delete[]szGBK;
15     delete[]wszGBK;
16     return strTemp;
17 }
 1 std::string Gbk2Unicode(std::string &strValue)
 2 {
 3     std::string strReturn;
 4     unsigned char chTemp;
 5     int nLength = strValue.length()*4+1;
 6     WCHAR *pwchBuf = new WCHAR[nLength];
 7     memset(pwchBuf, 0, sizeof(WCHAR) * nLength);
 8
 9     MultiByteToWideChar(CP_ACP, 0, strValue.c_str(), -1, (LPWSTR)pwchBuf, nLength);
10
11     for (size_t i = 0; i < wcslen(pwchBuf); i++)
12     {
13         strReturn += "\\u";
14         chTemp = *((unsigned char*)pwchBuf+i*2+1);
15         if(chTemp)
16         {
17             strReturn += m_clsmyOpenssl.char_to_hex( (unsigned char)(chTemp >> 4) );
18             strReturn += m_clsmyOpenssl.char_to_hex( (unsigned char)(chTemp % 16) );
19         }
20         chTemp = *((unsigned char*)pwchBuf+i*2);
21         strReturn += m_clsmyOpenssl.char_to_hex( (unsigned char)(chTemp >> 4) );
22         strReturn += m_clsmyOpenssl.char_to_hex( (unsigned char)(chTemp % 16) );
23     }
24     delete[]pwchBuf;
25     pwchBuf = NULL;
26     return strReturn;
27 }
 1 std::string Unicode2GBK(std::string &strValue)
 2 {
 3     std::vector<std::string> vcString;
 4     MyTools::SplitString(strValue, "\\u", vcString);
 5
 6     wchar_t* pwBuf = new wchar_t[strValue.length() + 1];
 7     memset(pwBuf, 0, (strValue.length() + 1)* sizeof(wchar_t));
 8
 9     int j(0);
10
11     for(std::vector<std::string>::iterator it = vcString.begin(); it != vcString.end(); ++it)
12     {
13         if (it->empty())
14         {
15             continue;
16         }
17         unsigned short wcTmp = 0;
18         unsigned char cTmp = 0;
19
20         //因为有中文字符混合ASSCII码情况,所以条件为k < it->length()
21         for(size_t k = 0; k < it->length(); ++k)
22         {
23             cTmp = (unsigned char)(*it)[k];
24
25             if(cTmp <= ‘9‘)//0x30~0x39 即0~9
26             {
27                 wcTmp |= (cTmp & 0x0f) << (it->length() - k - 1) * 4;
28             }
29             else if(cTmp >= ‘a‘)//0x61~7a 即a~z
30             {
31                 wcTmp |= (cTmp - 0x57) << (it->length() - k - 1) * 4;
32             }
33             else//0x41~5a 即A~Z
34             {
35                 wcTmp |= (cTmp - 0x37) << (it->length() - k - 1) * 4;
36             }
37         }
38         pwBuf[j++] = (wchar_t)wcTmp;
39     }
40     char *pDst = new char[strValue.length() + 1];
41     memset(pDst, 0, (strValue.length() + 1) * sizeof(char));
42
43     WideCharToMultiByte(CP_ACP, 0, pwBuf, -1, (char*)pDst, strValue.length() - 1, NULL, NULL);
44     std::string strRet(pDst);
45
46     delete[]pwBuf;
47     pwBuf= NULL;
48
49     delete[]pDst;
50     pDst=NULL;
51
52     return strRet;
53 }

原文地址:http://www.cnblogs.com/dongsheng/p/4387069.html

原文地址:https://www.cnblogs.com/wodehao0808/p/8616807.html

时间: 2024-11-10 08:20:08

(转载)GBK、UTF8、UNICODE编码转换的相关文章

转换编码,将Unicode编码转换成可以浏览的utf-8编码

//转换编码,将Unicode编码转换成可以浏览的utf-8编码 public function unicodeDecode($name) { $pattern = '/([\w]+)|(\\\u([\w]{4}))/i'; preg_match_all($pattern, $name, $matches); if (!empty($matches)) { $name = ''; for ($j = 0; $j < count($matches[0]); $j++) { $str = $matc

Python3的unicode编码转换成中文问题

Python3的unicode编码转换成中文问题 从别的地方搬过来的,担心以后不容易搜索到,就收集过来. 我当时面临的问题是要从C++发json代码出来,用python写了个server,然后返回给C++程序,结果收到的是:httpSvrDataCbUser: {"tranNO": "0808ad498670dc996", "data": "\u65b0A1EY16", "ver": "1.0&q

C#中文和UNICODE编码转换

C#中文和UNICODE编码转换 //中文轉為UNICODE string str = "中文"; string outStr = ""; if (!string.IsNullOrEmpty(str)) { for (int i = 0; i < str.Length; i++) { //將中文轉為10進制整數,然後轉為16進制unicode outStr += "\\u" + ((int)str[i]).ToString("x&

gbk转utf-8 iconv 编码转换

linux下面有时候 字符需要进行编码转换(爬虫将gbk转为utf-8编码...),一般可以选择iconv函数. 终端下面  输入 man 3 iconv 得到  iconv函数的使用方法. 个人看习惯了,msdn文档之后感觉linux下面的文档的看的不是那么爽了. 使用iconv函数进行转码,一般使用三个函数:iconv_open  . iconv  .iconv_close三个函数. iconv_t iconv_open(const char* tocode,const char* from

Java用native2ascii命令做unicode编码转换

背景:在做Java开发的时候,常常会出现一些乱码,或者无法正确识别或读取的文件,比如常见的validator验证用的消息资源(properties)文件就需要进行Unicode重新编码.原因是java默认的编码方式为Unicode,而我们的计算机系统编码常常是GBK等编码.需要将系统的编码转换为java正确识别的编码问题就解决了. 1.native2ascii简介:native2ascii是sun java sdk提供的一个工具.用来将别的文本类文件(比如*.txt,*.ini,*.proper

ASCII,Utf8,Unicode编码下的中英文字符大小

一,测试Demo namespace 不同编码下的中英文字符大小 { class Program { static void Main(string[] args) { ShowCode(); } private static void ShowCode() { string[] strArray = { "b","abc","乙","甲乙丙丁"}; byte[] buffer; string mode, back; fore

Jmeter后置处理器解析unicode 编码转换

使用Jmeter过程中发现,从查看结果树看到部分内容需要decode,此处参考曲线救国的方法. 步骤: 添加后置处理器-BeanShell PostProcessor 在scripts处添加相关代码,之后执行即可 1 //获取响应代码Unicode编码的 2 3 String s2=new String(prev.getResponseData(),"UTF-8"); 4 //---------------以下步骤为转码过程--------------- 5 char aChar; 6

java 中文转换成Unicode编码和Unicode编码转换成中文

转自:一叶飘舟 http://blog.csdn.net/jdsjlzx/article/details/7058823 package lia.meetlucene; import java.io.IOException; import org.apache.lucene.index.CorruptIndexException; public class Unicode { public static void main(String[] args) throws CorruptIndexEx

qt中文格式GBK.UTF-8,unicode 之间的转换

QTextCodec *gbk = QTextCodec::codecForName("GB18030");QTextCodec *utf8 = QTextCodec::codecForName("UTF-8");QString g2u = gbk->toUnicode(m_pUserSpi.m_mapInstruments.find(str1.toStdString())->second->InstrumentName); QString as