C++ 字符编码转换类

记录一下C++ 编码转换的函数:

 1 #pragma once
 2 #include "afx.h"
 3
 4
 5 #define DEFAULT_CODE 0
 6 #define CHINESE_SIMPLIFIED 1
 7 #define   CHINESE_TRADITIONAL 2
 8
 9 class CChineseConvertor:
10 //public CObject
11 {
12 public:
13     CChineseConvertor(void);
14     ~CChineseConvertor(void);
15     LPSTR Big52GBKSimplified(char * szText);
16     LPSTR Big52GBKTraditional(char * szText);
17     LPSTR GBK2Big5(char * szText);
18     LPSTR GBKSimplified2GBKTraditional(char * szSimplified);
19     LPSTR GBKTraditional2GBKSimplified(char * szTraditional);
20     LPWSTR UTF82UNICODE(char*   utf8str);
21     LPSTR UNICODE2UTF8(LPCWSTR  strText);
22
23     char *m_pszUnknown;
24     // 转换到Unicode
25     LPWSTR ToUnicode(char * szSource, int nEncoding);
26     LPSTR ToMultiByte(LPCWSTR szSource, int nEncoding);
27 };
  1 #include "stdafx.h"
  2 #include "Coding.h"
  3
  4
  5
  6 CChineseConvertor::CChineseConvertor(void)
  7 {
  8   m_pszUnknown = new char[2];
  9   m_pszUnknown[0]=‘ ‘;
 10   m_pszUnknown[1]=0;
 11 }
 12
 13 CChineseConvertor::~CChineseConvertor(void)
 14 {
 15   delete[] m_pszUnknown;
 16   m_pszUnknown = NULL;
 17 }
 18
 19 //big5 to GBK_简体
 20 LPSTR CChineseConvertor::Big52GBKSimplified(char * szText)
 21 {
 22   int nLength;
 23   wchar_t *pBuffer;
 24   LPSTR pResult;
 25   int nResultLength;
 26
 27   nLength=MultiByteToWideChar(950,0,szText,strlen(szText),NULL,0);
 28   pBuffer=new wchar_t[nLength+1];
 29   MultiByteToWideChar(950,0,(LPCSTR)szText,strlen(szText),(LPWSTR)pBuffer,nLength);
 30   pBuffer[nLength]=0;
 31
 32   nResultLength=WideCharToMultiByte(936,0,pBuffer,nLength,NULL,0,m_pszUnknown,FALSE);
 33   pResult=new char[nResultLength+1];
 34   WideCharToMultiByte(936,0,(LPWSTR)pBuffer,nLength,(LPSTR)pResult,nResultLength,"  ",FALSE);
 35   pResult[nResultLength]=0;
 36
 37   return GBKTraditional2GBKSimplified(pResult);
 38
 39 }
 40
 41 //big5 to GBK_繁体
 42 LPSTR CChineseConvertor::Big52GBKTraditional(char * szText)
 43 {
 44   int nLength;
 45   wchar_t *pBuffer;
 46   LPSTR pResult;
 47   int nResultLength;
 48
 49   nLength=MultiByteToWideChar(950,0,szText,strlen(szText),NULL,0);
 50   pBuffer=new wchar_t[nLength+1];
 51   MultiByteToWideChar(950,0,(LPCSTR)szText,strlen(szText),(LPWSTR)pBuffer,nLength);
 52   pBuffer[nLength]=0;
 53
 54   nResultLength=WideCharToMultiByte(936,0,pBuffer,nLength,NULL,0,m_pszUnknown,FALSE);
 55   pResult=new char[nResultLength+1];
 56   WideCharToMultiByte(936,0,(LPWSTR)pBuffer,nLength,(LPSTR)pResult,nResultLength,"  ",FALSE);
 57   pResult[nResultLength]=0;
 58
 59   return pResult;
 60 }
 61
 62 //GBK_简体 to GBK_繁体
 63 LPSTR CChineseConvertor::GBKTraditional2GBKSimplified(char * szTraditional)
 64 {
 65   LCID dwLocale;
 66   WORD wLangID;
 67   wLangID=MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED);
 68   dwLocale=MAKELCID(wLangID,SORT_CHINESE_PRC);
 69
 70   int nLength;
 71   char *pBuffer;
 72   nLength=LCMapStringA(dwLocale,LCMAP_SIMPLIFIED_CHINESE,(LPCSTR)szTraditional,strlen(szTraditional),NULL,0);
 73   pBuffer=new char[nLength+1];
 74   pBuffer[nLength]=0;
 75   LCMapStringA(dwLocale,LCMAP_SIMPLIFIED_CHINESE,(LPCSTR)szTraditional,strlen(szTraditional),pBuffer,nLength);
 76   return pBuffer;
 77 }
 78
 79 //GBK_简体 to big5
 80 LPSTR CChineseConvertor::GBK2Big5(char * szText)
 81 {
 82   LPSTR szGBKTraditional;
 83   int nLength;
 84   wchar_t *pBuffer;
 85   LPSTR pResult;
 86   int nResultLength;
 87
 88   szGBKTraditional=GBKSimplified2GBKTraditional(szText);
 89   nLength=MultiByteToWideChar(936,0,szGBKTraditional,strlen(szGBKTraditional),NULL,0);
 90   pBuffer=new wchar_t[nLength+1];
 91   MultiByteToWideChar(936,0,(LPCSTR)szGBKTraditional,strlen(szGBKTraditional),(LPWSTR)pBuffer,nLength);
 92   pBuffer[nLength]=0;
 93
 94   nResultLength=WideCharToMultiByte(950,0,pBuffer,nLength,NULL,0,m_pszUnknown,FALSE);
 95   pResult=new char[nResultLength+1];
 96   WideCharToMultiByte(950,0,(LPWSTR)pBuffer,nLength,(LPSTR)pResult,nResultLength,"  ",FALSE);
 97   pResult[nResultLength]=0;
 98
 99   return pResult;
100 }
101
102 //将GBK的简体转换到GBK繁体
103 LPSTR CChineseConvertor::GBKSimplified2GBKTraditional(char * szSimplified)
104 {
105   LCID dwLocale;
106   WORD wLangID;
107   wLangID=MAKELANGID(LANG_CHINESE,SUBLANG_CHINESE_SIMPLIFIED);
108   dwLocale=MAKELCID(wLangID,SORT_CHINESE_PRC);
109
110   int nLength;
111   char *pBuffer;
112   nLength=LCMapStringA(dwLocale,LCMAP_TRADITIONAL_CHINESE,(LPCSTR)szSimplified,strlen(szSimplified),NULL,0);
113   pBuffer=new char[nLength+1];
114   pBuffer[nLength]=0;
115   LCMapStringA(dwLocale,LCMAP_TRADITIONAL_CHINESE,(LPCSTR)szSimplified,strlen(szSimplified),pBuffer,nLength);
116   return pBuffer;
117 }
118
119 // 转换到Unicode
120 LPWSTR CChineseConvertor::ToUnicode(char * szSource, int nEncoding)
121 {
122   int nLength;
123   wchar_t *pBuffer;
124   int nLanguage;
125
126   if(nEncoding==CHINESE_SIMPLIFIED)
127     nLanguage=936;
128   else
129     if(nEncoding==CHINESE_TRADITIONAL)
130       nLanguage=950;
131     else
132       nLanguage= CP_ACP;
133
134   nLength=MultiByteToWideChar(nLanguage,0,szSource,strlen(szSource),NULL,0);
135   pBuffer=new wchar_t[nLength+1];
136   MultiByteToWideChar(nLanguage,0,(LPCSTR)szSource,strlen(szSource),(LPWSTR)pBuffer,nLength);
137   pBuffer[nLength]=0;
138
139   return pBuffer;
140 }
141
142 //转换到多字节
143 LPSTR CChineseConvertor::ToMultiByte(LPCWSTR szSource, int nEncoding)
144 {
145   int nLength;
146   char *pBuffer;
147   int nLanguage;
148
149   if(nEncoding==CHINESE_SIMPLIFIED)
150     nLanguage=936;
151   else
152     if(nEncoding==CHINESE_TRADITIONAL)
153       nLanguage=950;
154     else
155       nLanguage= CP_ACP;
156
157   nLength=WideCharToMultiByte(nLanguage,0,szSource,wcslen(szSource),NULL,0,m_pszUnknown,FALSE);
158
159   pBuffer=new char[nLength+1];
160   WideCharToMultiByte(nLanguage,0,szSource,wcslen(szSource),pBuffer,nLength,m_pszUnknown,FALSE);
161   pBuffer[nLength]=0;
162
163   return pBuffer;
164
165 }
166
167 //UTF8转换到UNICODE
168 LPWSTR CChineseConvertor::UTF82UNICODE(char*   utf8str)
169 {
170     int nLength;
171     wchar_t *pBuffer;
172
173     nLength=MultiByteToWideChar(CP_UTF8,0,utf8str,strlen(utf8str),NULL,0);
174     pBuffer=new wchar_t[nLength+1];
175     MultiByteToWideChar(CP_UTF8,0,(LPCSTR)utf8str,strlen(utf8str),(LPWSTR)pBuffer,nLength);
176     pBuffer[nLength]=0;
177
178     return pBuffer;
179 }
180
181 //UNICODE转换到UTF8
182 LPSTR CChineseConvertor::UNICODE2UTF8(LPCWSTR  strText)
183 {
184     int len;
185     len = WideCharToMultiByte(CP_UTF8, 0, (LPCWSTR)strText, -1, NULL, 0, NULL, NULL);
186     char *szUtf8=new char[2*(len + 1)];
187     memset(szUtf8, 0, len * 2 + 2);  //UTF8最多的字节数最多是一个UINICODE字符所占字节数的两倍
188     WideCharToMultiByte (CP_UTF8, 0, (LPCWSTR)strText, -1, szUtf8, len, NULL,NULL);
189     return szUtf8;
190
191 }
时间: 2024-10-13 06:56:38

C++ 字符编码转换类的相关文章

iconv字符编码转换

转自 http://blog.csdn.net/langresser_king/article/details/7459367 iconv(http://www.gnu.org/software/libiconv/)是一个开源的字符编码转换库,可以"方便"的完成几乎所有的编码转换工作.说简单是因为,它常用的接口就三个,iconv_open  iconv   iconv_close,但是即便是只有三个接口,要想使用正确也不容易.这里把一些基本概念和使用细节记录下来,希望能成为一篇最实用的

php字符编码转换之gb2312转为utf8(转)

在php中字符编码转换我们一般会用到iconv与mb_convert_encoding进行操作,但是mb_convert_encoding在转换性能上比iconv要差很多哦.string iconv ( string in_charset, string out_charset, string str ) 注意:第二个参数,除了可以指定要转化到的编码以外,还可以增加两个后缀://TRANSLIT 和 //IGNORE,其中 //TRANSLIT 会自动将不能直接转化的字符变成一个或多个近似的字符

Windows下字符编码转换

有时候经常使用别人用Tabhost+其它的实现demo.单纯利用Tabhost该如何使用呢? 下面看例子: public class MainActivity extends TabActivity { public TabHost tabHost; @Override protected void onCreate(Bundle savedInstanceState) { super.onCreate(savedInstanceState); // 获取对象 tabHost = getTabH

java 字节流和字符流转换类InputStreamReader,OutPutStreamReader

import java.io.*; public class BufferDemo { public static void main(String[] args) throws IOException { BufferedReader buf=null; buf=new BufferedReader(new InputStreamReader(System.in)); System.out.print("Please enter a number:"); String str=buf

erlang中字符编码转换(转)

转自:http://www.thinksaas.cn/group/topic/244329/ 功能说明: erlang中对各种语言的编码支持不足,此代码是使用erlang驱动了著名的iconv编码库来对字符进行编码转换处理. 文件说明: iconv_erl.c和iconv.h 是erlang字符编码模块的driver,作用是对iconv进行封装.编译后生成iconv_erl.dll,供iconv.erl使用. iconv_makefile.win32 windows上编译iconv_erl.dl

php 字符编码转换函数 iconv mb_convert_encoding比较

在使用PHP处理字符串时,我们经常会碰到字符编码转换的问题,你碰到过iconv转换失败吗? 发现问题时,网上搜了搜,才发现iconv原来有bug ,碰到一些生僻字就会无法转换,当然了配置第二个参数时,可以稍微弥补一下默认缺陷,不至于无法转换是截断,用法如下 iconv(“UTF-8″,”GB2312//IGNORE”,$data) ; 这样碰到生僻字转换失败时,它就会忽略失败,继续转换下面的内容,这算解决问题的一个办法,不过为了确保转换的成功率,我们可以用另一个转换函数(mb_convert_e

ASP中有关字符编码转换的几个有用函数

ASP中有关字符编码转换的几个有用函数 <%1.'UTF转GB---将UTF8编码文字转换为GB编码文字function UTF2GB(UTFStr) for Dig=1 to len(UTFStr)   '如果UTF8编码文字以%开头则进行转换  if mid(UTFStr,Dig,1)="%" then      'UTF8编码文字大于8则转换为汉字    if len(UTFStr) >= Dig+8 then        GBStr=GBStr & Con

python基础 字符编码转换

python2 1 #python2上所有的字符编码都需要先decode到unicode,再从unicode encode到目标编码 2 str_utf8 = "我就是我" 3 print("str_utf-8:我就是我:",str_utf8) 4 #将utf-8转换为unicode 5 str_utf8_to_unicode = str_utf8.decode("utf-8") 6 print(str_utf8_to_unicode) 7 #将

Android NDK 下的宽字符编码转换及icu库的使用(转)

原贴http://topic.csdn.net/u/20101022/16/1b2e0cec-b9d2-42ea-8d9c-4f1bb8320a54.html?r=70149216 ,看过并动手实现,记录下来以备再用. 如果是在java层,有String类可以很好的转换各种编码,在ndk下面就没有现成的公开的工具,不过可以用icu4c. ICU4C 是IBM的国际化开发组件ICU的C语言实现版本.在android系统里也有实现.ndk里面并没有公开可用的api,需要自己加载动态库来调用转换函数.