下面是一个unicode字符转换为utf-8的c程序实现:
/* * ===================================================================================== * * Filename: utou.c * * Description: * * Version: 1.0 * Created: 08/06/2015 10:53:31 AM * Revision: none * Compiler: gcc * * Author: YOUR NAME (), * Organization: * * ===================================================================================== */ #include <stdlib.h> #include <stdio.h> #include <string.h> int utou (unsigned int codepoint, char *str) { char out[4]; if (codepoint < 0x80) { out[0] = (char)codepoint; strncpy (str, out, 1); } else if (codepoint < 0x800) { out[0] = 0xC0 | (codepoint >> 6); out[1] = 0x80 | (codepoint & 0x0000003F); strncpy (str, out, 2); } else if (codepoint < 0x10000) { out[0] = 0xE0 | (codepoint >> 12); out[1] = 0x80 | ((codepoint & 0x00000FFF) >> 6); out[2] = 0x80 | (codepoint & 0x0000003F); strncpy (str, out, 3); } else { out[0] = 0xF0 | (codepoint >> 18); out[1] = 0x80 | ((codepoint & 0x0003FFFF) >> 12); out[2] = 0x80 | ((codepoint & 0x00000FFF) >> 6); out[3] = 0x80 | (codepoint & 0x0000003F); strncpy (str, out, 4); } return 0; } /* * === FUNCTION ====================================================================== * Name: main * Description: * ===================================================================================== */ int main ( int argc, char *argv[] ) { //unsigned int codepoint = 0x6c41; unsigned int codepoint = 0xFFFF; char str[16] = {}; utou (codepoint, str); printf ("str:%s\n", str); return EXIT_SUCCESS; } /* ---------- end of function main ---------- */
时间: 2024-10-08 11:28:38