unicode编码转换 C语言
时间: 2023-10-04 12:06:01 浏览: 283
在C语言中,可以使用以下函数来进行Unicode编码和UTF-8编码之间的转换:
1. Unicode转UTF-8:
```c
void UnicodeToUTF8(char* utf8, wchar_t* unicode) {
int len = wcslen(unicode);
int utf8len = 0;
for (int i = 0; i < len; i++) {
wchar_t w = unicode[i];
if (w <= 0x7f) {
utf8[utf8len++] = (char)w;
} else if (w <= 0x7ff) {
utf8[utf8len++] = (char)(0xc0 | (w >> 6));
utf8[utf8len++] = (char)(0x80 | (w & 0x3f));
} else if (w <= 0xffff) {
utf8[utf8len++] = (char)(0xe0 | (w >> 12));
utf8[utf8len++] = (char)(0x80 | ((w >> 6) & 0x3f));
utf8[utf8len++] = (char)(0x80 | (w & 0x3f));
} else if (w <= 0x10ffff) {
utf8[utf8len++] = (char)(0xf0 | (w >> 18));
utf8[utf8len++] = (char)(0x80 | ((w >> 12) & 0x3f));
utf8[utf8len++] = (char)(0x80 | ((w >> 6) & 0x3f));
utf8[utf8len++] = (char)(0x80 | (w & 0x3f));
}
}
utf8[utf8len] = 0;
}
```
其中,`utf8`是存放转换后的UTF-8字符串的缓冲区,`unicode`是存放Unicode字符串的缓冲区。
2. UTF-8转Unicode:
```c
void UTF8ToUnicode(wchar_t* unicode, char* utf8) {
int len = strlen(utf8);
int unicodelen = 0;
for (int i = 0; i < len; ) {
unsigned char c = utf8[i++];
if (c < 0x80) {
unicode[unicodelen++] = c;
} else if ((c & 0xe0) == 0xc0) {
unsigned char c2 = utf8[i++];
unicode[unicodelen++] = ((c & 0x1f) << 6) | (c2 & 0x3f);
} else if ((c & 0xf0) == 0xe0) {
unsigned char c2 = utf8[i++];
unsigned char c3 = utf8[i++];
unicode[unicodelen++] = ((c & 0x0f) << 12) | ((c2 & 0x3f) << 6) | (c3 & 0x3f);
} else if ((c & 0xf8) == 0xf0) {
unsigned char c2 = utf8[i++];
unsigned char c3 = utf8[i++];
unsigned char c4 = utf8[i++];
unsigned int u = ((c & 0x07) << 18) | ((c2 & 0x3f) << 12) | ((c3 & 0x3f) << 6) | (c4 & 0x3f) - 0x10000;
unicode[unicodelen++] = (wchar_t)(0xd800 + (u >> 10));
unicode[unicodelen++] = (wchar_t)(0xdc00 + (u & 0x03ff));
}
}
unicode[unicodelen] = 0;
}
```
其中,`unicode`是存放转换后的Unicode字符串的缓冲区,`utf8`是存放UTF-8字符串的缓冲区。
阅读全文