#ifdef 如何判断UTF8
时间: 2024-03-08 10:41:00 浏览: 18
要判断一个字符串是否为UTF-8编码,可以使用以下C语言代码:
```c
#include <stdio.h>
#include <stdbool.h>
bool is_utf8(const char *str) {
unsigned char *bytes = (unsigned char *)str;
while (*bytes != '\0') {
// 判断是否为单字节字符
if ((*bytes & 0x80) == 0x00) {
bytes++;
continue;
}
// 判断是否为多字节字符的起始字节
if ((*bytes & 0xE0) == 0xC0) {
if ((bytes[1] & 0xC0) == 0x80 && (bytes[1] & 0x3E) != 0x00) {
bytes += 2;
continue;
}
}
if ((*bytes & 0xF0) == 0xE0) {
if ((bytes[1] & 0xC0) == 0x80 && (bytes[2] & 0xC0) == 0x80 && (bytes[1] & 0x1E) != 0x00) {
bytes += 3;
continue;
}
}
if ((*bytes & 0xF8) == 0xF0) {
if ((bytes[1] & 0xC0) == 0x80 && (bytes[2] & 0xC0) == 0x80 && (bytes[3] & 0xC0) == 0x80 && (bytes[1] & 0x0F) != 0x00) {
bytes += 4;
continue;
}
}
// 不满足UTF-8编码规则
return false;
}
// 字符串为空或已经遍历完毕,满足UTF-8编码规则
return true;
}
int main() {
const char *str1 = "Hello, world!"; // ASCII字符串,满足UTF-8编码规则
const char *str2 = "你好,世界!"; // UTF-8字符串,满足UTF-8编码规则
const char *str3 = "こんにちは、世界!"; // 日语字符串,满足UTF-8编码规则
const char *str4 = "안녕하세요, 세계!"; // 韩语字符串,满足UTF-8编码规则
const char *str5 = "مرحبا، العالم!"; // 阿拉伯语字符串,满足UTF-8编码规则
const char *str6 = "𠜱𠜱𠜱"; // Emoji表情字符,满足UTF-8编码规则
printf("str1 is UTF-8: %s\n", is_utf8(str1) ? "true" : "false");
printf("str2 is UTF-8: %s\n", is_utf8(str2) ? "true" : "false");
printf("str3 is UTF-8: %s\n", is_utf8(str3) ? "true" : "false");
printf("str4 is UTF-8: %s\n", is_utf8(str4) ? "true" : "false");
printf("str5 is UTF-8: %s\n", is_utf8(str5) ? "true" : "false");
printf("str6 is UTF-8: %s\n", is_utf8(str6) ? "true" : "false");
return 0;
}
```
以上代码中,`is_utf8()`函数用于判断一个字符串是否为UTF-8编码。它通过遍历字符串的每个字节来判断是否满足UTF-8编码规则。如果满足规则,则返回`true`;否则返回`false`。