utf8 unicode java_Java实现的utf8,gbk,unicode编码相互转换的代码
时间: 2024-02-20 09:01:30 浏览: 52
以下是Java实现的UTF-8、GBK、Unicode编码相互转换的代码:
UTF-8编码转换为Unicode编码:
```java
public static String utf8ToUnicode(String utf8Str) {
StringBuilder sb = new StringBuilder();
int i = 0;
while (i < utf8Str.length()) {
char c = utf8Str.charAt(i);
if ((c & 0x80) == 0) {
// 单字节字符
sb.append(c);
i++;
} else if ((c & 0xE0) == 0xC0) {
// 双字节字符
int code = ((c & 0x1F) << 6) | (utf8Str.charAt(i + 1) & 0x3F);
sb.append((char) code);
i += 2;
} else if ((c & 0xF0) == 0xE0) {
// 三字节字符
int code = ((c & 0x0F) << 12) | ((utf8Str.charAt(i + 1) & 0x3F) << 6) | (utf8Str.charAt(i + 2) & 0x3F);
sb.append((char) code);
i += 3;
} else {
// 四字节字符
int code = ((c & 0x07) << 18) | ((utf8Str.charAt(i + 1) & 0x3F) << 12)
| ((utf8Str.charAt(i + 2) & 0x3F) << 6) | (utf8Str.charAt(i + 3) & 0x3F);
code -= 0x10000;
sb.append((char) ((code >> 10) | 0xD800));
sb.append((char) ((code & 0x3FF) | 0xDC00));
i += 4;
}
}
return sb.toString();
}
```
Unicode编码转换为UTF-8编码:
```java
public static String unicodeToUtf8(String unicodeStr) {
StringBuilder sb = new StringBuilder();
int i = 0;
while (i < unicodeStr.length()) {
char c = unicodeStr.charAt(i);
if (c <= 0x7F) {
// 单字节字符
sb.append(c);
} else if (c <= 0x7FF) {
// 双字节字符
sb.append((char) (0xC0 | (c >> 6)));
sb.append((char) (0x80 | (c & 0x3F)));
} else if (c >= 0xD800 && c <= 0xDBFF) {
// 四字节字符
int code = ((c - 0xD800) << 10) | (unicodeStr.charAt(i + 1) - 0xDC00) + 0x10000;
sb.append((char) (0xF0 | (code >> 18)));
sb.append((char) (0x80 | ((code >> 12) & 0x3F)));
sb.append((char) (0x80 | ((code >> 6) & 0x3F)));
sb.append((char) (0x80 | (code & 0x3F)));
i++;
} else {
// 三字节字符
sb.append((char) (0xE0 | (c >> 12)));
sb.append((char) (0x80 | ((c >> 6) & 0x3F)));
sb.append((char) (0x80 | (c & 0x3F)));
}
i++;
}
return sb.toString();
}
```
GBK编码转换为Unicode编码:
```java
public static String gbkToUnicode(String gbkStr) {
StringBuilder sb = new StringBuilder();
try {
byte[] bytes = gbkStr.getBytes("GBK");
for (int i = 0; i < bytes.length; i += 2) {
int code = (bytes[i] & 0xFF) | ((bytes[i + 1] & 0xFF) << 8);
sb.append((char) code);
}
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
return sb.toString();
}
```
Unicode编码转换为GBK编码:
```java
public static String unicodeToGbk(String unicodeStr) {
StringBuilder sb = new StringBuilder();
for (int i = 0; i < unicodeStr.length(); i++) {
char c = unicodeStr.charAt(i);
if (c <= 0xFF) {
// 单字节字符
sb.append(c);
} else {
// 双字节字符
try {
byte[] bytes = String.valueOf(c).getBytes("Unicode");
sb.append(new String(bytes, 2, 2, "GBK"));
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
}
}
}
return sb.toString();
}
```
注意:以上代码仅供参考,可能存在一些细节问题,请在实际使用时自行测试和调整。