复制代码代码如下:
public static string getCharset (file file) {
String charset = "gbk";
byte [] first3bytes = byte baru [3];
mencoba {
boolean checked = false;
BufferedInputStream bis = baru bufferedInputStream (
FileInputStream baru (file));
bis.mark (0);
int read = bis.read (first3bytes, 0, 3);
if (baca == -1)
Return Charset;
if (first3bytes [0] == (byte) 0xff && first3bytes [1] == (byte) 0xfe) {
charset = "UTF-16LE";
diperiksa = true;
} else if (first3bytes [0] == (byte) 0xfe && first3bytes [1]
== (byte) 0xff) {
charset = "UTF-16BE";
diperiksa = true;
} else if (first3bytes [0] == (byte) 0xef && first3bytes [1]
== (byte) 0xBB
&& first3bytes [2] == (byte) 0xbf) {
charset = "UTF-8";
diperiksa = true;
}
bis.reset ();
if (! checked) {
int loc = 0;
while ((baca = bis.read ())! = -1) {
loc ++;
if (baca> = 0xf0)
merusak;
// 单独出现 bf 以下的 , 也算是 gbk
if (0x80 <= baca && baca <= 0xbf)
merusak;
if (0xc0 <= baca && baca <= 0xdf) {
baca = bis.read ();
if (0x80 <= baca && baca <= 0xbf) // 双字节 (0xc0 - 0xdf)
// (0x80 -
// 0xbf), 也可能在 GB 编码内
melanjutkan;
kalau tidak
merusak;
// 也有可能出错 , 但是几率较小
} else if (0xe0 <= baca && baca <= 0xef) {
baca = bis.read ();
if (0x80 <= baca && baca <= 0xbf) {
baca = bis.read ();
if (0x80 <= baca && baca <= 0xbf) {
charset = "UTF-8";
merusak;
} kalau tidak
merusak;
} kalau tidak
merusak;
}
}
System.out.println (loc + "" + integer.tohexstring (baca));
}
bis.close ();
} catch (Exception e) {
e.printstacktrace ();
}
Return Charset;
}