复制代码代码如下:
String statique public getCharset (fichier fichier) {
String charSet = "gbk";
octet [] first3bytes = nouveau octet [3];
essayer {
booléen vérifié = false;
BufferedInputStream bis = new BufferedInputStream (
new FileInputStream (fichier));
bis.mark (0);
int lien = bis.read (first3bytes, 0, 3);
if (lire == -1)
retour charset;
if (first3Bytes [0] == (byte) 0xff && premierBytes [1] == (byte) 0xfe) {
charset = "utf-16le";
vérifié = true;
} else if (first3Bytes [0] == (byte) 0xfe && first3bytes [1]
== (byte) 0xff) {
charSet = "UTF-16BE";
vérifié = true;
} else if (First3Bytes [0] == (BYTE) 0XEF && First3Bytes [1]
== (octet) 0xbb
&& first3bytes [2] == (byte) 0xbf) {
charSet = "UTF-8";
vérifié = true;
}
bis.reset ();
if (! vérifié) {
int loc = 0;
while ((read = bis.read ())! = -1) {
loc ++;
if (lire> = 0xf0)
casser;
// 单独出现 bf 以下的 , 也算是 gbk
if (0x80 <= read && read <= 0xbf)
casser;
if (0xc0 <= read && read <= 0xdf) {
read = bis.read ();
if (0x80 <= read && read <= 0xbf) // 双字节 (0xc0 - 0xdf)
// (0x80 -
// 0xbf), 也可能在 gb 编码内
continuer;
autre
casser;
// 也有可能出错 , 但是几率较小
} else if (0xe0 <= read && read <= 0xef) {
read = bis.read ();
if (0x80 <= read && read <= 0xbf) {
read = bis.read ();
if (0x80 <= read && read <= 0xbf) {
charSet = "UTF-8";
casser;
} autre
casser;
} autre
casser;
}
}
System.out.println (LOC + "" + Integer.tohexString (lecture));
}
bis.close ();
} catch (exception e) {
e.printStackTrace ();
}
retour charset;
}