复制代码代码如下::
public static String getCharSet (Dateidatei) {
String charset = "gbk";
byte [] first3Bytes = neues Byte [3];
versuchen {
boolean checked = false;
BufferedInputStream Bis = neuer BufferedInputStream (
neuer FileInputStream (Datei));
Bis.mark (0);
int read = Bis.Read (First3Bytes, 0, 3);
if (read == -1)
charset zurückgeben;
if (first3Bytes [0] == (byte) 0xff && first3Bytes [1] == (byte) 0xfe) {
charset = "utf-16le";
geprüft = wahr;
} else if (first3Bytes [0] == (byte) 0xfe && first3Bytes [1]
== (byte) 0xff) {
charset = "utf-16be";
geprüft = wahr;
} else if (first3Bytes [0] == (byte) 0xef && first3Bytes [1]
== (byte) 0xbb
&& first3Bytes [2] == (byte) 0xbf) {
charset = "utf-8";
geprüft = wahr;
}
Bis.reset ();
if (! Checked) {
int loc = 0;
while ((read = bis.read ())! = -1) {
loc ++;
if (read> = 0xf0)
brechen;
// 单独出现 bf 以下的 , 也算是 gbk
if (0x80 <= read && read <= 0xbf)
brechen;
if (0xc0 <= read && read <= 0xdf) {
Read = Bis.Read ();
if (0x80 <= read && read <= 0xbf) // 双字节 (0xc0 - 0xdf)
// (0x80 -
// 0xbf), 也可能在 gb 编码内
weitermachen;
anders
brechen;
// 也有可能出错 , 但是几率较小
} else if (0xe0 <= read && read <= 0xef) {
Read = Bis.Read ();
if (0x80 <= read && read <= 0xbf) {
Read = Bis.Read ();
if (0x80 <= read && read <= 0xbf) {
charset = "utf-8";
brechen;
} anders
brechen;
} anders
brechen;
}
}
System.out.println (loc + "" + integer.tohexstring (read));
}
Bis.close ();
} catch (Ausnahme e) {
E. printstacktrace ();
}
charset zurückgeben;
}