复制代码代码如下:
public static String getCharset(文件文件){
字符串charset =“ gbk”;
字节[] first3bytes = new Byte [3];
尝试 {
布尔值检查= false;
BufferedInputStream bis = new BufferedInputStream(
新的fileInputStream(file));
bis.mark(0);
int read = bis.Read(first3Bytes,0,3);
if(read == -1)
返回字符集;
if(first3bytes [0] ==(字节)0xff && first3bytes [1] ==(byte)0xfe){
charset =“ utf-16le”;
检查= true;
} else if(first3bytes [0] ==(字节)0xfe && first3bytes [1]
==(字节)0xff){
charset =“ UTF-16BE”;
检查= true;
} else if(first3bytes [0] ==(字节)0xef && first3bytes [1]
==(字节)0xbb
&& first3bytes [2] ==(字节)0xbf){
charset =“ utf-8”;
检查= true;
}
bis.reset();
如果(!检查){
int loc = 0;
while(((read = bis.read())!= -1){
loc ++;
如果(读> = 0xf0)
休息;
//单独出现bf以下的,也算是gbk
如果(0x80 <=读&&读<= 0xbf)
休息;
if(0xc0 <= read && read <= 0xdf){
read = bis.read();
if(0x80 <=读&&读<= 0xbf)//双字节(0xc0-0xdf)
//(0x80-
// 0xbf),也可能在GB编码内
继续;
别的
休息;
//也有可能出错,但是几率较小
} else if(0xe0 <= read && read <= 0xef){
read = bis.read();
if(0x80 <= read && read <= 0xbf){
read = bis.read();
if(0x80 <= read && read <= 0xbf){
charset =“ utf-8”;
休息;
} 别的
休息;
} 别的
休息;
}
}
system.out.println(loc +“” + integer.tohexstring(read));
}
bis.close();
} catch(异常E){
e.printstacktrace();
}
返回字符集;
}