复制代码代码如下:
public static string getCharset(file file){
文字列charset = "gbk";
byte [] first3bytes = new byte [3];
試す {
boolean checked = false;
BufferedInputStream bis = new BufferedInputStream(
new FileInputStream(file));
bis.mark(0);
int read = bis.read(first3bytes、0、3);
if(read == -1)
charsetを返します。
if(first3bytes [0] ==(byte)0xff && first3bytes [1] ==(byte)0xfe){
charset = "utf-16le";
checked = true;
} else if(first3bytes [0] ==(byte)0xfe && first3bytes [1]
==(byte)0xff){
charset = "utf-16be";
checked = true;
} else if(first3bytes [0] ==(byte)0xef && first3bytes [1]
==(byte)0xbb
&& first3bytes [2] ==(byte)0xbf){
charset = "utf-8";
checked = true;
}
bis.reset();
if(!checked){
int loc = 0;
while((read = bis.read())!= -1){
loc ++;
if(read> = 0xf0)
壊す;
//单独出现bf以下的、也算是gbk
if(0x80 <= read && read <= 0xbf)
壊す;
if(0xc0 <= read && read <= 0xdf){
read = bis.read();
if(0x80 <= read && read <= 0xbf)//双字节(0xc0-0xdf)
//(0x80-
// 0xbf)、gb编码内
続く;
それ以外
壊す;
//也有可能出错、但是几率较小
} else if(0xe0 <= read && read <= 0xef){
read = bis.read();
if(0x80 <= read && read <= 0xbf){
read = bis.read();
if(0x80 <= read && read <= 0xbf){
charset = "utf-8";
壊す;
} それ以外
壊す;
} それ以外
壊す;
}
}
system.out.println(loc + "" + integer.tohexstring(read));
}
bis.close();
} catch(例外e){
e.printstacktrace();
}
charsetを返します。
}