Em relação à relação entre Unicode e UTF, você pode simplesmente se lembrar: o Unicode é uma organização de codificação, uma especificação de codificação e refere-se ao UTF-16 em Java; Bem, na rede.
A cópia do código é a seguinte:
public static void main (string [] args) lança UnsupportEdEncodingingException {
Stringutil.str2all ("0 modelo de produto descrição");
Stringutil.str4all ("30000900A74EC1548B57F753CF63F08F");
}
/**
* Tente codificar seqüências numéricas hexadecimais em todos os formatos de codificação
*
* @param hexstr
* @THOWS UnsupportEdEncodingException
*/
public static void str4all (string ustr) lança UnsupportEdEncodingException {
System.out.println ("+++++++++++++++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++++ ");
byte [] bs = novo byte [ustr.length ()/2];
for (int i = 0; i <bs.length; i ++) {
bs [i] = (byte) Integer.parseint (USTR.SubString (i*2, i*2+2), 16);
}
System.out.println (new String (BS, "UTF-8"));
// 16
System.out.println (New String (BS, "UTF-16");
System.out.println (new String (BS, "UTF-16LE"));
System.out.println (new String (BS, "X-UTF-16LE-BOM"));
System.out.println (new String (BS, "UTF-16BE"));
//System.out.println(new String (BS, "X-UTF-16BE-BOM");
// 32
System.out.println (new String (BS, "UTF-32"));
System.out.println (new String (BS, "UTF-32LE"));
System.out.println (new String (BS, "X-UTF-32LE-BOM"));
System.out.println (new String (BS, "UTF-32BE"));
System.out.println (new String (BS, "X-UTF-32LE-BOM"));
}
/**
* Liste todas as cordas numéricas hexadecimais decodificadas correspondentes à codificação
*
* @param ustr
* @THOWS UnsupportEdEncodingException
*/
public static void str2all (string ustr) lança UnsupportEdEncodingException {
System.out.println ("+++++++++++++++++++++++++++++++++++++++++ +++++++++++++++++++++++++++++++ ");
byte [] bs = novo byte [] {};
bs = ustr.getBytes ("utf-8");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
// 16
bs = ustr.getBytes ("utf-16");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
bs = ustr.getBytes ("utf-16le");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
bs = ustr.getBytes ("x-utf-16le-bom");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
bs = ustr.getBytes ("utf-16be");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
// bs = ustr.getbytes ("x-utf-16Be-bom");
// 32
bs = ustr.getBytes ("UTF-32");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
bs = ustr.getBytes ("UTF-32LE");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
bs = ustr.getBytes ("x-utf-32le-bom");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
bs = ustr.getBytes ("utf-32be");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
bs = ustr.getBytes ("x-utf-32le-bom");
para (byte b: bs) {
System.out.print (Integer.toHexString (B & 0XFF));
}
System.out.println ();
}
Coleção de nomes de codificação
A cópia do código é a seguinte:
CHARSET US-ASCII %s
HistoricalName ASCII
# IANA Aliases
Alias ISO-IR-6
Alias ANSI_X3.4-1986
Alias ISO_646.irv: 1991
Alias ASCII
Alias ISO646-US
Alias nos EUA
Alias IBM367
Alias cp367
Alias csaScii
Alias padrão
# Outros aliases
Alias 646 # Solaris Posix Locale
Alias ISO_646.irv: 1983
Alias ANSI_X3.4-1968 # Linux Posix Locale (Redhat)
Alias ASCII7
CHARSET UTF-8 UTF_8
HistoricalName Utf8
Alias UTF8
Alias Unicode-1-1-UTF-8
CHARSET UTF-16 UTF_16
HistoricalName UTF-16
Alias UTF_16
Alias UTF16
Alias Unicode
Alias UnicodeBig
charset utf-16be utf_16be
HistoricalName UnicodeBigun Marked
Alias UTF_16BE
Alias ISO-10646-UCS-2
Alias X-UTF-16BE
Alias UnicodeBigun Marked
CHARSET UTF-16LE UTF_16LE
HistoricalName UnicodelittleUnMarked
Alias UTF_16LE
Alias X-UTF-16LE
Alias unicodelittleunmarked
charset x-utf-16le-BOM UTF_16LE_BOM
HistoricalName Unicodelittle
Alias Unicodelittle
CHARSET UTF-32 UTF_32
Alias UTF_32
Alias UTF32
CHARSET UTF-32LE UTF_32LE
Alias UTF_32LE
Alias X-UTF-32LE
CHARSET UTF-32BE UTF_32BE
Alias UTF_32BE
Alias X-UTF-32BE
CHARSET X-UTF-32LE-BOM UTF_32LE_BOM
Alias UTF_32LE_BOM
Alias UTF-32LE-BOM
CHARSET X-UTF-32BE-BOM UTF_32BE_BOM
Alias UTF_32BE_BOM
Alias UTF-32BE-BOM
CHARSET ISO-8859-1 %s
HistoricalName ISO8859_1
# IANA Aliases
Alias ISO-IR-100
Alias ISO_8859-1
Alias Latin1
Alias L1
Alias IBM819
Alias CP819
Alias csisolatin1
# Outros aliases
Alias 819
Alias IBM-819
Alias ISO8859_1
Alias ISO_8859-1: 1987
Alias ISO_8859_1
Alias 8859_1
Alias ISO8859-1
CHARSET ISO-8859-2 %s
HistoricalName ISO8859_2
Alias ISO8859_2
Alias 8859_2
Alias ISO-IR-101
Alias ISO_8859-2
Alias ISO_8859-2: 1987
Alias ISO8859-2
Alias Latin2
Alias L2
Alias IBM912
Alias IBM-912
Alias CP912
Alias 912
Alias csisolatin2
CHARSET ISO-8859-4 %s
HistoricalName ISO8859_4
Alias ISO8859_4
Alias ISO8859-4
Alias 8859_4
Alias ISO-IR-110
Alias ISO_8859-4
Alias ISO_8859-4: 1988
Alias Latin4
Alias L4
Alias IBM914
Alias IBM-914
Alias CP914
Alias 914
Alias csisolatin4
CHARSET ISO-8859-5 %s
HistoricalName ISO8859_5
Alias ISO8859_5
Alias 8859_5
Alias ISO-IR-144
Alias ISO_8859-5
Alias ISO_8859-5: 1988
Alias ISO8859-5
Alias Cirílico
Alias IBM915
Alias IBM-915
Alias CP915
Alias 915
Alias csisolatincyrillic
CHARSET ISO-8859-7 %s
HistoricalName ISO8859_7
Alias ISO8859_7
Alias 8859_7
Alias ISO-IR-126
Alias ISO_8859-7
Alias ISO_8859-7: 1987
Alias elot_928
Alias ECMA-118
Alias grego
Alias Greek8
Alias CSisolatingReek
Alias Sun_eu_greek # Solaris 7/8 Compatibilidade
Alias IBM813
Alias IBM-813
Alias 813
Alias CP813
Alias ISO8859-7 # Solaris 9 Compatibilidade
Charset ISO-8859-9 %s
HistoricalName ISO8859_9
Alias ISO8859_9
Alias 8859_9
Alias ISO-IR-148
Alias ISO_8859-9
Alias ISO_8859-9: 1989
Alias ISO8859-9
Alias Latin5
Alias L5
Alias IBM920
Alias IBM-920
Alias 920
Alias CP920
Alias csisolatin5
CHARSET ISO-8859-13 %s
HistoricalName ISO8859_13
Alias ISO8859_13
Alias 8859_13
Alias ISO_8859-13
Alias ISO8859-13
CHARSET ISO-8859-15 %s
HistoricalName ISO8859_15
# IANA Alias
Alias ISO_8859-15
# Outros aliases
Alias 8859_15
Alias ISO8859_15
Alias ISO8859-15
Alias IBM923
Alias IBM-923
Alias CP923
Alias 923
Alias Latin0
Alias Latin9
Alias L9
Alias csisolatin0
Alias csisolatin9
Alias ISO8859_15_FDIS
charset koi8-r %s
HistoricalName KOI8_R
Alias koi8_r
Alias koi8
Alias cskoi8r
CHARSET KOI8-U %s
Alias koi8_u
Charset Windows-1250 %s
HistoricalName CP1250
Alias CP1250
Alias cp5346 # euro ibm ccsid
Charset Windows-1251 %s
HistoricalName Cp1251
Alias CP1251
Alias cp5347 # euro ibm ccsid
Alias ANSI-1251 # Solaris Compatibilidade
Charset Windows-1252 %s
HistoricalName Cp1252
Alias CP1252
Alias CP5348 # Euro IBM CCSID
Charset Windows-1253 %s
HistoricalName Cp1253
Alias CP1253
Alias CP5349 # Euro IBM CCSID
Charset Windows-1254 %s
HistoricalName Cp1254
Alias CP1254
Alias CP5350 # Euro IBM CCSID
Charset Windows-1257 %s
HistoricalName Cp1257
Alias CP1257
Alias CP5353 # Euro IBM CCSID
CHARSET IBM437 %s
HistoricalName CP437
Alias CP437
Alias IBM-437
Alias 437
Alias CSPC8CodePage437
Alias Windows-437
Charset X-IBM737 %s
HistoricalName CP737
Alias CP737
Alias IBM737
Alias IBM-737
Alias 737
charset ibm775 %s
HistoricalName Cp775
Alias CP775
Alias IBM-775
Alias 775
charset ibm850 %s
HistoricalName CP850
Alias CP850
Alias IBM-850
Alias 850
Alias CSPC850MultilinguLual
CHARSET IBM852 %s
HistoricalName CP852
Alias CP852
Alias IBM-852
Alias 852
Alias CSPCP852
CHARSET IBM855 %s
HistoricalName CP855
Alias CP855
Alias IBM-855
Alias 855
Alias CSPCP855
CHARSET IBM857 %s
HistoricalName CP857
Alias CP857
Alias IBM-857
Alias 857
Alias CSIBM857
charset ibm00858 %s
HistoricalName CP858
Alias CP858
Alias CCSID00858
Alias CP00858
Alias 858
charset ibm862 %s
HistoricalName CP862
Alias CP862
Alias IBM-862
Alias 862
Alias CSIBM862
Alias cspc862latinhebrew
charset ibm866 %s
HistoricalName CP866
Alias CP866
Alias IBM-866
Alias 866
Alias CSIBM866
Charset X-IBM874 %s
HistoricalName CP874
Alias CP874
Alias IBM874
Alias IBM-874
Alias 874