Con respecto a la relación entre Unicode y UTF, simplemente puede recordar: Unicode es una organización de codificación, una especificación de codificación, y se refiere a UTF-16 en Java; Bueno, en la red.
La copia del código es la siguiente:
Public static void main (string [] args) lanza no comportedEncodingException {
StringUtil.str2all ("0 Descripción del modelo del producto");
StringUtil.str4all ("30000900A74EC1548B57F753CF63F08F");
}
/**
* Intenta codificar cadenas numéricas hexadecimales en todos los formatos de codificación
*
* @param hexstr
* @throws UnsupportedEncodingException
*/
public static void str4all (String USTR) lanza UnspportedEnCodingException {
System.out.println ("+++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++++++++++++++++++++++++++++++ ++++++++++ ");
byte [] bs = nuevo byte [ustr.length ()/2];
para (int i = 0; i <bs.length; i ++) {
bs [i] = (byte) integer.parseint (ustr.substring (i*2, i*2+2), 16);
}
System.out.println (nueva cadena (BS, "UTF-8"));
// 16
System.out.println (nueva cadena (BS, "UTF-16"));
System.out.println (nueva cadena (BS, "UTF-16le"));
System.out.println (nueva cadena (BS, "X-UTF-16LE-BOM"));
System.out.println (nueva cadena (BS, "UTF-16BE"));
//System.out.println(new String (BS, "X-UTF-16BE-BOM"));
// 32
System.out.println (nueva cadena (BS, "UTF-32"));
System.out.println (nueva cadena (BS, "UTF-32LE"));
System.out.println (nueva cadena (BS, "X-UTF-32LE-BOM"));
System.out.println (nueva cadena (BS, "UTF-32BE"));
System.out.println (nueva cadena (BS, "X-UTF-32LE-BOM"));
}
/**
* Enumere todas las cadenas numéricas hexadecimales decodificadas correspondientes a la codificación
*
* @param ustr
* @throws UnsupportedEncodingException
*/
public static void str2all (string ustr) lanza no comportedEncodingException {
System.out.println ("+++++++++++++++++++++++++++++++++++++++++++++++++++++++ ++++++++++++++++++++++++++++++ ++++++++++ ");
byte [] bs = new byte [] {};
bs = ustr.getbytes ("utf-8");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
// 16
bs = ustr.getBytes ("UTF-16");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("utf-16le");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("X-UTF-16LE-BOM");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("utf-16be");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
// bs = ustr.getBytes ("X-UTF-16BE-BOM");
// 32
bs = ustr.getBytes ("UTF-32");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("UTF-32LE");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("X-UTF-32LE-BOM");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("UTF-32BE");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("X-UTF-32LE-BOM");
para (byte b: bs) {
System.out.print (integer.tohexString (b & 0xff));
}
System.out.println ();
}
Colección de nombres de codificación
La copia del código es la siguiente:
charset us-ascii %s
nombre histórico ascii
# Alias de IANA
alias iso-ir-6
alias ANSI_X3.4-1986
Alias ISO_646.IRV: 1991
alias ascii
alias iso646-us
alias nosotros
Alias IBM367
Alias CP367
alias csascii
alias predeterminado
# Otros alias
Alias 646 # Solaris Posix local
Alias ISO_646.IRV: 1983
alias ANSI_X3.4-1968 # Linux Posix Locale (Redhat)
alias ascii7
Charset UTF-8 UTF_8
HistoricalName UTF8
Alias UTF8
alias unicode-1-1-UTF-8
Charset UTF-16 UTF_16
HistoricalName UTF-16
alias UTF_16
Alias UTF16
alias unicode
alias unicodebig
Charset UTF-16BE UTF_16BE
Nombre histórico unicodeBigunmarked
alias UTF_16BE
Alias ISO-10646-UCS-2
Alias X-UTF-16BE
alias unicodebigunmarked
Charset UTF-16LE UTF_16LE
Nombre histórico unicodelittleunnmarked
alias utf_16le
alias X-UTF-16LE
alias unicodelittleunmarked
Charset X-UTF-16LE-BOM UTF_16LE_BOM
Historical Name Unicodeltittle
alias unicodelittle
Charset UTF-32 UTF_32
alias UTF_32
Alias UTF32
Charset UTF-32LE UTF_32LE
alias utf_32le
alias X-UTF-32LE
Charset UTF-32BE UTF_32BE
alias UTF_32BE
alias X-UTF-32BE
Charset X-UTF-32LE-BOM UTF_32LE_BOM
alias UTF_32LE_BOM
alias UTF-32LE-BOM
Charset X-UTF-32BE-BOM UTF_32BE_BOM
alias UTF_32BE_BOM
alias UTF-32BE-BOM
Charset ISO-8859-1 %S
HistoricalName ISO8859_1
# Alias de IANA
alias iso-ir-100
Alias ISO_8859-1
Alias Latin1
Alias L1
Alias IBM819
Alias CP819
alias csisolatin1
# Otros alias
Alias 819
Alias IBM-819
alias iso8859_1
Alias ISO_8859-1: 1987
alias iso_8859_1
Alias 8859_1
Alias ISO8859-1
Charset ISO-8859-2 %S
HistoricalName ISO8859_2
alias iso8859_2
Alias 8859_2
alias iso-ir-101
alias iso_8859-2
Alias ISO_8859-2: 1987
alias iso8859-2
Alias Latin2
Alias L2
Alias IBM912
Alias IBM-912
Alias CP912
Alias 912
alias csisolatin2
Charset ISO-8859-4 %S
HistoricalName ISO8859_4
alias iso8859_4
Alias ISO8859-4
Alias 8859_4
alias iso-ir-110
alias iso_8859-4
Alias ISO_8859-4: 1988
Alias Latin4
alias l4
Alias IBM914
Alias IBM-914
Alias CP914
Alias 914
alias csisolatin4
Charset ISO-8859-5 %s
HistoricalName ISO8859_5
alias iso8859_5
Alias 8859_5
alias iso-ir-144
Alias ISO_8859-5
Alias ISO_8859-5: 1988
Alias ISO8859-5
alias cirílico
Alias IBM915
Alias IBM-915
Alias CP915
Alias 915
alias csisolatincirílica
Charset ISO-8859-7 %S
HistoricalName ISO8859_7
alias iso8859_7
Alias 8859_7
alias iso-ir-126
alias iso_8859-7
Alias ISO_8859-7: 1987
alias elot_928
Alias ECMA-118
alias griego
alias griego8
alias csisolatingreek
Alias Sun_eu_Geek # Solaris 7/8 Compatibilidad
Alias IBM813
Alias IBM-813
Alias 813
Alias CP813
Alias ISO8859-7 # Solaris 9 Compatibilidad
Charset ISO-8859-9 %s
HistoricalName ISO8859_9
Alias ISO8859_9
Alias 8859_9
alias iso-ir-148
Alias ISO_8859-9
Alias ISO_8859-9: 1989
Alias ISO8859-9
Alias Latin5
Alias L5
Alias IBM920
Alias IBM-920
Alias 920
Alias CP920
alias csisolatin5
Charset ISO-8859-13 %s
HistoricalName ISO8859_13
alias iso8859_13
Alias 8859_13
Alias ISO_8859-13
Alias ISO8859-13
Charset ISO-8859-15 %s
HistoricalName ISO8859_15
# Alias IANA
Alias ISO_8859-15
# Otros alias
Alias 8859_15
alias iso8859_15
Alias ISO8859-15
Alias IBM923
Alias IBM-923
Alias CP923
Alias 923
Alias Latin0
Alias Latin9
Alias L9
alias csisolatin0
alias csisolatin9
Alias ISO8859_15_FDIS
charset koi8-r %s
histórico nombre koi8_r
alias koi8_r
alias koi8
alias cskoi8r
charset koi8-u %s
alias koi8_u
Charset Windows-1250 %s
HistoricalName CP1250
Alias CP1250
Alias CP5346 # Euro IBM CCSID
Charset Windows-1251 %s
HistoricalName CP1251
Alias CP1251
Alias CP5347 # Euro IBM CCSID
alias ANSI-1251 # compatibilidad de solaris
Charset Windows-1252 %s
HistoricalName CP1252
Alias CP1252
Alias CP5348 # Euro IBM CCSID
Charset Windows-1253 %s
HistoricalName CP1253
Alias CP1253
Alias CP5349 # Euro IBM CCSID
Charset Windows-1254 %s
HistoricalName CP1254
Alias CP1254
Alias CP5350 # Euro IBM CCSID
Charset Windows-1257 %s
HistoricalName CP1257
Alias CP1257
Alias CP5353 # Euro IBM CCSID
charset ibm437 %s
HistoricalName CP437
Alias CP437
Alias IBM-437
Alias 437
Alias CSPC8CODEPAGE437
Alias Windows-437
Charset X-IBM737 %s
HistoricalName CP737
Alias CP737
Alias IBM737
Alias IBM-737
Alias 737
Charset IBM775 %s
HistoricalName CP775
Alias CP775
Alias IBM-775
Alias 775
Charset IBM850 %S
HistoricalName CP850
Alias CP850
Alias IBM-850
Alias 850
alias cspc850multilingües
Charset IBM852 %S
HistoricalName CP852
Alias CP852
Alias IBM-852
Alias 852
Alias CSPCP852
charset ibm855 %s
HistoricalName CP855
Alias CP855
Alias IBM-855
Alias 855
Alias CSPCP855
charset ibm857 %s
HistoricalName CP857
Alias CP857
Alias IBM-857
Alias 857
Alias CSIBM857
Charset IBM00858 %s
HistoricalName CP858
Alias CP858
Alias CCSID00858
Alias CP00858
Alias 858
charset ibm862 %s
HistoricalName CP862
Alias CP862
Alias IBM-862
Alias 862
Alias CSIBM862
alias cspc862latinhebrew
charset ibm866 %s
HistoricalName CP866
Alias CP866
Alias IBM-866
Alias 866
Alias CSIBM866
Charset X-IBM874 %s
HistoricalName CP874
Alias CP874
Alias IBM874
Alias IBM-874
Alias 874