En ce qui concerne la relation entre Unicode et UTF, vous pouvez simplement vous souvenir: Unicode est une organisation de codage, une spécification de codage et se réfère à UTF-16 dans Java; Eh bien, dans le réseau.
La copie de code est la suivante:
public static void main (String [] args) lève un peu supporté
StringUtil.str2all ("0 Description du modèle de produit");
StringUtil.str4all ("30000900a74ec1548b57f753cf63f08f");
}
/ **
* Essayez de coder les cordes numériques hexadécimales dans tous les formats de codage
*
* @param hexstr
* @Throws UnportEdenCodingException
* /
public static void Str4all (String USTR) lance unportdencodingException {
System.out.println ("++++++++++++++++++++++++++++++++++++++++++++++++ ++++++++++++++++++++++++++ +++++++++ ");
octet [] bs = nouveau octet [ustr.length () / 2];
for (int i = 0; i <bs.length; i ++) {
bs [i] = (byte) Integer.ParseInt (ustr.substring (i * 2, i * 2 + 2), 16);
}
System.out.println (new String (BS, "UTF-8"));
// 16
System.out.println (nouvelle chaîne (BS, "UTF-16"));
System.out.println (new String (BS, "UTF-16Le"));
System.out.println (new String (BS, "X-UTF-16Le-Bom"));
System.out.println (new String (BS, "UTF-16BE"));
//System.out.println(New String (BS, "X-UTF-16BE-BOM"));
// 32
System.out.println (new String (BS, "UTF-32"));
System.out.println (new String (BS, "UTF-32LE"));
System.out.println (New String (BS, "X-UTF-32LE-BOM"));
System.out.println (new String (BS, "UTF-32BE"));
System.out.println (New String (BS, "X-UTF-32LE-BOM"));
}
/ **
* Énumérez toutes les chaînes numériques hexadécimales décodées correspondant à l'encodage
*
* @param ustr
* @Throws UnportEdenCodingException
* /
public static void Str2all (String USTR) lance unportdencodingException {
System.out.println ("++++++++++++++++++++++++++++++++++++++++++++++++ ++++++++++++++++++++++++++ +++++++++ ");
octet [] bs = new byte [] {};
bs = ustr.getBytes ("utf-8");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
// 16
bs = ustr.getBytes ("utf-16");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("utf-16Le");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("x-utf-16le-bom");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("UTF-16BE");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
// bs = ustr.getbytes ("x-utf-16be-bom");
// 32
bs = ustr.getBytes ("UTF-32");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("utf-32le");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("x-utf-32le-bom");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("UTF-32BE");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
bs = ustr.getBytes ("x-utf-32le-bom");
pour (octet b: bs) {
System.out.print (Integer.tohexString (b & 0xff));
}
System.out.println ();
}
Collection de nom de codage
La copie de code est la suivante:
Charset US-ascii% s
HistoricalName ASCII
# Iana alias
alias iso-ir-6
alias ansi_x3.4-1986
alias iso_646.irv: 1991
alias ascii
alias iso646-us
Alias-nous
alias ibm367
alias CP367
alias csascii
Alias par défaut
# Autres alias
Alias 646 # Solaris Posix Locale
alias iso_646.irv: 1983
Alias ANSI_X3.4-1968 # Linux Posix Locale (Redhat)
alias ascii7
Charset UTF-8 UTF_8
HistoricalName UTF8
alias utf8
alias unicode-1-1-utf-8
Charset UTF-16 UTF_16
HistoricalName UTF-16
alias utf_16
alias utf16
Alias Unicode
alias unicodebig
Charset UTF-16BE UTF_16BE
HistoricalName Unicodebigunmarked
alias utf_16be
alias iso-10646-ucs-2
alias x-utf-16be
alias unicodebigunmarked
charset utf-16le utf_16le
Historical Name UnicodelittleUnmarked
alias utf_16le
alias x-utf-16le
alias unicodelittleUnmarked
charset x-utf-16le-bom utf_16le_bom
Historical Name Unicodelittle
alias uncodélittle
Charset UTF-32 UTF_32
alias utf_32
alias utf32
charset utf-32le utf_32le
alias utf_32le
alias x-utf-32le
Charset UTF-32BE UTF_32BE
alias utf_32be
alias x-utf-32be
charset x-utf-32le-bom utf_32le_bom
alias utf_32le_bom
alias utf-32le-bom
Charset X-UTF-32BE-BOM UTF_32BE_BOM
alias utf_32be_bom
alias utf-32be-bom
Charset ISO-8859-1% S
HistoricalName ISO8859_1
# Iana alias
Alias ISO-IR-100
alias iso_8859-1
alias latin1
alias l1
alias ibm819
Alias CP819
alias csisolatin1
# Autres alias
Alias 819
Alias IBM-819
alias iso8859_1
alias iso_8859-1: 1987
alias iso_8859_1
Alias 8859_1
alias iso8859-1
Charset ISO-8859-2% S
HistoricalName ISO8859_2
alias iso8859_2
Alias 8859_2
alias iso-ir-101
alias iso_8859-2
alias iso_8859-2: 1987
alias iso8859-2
alias latin2
alias l2
alias ibm912
Alias IBM-912
alias cp912
alias 912
alias csisolatin2
Charset ISO-8859-4% S
HistoricalName ISO8859_4
alias iso8859_4
alias iso8859-4
alias 8859_4
Alias ISO-IR-110
alias iso_8859-4
alias iso_8859-4: 1988
alias latin4
Alias L4
alias ibm914
Alias IBM-914
alias cp914
Alias 914
alias csisolatin4
Charset ISO-8859-5% S
HistoricalName ISO8859_5
alias iso8859_5
Alias 8859_5
alias iso-ir-144
alias iso_8859-5
Alias ISO_8859-5: 1988
alias iso8859-5
alias cyrillique
alias ibm915
Alias IBM-915
alias cp915
Alias 915
alias csisolatincyrillique
Charset ISO-8859-7% S
HistoricalName ISO8859_7
alias iso8859_7
Alias 8859_7
alias iso-ir-126
alias iso_8859-7
alias iso_8859-7: 1987
alias elot_928
alias ecma-118
alias grec
alias grec8
alias csisolatingreek
alias sun_eu_greek # Solaris 7/8 Compatibilité
alias ibm813
Alias IBM-813
Alias 813
alias cp813
Alias ISO8859-7 # Solaris 9 Compatibilité
Charset ISO-8859-9% S
HistoricalName ISO8859_9
alias iso8859_9
Alias 8859_9
alias iso-ir-148
alias iso_8859-9
Alias ISO_8859-9: 1989
alias iso8859-9
alias latin5
alias l5
alias ibm920
Alias IBM-920
Alias 920
alias cp920
alias csisolatin5
Charset ISO-8859-13% S
HistoricalName ISO8859_13
alias iso8859_13
Alias 8859_13
alias iso_8859-13
alias iso8859-13
Charset ISO-8859-15% S
HistoricalName ISO8859_15
# Iana alias
alias iso_8859-15
# Autres alias
Alias 8859_15
alias iso8859_15
alias iso8859-15
alias ibm923
Alias IBM-923
alias cp923
Alias 923
alias latin0
alias latin9
Alias L9
alias csisolatin0
alias csisolatin9
alias iso8859_15_fdis
Charset KOI8-R% S
HistoricalName koi8_r
alias koi8_r
alias koi8
alias cskoi8r
Charset koi8-u% s
alias koi8_u
Windows-1250% du charse
HistoricalName CP1250
Alias CP1250
Alias CP5346 # Euro IBM CCSID
Charset Windows-1251% S
HistoricalName CP1251
alias cp1251
Alias CP5347 # Euro IBM CCSID
Alias ANSI-1251 # Compatibilité Solaris
Charset Windows-1252% S
Name historique CP1252
alias cp1252
Alias CP5348 # Euro IBM CCSID
Charset Windows-1253% S
HistoricalName CP1253
alias cp1253
Alias CP5349 # Euro IBM CCSID
Charset Windows-1254% S
HistoricalName CP1254
Alias CP1254
Alias CP5350 # Euro IBM CCSID
Charset Windows-1257% S
HistoricalName CP1257
Alias CP1257
Alias CP5353 # Euro IBM CCSID
Charset IBM437% S
HistoricalName CP437
Alias CP437
Alias IBM-437
Alias 437
alias cspc8codepage437
Alias Windows-437
Charset X-IBM737% S
HistoricalName CP737
alias cp737
alias ibm737
Alias IBM-737
Alias 737
Charset IBM775% S
HistoricalName CP775
Alias CP775
Alias IBM-775
Alias 775
Charset IBM850% S
HistoricalName CP850
Alias CP850
Alias IBM-850
Alias 850
alias cspc850multiling
Charset IBM852% S
Name historique CP852
alias cp852
Alias IBM-852
Alias 852
alias cspcp852
Charset IBM855% S
Name historique CP855
Alias CP855
Alias IBM-855
Alias 855
alias cspcp855
Charset IBM857% S
HistoricalName CP857
Alias CP857
Alias IBM-857
Alias 857
alias csibm857
Charset IBM00858% S
HistoricalName CP858
Alias CP858
alias ccsid00858
Alias CP00858
Alias 858
Charset IBM862% S
Name historique CP862
alias cp862
Alias IBM-862
Alias 862
alias csibm862
Alias CSPC862Latinhebrew
Charset IBM866% S
Name historique CP866
alias cp866
Alias IBM-866
Alias 866
alias csibm866
Charset X-IBM874% S
HistoricalName CP874
Alias CP874
Alias IBM874
Alias IBM-874
Alias 874