1 /*======================================================================*
2 * Copyright (c) 2008, Yahoo! Inc. All rights reserved. *
4 * Licensed under the New BSD License (the "License"); you may not use *
5 * this file except in compliance with the License. Unless required *
6 * by applicable law or agreed to in writing, software distributed *
7 * under the License is distributed on an "AS IS" BASIS, WITHOUT *
8 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
9 * See the License for the specific language governing permissions and *
10 * limitations under the License. See accompanying LICENSE file. *
11 *======================================================================*/
13 package org
.lwes
.util
;
15 import java
.io
.UnsupportedEncodingException
;
16 import java
.util
.HashMap
;
19 * This is a little class to abstract the character encoding strings that Java
20 * uses into classes which can be checked at compile time.
22 * @author Kevin Scaldeferri
24 public abstract class CharacterEncoding
{
27 private static final HashMap
<String
, CharacterEncoding
> ENCODING_HASH
= new HashMap
<String
, CharacterEncoding
>();
28 private static final String CANONICAL_ASCII_NAME
= "ASCII";
29 private static final String CANONICAL_ISO_8859_1_NAME
= "ISO8859_1";
30 private static final String CANONICAL_UTF_8_NAME
= "UTF8";
31 private static final String CANONICAL_SHIFT_JIS_NAME
= "SJIS";
32 private static final String CANONICAL_EUC_JP_NAME
= "EUC_JP";
33 private static final String CANONICAL_EUC_KR_NAME
= "EUC_KR";
34 private static final String
[] ASCII_ALIASES
= { CANONICAL_ASCII_NAME
,
35 "US-ASCII", "ISO646-US" };
36 private static final String
[] ISO_8859_1_ALIASES
= {
37 CANONICAL_ISO_8859_1_NAME
, "ISO-8859-1", "ISO-LATIN-1", "8859_1" };
38 private static final String
[] UTF_8_ALIASES
= { CANONICAL_UTF_8_NAME
,
40 private static final String
[] SHIFT_JIS_ALIASES
= {
41 CANONICAL_SHIFT_JIS_NAME
, "SHIFTJIS", "SHIFT-JIS", "SHIFT_JIS" };
42 private static final String
[] EUC_JP_ALIASES
= { CANONICAL_EUC_JP_NAME
,
44 private static final String
[] EUC_KR_ALIASES
= { CANONICAL_EUC_KR_NAME
,
48 * Returns Java's canonical form of the encoding.
50 public abstract String
getEncodingString();
53 * Returns the official IANA name for the encoding. Everything expect Java
56 public abstract String
getIANAString();
58 public static final CharacterEncoding ISO_8859_1
= new CharacterEncoding() {
59 public String
getEncodingString() {
63 public String
getIANAString() {
68 public static final CharacterEncoding UTF_8
= new CharacterEncoding() {
69 public String
getEncodingString() {
73 public String
getIANAString() {
78 public static final CharacterEncoding ASCII
= new CharacterEncoding() {
79 public String
getEncodingString() {
83 public String
getIANAString() {
88 public static final CharacterEncoding SHIFT_JIS
= new CharacterEncoding() {
89 public String
getEncodingString() {
93 public String
getIANAString() {
98 public static final CharacterEncoding EUC_JP
= new CharacterEncoding() {
99 public String
getEncodingString() {
103 public String
getIANAString() {
108 public static final CharacterEncoding EUC_KR
= new CharacterEncoding() {
109 public String
getEncodingString() {
113 public String
getIANAString() {
119 * This is a highly limited implementation at the moment, so don't expect
121 * @param enc the String representation of the encoding.
122 * @return CharacterEncoding
123 * @throws java.io.UnsupportedEncodingException if the encoding doesnt exist.
125 public static CharacterEncoding
getInstance(String enc
)
126 throws UnsupportedEncodingException
{
127 if (ENCODING_HASH
.containsKey(enc
.toUpperCase())) {
128 return ENCODING_HASH
.get(enc
.toUpperCase());
130 throw new UnsupportedEncodingException(enc
);
135 public int hashCode() {
136 return getEncodingString().hashCode();
139 public boolean equals(Object o
) {
140 return (o
instanceof CharacterEncoding
)
141 && getEncodingString().equals(((CharacterEncoding
) o
).getEncodingString());
146 for (i
= 0; i
< ASCII_ALIASES
.length
; i
++) {
147 ENCODING_HASH
.put(ASCII_ALIASES
[i
], ASCII
);
149 for (i
= 0; i
< ISO_8859_1_ALIASES
.length
; i
++) {
150 ENCODING_HASH
.put(ISO_8859_1_ALIASES
[i
], ISO_8859_1
);
152 for (i
= 0; i
< UTF_8_ALIASES
.length
; i
++) {
153 ENCODING_HASH
.put(UTF_8_ALIASES
[i
], UTF_8
);
155 for (i
= 0; i
< SHIFT_JIS_ALIASES
.length
; i
++) {
156 ENCODING_HASH
.put(SHIFT_JIS_ALIASES
[i
], SHIFT_JIS
);
158 for (i
= 0; i
< EUC_JP_ALIASES
.length
; i
++) {
159 ENCODING_HASH
.put(EUC_JP_ALIASES
[i
], EUC_JP
);
161 for (i
= 0; i
< EUC_KR_ALIASES
.length
; i
++) {
162 ENCODING_HASH
.put(EUC_KR_ALIASES
[i
], EUC_KR
);