changing license to BSD, assigning Yahoo copyrights where appropriate
[lwes-java.git] / src / main / java / org / lwes / util / CharacterEncoding.java
blob399104972c0485d758f5039bc6c18c65a05ef83d
1 /*======================================================================*
2 * Copyright (c) 2008, Yahoo! Inc. All rights reserved. *
3 * *
4 * Licensed under the New BSD License (the "License"); you may not use *
5 * this file except in compliance with the License. Unless required *
6 * by applicable law or agreed to in writing, software distributed *
7 * under the License is distributed on an "AS IS" BASIS, WITHOUT *
8 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
9 * See the License for the specific language governing permissions and *
10 * limitations under the License. See accompanying LICENSE file. *
11 *======================================================================*/
13 package org.lwes.util;
15 import java.io.UnsupportedEncodingException;
16 import java.util.HashMap;
18 /**
19 * This is a little class to abstract the character encoding strings that Java
20 * uses into classes which can be checked at compile time.
22 * @author Kevin Scaldeferri
24 public abstract class CharacterEncoding {
26 // static loading
27 private static final HashMap<String, CharacterEncoding> ENCODING_HASH = new HashMap<String, CharacterEncoding>();
28 private static final String CANONICAL_ASCII_NAME = "ASCII";
29 private static final String CANONICAL_ISO_8859_1_NAME = "ISO8859_1";
30 private static final String CANONICAL_UTF_8_NAME = "UTF8";
31 private static final String CANONICAL_SHIFT_JIS_NAME = "SJIS";
32 private static final String CANONICAL_EUC_JP_NAME = "EUC_JP";
33 private static final String CANONICAL_EUC_KR_NAME = "EUC_KR";
34 private static final String[] ASCII_ALIASES = { CANONICAL_ASCII_NAME,
35 "US-ASCII", "ISO646-US" };
36 private static final String[] ISO_8859_1_ALIASES = {
37 CANONICAL_ISO_8859_1_NAME, "ISO-8859-1", "ISO-LATIN-1", "8859_1" };
38 private static final String[] UTF_8_ALIASES = { CANONICAL_UTF_8_NAME,
39 "UTF-8" };
40 private static final String[] SHIFT_JIS_ALIASES = {
41 CANONICAL_SHIFT_JIS_NAME, "SHIFTJIS", "SHIFT-JIS", "SHIFT_JIS" };
42 private static final String[] EUC_JP_ALIASES = { CANONICAL_EUC_JP_NAME,
43 "EUC-JP" };
44 private static final String[] EUC_KR_ALIASES = { CANONICAL_EUC_KR_NAME,
45 "EUC-KR" };
48 * Returns Java's canonical form of the encoding.
50 public abstract String getEncodingString();
53 * Returns the official IANA name for the encoding. Everything expect Java
54 * expects this form.
56 public abstract String getIANAString();
58 public static final CharacterEncoding ISO_8859_1 = new CharacterEncoding() {
59 public String getEncodingString() {
60 return "ISO-8859-1";
63 public String getIANAString() {
64 return "ISO-8859-1";
68 public static final CharacterEncoding UTF_8 = new CharacterEncoding() {
69 public String getEncodingString() {
70 return "UTF-8";
73 public String getIANAString() {
74 return "UTF-8";
78 public static final CharacterEncoding ASCII = new CharacterEncoding() {
79 public String getEncodingString() {
80 return "ASCII";
83 public String getIANAString() {
84 return "US-ASCII";
88 public static final CharacterEncoding SHIFT_JIS = new CharacterEncoding() {
89 public String getEncodingString() {
90 return "SJIS";
93 public String getIANAString() {
94 return "Shift_JIS";
98 public static final CharacterEncoding EUC_JP = new CharacterEncoding() {
99 public String getEncodingString() {
100 return "EUC_JP";
103 public String getIANAString() {
104 return "EUC-JP";
108 public static final CharacterEncoding EUC_KR = new CharacterEncoding() {
109 public String getEncodingString() {
110 return "EUC_KR";
113 public String getIANAString() {
114 return "EUC-KR";
119 * This is a highly limited implementation at the moment, so don't expect
120 * too much from it.
121 * @param enc the String representation of the encoding.
122 * @return CharacterEncoding
123 * @throws java.io.UnsupportedEncodingException if the encoding doesnt exist.
125 public static CharacterEncoding getInstance(String enc)
126 throws UnsupportedEncodingException {
127 if (ENCODING_HASH.containsKey(enc.toUpperCase())) {
128 return ENCODING_HASH.get(enc.toUpperCase());
129 } else {
130 throw new UnsupportedEncodingException(enc);
134 @Override
135 public int hashCode() {
136 return getEncodingString().hashCode();
139 public boolean equals(Object o) {
140 return (o instanceof CharacterEncoding)
141 && getEncodingString().equals(((CharacterEncoding) o).getEncodingString());
144 static {
145 int i;
146 for (i = 0; i < ASCII_ALIASES.length; i++) {
147 ENCODING_HASH.put(ASCII_ALIASES[i], ASCII);
149 for (i = 0; i < ISO_8859_1_ALIASES.length; i++) {
150 ENCODING_HASH.put(ISO_8859_1_ALIASES[i], ISO_8859_1);
152 for (i = 0; i < UTF_8_ALIASES.length; i++) {
153 ENCODING_HASH.put(UTF_8_ALIASES[i], UTF_8);
155 for (i = 0; i < SHIFT_JIS_ALIASES.length; i++) {
156 ENCODING_HASH.put(SHIFT_JIS_ALIASES[i], SHIFT_JIS);
158 for (i = 0; i < EUC_JP_ALIASES.length; i++) {
159 ENCODING_HASH.put(EUC_JP_ALIASES[i], EUC_JP);
161 for (i = 0; i < EUC_KR_ALIASES.length; i++) {
162 ENCODING_HASH.put(EUC_KR_ALIASES[i], EUC_KR);