2 * Routines for handling character sets
6 * Wireshark - Network traffic analyzer
7 * By Gerald Combs <gerald@wireshark.org>
8 * Copyright 1998 Gerald Combs
10 * This program is free software; you can redistribute it and/or
11 * modify it under the terms of the GNU General Public License
12 * as published by the Free Software Foundation; either version 2
13 * of the License, or (at your option) any later version.
15 * This program is distributed in the hope that it will be useful,
16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 * GNU General Public License for more details.
20 * You should have received a copy of the GNU General Public License
21 * along with this program; if not, write to the Free Software
22 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
32 * Wikipedia's "Character encoding" template, giving a pile of character encodings and
33 * Wikipedia pages for them:
35 * http://en.wikipedia.org/wiki/Template:Character_encoding
37 * Unicode character encoding model:
39 * http://www.unicode.org/reports/tr17/
41 * International Components for Unicode character set mapping tables:
43 * http://site.icu-project.org/charts/charset
45 * MSDN information on code pages:
47 * http://msdn.microsoft.com/en-us/library/dd317752(v=VS.85).aspx
49 * ASCII-based code pages, from IBM:
51 * http://www-01.ibm.com/software/globalization/cp/cp_cpgid.html
53 * EBCDIC code pages, from IBM:
55 * http://www-03.ibm.com/systems/i/software/globalization/codepages.html
58 /* ASCII/EBCDIC conversion tables from
59 * http://www.room42.com/store/computer_center/code_tables.shtml
62 static guint8 ASCII_translate_EBCDIC
[ 256 ] = {
63 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
64 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
65 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
66 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
67 0x40, 0x5A, 0x7F, 0x7B, 0x5B, 0x6C, 0x50, 0x7D, 0x4D,
68 0x5D, 0x5C, 0x4E, 0x6B, 0x60, 0x4B, 0x61,
69 0xF0, 0xF1, 0xF2, 0xF3, 0xF4, 0xF5, 0xF6, 0xF7, 0xF8,
70 0xF9, 0x7A, 0x5E, 0x4C, 0x7E, 0x6E, 0x6F,
71 0x7C, 0xC1, 0xC2, 0xC3, 0xC4, 0xC5, 0xC6, 0xC7, 0xC8,
72 0xC9, 0xD1, 0xD2, 0xD3, 0xD4, 0xD5, 0xD6,
73 0xD7, 0xD8, 0xD9, 0xE2, 0xE3, 0xE4, 0xE5, 0xE6, 0xE7,
74 0xE8, 0xE9, 0xAD, 0xE0, 0xBD, 0x5F, 0x6D,
75 0x7D, 0x81, 0x82, 0x83, 0x84, 0x85, 0x86, 0x87, 0x88,
76 0x89, 0x91, 0x92, 0x93, 0x94, 0x95, 0x96,
77 0x97, 0x98, 0x99, 0xA2, 0xA3, 0xA4, 0xA5, 0xA6, 0xA7,
78 0xA8, 0xA9, 0xC0, 0x6A, 0xD0, 0xA1, 0x4B,
79 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
80 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
81 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
82 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
83 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
84 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
85 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
86 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
87 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
88 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
89 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
90 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
91 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
92 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
93 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B,
94 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B, 0x4B
98 ASCII_to_EBCDIC(guint8
*buf
, guint bytes
)
105 for (i
= 0; i
< bytes
; i
++, bufptr
++) {
106 *bufptr
= ASCII_translate_EBCDIC
[*bufptr
];
111 ASCII_to_EBCDIC1(guint8 c
)
113 return ASCII_translate_EBCDIC
[c
];
117 static guint8 EBCDIC_translate_ASCII
[ 256 ] = {
118 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08,
119 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
120 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
121 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
122 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27, 0x28,
123 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
124 0x2E, 0x2E, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
125 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x2E, 0x3F,
126 0x20, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
127 0x2E, 0x2E, 0x2E, 0x3C, 0x28, 0x2B, 0x7C,
128 0x26, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
129 0x2E, 0x21, 0x24, 0x2A, 0x29, 0x3B, 0x5E,
130 0x2D, 0x2F, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
131 0x2E, 0x7C, 0x2C, 0x25, 0x5F, 0x3E, 0x3F,
132 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
133 0x2E, 0x3A, 0x23, 0x40, 0x27, 0x3D, 0x22,
134 0x2E, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67, 0x68,
135 0x69, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
136 0x2E, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F, 0x70, 0x71,
137 0x72, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
138 0x2E, 0x7E, 0x73, 0x74, 0x75, 0x76, 0x77, 0x78, 0x79,
139 0x7A, 0x2E, 0x2E, 0x2E, 0x5B, 0x2E, 0x2E,
140 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
141 0x2E, 0x2E, 0x2E, 0x2E, 0x5D, 0x2E, 0x2E,
142 0x7B, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47, 0x48,
143 0x49, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
144 0x7D, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F, 0x50, 0x51,
145 0x52, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
146 0x5C, 0x2E, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59,
147 0x5A, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E,
148 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38,
149 0x39, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E, 0x2E
153 EBCDIC_to_ASCII(guint8
*buf
, guint bytes
)
160 for (i
= 0; i
< bytes
; i
++, bufptr
++) {
161 *bufptr
= EBCDIC_translate_ASCII
[*bufptr
];
166 EBCDIC_to_ASCII1(guint8 c
)
168 return EBCDIC_translate_ASCII
[c
];