external/bsd/file/dist/src/encoding.c

   1 /*      $NetBSD: encoding.c,v 1.4 2015/01/02 21:15:32 christos Exp $    */
   2
   3 /*
   4  * Copyright (c) Ian F. Darwin 1986-1995.
   5  * Software written by Ian F. Darwin and others;
   6  * maintained 1995-present by Christos Zoulas and others.
   7  *
   8  * Redistribution and use in source and binary forms, with or without
   9  * modification, are permitted provided that the following conditions
  10  * are met:
  11  * 1. Redistributions of source code must retain the above copyright
  12  *    notice immediately at the beginning of the file, without modification,
  13  *    this list of conditions, and the following disclaimer.
  14  * 2. Redistributions in binary form must reproduce the above copyright
  15  *    notice, this list of conditions and the following disclaimer in the
  16  *    documentation and/or other materials provided with the distribution.
  17  *
  18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
  19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
  22  * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  28  * SUCH DAMAGE.
  29  */
  30 /*
  31  * Encoding -- determine the character encoding of a text file.
  32  *
  33  * Joerg Wunsch <joerg@freebsd.org> wrote the original support for 8-bit
  34  * international characters.
  35  */
  36
  37 #include "file.h"
  38
  39 #ifndef lint
  40 #if 0
  41 FILE_RCSID("@(#)$File: encoding.c,v 1.10 2014/09/11 12:08:52 christos Exp $")
  42 #else
  43 __RCSID("$NetBSD: encoding.c,v 1.4 2015/01/02 21:15:32 christos Exp $");
  44 #endif
  45 #endif  /* lint */
  46
  47 #include "magic.h"
  48 #include <string.h>
  49 #include <memory.h>
  50 #include <stdlib.h>
  51
  52
  53 private int looks_ascii(const unsigned char *, size_t, unichar *, size_t *);
  54 private int looks_utf8_with_BOM(const unsigned char *, size_t, unichar *,
  55     size_t *);
  56 private int looks_ucs16(const unsigned char *, size_t, unichar *, size_t *);
  57 private int looks_latin1(const unsigned char *, size_t, unichar *, size_t *);
  58 private int looks_extended(const unsigned char *, size_t, unichar *, size_t *);
  59 private void from_ebcdic(const unsigned char *, size_t, unsigned char *);
  60
  61 #ifdef DEBUG_ENCODING
  62 #define DPRINTF(a) printf a
  63 #else
  64 #define DPRINTF(a)
  65 #endif
  66
  67 /*
  68  * Try to determine whether text is in some character code we can
  69  * identify.  Each of these tests, if it succeeds, will leave
  70  * the text converted into one-unichar-per-character Unicode in
  71  * ubuf, and the number of characters converted in ulen.
  72  */
  73 protected int
  74 file_encoding(struct magic_set *ms, const unsigned char *buf, size_t nbytes, unichar **ubuf, size_t *ulen, const char **code, const char **code_mime, const char **type)
  75 {
  76         size_t mlen;
  77         int rv = 1, ucs_type;
  78         unsigned char *nbuf = NULL;
  79
  80         *type = "text";
  81         *ulen = 0;
  82         *code = "unknown";
  83         *code_mime = "binary";
  84
  85         mlen = (nbytes + 1) * sizeof((*ubuf)[0]);
  86         if ((*ubuf = CAST(unichar *, calloc((size_t)1, mlen))) == NULL) {
  87                 file_oomem(ms, mlen);
  88                 goto done;
  89         }
  90         mlen = (nbytes + 1) * sizeof(nbuf[0]);
  91         if ((nbuf = CAST(unsigned char *, calloc((size_t)1, mlen))) == NULL) {
  92                 file_oomem(ms, mlen);
  93                 goto done;
  94         }
  95
  96         if (looks_ascii(buf, nbytes, *ubuf, ulen)) {
  97                 DPRINTF(("ascii %" SIZE_T_FORMAT "u\n", *ulen));
  98                 *code = "ASCII";
  99                 *code_mime = "us-ascii";
 100         } else if (looks_utf8_with_BOM(buf, nbytes, *ubuf, ulen) > 0) {
 101                 DPRINTF(("utf8/bom %" SIZE_T_FORMAT "u\n", *ulen));
 102                 *code = "UTF-8 Unicode (with BOM)";
 103                 *code_mime = "utf-8";
 104         } else if (file_looks_utf8(buf, nbytes, *ubuf, ulen) > 1) {
 105                 DPRINTF(("utf8 %" SIZE_T_FORMAT "u\n", *ulen));
 106                 *code = "UTF-8 Unicode";
 107                 *code_mime = "utf-8";
 108         } else if ((ucs_type = looks_ucs16(buf, nbytes, *ubuf, ulen)) != 0) {
 109                 if (ucs_type == 1) {
 110                         *code = "Little-endian UTF-16 Unicode";
 111                         *code_mime = "utf-16le";
 112                 } else {
 113                         *code = "Big-endian UTF-16 Unicode";
 114                         *code_mime = "utf-16be";
 115                 }
 116                 DPRINTF(("ucs16 %" SIZE_T_FORMAT "u\n", *ulen));
 117         } else if (looks_latin1(buf, nbytes, *ubuf, ulen)) {
 118                 DPRINTF(("latin1 %" SIZE_T_FORMAT "u\n", *ulen));
 119                 *code = "ISO-8859";
 120                 *code_mime = "iso-8859-1";
 121         } else if (looks_extended(buf, nbytes, *ubuf, ulen)) {
 122                 DPRINTF(("extended %" SIZE_T_FORMAT "u\n", *ulen));
 123                 *code = "Non-ISO extended-ASCII";
 124                 *code_mime = "unknown-8bit";
 125         } else {
 126                 from_ebcdic(buf, nbytes, nbuf);
 127
 128                 if (looks_ascii(nbuf, nbytes, *ubuf, ulen)) {
 129                         DPRINTF(("ebcdic %" SIZE_T_FORMAT "u\n", *ulen));
 130                         *code = "EBCDIC";
 131                         *code_mime = "ebcdic";
 132                 } else if (looks_latin1(nbuf, nbytes, *ubuf, ulen)) {
 133                         DPRINTF(("ebcdic/international %" SIZE_T_FORMAT "u\n",
 134                             *ulen));
 135                         *code = "International EBCDIC";
 136                         *code_mime = "ebcdic";
 137                 } else { /* Doesn't look like text at all */
 138                         DPRINTF(("binary\n"));
 139                         rv = 0;
 140                         *type = "binary";
 141                 }
 142         }
 143
 144  done:
 145         free(nbuf);
 146
 147         return rv;
 148 }
 149
 150 /*
 151  * This table reflects a particular philosophy about what constitutes
 152  * "text," and there is room for disagreement about it.
 153  *
 154  * Version 3.31 of the file command considered a file to be ASCII if
 155  * each of its characters was approved by either the isascii() or
 156  * isalpha() function.  On most systems, this would mean that any
 157  * file consisting only of characters in the range 0x00 ... 0x7F
 158  * would be called ASCII text, but many systems might reasonably
 159  * consider some characters outside this range to be alphabetic,
 160  * so the file command would call such characters ASCII.  It might
 161  * have been more accurate to call this "considered textual on the
 162  * local system" than "ASCII."
 163  *
 164  * It considered a file to be "International language text" if each
 165  * of its characters was either an ASCII printing character (according
 166  * to the real ASCII standard, not the above test), a character in
 167  * the range 0x80 ... 0xFF, or one of the following control characters:
 168  * backspace, tab, line feed, vertical tab, form feed, carriage return,
 169  * escape.  No attempt was made to determine the language in which files
 170  * of this type were written.
 171  *
 172  *
 173  * The table below considers a file to be ASCII if all of its characters
 174  * are either ASCII printing characters (again, according to the X3.4
 175  * standard, not isascii()) or any of the following controls: bell,
 176  * backspace, tab, line feed, form feed, carriage return, esc, nextline.
 177  *
 178  * I include bell because some programs (particularly shell scripts)
 179  * use it literally, even though it is rare in normal text.  I exclude
 180  * vertical tab because it never seems to be used in real text.  I also
 181  * include, with hesitation, the X3.64/ECMA-43 control nextline (0x85),
 182  * because that's what the dd EBCDIC->ASCII table maps the EBCDIC newline
 183  * character to.  It might be more appropriate to include it in the 8859
 184  * set instead of the ASCII set, but it's got to be included in *something*
 185  * we recognize or EBCDIC files aren't going to be considered textual.
 186  * Some old Unix source files use SO/SI (^N/^O) to shift between Greek
 187  * and Latin characters, so these should possibly be allowed.  But they
 188  * make a real mess on VT100-style displays if they're not paired properly,
 189  * so we are probably better off not calling them text.
 190  *
 191  * A file is considered to be ISO-8859 text if its characters are all
 192  * either ASCII, according to the above definition, or printing characters
 193  * from the ISO-8859 8-bit extension, characters 0xA0 ... 0xFF.
 194  *
 195  * Finally, a file is considered to be international text from some other
 196  * character code if its characters are all either ISO-8859 (according to
 197  * the above definition) or characters in the range 0x80 ... 0x9F, which
 198  * ISO-8859 considers to be control characters but the IBM PC and Macintosh
 199  * consider to be printing characters.
 200  */
 201
 202 #define F 0   /* character never appears in text */
 203 #define T 1   /* character appears in plain ASCII text */
 204 #define I 2   /* character appears in ISO-8859 text */
 205 #define X 3   /* character appears in non-ISO extended ASCII (Mac, IBM PC) */
 206
 207 private char text_chars[256] = {
 208         /*                  BEL BS HT LF    FF CR    */
 209         F, F, F, F, F, F, F, T, T, T, T, F, T, T, F, F,  /* 0x0X */
 210         /*                              ESC          */
 211         F, F, F, F, F, F, F, F, F, F, F, T, F, F, F, F,  /* 0x1X */
 212         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x2X */
 213         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x3X */
 214         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x4X */
 215         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x5X */
 216         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, T,  /* 0x6X */
 217         T, T, T, T, T, T, T, T, T, T, T, T, T, T, T, F,  /* 0x7X */
 218         /*            NEL                            */
 219         X, X, X, X, X, T, X, X, X, X, X, X, X, X, X, X,  /* 0x8X */
 220         X, X, X, X, X, X, X, X, X, X, X, X, X, X, X, X,  /* 0x9X */
 221         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xaX */
 222         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xbX */
 223         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xcX */
 224         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xdX */
 225         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I,  /* 0xeX */
 226         I, I, I, I, I, I, I, I, I, I, I, I, I, I, I, I   /* 0xfX */
 227 };
 228
 229 private int
 230 looks_ascii(const unsigned char *buf, size_t nbytes, unichar *ubuf,
 231     size_t *ulen)
 232 {
 233         size_t i;
 234
 235         *ulen = 0;
 236
 237         for (i = 0; i < nbytes; i++) {
 238                 int t = text_chars[buf[i]];
 239
 240                 if (t != T)
 241                         return 0;
 242
 243                 ubuf[(*ulen)++] = buf[i];
 244         }
 245
 246         return 1;
 247 }
 248
 249 private int
 250 looks_latin1(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
 251 {
 252         size_t i;
 253
 254         *ulen = 0;
 255
 256         for (i = 0; i < nbytes; i++) {
 257                 int t = text_chars[buf[i]];
 258
 259                 if (t != T && t != I)
 260                         return 0;
 261
 262                 ubuf[(*ulen)++] = buf[i];
 263         }
 264
 265         return 1;
 266 }
 267
 268 private int
 269 looks_extended(const unsigned char *buf, size_t nbytes, unichar *ubuf,
 270     size_t *ulen)
 271 {
 272         size_t i;
 273
 274         *ulen = 0;
 275
 276         for (i = 0; i < nbytes; i++) {
 277                 int t = text_chars[buf[i]];
 278
 279                 if (t != T && t != I && t != X)
 280                         return 0;
 281
 282                 ubuf[(*ulen)++] = buf[i];
 283         }
 284
 285         return 1;
 286 }
 287
 288 /*
 289  * Decide whether some text looks like UTF-8. Returns:
 290  *
 291  *     -1: invalid UTF-8
 292  *      0: uses odd control characters, so doesn't look like text
 293  *      1: 7-bit text
 294  *      2: definitely UTF-8 text (valid high-bit set bytes)
 295  *
 296  * If ubuf is non-NULL on entry, text is decoded into ubuf, *ulen;
 297  * ubuf must be big enough!
 298  */
 299 protected int
 300 file_looks_utf8(const unsigned char *buf, size_t nbytes, unichar *ubuf, size_t *ulen)
 301 {
 302         size_t i;
 303         int n;
 304         unichar c;
 305         int gotone = 0, ctrl = 0;
 306
 307         if (ubuf)
 308                 *ulen = 0;
 309
 310         for (i = 0; i < nbytes; i++) {
 311                 if ((buf[i] & 0x80) == 0) {        /* 0xxxxxxx is plain ASCII */
 312                         /*
 313                          * Even if the whole file is valid UTF-8 sequences,
 314                          * still reject it if it uses weird control characters.
 315                          */
 316
 317                         if (text_chars[buf[i]] != T)
 318                                 ctrl = 1;
 319
 320                         if (ubuf)
 321                                 ubuf[(*ulen)++] = buf[i];
 322                 } else if ((buf[i] & 0x40) == 0) { /* 10xxxxxx never 1st byte */
 323                         return -1;
 324                 } else {                           /* 11xxxxxx begins UTF-8 */
 325                         int following;
 326
 327                         if ((buf[i] & 0x20) == 0) {             /* 110xxxxx */
 328                                 c = buf[i] & 0x1f;
 329                                 following = 1;
 330                         } else if ((buf[i] & 0x10) == 0) {      /* 1110xxxx */
 331                                 c = buf[i] & 0x0f;
 332                                 following = 2;
 333                         } else if ((buf[i] & 0x08) == 0) {      /* 11110xxx */
 334                                 c = buf[i] & 0x07;
 335                                 following = 3;
 336                         } else if ((buf[i] & 0x04) == 0) {      /* 111110xx */
 337                                 c = buf[i] & 0x03;
 338                                 following = 4;
 339                         } else if ((buf[i] & 0x02) == 0) {      /* 1111110x */
 340                                 c = buf[i] & 0x01;
 341                                 following = 5;
 342                         } else
 343                                 return -1;
 344
 345                         for (n = 0; n < following; n++) {
 346                                 i++;
 347                                 if (i >= nbytes)
 348                                         goto done;
 349
 350                                 if ((buf[i] & 0x80) == 0 || (buf[i] & 0x40))
 351                                         return -1;
 352
 353                                 c = (c << 6) + (buf[i] & 0x3f);
 354                         }
 355
 356                         if (ubuf)
 357                                 ubuf[(*ulen)++] = c;
 358                         gotone = 1;
 359                 }
 360         }
 361 done:
 362         return ctrl ? 0 : (gotone ? 2 : 1);
 363 }
 364
 365 /*
 366  * Decide whether some text looks like UTF-8 with BOM. If there is no
 367  * BOM, return -1; otherwise return the result of looks_utf8 on the
 368  * rest of the text.
 369  */
 370 private int
 371 looks_utf8_with_BOM(const unsigned char *buf, size_t nbytes, unichar *ubuf,
 372     size_t *ulen)
 373 {
 374         if (nbytes > 3 && buf[0] == 0xef && buf[1] == 0xbb && buf[2] == 0xbf)
 375                 return file_looks_utf8(buf + 3, nbytes - 3, ubuf, ulen);
 376         else
 377                 return -1;
 378 }
 379
 380 private int
 381 looks_ucs16(const unsigned char *buf, size_t nbytes, unichar *ubuf,
 382     size_t *ulen)
 383 {
 384         int bigend;
 385         size_t i;
 386
 387         if (nbytes < 2)
 388                 return 0;
 389
 390         if (buf[0] == 0xff && buf[1] == 0xfe)
 391                 bigend = 0;
 392         else if (buf[0] == 0xfe && buf[1] == 0xff)
 393                 bigend = 1;
 394         else
 395                 return 0;
 396
 397         *ulen = 0;
 398
 399         for (i = 2; i + 1 < nbytes; i += 2) {
 400                 /* XXX fix to properly handle chars > 65536 */
 401
 402                 if (bigend)
 403                         ubuf[(*ulen)++] = buf[i + 1] + 256 * buf[i];
 404                 else
 405                         ubuf[(*ulen)++] = buf[i] + 256 * buf[i + 1];
 406
 407                 if (ubuf[*ulen - 1] == 0xfffe)
 408                         return 0;
 409                 if (ubuf[*ulen - 1] < 128 &&
 410                     text_chars[(size_t)ubuf[*ulen - 1]] != T)
 411                         return 0;
 412         }
 413
 414         return 1 + bigend;
 415 }
 416
 417 #undef F
 418 #undef T
 419 #undef I
 420 #undef X
 421
 422 /*
 423  * This table maps each EBCDIC character to an (8-bit extended) ASCII
 424  * character, as specified in the rationale for the dd(1) command in
 425  * draft 11.2 (September, 1991) of the POSIX P1003.2 standard.
 426  *
 427  * Unfortunately it does not seem to correspond exactly to any of the
 428  * five variants of EBCDIC documented in IBM's _Enterprise Systems
 429  * Architecture/390: Principles of Operation_, SA22-7201-06, Seventh
 430  * Edition, July, 1999, pp. I-1 - I-4.
 431  *
 432  * Fortunately, though, all versions of EBCDIC, including this one, agree
 433  * on most of the printing characters that also appear in (7-bit) ASCII.
 434  * Of these, only '|', '!', '~', '^', '[', and ']' are in question at all.
 435  *
 436  * Fortunately too, there is general agreement that codes 0x00 through
 437  * 0x3F represent control characters, 0x41 a nonbreaking space, and the
 438  * remainder printing characters.
 439  *
 440  * This is sufficient to allow us to identify EBCDIC text and to distinguish
 441  * between old-style and internationalized examples of text.
 442  */
 443
 444 private unsigned char ebcdic_to_ascii[] = {
 445   0,   1,   2,   3, 156,   9, 134, 127, 151, 141, 142,  11,  12,  13,  14,  15,
 446  16,  17,  18,  19, 157, 133,   8, 135,  24,  25, 146, 143,  28,  29,  30,  31,
 447 128, 129, 130, 131, 132,  10,  23,  27, 136, 137, 138, 139, 140,   5,   6,   7,
 448 144, 145,  22, 147, 148, 149, 150,   4, 152, 153, 154, 155,  20,  21, 158,  26,
 449 ' ', 160, 161, 162, 163, 164, 165, 166, 167, 168, 213, '.', '<', '(', '+', '|',
 450 '&', 169, 170, 171, 172, 173, 174, 175, 176, 177, '!', '$', '*', ')', ';', '~',
 451 '-', '/', 178, 179, 180, 181, 182, 183, 184, 185, 203, ',', '%', '_', '>', '?',
 452 186, 187, 188, 189, 190, 191, 192, 193, 194, '`', ':', '#', '@', '\'','=', '"',
 453 195, 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 196, 197, 198, 199, 200, 201,
 454 202, 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', '^', 204, 205, 206, 207, 208,
 455 209, 229, 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 210, 211, 212, '[', 214, 215,
 456 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, ']', 230, 231,
 457 '{', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 232, 233, 234, 235, 236, 237,
 458 '}', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 238, 239, 240, 241, 242, 243,
 459 '\\',159, 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', 244, 245, 246, 247, 248, 249,
 460 '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 250, 251, 252, 253, 254, 255
 461 };
 462
 463 #ifdef notdef
 464 /*
 465  * The following EBCDIC-to-ASCII table may relate more closely to reality,
 466  * or at least to modern reality.  It comes from
 467  *
 468  *   http://ftp.s390.ibm.com/products/oe/bpxqp9.html
 469  *
 470  * and maps the characters of EBCDIC code page 1047 (the code used for
 471  * Unix-derived software on IBM's 390 systems) to the corresponding
 472  * characters from ISO 8859-1.
 473  *
 474  * If this table is used instead of the above one, some of the special
 475  * cases for the NEL character can be taken out of the code.
 476  */
 477
 478 private unsigned char ebcdic_1047_to_8859[] = {
 479 0x00,0x01,0x02,0x03,0x9C,0x09,0x86,0x7F,0x97,0x8D,0x8E,0x0B,0x0C,0x0D,0x0E,0x0F,
 480 0x10,0x11,0x12,0x13,0x9D,0x0A,0x08,0x87,0x18,0x19,0x92,0x8F,0x1C,0x1D,0x1E,0x1F,
 481 0x80,0x81,0x82,0x83,0x84,0x85,0x17,0x1B,0x88,0x89,0x8A,0x8B,0x8C,0x05,0x06,0x07,
 482 0x90,0x91,0x16,0x93,0x94,0x95,0x96,0x04,0x98,0x99,0x9A,0x9B,0x14,0x15,0x9E,0x1A,
 483 0x20,0xA0,0xE2,0xE4,0xE0,0xE1,0xE3,0xE5,0xE7,0xF1,0xA2,0x2E,0x3C,0x28,0x2B,0x7C,
 484 0x26,0xE9,0xEA,0xEB,0xE8,0xED,0xEE,0xEF,0xEC,0xDF,0x21,0x24,0x2A,0x29,0x3B,0x5E,
 485 0x2D,0x2F,0xC2,0xC4,0xC0,0xC1,0xC3,0xC5,0xC7,0xD1,0xA6,0x2C,0x25,0x5F,0x3E,0x3F,
 486 0xF8,0xC9,0xCA,0xCB,0xC8,0xCD,0xCE,0xCF,0xCC,0x60,0x3A,0x23,0x40,0x27,0x3D,0x22,
 487 0xD8,0x61,0x62,0x63,0x64,0x65,0x66,0x67,0x68,0x69,0xAB,0xBB,0xF0,0xFD,0xFE,0xB1,
 488 0xB0,0x6A,0x6B,0x6C,0x6D,0x6E,0x6F,0x70,0x71,0x72,0xAA,0xBA,0xE6,0xB8,0xC6,0xA4,
 489 0xB5,0x7E,0x73,0x74,0x75,0x76,0x77,0x78,0x79,0x7A,0xA1,0xBF,0xD0,0x5B,0xDE,0xAE,
 490 0xAC,0xA3,0xA5,0xB7,0xA9,0xA7,0xB6,0xBC,0xBD,0xBE,0xDD,0xA8,0xAF,0x5D,0xB4,0xD7,
 491 0x7B,0x41,0x42,0x43,0x44,0x45,0x46,0x47,0x48,0x49,0xAD,0xF4,0xF6,0xF2,0xF3,0xF5,
 492 0x7D,0x4A,0x4B,0x4C,0x4D,0x4E,0x4F,0x50,0x51,0x52,0xB9,0xFB,0xFC,0xF9,0xFA,0xFF,
 493 0x5C,0xF7,0x53,0x54,0x55,0x56,0x57,0x58,0x59,0x5A,0xB2,0xD4,0xD6,0xD2,0xD3,0xD5,
 494 0x30,0x31,0x32,0x33,0x34,0x35,0x36,0x37,0x38,0x39,0xB3,0xDB,0xDC,0xD9,0xDA,0x9F
 495 };
 496 #endif
 497
 498 /*
 499  * Copy buf[0 ... nbytes-1] into out[], translating EBCDIC to ASCII.
 500  */
 501 private void
 502 from_ebcdic(const unsigned char *buf, size_t nbytes, unsigned char *out)
 503 {
 504         size_t i;
 505
 506         for (i = 0; i < nbytes; i++) {
 507                 out[i] = ebcdic_to_ascii[buf[i]];
 508         }
 509 }