sys/fs/udf/udf_osta.c

   1 /* $NetBSD: udf_osta.c,v 1.10 2013/08/05 17:02:54 joerg Exp $ */
   2 #if HAVE_NBTOOL_CONFIG_H
   3 #include "nbtool_config.h"
   4 #endif
   5
   6 #include <sys/cdefs.h>
   7 __KERNEL_RCSID(0, "$NetBSD: udf_osta.c,v 1.10 2013/08/05 17:02:54 joerg Exp $");
   8
   9 /*
  10  * Various routines from the OSTA 2.01 specs.  Copyrights are included with
  11  * each code segment.  Slight whitespace modifications have been made for
  12  * formatting purposes.  Typos/bugs have been fixed.
  13  *
  14  */
  15
  16 #include "udf_osta.h"
  17
  18 #ifndef _KERNEL
  19 #include <ctype.h>
  20 #endif
  21
  22 /*****************************************************************************/
  23 /***********************************************************************
  24  * OSTA compliant Unicode compression, uncompression routines.
  25  * Copyright 1995 Micro Design International, Inc.
  26  * Written by Jason M. Rinn.
  27  * Micro Design International gives permission for the free use of the
  28  * following source code.
  29  */
  30
  31 /***********************************************************************
  32  * Takes an OSTA CS0 compressed unicode name, and converts
  33  * it to Unicode.
  34  * The Unicode output will be in the byte order
  35  * that the local compiler uses for 16-bit values.
  36  * NOTE: This routine only performs error checking on the compID.
  37  * It is up to the user to ensure that the unicode buffer is large
  38  * enough, and that the compressed unicode name is correct.
  39  *
  40  * RETURN VALUE
  41  *
  42  * The number of unicode characters which were uncompressed.
  43  * A -1 is returned if the compression ID is invalid.
  44  */
  45 int
  46 udf_UncompressUnicode(
  47         int numberOfBytes,      /* (Input) number of bytes read from media. */
  48         byte *UDFCompressed,    /* (Input) bytes read from media. */
  49         unicode_t *unicode)     /* (Output) uncompressed unicode characters. */
  50 {
  51         unsigned int compID;
  52         int returnValue, unicodeIndex, byteIndex;
  53
  54         /* Use UDFCompressed to store current byte being read. */
  55         compID = UDFCompressed[0];
  56
  57         /* First check for valid compID. */
  58         if (compID != 8 && compID != 16) {
  59                 returnValue = -1;
  60         } else {
  61                 unicodeIndex = 0;
  62                 byteIndex = 1;
  63
  64                 /* Loop through all the bytes. */
  65                 while (byteIndex < numberOfBytes) {
  66                         if (compID == 16) {
  67                                 /* Move the first byte to the high bits of the
  68                                  * unicode char.
  69                                  */
  70                                 unicode[unicodeIndex] =
  71                                     UDFCompressed[byteIndex++] << 8;
  72                         } else {
  73                                 unicode[unicodeIndex] = 0;
  74                         }
  75                         if (byteIndex < numberOfBytes) {
  76                                 /*Then the next byte to the low bits. */
  77                                 unicode[unicodeIndex] |=
  78                                     UDFCompressed[byteIndex++];
  79                         }
  80                         unicodeIndex++;
  81                 }
  82                 returnValue = unicodeIndex;
  83         }
  84         return(returnValue);
  85 }
  86
  87 /***********************************************************************
  88  * DESCRIPTION:
  89  * Takes a string of unicode wide characters and returns an OSTA CS0
  90  * compressed unicode string. The unicode MUST be in the byte order of
  91  * the compiler in order to obtain correct results. Returns an error
  92  * if the compression ID is invalid.
  93  *
  94  * NOTE: This routine assumes the implementation already knows, by
  95  * the local environment, how many bits are appropriate and
  96  * therefore does no checking to test if the input characters fit
  97  * into that number of bits or not.
  98  *
  99  * RETURN VALUE
 100  *
 101  * The total number of bytes in the compressed OSTA CS0 string,
 102  * including the compression ID.
 103  * A -1 is returned if the compression ID is invalid.
 104  */
 105 int
 106 udf_CompressUnicode(
 107         int numberOfChars,      /* (Input) number of unicode characters. */
 108         int compID,             /* (Input) compression ID to be used. */
 109         unicode_t *unicode,     /* (Input) unicode characters to compress. */
 110         byte *UDFCompressed)    /* (Output) compressed string, as bytes. */
 111 {
 112         int byteIndex, unicodeIndex;
 113
 114         if (compID != 8 && compID != 16) {
 115                 byteIndex = -1; /* Unsupported compression ID ! */
 116         } else {
 117                 /* Place compression code in first byte. */
 118                 UDFCompressed[0] = compID;
 119
 120                 byteIndex = 1;
 121                 unicodeIndex = 0;
 122                 while (unicodeIndex < numberOfChars) {
 123                         if (compID == 16) {
 124                                 /* First, place the high bits of the char
 125                                  * into the byte stream.
 126                                  */
 127                                 UDFCompressed[byteIndex++] =
 128                                     (unicode[unicodeIndex] & 0xFF00) >> 8;
 129                         }
 130                         /*Then place the low bits into the stream. */
 131                         UDFCompressed[byteIndex++] =
 132                             unicode[unicodeIndex] & 0x00FF;
 133                         unicodeIndex++;
 134                 }
 135         }
 136         return(byteIndex);
 137 }
 138
 139 /*****************************************************************************/
 140 /*
 141  * CRC 010041
 142  */
 143 static unsigned short crc_table[256] = {
 144         0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50A5, 0x60C6, 0x70E7,
 145         0x8108, 0x9129, 0xA14A, 0xB16B, 0xC18C, 0xD1AD, 0xE1CE, 0xF1EF,
 146         0x1231, 0x0210, 0x3273, 0x2252, 0x52B5, 0x4294, 0x72F7, 0x62D6,
 147         0x9339, 0x8318, 0xB37B, 0xA35A, 0xD3BD, 0xC39C, 0xF3FF, 0xE3DE,
 148         0x2462, 0x3443, 0x0420, 0x1401, 0x64E6, 0x74C7, 0x44A4, 0x5485,
 149         0xA56A, 0xB54B, 0x8528, 0x9509, 0xE5EE, 0xF5CF, 0xC5AC, 0xD58D,
 150         0x3653, 0x2672, 0x1611, 0x0630, 0x76D7, 0x66F6, 0x5695, 0x46B4,
 151         0xB75B, 0xA77A, 0x9719, 0x8738, 0xF7DF, 0xE7FE, 0xD79D, 0xC7BC,
 152         0x48C4, 0x58E5, 0x6886, 0x78A7, 0x0840, 0x1861, 0x2802, 0x3823,
 153         0xC9CC, 0xD9ED, 0xE98E, 0xF9AF, 0x8948, 0x9969, 0xA90A, 0xB92B,
 154         0x5AF5, 0x4AD4, 0x7AB7, 0x6A96, 0x1A71, 0x0A50, 0x3A33, 0x2A12,
 155         0xDBFD, 0xCBDC, 0xFBBF, 0xEB9E, 0x9B79, 0x8B58, 0xBB3B, 0xAB1A,
 156         0x6CA6, 0x7C87, 0x4CE4, 0x5CC5, 0x2C22, 0x3C03, 0x0C60, 0x1C41,
 157         0xEDAE, 0xFD8F, 0xCDEC, 0xDDCD, 0xAD2A, 0xBD0B, 0x8D68, 0x9D49,
 158         0x7E97, 0x6EB6, 0x5ED5, 0x4EF4, 0x3E13, 0x2E32, 0x1E51, 0x0E70,
 159         0xFF9F, 0xEFBE, 0xDFDD, 0xCFFC, 0xBF1B, 0xAF3A, 0x9F59, 0x8F78,
 160         0x9188, 0x81A9, 0xB1CA, 0xA1EB, 0xD10C, 0xC12D, 0xF14E, 0xE16F,
 161         0x1080, 0x00A1, 0x30C2, 0x20E3, 0x5004, 0x4025, 0x7046, 0x6067,
 162         0x83B9, 0x9398, 0xA3FB, 0xB3DA, 0xC33D, 0xD31C, 0xE37F, 0xF35E,
 163         0x02B1, 0x1290, 0x22F3, 0x32D2, 0x4235, 0x5214, 0x6277, 0x7256,
 164         0xB5EA, 0xA5CB, 0x95A8, 0x8589, 0xF56E, 0xE54F, 0xD52C, 0xC50D,
 165         0x34E2, 0x24C3, 0x14A0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
 166         0xA7DB, 0xB7FA, 0x8799, 0x97B8, 0xE75F, 0xF77E, 0xC71D, 0xD73C,
 167         0x26D3, 0x36F2, 0x0691, 0x16B0, 0x6657, 0x7676, 0x4615, 0x5634,
 168         0xD94C, 0xC96D, 0xF90E, 0xE92F, 0x99C8, 0x89E9, 0xB98A, 0xA9AB,
 169         0x5844, 0x4865, 0x7806, 0x6827, 0x18C0, 0x08E1, 0x3882, 0x28A3,
 170         0xCB7D, 0xDB5C, 0xEB3F, 0xFB1E, 0x8BF9, 0x9BD8, 0xABBB, 0xBB9A,
 171         0x4A75, 0x5A54, 0x6A37, 0x7A16, 0x0AF1, 0x1AD0, 0x2AB3, 0x3A92,
 172         0xFD2E, 0xED0F, 0xDD6C, 0xCD4D, 0xBDAA, 0xAD8B, 0x9DE8, 0x8DC9,
 173         0x7C26, 0x6C07, 0x5C64, 0x4C45, 0x3CA2, 0x2C83, 0x1CE0, 0x0CC1,
 174         0xEF1F, 0xFF3E, 0xCF5D, 0xDF7C, 0xAF9B, 0xBFBA, 0x8FD9, 0x9FF8,
 175         0x6E17, 0x7E36, 0x4E55, 0x5E74, 0x2E93, 0x3EB2, 0x0ED1, 0x1EF0
 176 };
 177
 178 unsigned short
 179 udf_cksum(unsigned char *s, int n)
 180 {
 181         unsigned short crc=0;
 182
 183         while (n-- > 0)
 184                 crc = crc_table[(crc>>8 ^ *s++) & 0xff] ^ (crc<<8);
 185         return crc;
 186 }
 187
 188 /* UNICODE Checksum */
 189 unsigned short
 190 udf_unicode_cksum(unsigned short *s, int n)
 191 {
 192         unsigned short crc=0;
 193
 194         while (n-- > 0) {
 195                 /* Take high order byte first--corresponds to a big endian
 196                  * byte stream.
 197                  */
 198                 crc = crc_table[(crc>>8 ^ (*s>>8)) & 0xff] ^ (crc<<8);
 199                 crc = crc_table[(crc>>8 ^ (*s++ & 0xff)) & 0xff] ^ (crc<<8);
 200         }
 201         return crc;
 202 }
 203
 204
 205 /*
 206   * Calculates a 16-bit checksum of the Implementation Use
 207   * Extended Attribute header or Application Use Extended Attribute
 208   * header. The fields AttributeType through ImplementationIdentifier
 209   * (or ApplicationIdentifier) inclusively represent the
 210   * data covered by the checksum (48 bytes).
 211   *
 212   */
 213 uint16_t udf_ea_cksum(uint8_t *data) {
 214         uint16_t checksum = 0;
 215         int      count;
 216
 217         for (count = 0; count < 48; count++) {
 218                checksum += *data++;
 219         }
 220
 221         return checksum;
 222 }
 223
 224
 225 #ifdef MAIN
 226 unsigned char bytes[] = { 0x70, 0x6A, 0x77 };
 227
 228 main(void)
 229 {
 230         unsigned short x;
 231         x = cksum(bytes, sizeof bytes);
 232         printf("checksum: calculated=%4.4x, correct=%4.4x\en", x, 0x3299);
 233         exit(0);
 234 }
 235 #endif
 236
 237 /*****************************************************************************/
 238 /* #ifdef NEEDS_ISPRINT */
 239 /***********************************************************************
 240  * OSTA UDF compliant file name translation routine for OS/2,
 241  * Windows 95, Windows NT, Macintosh and UNIX.
 242  * Copyright 1995 Micro Design International, Inc.
 243  * Written by Jason M. Rinn.
 244  * Micro Design International gives permission for the free use of the
 245  * following source code.
 246  */
 247
 248 /***********************************************************************
 249  * To use these routines with different operating systems.
 250  *
 251  * OS/2
 252  * Define OS2
 253  * Define MAXLEN = 254
 254  *
 255  * Windows 95
 256  * Define WIN_95
 257  * Define MAXLEN = 255
 258  *
 259  * Windows NT
 260  * Define WIN_NT
 261  * Define MAXLEN = 255
 262  *
 263  * Macintosh:
 264  * Define MAC.
 265  * Define MAXLEN = 31.
 266  *
 267  * UNIX
 268  * Define UNIX.
 269  * Define MAXLEN as specified by unix version.
 270  */
 271
 272 #define ILLEGAL_CHAR_MARK       0x005F
 273 #define CRC_MARK        0x0023
 274 #define EXT_SIZE        5
 275 #define PERIOD  0x002E
 276 #define SPACE   0x0020
 277
 278 /*** PROTOTYPES ***/
 279 int IsIllegal(unicode_t ch);
 280
 281 /* Define a function or macro which determines if a Unicode character is
 282  * printable under your implementation.
 283  */
 284
 285
 286 /* #include <stdio.h> */
 287 static int UnicodeIsPrint(unicode_t ch) {
 288         return (ch >=' ') && (ch != 127);
 289 }
 290
 291
 292 int UnicodeLength(unicode_t *string) {
 293         int length;
 294         length = 0;
 295         while (*string++) length++;
 296
 297         return length;
 298 }
 299
 300
 301 #ifdef _KERNEL
 302 static int isprint(int c) {
 303         return (c >= ' ') && (c != 127);
 304 }
 305 #endif
 306
 307
 308 /***********************************************************************
 309  * Translates a long file name to one using a MAXLEN and an illegal
 310  * char set in accord with the OSTA requirements. Assumes the name has
 311  * already been translated to Unicode.
 312  *
 313  * RETURN VALUE
 314  *
 315  * Number of unicode characters in translated name.
 316  */
 317 int UDFTransName(
 318         unicode_t *newName,     /* (Output)Translated name. Must be of length
 319                                  * MAXLEN */
 320         unicode_t *udfName,     /* (Input) Name from UDF volume.*/
 321         int udfLen)             /* (Input) Length of UDF Name. */
 322 {
 323         int Index, newIndex = 0, needsCRC = false;      /* index is shadowed */
 324         int extIndex = 0, newExtIndex = 0, hasExt = false;
 325 #if defined OS2 || defined WIN_95 || defined WIN_NT
 326         int trailIndex = 0;
 327 #endif
 328         unsigned short valueCRC;
 329         unicode_t current;
 330         const char hexChar[] = "0123456789ABCDEF";
 331
 332         for (Index = 0; Index < udfLen; Index++) {
 333                 current = udfName[Index];
 334
 335                 if (IsIllegal(current) || !UnicodeIsPrint(current)) {
 336                         needsCRC = true;
 337                         /* Replace Illegal and non-displayable chars with
 338                          * underscore.
 339                          */
 340                         current = ILLEGAL_CHAR_MARK;
 341                         /* Skip any other illegal or non-displayable
 342                          * characters.
 343                          */
 344                         while(Index+1 < udfLen && (IsIllegal(udfName[Index+1])
 345                             || !UnicodeIsPrint(udfName[Index+1]))) {
 346                                 Index++;
 347                         }
 348                 }
 349
 350                 /* Record position of extension, if one is found. */
 351                 if (current == PERIOD && (udfLen - Index -1) <= EXT_SIZE) {
 352                         if (udfLen == Index + 1) {
 353                                 /* A trailing period is NOT an extension. */
 354                                 hasExt = false;
 355                         } else {
 356                                 hasExt = true;
 357                                 extIndex = Index;
 358                                 newExtIndex = newIndex;
 359                         }
 360                 }
 361
 362 #if defined OS2 || defined WIN_95 || defined WIN_NT
 363                 /* Record position of last char which is NOT period or space. */
 364                 else if (current != PERIOD && current != SPACE) {
 365                         trailIndex = newIndex;
 366                 }
 367 #endif
 368
 369                 if (newIndex < MAXLEN) {
 370                         newName[newIndex++] = current;
 371                 } else {
 372                         needsCRC = true;
 373                 }
 374         }
 375
 376 #if defined OS2 || defined WIN_95 || defined WIN_NT
 377         /* For OS2, 95 & NT, truncate any trailing periods and\or spaces. */
 378         if (trailIndex != newIndex - 1) {
 379                 newIndex = trailIndex + 1;
 380                 needsCRC = true;
 381                 hasExt = false; /* Trailing period does not make an
 382                                  * extension. */
 383         }
 384 #endif
 385
 386         if (needsCRC) {
 387                 unicode_t ext[EXT_SIZE];
 388                 int localExtIndex = 0;
 389                 if (hasExt) {
 390                         int maxFilenameLen;
 391                         /* Translate extension, and store it in ext. */
 392                         for(Index = 0; Index<EXT_SIZE &&
 393                             extIndex + Index +1 < udfLen; Index++ ) {
 394                                 current = udfName[extIndex + Index + 1];
 395                                 if (IsIllegal(current) ||
 396                                     !UnicodeIsPrint(current)) {
 397                                         needsCRC = 1;
 398                                         /* Replace Illegal and non-displayable
 399                                          * chars with underscore.
 400                                          */
 401                                         current = ILLEGAL_CHAR_MARK;
 402                                         /* Skip any other illegal or
 403                                          * non-displayable characters.
 404                                          */
 405                                         while(Index + 1 < EXT_SIZE
 406                                             && (IsIllegal(udfName[extIndex +
 407                                             Index + 2]) ||
 408                                             !isprint(udfName[extIndex +
 409                                             Index + 2]))) {
 410                                                 Index++;
 411                                         }
 412                                 }
 413                                 ext[localExtIndex++] = current;
 414                         }
 415
 416                         /* Truncate filename to leave room for extension and
 417                          * CRC.
 418                          */
 419                         maxFilenameLen = ((MAXLEN - 5) - localExtIndex - 1);
 420                         if (newIndex > maxFilenameLen) {
 421                                 newIndex = maxFilenameLen;
 422                         } else {
 423                                 newIndex = newExtIndex;
 424                         }
 425                 } else if (newIndex > MAXLEN - 5) {
 426                         /*If no extension, make sure to leave room for CRC. */
 427                         newIndex = MAXLEN - 5;
 428                 }
 429                 newName[newIndex++] = CRC_MARK; /* Add mark for CRC. */
 430
 431                 /*Calculate CRC from original filename from FileIdentifier. */
 432                 valueCRC = udf_unicode_cksum(udfName, udfLen);
 433                 /* Convert 16-bits of CRC to hex characters. */
 434                 newName[newIndex++] = hexChar[(valueCRC & 0xf000) >> 12];
 435                 newName[newIndex++] = hexChar[(valueCRC & 0x0f00) >> 8];
 436                 newName[newIndex++] = hexChar[(valueCRC & 0x00f0) >> 4];
 437                 newName[newIndex++] = hexChar[(valueCRC & 0x000f)];
 438
 439                 /* Place a translated extension at end, if found. */
 440                 if (hasExt) {
 441                         newName[newIndex++] = PERIOD;
 442                         for (Index = 0;Index < localExtIndex ;Index++ ) {
 443                                 newName[newIndex++] = ext[Index];
 444                         }
 445                 }
 446         }
 447         return(newIndex);
 448 }
 449
 450 #if defined OS2 || defined WIN_95 || defined WIN_NT
 451 /***********************************************************************
 452  * Decides if a Unicode character matches one of a list
 453  * of ASCII characters.
 454  * Used by OS2 version of IsIllegal for readability, since all of the
 455  * illegal characters above 0x0020 are in the ASCII subset of Unicode.
 456  * Works very similarly to the standard C function strchr().
 457  *
 458  * RETURN VALUE
 459  *
 460  * Non-zero if the Unicode character is in the given ASCII string.
 461  */
 462 int UnicodeInString(
 463         unsigned char *string,  /* (Input) String to search through. */
 464         unicode_t ch)           /* (Input) Unicode char to search for. */
 465 {
 466         int found = false;
 467         while (*string != '\0' && found == false) {
 468                 /* These types should compare, since both are unsigned
 469                  * numbers. */
 470                 if (*string == ch) {
 471                         found = true;
 472                 }
 473                 string++;
 474         }
 475         return(found);
 476 }
 477 #endif /* OS2 */
 478
 479 /***********************************************************************
 480  * Decides whether the given character is illegal for a given OS.
 481  *
 482  * RETURN VALUE
 483  *
 484  * Non-zero if char is illegal.
 485  */
 486 int IsIllegal(unicode_t ch)
 487 {
 488 #ifdef MAC
 489         /* Only illegal character on the MAC is the colon. */
 490         if (ch == 0x003A) {
 491                 return(1);
 492         } else {
 493                 return(0);
 494         }
 495
 496 #elif defined UNIX
 497         /* Illegal UNIX characters are NULL and slash. */
 498         if (ch == 0x0000 || ch == 0x002F) {
 499                 return(1);
 500         } else {
 501                 return(0);
 502         }
 503
 504 #elif defined OS2 || defined WIN_95 || defined WIN_NT
 505         /* Illegal char's for OS/2 according to WARP toolkit. */
 506         if (ch < 0x0020 || UnicodeInString("\\/:*?\"<>|", ch)) {
 507                 return(1);
 508         } else {
 509                 return(0);
 510         }
 511 #endif
 512 }
 513 /* #endif*/     /* NEEDS_ISPRINT */
 514