sal/textenc/tencinfo.cxx

   1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
   2 /*
   3  * This file is part of the LibreOffice project.
   4  *
   5  * This Source Code Form is subject to the terms of the Mozilla Public
   6  * License, v. 2.0. If a copy of the MPL was not distributed with this
   7  * file, You can obtain one at http://mozilla.org/MPL/2.0/.
   8  *
   9  * This file incorporates work covered by the following license notice:
  10  *
  11  *   Licensed to the Apache Software Foundation (ASF) under one or more
  12  *   contributor license agreements. See the NOTICE file distributed
  13  *   with this work for additional information regarding copyright
  14  *   ownership. The ASF licenses this file to you under the Apache
  15  *   License, Version 2.0 (the "License"); you may not use this file
  16  *   except in compliance with the License. You may obtain a copy of
  17  *   the License at http://www.apache.org/licenses/LICENSE-2.0 .
  18  */
  19
  20 #include <sal/config.h>
  21
  22 #include <cstddef>
  23 #include <cstring>
  24
  25 #include <rtl/tencinfo.h>
  26
  27 #include "gettextencodingdata.hxx"
  28 #include "tenchelp.hxx"
  29 #include <memory>
  30
  31 sal_Bool SAL_CALL rtl_isOctetTextEncoding(rtl_TextEncoding nEncoding)
  32 {
  33     return
  34         nEncoding > RTL_TEXTENCODING_DONTKNOW
  35         && nEncoding != 9 // RTL_TEXTENCODING_SYSTEM
  36         && nEncoding <= RTL_TEXTENCODING_MAZOVIA; // always update this!
  37 }
  38
  39 /* ======================================================================= */
  40
  41 static void Impl_toAsciiLower( const char* pName, char* pBuf )
  42 {
  43     while ( *pName )
  44     {
  45         /* A-Z */
  46         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
  47             *pBuf = (*pName)+0x20;  /* toAsciiLower */
  48         else
  49             *pBuf = *pName;
  50
  51         pBuf++;
  52         pName++;
  53     }
  54
  55     *pBuf = '\0';
  56 }
  57
  58 /* ----------------------------------------------------------------------- */
  59
  60 static void Impl_toAsciiLowerAndRemoveNonAlphanumeric( const char* pName, char* pBuf )
  61 {
  62     while ( *pName )
  63     {
  64         /* A-Z */
  65         if ( (*pName >= 0x41) && (*pName <= 0x5A) )
  66         {
  67             *pBuf = (*pName)+0x20;  /* toAsciiLower */
  68             pBuf++;
  69         }
  70         /* a-z, 0-9 */
  71         else if ( ((*pName >= 0x61) && (*pName <= 0x7A)) ||
  72                   ((*pName >= 0x30) && (*pName <= 0x39)) )
  73         {
  74             *pBuf = *pName;
  75             pBuf++;
  76         }
  77
  78         pName++;
  79     }
  80
  81     *pBuf = '\0';
  82 }
  83
  84 /* ----------------------------------------------------------------------- */
  85
  86 /* pMatchStr must match with all characters in pCompStr */
  87 static bool Impl_matchString( const char* pCompStr, const char* pMatchStr )
  88 {
  89     /* We test only for end in MatchStr, because the last 0 character from */
  90     /* pCompStr is unequal a character in MatchStr, so the loop terminates */
  91     while ( *pMatchStr )
  92     {
  93         if ( *pCompStr != *pMatchStr )
  94             return false;
  95
  96         pCompStr++;
  97         pMatchStr++;
  98     }
  99
 100     return true;
 101 }
 102
 103 /* ======================================================================= */
 104
 105 namespace {
 106
 107 struct ImplStrCharsetDef
 108 {
 109     const char*             mpCharsetStr;
 110     rtl_TextEncoding        meTextEncoding;
 111 };
 112
 113 struct ImplStrFirstPartCharsetDef
 114 {
 115     const char*             mpCharsetStr;
 116     const ImplStrCharsetDef*    mpSecondPartTab;
 117 };
 118
 119 }
 120
 121 /* ======================================================================= */
 122
 123 sal_Bool SAL_CALL rtl_getTextEncodingInfo( rtl_TextEncoding eTextEncoding, rtl_TextEncodingInfo* pEncInfo )
 124 {
 125     const ImplTextEncodingData* pData;
 126
 127     pData = Impl_getTextEncodingData( eTextEncoding );
 128     if ( !pData )
 129     {
 130         /* HACK: For not implemented encoding, because not all
 131            calls handle the errors */
 132         if ( pEncInfo->StructSize < 5 )
 133             return false;
 134         pEncInfo->MinimumCharSize = 1;
 135
 136         if ( pEncInfo->StructSize < 6 )
 137             return true;
 138         pEncInfo->MaximumCharSize = 1;
 139
 140         if ( pEncInfo->StructSize < 7 )
 141             return true;
 142         pEncInfo->AverageCharSize = 1;
 143
 144         if ( pEncInfo->StructSize < 12 )
 145             return true;
 146         pEncInfo->Flags = 0;
 147
 148         return false;
 149     }
 150
 151     if ( pEncInfo->StructSize < 5 )
 152         return false;
 153     pEncInfo->MinimumCharSize = pData->mnMinCharSize;
 154
 155     if ( pEncInfo->StructSize < 6 )
 156         return true;
 157     pEncInfo->MaximumCharSize = pData->mnMaxCharSize;
 158
 159     if ( pEncInfo->StructSize < 7 )
 160         return true;
 161     pEncInfo->AverageCharSize = pData->mnAveCharSize;
 162
 163     if ( pEncInfo->StructSize < 12 )
 164         return true;
 165     pEncInfo->Flags = pData->mnInfoFlags;
 166
 167     return true;
 168 }
 169
 170 /* ======================================================================= */
 171
 172 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromWindowsCharset( sal_uInt8 nWinCharset )
 173 {
 174     rtl_TextEncoding eTextEncoding;
 175
 176     switch ( nWinCharset )
 177     {
 178         case 0:     eTextEncoding = RTL_TEXTENCODING_MS_1252; break;    /* ANSI_CHARSET */
 179         case 2:     eTextEncoding = RTL_TEXTENCODING_SYMBOL; break;     /* SYMBOL_CHARSET */
 180         case 77:    eTextEncoding = RTL_TEXTENCODING_APPLE_ROMAN; break;/* MAC_CHARSET */
 181         case 128:   eTextEncoding = RTL_TEXTENCODING_MS_932; break;     /* SHIFTJIS_CHARSET */
 182         case 129:   eTextEncoding = RTL_TEXTENCODING_MS_949; break;     /* HANGEUL_CHARSET */
 183         case 130:   eTextEncoding = RTL_TEXTENCODING_MS_1361; break;    /* JOHAB_CHARSET */
 184         case 134:   eTextEncoding = RTL_TEXTENCODING_MS_936; break;     /* GB2312_CHARSET */
 185         case 136:   eTextEncoding = RTL_TEXTENCODING_MS_950; break;     /* CHINESEBIG5_CHARSET */
 186         case 161:   eTextEncoding = RTL_TEXTENCODING_MS_1253; break;    /* GREEK_CHARSET */
 187         case 162:   eTextEncoding = RTL_TEXTENCODING_MS_1254; break;    /* TURKISH_CHARSET */
 188         case 163:   eTextEncoding = RTL_TEXTENCODING_MS_1258; break;    /* VIETNAMESE_CHARSET !!! */
 189         case 177:   eTextEncoding = RTL_TEXTENCODING_MS_1255; break;    /* HEBREW_CHARSET */
 190         case 178:   eTextEncoding = RTL_TEXTENCODING_MS_1256; break;    /* ARABIC_CHARSET */
 191         case 186:   eTextEncoding = RTL_TEXTENCODING_MS_1257; break;    /* BALTIC_CHARSET */
 192         case 204:   eTextEncoding = RTL_TEXTENCODING_MS_1251; break;    /* RUSSIAN_CHARSET */
 193         case 222:   eTextEncoding = RTL_TEXTENCODING_MS_874; break;     /* THAI_CHARSET */
 194         case 238:   eTextEncoding = RTL_TEXTENCODING_MS_1250; break;    /* EASTEUROPE_CHARSET */
 195         case 255:   eTextEncoding = RTL_TEXTENCODING_IBM_850; break;    /* OEM_CHARSET */
 196         default:    eTextEncoding = RTL_TEXTENCODING_DONTKNOW; break;
 197     }
 198
 199     return eTextEncoding;
 200 }
 201
 202 /* ----------------------------------------------------------------------- */
 203
 204 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromUnixCharset( const char* pUnixCharset )
 205 {
 206     /* See <ftp://ftp.x.org/pub/DOCS/registry>, section 14 ("Font Charset
 207      * (Registry and Encoding) Names").
 208      */
 209
 210     /* All Identifiers in the tables are lower case The function search */
 211     /* for the first matching string in the tables. */
 212     /* Sort order: unique (first 14, then 1), important */
 213
 214     static ImplStrCharsetDef const aUnixCharsetISOTab[] =
 215     {
 216         { "15", RTL_TEXTENCODING_ISO_8859_15 },
 217         { "14", RTL_TEXTENCODING_ISO_8859_14 },
 218         { "13", RTL_TEXTENCODING_ISO_8859_13 },
 219         { "11", RTL_TEXTENCODING_TIS_620 },
 220         { "10", RTL_TEXTENCODING_ISO_8859_10 },
 221         { "1", RTL_TEXTENCODING_ISO_8859_1 },
 222         { "2", RTL_TEXTENCODING_ISO_8859_2 },
 223         { "3", RTL_TEXTENCODING_ISO_8859_3 },
 224         { "4", RTL_TEXTENCODING_ISO_8859_4 },
 225         { "5", RTL_TEXTENCODING_ISO_8859_5 },
 226         { "6", RTL_TEXTENCODING_ISO_8859_6 },
 227         { "7", RTL_TEXTENCODING_ISO_8859_7 },
 228         { "8", RTL_TEXTENCODING_ISO_8859_8 },
 229         { "9", RTL_TEXTENCODING_ISO_8859_9 },
 230         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 231     };
 232
 233     static ImplStrCharsetDef const aUnixCharsetADOBETab[] =
 234     {
 235         { "fontspecific", RTL_TEXTENCODING_SYMBOL },
 236         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 237     };
 238
 239     static ImplStrCharsetDef const aUnixCharsetMSTab[] =
 240     {
 241         { "1252", RTL_TEXTENCODING_MS_1252 },
 242         { "1250", RTL_TEXTENCODING_MS_1250 },
 243         { "1251", RTL_TEXTENCODING_MS_1251 },
 244         { "1253", RTL_TEXTENCODING_MS_1253 },
 245         { "1254", RTL_TEXTENCODING_MS_1254 },
 246         { "1255", RTL_TEXTENCODING_MS_1255 },
 247         { "1256", RTL_TEXTENCODING_MS_1256 },
 248         { "1257", RTL_TEXTENCODING_MS_1257 },
 249         { "1258", RTL_TEXTENCODING_MS_1258 },
 250         { "932", RTL_TEXTENCODING_MS_932 },
 251         { "936", RTL_TEXTENCODING_MS_936 },
 252         { "949", RTL_TEXTENCODING_MS_949 },
 253         { "950", RTL_TEXTENCODING_MS_950 },
 254         { "1361", RTL_TEXTENCODING_MS_1361 },
 255         { "cp1252", RTL_TEXTENCODING_MS_1252 },
 256         { "cp1250", RTL_TEXTENCODING_MS_1250 },
 257         { "cp1251", RTL_TEXTENCODING_MS_1251 },
 258         { "cp1253", RTL_TEXTENCODING_MS_1253 },
 259         { "cp1254", RTL_TEXTENCODING_MS_1254 },
 260         { "cp1255", RTL_TEXTENCODING_MS_1255 },
 261         { "cp1256", RTL_TEXTENCODING_MS_1256 },
 262         { "cp1257", RTL_TEXTENCODING_MS_1257 },
 263         { "cp1258", RTL_TEXTENCODING_MS_1258 },
 264         { "cp932", RTL_TEXTENCODING_MS_932 },
 265         { "cp936", RTL_TEXTENCODING_MS_936 },
 266         { "cp949", RTL_TEXTENCODING_MS_949 },
 267         { "cp950", RTL_TEXTENCODING_MS_950 },
 268         { "cp1361", RTL_TEXTENCODING_MS_1361 },
 269         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 270     };
 271
 272     static ImplStrCharsetDef const aUnixCharsetIBMTab[] =
 273     {
 274         { "437", RTL_TEXTENCODING_IBM_437 },
 275         { "850", RTL_TEXTENCODING_IBM_850 },
 276         { "860", RTL_TEXTENCODING_IBM_860 },
 277         { "861", RTL_TEXTENCODING_IBM_861 },
 278         { "863", RTL_TEXTENCODING_IBM_863 },
 279         { "865", RTL_TEXTENCODING_IBM_865 },
 280         { "737", RTL_TEXTENCODING_IBM_737 },
 281         { "775", RTL_TEXTENCODING_IBM_775 },
 282         { "852", RTL_TEXTENCODING_IBM_852 },
 283         { "855", RTL_TEXTENCODING_IBM_855 },
 284         { "857", RTL_TEXTENCODING_IBM_857 },
 285         { "862", RTL_TEXTENCODING_IBM_862 },
 286         { "864", RTL_TEXTENCODING_IBM_864 },
 287         { "866", RTL_TEXTENCODING_IBM_866 },
 288         { "869", RTL_TEXTENCODING_IBM_869 },
 289         { "874", RTL_TEXTENCODING_MS_874 },
 290         { "1004", RTL_TEXTENCODING_MS_1252 },
 291         { "65400", RTL_TEXTENCODING_SYMBOL },
 292         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 293     };
 294
 295     static ImplStrCharsetDef const aUnixCharsetKOI8Tab[] =
 296     {
 297         { "r", RTL_TEXTENCODING_KOI8_R },
 298         { "u", RTL_TEXTENCODING_KOI8_U },
 299         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 300     };
 301
 302     static ImplStrCharsetDef const aUnixCharsetJISX0208Tab[] =
 303     {
 304         { nullptr, RTL_TEXTENCODING_JIS_X_0208 }
 305     };
 306
 307     static ImplStrCharsetDef const aUnixCharsetJISX0201Tab[] =
 308     {
 309         { nullptr, RTL_TEXTENCODING_JIS_X_0201 }
 310     };
 311
 312     static ImplStrCharsetDef const aUnixCharsetJISX0212Tab[] =
 313     {
 314         { nullptr, RTL_TEXTENCODING_JIS_X_0212 }
 315     };
 316
 317     static ImplStrCharsetDef const aUnixCharsetGBTab[] =
 318     {
 319         { nullptr, RTL_TEXTENCODING_GB_2312 }
 320     };
 321
 322     static ImplStrCharsetDef const aUnixCharsetGBKTab[] =
 323     {
 324         { nullptr, RTL_TEXTENCODING_GBK }
 325     };
 326
 327     static ImplStrCharsetDef const aUnixCharsetBIG5Tab[] =
 328     {
 329         { nullptr, RTL_TEXTENCODING_BIG5 }
 330     };
 331
 332     static ImplStrCharsetDef const aUnixCharsetKSC56011987Tab[] =
 333     {
 334         { nullptr, RTL_TEXTENCODING_EUC_KR }
 335     };
 336
 337     static ImplStrCharsetDef const aUnixCharsetKSC56011992Tab[] =
 338     {
 339         { nullptr, RTL_TEXTENCODING_MS_1361 }
 340     };
 341
 342     static ImplStrCharsetDef const aUnixCharsetISO10646Tab[] =
 343     {
 344         { nullptr, RTL_TEXTENCODING_UNICODE }
 345     };
 346
 347     static ImplStrCharsetDef const aUnixCharsetUNICODETab[] =
 348     {
 349 /* Currently every Unicode Encoding is for us Unicode */
 350 /*        { "fontspecific", RTL_TEXTENCODING_UNICODE }, */
 351         { nullptr, RTL_TEXTENCODING_UNICODE }
 352     };
 353
 354     static ImplStrCharsetDef const aUnixCharsetSymbolTab[] =
 355     {
 356         { nullptr, RTL_TEXTENCODING_SYMBOL }
 357     };
 358
 359     /* See <http://cvs.freedesktop.org/xorg/xc/fonts/encodings/iso8859-11.enc?
 360        rev=1.1.1.1>: */
 361     static ImplStrCharsetDef const aUnixCharsetTIS620Tab[] =
 362     {
 363         { "0", RTL_TEXTENCODING_TIS_620 },
 364         { "2529", RTL_TEXTENCODING_TIS_620 },
 365         { "2533", RTL_TEXTENCODING_TIS_620 },
 366         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 367     };
 368     static ImplStrCharsetDef const aUnixCharsetTIS6202529Tab[] =
 369     {
 370         { "1", RTL_TEXTENCODING_TIS_620 },
 371         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 372     };
 373     static ImplStrCharsetDef const aUnixCharsetTIS6202533Tab[] =
 374     {
 375         { "0", RTL_TEXTENCODING_TIS_620 },
 376         { "1", RTL_TEXTENCODING_TIS_620 },
 377         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 378     };
 379
 380     static ImplStrFirstPartCharsetDef const aUnixCharsetFirstPartTab[] =
 381     {
 382         { "iso8859", aUnixCharsetISOTab },
 383         { "adobe", aUnixCharsetADOBETab },
 384         { "ansi", aUnixCharsetMSTab },
 385         { "microsoft", aUnixCharsetMSTab },
 386         { "ibm", aUnixCharsetIBMTab },
 387         { "koi8", aUnixCharsetKOI8Tab },
 388         { "jisx0208", aUnixCharsetJISX0208Tab },
 389         { "jisx0208.1983", aUnixCharsetJISX0208Tab },
 390         { "jisx0201", aUnixCharsetJISX0201Tab },
 391         { "jisx0201.1976", aUnixCharsetJISX0201Tab },
 392         { "jisx0212", aUnixCharsetJISX0212Tab },
 393         { "jisx0212.1990", aUnixCharsetJISX0212Tab },
 394         { "gb2312", aUnixCharsetGBTab },
 395         { "gbk", aUnixCharsetGBKTab },
 396         { "big5", aUnixCharsetBIG5Tab },
 397         { "iso10646", aUnixCharsetISO10646Tab },
 398 /*      { "unicode", aUnixCharsetUNICODETab }, */ /* fonts contain only default chars */
 399         { "sunolcursor", aUnixCharsetSymbolTab },
 400         { "sunolglyph", aUnixCharsetSymbolTab },
 401         { "iso10646", aUnixCharsetUNICODETab },
 402         { "ksc5601.1987", aUnixCharsetKSC56011987Tab },
 403         { "ksc5601.1992", aUnixCharsetKSC56011992Tab },
 404         { "tis620.2529", aUnixCharsetTIS6202529Tab },
 405         { "tis620.2533", aUnixCharsetTIS6202533Tab },
 406         { "tis620", aUnixCharsetTIS620Tab },
 407 /*        { "sunudcja.1997",  },        */
 408 /*        { "sunudcko.1997",  },        */
 409 /*        { "sunudczh.1997",  },        */
 410 /*        { "sunudczhtw.1997",  },      */
 411         { nullptr, nullptr }
 412     };
 413
 414     rtl_TextEncoding    eEncoding = RTL_TEXTENCODING_DONTKNOW;
 415     char*           pTempBuf;
 416     sal_uInt32          nBufLen = strlen( pUnixCharset )+1;
 417     const char*     pFirstPart;
 418     const char*     pSecondPart;
 419
 420     /* Alloc Buffer and map to lower case */
 421     std::unique_ptr<char[]> pBuf(new char[nBufLen]);
 422     Impl_toAsciiLower( pUnixCharset, pBuf.get() );
 423
 424     /* Search FirstPart */
 425     pFirstPart = pBuf.get();
 426     pSecondPart = nullptr;
 427     pTempBuf = pBuf.get();
 428     while ( *pTempBuf )
 429     {
 430         if ( *pTempBuf == '-' )
 431         {
 432             *pTempBuf = '\0';
 433             pSecondPart = pTempBuf+1;
 434             break;
 435         }
 436
 437         pTempBuf++;
 438     }
 439
 440     /* found part separator */
 441     if ( pSecondPart )
 442     {
 443         /* Search for the part tab */
 444         const ImplStrFirstPartCharsetDef* pFirstPartData = aUnixCharsetFirstPartTab;
 445         while ( pFirstPartData->mpCharsetStr )
 446         {
 447             if ( Impl_matchString( pFirstPart, pFirstPartData->mpCharsetStr ) )
 448             {
 449                 /* Search for the charset in the second part tab */
 450                 const ImplStrCharsetDef* pData = pFirstPartData->mpSecondPartTab;
 451                 while ( pData->mpCharsetStr )
 452                 {
 453                     if ( Impl_matchString( pSecondPart, pData->mpCharsetStr ) )
 454                     {
 455                         break;
 456                     }
 457
 458                     pData++;
 459                 }
 460
 461                 /* use default encoding for first part */
 462                 eEncoding = pData->meTextEncoding;
 463                 break;
 464             }
 465
 466             pFirstPartData++;
 467         }
 468     }
 469
 470     return eEncoding;
 471 }
 472
 473 /* ----------------------------------------------------------------------- */
 474
 475 rtl_TextEncoding SAL_CALL rtl_getTextEncodingFromMimeCharset( const char* pMimeCharset )
 476 {
 477     /* All Identifiers are in lower case and contain only alphanumeric */
 478     /* characters. The function search for the first equal string in */
 479     /* the table. In this table are only the most used mime types. */
 480     /* Sort order: important */
 481     static ImplStrCharsetDef const aVIPMimeCharsetTab[] =
 482     {
 483         { "usascii", RTL_TEXTENCODING_ASCII_US },
 484         { "utf8", RTL_TEXTENCODING_UTF8 },
 485         { "utf7", RTL_TEXTENCODING_UTF7 },
 486         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
 487         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
 488         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
 489         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
 490         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
 491         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
 492         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
 493         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
 494         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
 495         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
 496         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
 497         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
 498         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
 499         { "iso2022jp", RTL_TEXTENCODING_ISO_2022_JP },
 500         { "iso2022jp2", RTL_TEXTENCODING_ISO_2022_JP },
 501         { "iso2022cn", RTL_TEXTENCODING_ISO_2022_CN },
 502         { "iso2022cnext", RTL_TEXTENCODING_ISO_2022_CN },
 503         { "iso2022kr", RTL_TEXTENCODING_ISO_2022_KR },
 504         { "eucjp", RTL_TEXTENCODING_EUC_JP },
 505         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
 506         { "mskanji", RTL_TEXTENCODING_MS_932 },
 507         { "gb2312", RTL_TEXTENCODING_GB_2312 },
 508         { "cngb", RTL_TEXTENCODING_GB_2312 },
 509         { "big5", RTL_TEXTENCODING_BIG5 },
 510         { "cnbig5", RTL_TEXTENCODING_BIG5 },
 511         { "cngb12345", RTL_TEXTENCODING_GBT_12345 },
 512         { "euckr", RTL_TEXTENCODING_EUC_KR },
 513         { "koi8r", RTL_TEXTENCODING_KOI8_R },
 514         { "windows1252", RTL_TEXTENCODING_MS_1252 },
 515         { "windows1250", RTL_TEXTENCODING_MS_1250 },
 516         { "windows1251", RTL_TEXTENCODING_MS_1251 },
 517         { "windows1253", RTL_TEXTENCODING_MS_1253 },
 518         { "windows1254", RTL_TEXTENCODING_MS_1254 },
 519         { "windows1255", RTL_TEXTENCODING_MS_1255 },
 520         { "windows1256", RTL_TEXTENCODING_MS_1256 },
 521         { "windows1257", RTL_TEXTENCODING_MS_1257 },
 522         { "windows1258", RTL_TEXTENCODING_MS_1258 },
 523         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 524     };
 525
 526     /* All Identifiers are in lower case and contain only alphanumeric */
 527     /* characters. The function search for the first matching string in */
 528     /* the table. */
 529     /* Sort order: unique (first iso885914, then iso88591), important */
 530     static ImplStrCharsetDef const aMimeCharsetTab[] =
 531     {
 532         { "unicode11utf7", RTL_TEXTENCODING_UTF7 },
 533         { "caunicode11utf7", RTL_TEXTENCODING_UTF7 },
 534         { "iso88591windows30", RTL_TEXTENCODING_ISO_8859_1 },
 535         { "iso88591win", RTL_TEXTENCODING_MS_1252 },
 536         { "iso88592win", RTL_TEXTENCODING_MS_1250 },
 537         { "iso88599win", RTL_TEXTENCODING_MS_1254 },
 538         { "iso885915", RTL_TEXTENCODING_ISO_8859_15 },
 539         { "iso885914", RTL_TEXTENCODING_ISO_8859_14 },
 540         { "iso885913", RTL_TEXTENCODING_ISO_8859_13 },
 541         { "iso885911", RTL_TEXTENCODING_TIS_620 },
 542             /* This is no official MIME character set name, but it might be in
 543                use in Thailand. */
 544         { "iso885910", RTL_TEXTENCODING_ISO_8859_10 },
 545         { "iso88591", RTL_TEXTENCODING_ISO_8859_1 },
 546         { "iso88592", RTL_TEXTENCODING_ISO_8859_2 },
 547         { "iso88593", RTL_TEXTENCODING_ISO_8859_3 },
 548         { "iso88594", RTL_TEXTENCODING_ISO_8859_4 },
 549         { "iso88595", RTL_TEXTENCODING_ISO_8859_5 },
 550         { "iso88596", RTL_TEXTENCODING_ISO_8859_6 },
 551         { "iso88597", RTL_TEXTENCODING_ISO_8859_7 },
 552         { "iso88598", RTL_TEXTENCODING_ISO_8859_8 },
 553         { "iso88599", RTL_TEXTENCODING_ISO_8859_9 },
 554         { "isoir100", RTL_TEXTENCODING_ISO_8859_1 },
 555         { "latin1", RTL_TEXTENCODING_ISO_8859_1 },
 556         { "l1", RTL_TEXTENCODING_ISO_8859_1 },
 557         { "cp819", RTL_TEXTENCODING_ISO_8859_1 },
 558         { "ibm819", RTL_TEXTENCODING_ISO_8859_1 },
 559         { "csisolatin1", RTL_TEXTENCODING_ISO_8859_1 },
 560         { "isoir101", RTL_TEXTENCODING_ISO_8859_2 },
 561         { "latin2", RTL_TEXTENCODING_ISO_8859_2 },
 562         { "l2", RTL_TEXTENCODING_ISO_8859_2 },
 563         { "csisolatin2", RTL_TEXTENCODING_ISO_8859_2 },
 564         { "isoir109", RTL_TEXTENCODING_ISO_8859_3 },
 565         { "latin3", RTL_TEXTENCODING_ISO_8859_3 },
 566         { "l3", RTL_TEXTENCODING_ISO_8859_3 },
 567         { "csisolatin3", RTL_TEXTENCODING_ISO_8859_3 },
 568         { "isoir110", RTL_TEXTENCODING_ISO_8859_4 },
 569         { "latin4", RTL_TEXTENCODING_ISO_8859_4 },
 570         { "l4", RTL_TEXTENCODING_ISO_8859_4 },
 571         { "csisolatin4", RTL_TEXTENCODING_ISO_8859_4 },
 572         { "isoir144", RTL_TEXTENCODING_ISO_8859_5 },
 573         { "cyrillicasian", RTL_TEXTENCODING_PT154 },
 574         { "cyrillic", RTL_TEXTENCODING_ISO_8859_5 },
 575         { "csisolatincyrillic", RTL_TEXTENCODING_ISO_8859_5 },
 576         { "isoir127", RTL_TEXTENCODING_ISO_8859_6 },
 577         { "arabic", RTL_TEXTENCODING_ISO_8859_6 },
 578         { "csisolatinarabic", RTL_TEXTENCODING_ISO_8859_6 },
 579         { "ecma114", RTL_TEXTENCODING_ISO_8859_6 },
 580         { "asmo708", RTL_TEXTENCODING_ISO_8859_6 },
 581         { "isoir126", RTL_TEXTENCODING_ISO_8859_7 },
 582         { "greek", RTL_TEXTENCODING_ISO_8859_7 },
 583         { "csisolatingreek", RTL_TEXTENCODING_ISO_8859_7 },
 584         { "elot928", RTL_TEXTENCODING_ISO_8859_7 },
 585         { "ecma118", RTL_TEXTENCODING_ISO_8859_7 },
 586         { "isoir138", RTL_TEXTENCODING_ISO_8859_8 },
 587         { "hebrew", RTL_TEXTENCODING_ISO_8859_8 },
 588         { "csisolatinhebrew", RTL_TEXTENCODING_ISO_8859_8 },
 589         { "isoir148", RTL_TEXTENCODING_ISO_8859_9 },
 590         { "latin5", RTL_TEXTENCODING_ISO_8859_9 },
 591         { "l5", RTL_TEXTENCODING_ISO_8859_9 },
 592         { "csisolatin5", RTL_TEXTENCODING_ISO_8859_9 },
 593         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
 594         { "cswindows30latin1", RTL_TEXTENCODING_ISO_8859_1 },
 595         { "cswindows31latin1", RTL_TEXTENCODING_MS_1252 },
 596         { "cswindows31latin2", RTL_TEXTENCODING_MS_1250 },
 597         { "cswindows31latin5", RTL_TEXTENCODING_MS_1254 },
 598         { "iso10646us", RTL_TEXTENCODING_ASCII_US },
 599         { "iso646irv", RTL_TEXTENCODING_ASCII_US },
 600         { "cskoi8r", RTL_TEXTENCODING_KOI8_R },
 601         { "ibm437", RTL_TEXTENCODING_IBM_437 },
 602         { "cp437", RTL_TEXTENCODING_IBM_437 },
 603         { "437", RTL_TEXTENCODING_IBM_437 },
 604         { "cspc8codepage437", RTL_TEXTENCODING_IBM_437 },
 605         { "ansix34", RTL_TEXTENCODING_ASCII_US },
 606         { "ibm367", RTL_TEXTENCODING_ASCII_US },
 607         { "cp367", RTL_TEXTENCODING_ASCII_US },
 608         { "csascii", RTL_TEXTENCODING_ASCII_US },
 609         { "ibm775", RTL_TEXTENCODING_IBM_775 },
 610         { "cp775", RTL_TEXTENCODING_IBM_775 },
 611         { "cspc775baltic", RTL_TEXTENCODING_IBM_775 },
 612         { "ibm850", RTL_TEXTENCODING_IBM_850 },
 613         { "cp850", RTL_TEXTENCODING_IBM_850 },
 614         { "850", RTL_TEXTENCODING_IBM_850 },
 615         { "cspc850multilingual", RTL_TEXTENCODING_IBM_850 },
 616 /*        { "ibm851", RTL_TEXTENCODING_IBM_851 }, */
 617 /*        { "cp851", RTL_TEXTENCODING_IBM_851 }, */
 618 /*        { "851", RTL_TEXTENCODING_IBM_851 }, */
 619 /*        { "csibm851", RTL_TEXTENCODING_IBM_851 }, */
 620         { "ibm852", RTL_TEXTENCODING_IBM_852 },
 621         { "cp852", RTL_TEXTENCODING_IBM_852 },
 622         { "852", RTL_TEXTENCODING_IBM_852 },
 623         { "cspcp852", RTL_TEXTENCODING_IBM_852 },
 624         { "ibm855", RTL_TEXTENCODING_IBM_855 },
 625         { "cp855", RTL_TEXTENCODING_IBM_855 },
 626         { "855", RTL_TEXTENCODING_IBM_855 },
 627         { "csibm855", RTL_TEXTENCODING_IBM_855 },
 628         { "ibm857", RTL_TEXTENCODING_IBM_857 },
 629         { "cp857", RTL_TEXTENCODING_IBM_857 },
 630         { "857", RTL_TEXTENCODING_IBM_857 },
 631         { "csibm857", RTL_TEXTENCODING_IBM_857 },
 632         { "ibm860", RTL_TEXTENCODING_IBM_860 },
 633         { "cp860", RTL_TEXTENCODING_IBM_860 },
 634         { "860", RTL_TEXTENCODING_IBM_860 },
 635         { "csibm860", RTL_TEXTENCODING_IBM_860 },
 636         { "ibm861", RTL_TEXTENCODING_IBM_861 },
 637         { "cp861", RTL_TEXTENCODING_IBM_861 },
 638         { "861", RTL_TEXTENCODING_IBM_861 },
 639         { "csis", RTL_TEXTENCODING_IBM_861 },
 640         { "csibm861", RTL_TEXTENCODING_IBM_861 },
 641         { "ibm862", RTL_TEXTENCODING_IBM_862 },
 642         { "cp862", RTL_TEXTENCODING_IBM_862 },
 643         { "862", RTL_TEXTENCODING_IBM_862 },
 644         { "cspc862latinhebrew", RTL_TEXTENCODING_IBM_862 },
 645         { "ibm863", RTL_TEXTENCODING_IBM_863 },
 646         { "cp863", RTL_TEXTENCODING_IBM_863 },
 647         { "863", RTL_TEXTENCODING_IBM_863 },
 648         { "csibm863", RTL_TEXTENCODING_IBM_863 },
 649         { "ibm864", RTL_TEXTENCODING_IBM_864 },
 650         { "cp864", RTL_TEXTENCODING_IBM_864 },
 651         { "864", RTL_TEXTENCODING_IBM_864 },
 652         { "csibm864", RTL_TEXTENCODING_IBM_864 },
 653         { "ibm865", RTL_TEXTENCODING_IBM_865 },
 654         { "cp865", RTL_TEXTENCODING_IBM_865 },
 655         { "865", RTL_TEXTENCODING_IBM_865 },
 656         { "csibm865", RTL_TEXTENCODING_IBM_865 },
 657         { "ibm866", RTL_TEXTENCODING_IBM_866 },
 658         { "cp866", RTL_TEXTENCODING_IBM_866 },
 659         { "866", RTL_TEXTENCODING_IBM_866 },
 660         { "csibm866", RTL_TEXTENCODING_IBM_866 },
 661 /*        { "ibm868", RTL_TEXTENCODING_IBM_868 }, */
 662 /*        { "cp868", RTL_TEXTENCODING_IBM_868 }, */
 663 /*        { "cpar", RTL_TEXTENCODING_IBM_868 }, */
 664 /*        { "csibm868", RTL_TEXTENCODING_IBM_868 }, */
 665         { "ibm869", RTL_TEXTENCODING_IBM_869 },
 666         { "cp869", RTL_TEXTENCODING_IBM_869 },
 667         { "869", RTL_TEXTENCODING_IBM_869 },
 668         { "cpgr", RTL_TEXTENCODING_IBM_869 },
 669         { "csibm869", RTL_TEXTENCODING_IBM_869 },
 670         { "ibm869", RTL_TEXTENCODING_IBM_869 },
 671         { "cp869", RTL_TEXTENCODING_IBM_869 },
 672         { "869", RTL_TEXTENCODING_IBM_869 },
 673         { "cpgr", RTL_TEXTENCODING_IBM_869 },
 674         { "csibm869", RTL_TEXTENCODING_IBM_869 },
 675         { "mac", RTL_TEXTENCODING_APPLE_ROMAN },
 676         { "csmacintosh", RTL_TEXTENCODING_APPLE_ROMAN },
 677         { "shiftjis", RTL_TEXTENCODING_SHIFT_JIS },
 678         { "mskanji", RTL_TEXTENCODING_MS_932 },
 679         { "csshiftjis", RTL_TEXTENCODING_SHIFT_JIS },
 680         { "jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
 681         { "jisc62261983", RTL_TEXTENCODING_JIS_X_0208 },
 682         { "csiso87jisx0208", RTL_TEXTENCODING_JIS_X_0208 },
 683         { "isoir86", RTL_TEXTENCODING_JIS_X_0208 },
 684         { "x0208", RTL_TEXTENCODING_JIS_X_0208 },
 685         { "jisx0201", RTL_TEXTENCODING_JIS_X_0201 },
 686         { "cshalfwidthkatakana", RTL_TEXTENCODING_JIS_X_0201 },
 687         { "x0201", RTL_TEXTENCODING_JIS_X_0201 },
 688         { "jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
 689         { "csiso159jisx0212", RTL_TEXTENCODING_JIS_X_0212 },
 690         { "isoir159", RTL_TEXTENCODING_JIS_X_0208 },
 691         { "x0212", RTL_TEXTENCODING_JIS_X_0212 },
 692         { "isoir6", RTL_TEXTENCODING_ASCII_US },
 693         { "xsjis", RTL_TEXTENCODING_SHIFT_JIS },
 694         { "sjis", RTL_TEXTENCODING_SHIFT_JIS },
 695         { "ascii", RTL_TEXTENCODING_ASCII_US },
 696         { "us", RTL_TEXTENCODING_ASCII_US },
 697         { "gb180302000", RTL_TEXTENCODING_GB_18030 },
 698             /* This is no actual MIME character set name, it is only in here
 699                for backwards compatibility (before "GB18030" was officially
 700                registered with IANA, this code contained some guesses of what
 701                would become official names for GB18030). */
 702         { "gb18030", RTL_TEXTENCODING_GB_18030 },
 703         { "big5hkscs", RTL_TEXTENCODING_BIG5_HKSCS },
 704         { "tis620", RTL_TEXTENCODING_TIS_620 },
 705         { "gbk", RTL_TEXTENCODING_GBK },
 706         { "cp936", RTL_TEXTENCODING_GBK },
 707         { "ms936", RTL_TEXTENCODING_GBK },
 708         { "windows936", RTL_TEXTENCODING_GBK },
 709         { "cp874", RTL_TEXTENCODING_MS_874 },
 710             /* This is no official MIME character set name, but it might be in
 711                use in Thailand. */
 712         { "ms874", RTL_TEXTENCODING_MS_874 },
 713             /* This is no official MIME character set name, but it might be in
 714                use in Thailand. */
 715         { "windows874", RTL_TEXTENCODING_MS_874 },
 716             /* This is no official MIME character set name, but it might be in
 717                use in Thailand. */
 718         { "koi8u", RTL_TEXTENCODING_KOI8_U },
 719         { "cpis", RTL_TEXTENCODING_IBM_861 },
 720         { "ksc56011987", RTL_TEXTENCODING_MS_949 },
 721         { "isoir149", RTL_TEXTENCODING_MS_949 },
 722         { "ksc56011989", RTL_TEXTENCODING_MS_949 },
 723         { "ksc5601", RTL_TEXTENCODING_MS_949 },
 724         { "korean", RTL_TEXTENCODING_MS_949 },
 725         { "csksc56011987", RTL_TEXTENCODING_MS_949 },
 726             /* Map KS_C_5601-1987 and aliases to MS-949 instead of EUC-KR, as
 727                this character set identifier seems to be prominently used by MS
 728                to stand for KS C 5601 plus MS-949 extensions */
 729         { "latin9", RTL_TEXTENCODING_ISO_8859_15 },
 730         { "adobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
 731         { "csadobestandardencoding", RTL_TEXTENCODING_ADOBE_STANDARD },
 732         { "adobesymbolencoding", RTL_TEXTENCODING_ADOBE_SYMBOL },
 733         { "cshppsmath", RTL_TEXTENCODING_ADOBE_SYMBOL },
 734         { "ptcp154", RTL_TEXTENCODING_PT154 },
 735         { "csptcp154", RTL_TEXTENCODING_PT154 },
 736         { "pt154", RTL_TEXTENCODING_PT154 },
 737         { "cp154", RTL_TEXTENCODING_PT154 },
 738         { "xisciide", RTL_TEXTENCODING_ISCII_DEVANAGARI },
 739             /* This is not an official MIME character set name, but is in use by
 740                various windows APIs. */
 741         { nullptr, RTL_TEXTENCODING_DONTKNOW }
 742     };
 743
 744     rtl_TextEncoding            eEncoding = RTL_TEXTENCODING_DONTKNOW;
 745     const ImplStrCharsetDef*    pData = aVIPMimeCharsetTab;
 746     sal_uInt32                  nBufLen = strlen( pMimeCharset )+1;
 747
 748     /* Alloc Buffer and map to lower case and remove non alphanumeric chars */
 749     std::unique_ptr<char[]> pBuf(new char[nBufLen]);
 750     Impl_toAsciiLowerAndRemoveNonAlphanumeric( pMimeCharset, pBuf.get() );
 751
 752     /* Search for equal in the VIP table */
 753     while ( pData->mpCharsetStr )
 754     {
 755         if ( strcmp( pBuf.get(), pData->mpCharsetStr ) == 0 )
 756         {
 757             eEncoding = pData->meTextEncoding;
 758             break;
 759         }
 760
 761         pData++;
 762     }
 763
 764     /* Search for matching in the mime table */
 765     if ( eEncoding == RTL_TEXTENCODING_DONTKNOW )
 766     {
 767         pData = aMimeCharsetTab;
 768         while ( pData->mpCharsetStr )
 769         {
 770             if ( Impl_matchString( pBuf.get(), pData->mpCharsetStr ) )
 771             {
 772                 eEncoding = pData->meTextEncoding;
 773                 break;
 774             }
 775
 776             pData++;
 777         }
 778     }
 779
 780     return eEncoding;
 781 }
 782
 783 /* ======================================================================= */
 784
 785 sal_uInt8 SAL_CALL rtl_getBestWindowsCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
 786 {
 787     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
 788     if ( pData )
 789         return pData->mnBestWindowsCharset;
 790     return 1;
 791 }
 792
 793 /* ----------------------------------------------------------------------- */
 794
 795 const char* SAL_CALL rtl_getBestUnixCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding  )
 796 {
 797     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
 798     if ( pData )
 799         return pData->mpBestUnixCharset;
 800     if( eTextEncoding == RTL_TEXTENCODING_UNICODE )
 801         return "iso10646-1";
 802     return nullptr;
 803 }
 804
 805 /* ----------------------------------------------------------------------- */
 806
 807 char const * SAL_CALL rtl_getMimeCharsetFromTextEncoding(rtl_TextEncoding
 808                                                              nEncoding)
 809 {
 810     ImplTextEncodingData const * p = Impl_getTextEncodingData(nEncoding);
 811     return p && (p->mnInfoFlags & RTL_TEXTENCODING_INFO_MIME) != 0 ?
 812                p->mpBestMimeCharset : nullptr;
 813 }
 814
 815 const char* SAL_CALL rtl_getBestMimeCharsetFromTextEncoding( rtl_TextEncoding eTextEncoding )
 816 {
 817     const ImplTextEncodingData* pData = Impl_getTextEncodingData( eTextEncoding );
 818     if ( pData )
 819         return pData->mpBestMimeCharset;
 820     return nullptr;
 821 }
 822
 823 /* The following two functions are based on <http://www.sharmahd.com/tm/
 824    codepages.html>, <http://msdn.microsoft.com/workshop/author/dhtml/reference/
 825    charsets/charset4.asp>, and <http://www.iana.org/assignments/character-sets>.
 826  */
 827
 828 rtl_TextEncoding SAL_CALL
 829 rtl_getTextEncodingFromWindowsCodePage(sal_uInt32 nCodePage)
 830 {
 831     switch (nCodePage)
 832     {
 833     case 42: return RTL_TEXTENCODING_SYMBOL;
 834     case 437: return RTL_TEXTENCODING_IBM_437;
 835     case 708: return RTL_TEXTENCODING_ISO_8859_6;
 836     case 737: return RTL_TEXTENCODING_IBM_737;
 837     case 775: return RTL_TEXTENCODING_IBM_775;
 838     case 850: return RTL_TEXTENCODING_IBM_850;
 839     case 852: return RTL_TEXTENCODING_IBM_852;
 840     case 855: return RTL_TEXTENCODING_IBM_855;
 841     case 857: return RTL_TEXTENCODING_IBM_857;
 842     case 860: return RTL_TEXTENCODING_IBM_860;
 843     case 861: return RTL_TEXTENCODING_IBM_861;
 844     case 862: return RTL_TEXTENCODING_IBM_862;
 845     case 863: return RTL_TEXTENCODING_IBM_863;
 846     case 864: return RTL_TEXTENCODING_IBM_864;
 847     case 865: return RTL_TEXTENCODING_IBM_865;
 848     case 866: return RTL_TEXTENCODING_IBM_866;
 849     case 869: return RTL_TEXTENCODING_IBM_869;
 850     case 874: return RTL_TEXTENCODING_MS_874;
 851     case 932: return RTL_TEXTENCODING_MS_932;
 852     case 936: return RTL_TEXTENCODING_MS_936;
 853     case 949: return RTL_TEXTENCODING_MS_949;
 854     case 950: return RTL_TEXTENCODING_MS_950;
 855     case 1250: return RTL_TEXTENCODING_MS_1250;
 856     case 1251: return RTL_TEXTENCODING_MS_1251;
 857     case 1252: return RTL_TEXTENCODING_MS_1252;
 858     case 1253: return RTL_TEXTENCODING_MS_1253;
 859     case 1254: return RTL_TEXTENCODING_MS_1254;
 860     case 1255: return RTL_TEXTENCODING_MS_1255;
 861     case 1256: return RTL_TEXTENCODING_MS_1256;
 862     case 1257: return RTL_TEXTENCODING_MS_1257;
 863     case 1258: return RTL_TEXTENCODING_MS_1258;
 864     case 1361: return RTL_TEXTENCODING_MS_1361;
 865     case 10000: return RTL_TEXTENCODING_APPLE_ROMAN;
 866     case 10001: return RTL_TEXTENCODING_APPLE_JAPANESE;
 867     case 10002: return RTL_TEXTENCODING_APPLE_CHINTRAD;
 868     case 10003: return RTL_TEXTENCODING_APPLE_KOREAN;
 869     case 10004: return RTL_TEXTENCODING_APPLE_ARABIC;
 870     case 10005: return RTL_TEXTENCODING_APPLE_HEBREW;
 871     case 10006: return RTL_TEXTENCODING_APPLE_GREEK;
 872     case 10007: return RTL_TEXTENCODING_APPLE_CYRILLIC;
 873     case 10008: return RTL_TEXTENCODING_APPLE_CHINSIMP;
 874     case 10010: return RTL_TEXTENCODING_APPLE_ROMANIAN;
 875     case 10017: return RTL_TEXTENCODING_APPLE_UKRAINIAN;
 876     case 10029: return RTL_TEXTENCODING_APPLE_CENTEURO;
 877     case 10079: return RTL_TEXTENCODING_APPLE_ICELAND;
 878     case 10081: return RTL_TEXTENCODING_APPLE_TURKISH;
 879     case 10082: return RTL_TEXTENCODING_APPLE_CROATIAN;
 880     case 20127: return RTL_TEXTENCODING_ASCII_US;
 881     case 20866: return RTL_TEXTENCODING_KOI8_R;
 882     case 21866: return RTL_TEXTENCODING_KOI8_U;
 883     case 28591: return RTL_TEXTENCODING_ISO_8859_1;
 884     case 28592: return RTL_TEXTENCODING_ISO_8859_2;
 885     case 28593: return RTL_TEXTENCODING_ISO_8859_3;
 886     case 28594: return RTL_TEXTENCODING_ISO_8859_4;
 887     case 28595: return RTL_TEXTENCODING_ISO_8859_5;
 888     case 28596: return RTL_TEXTENCODING_ISO_8859_6;
 889     case 28597: return RTL_TEXTENCODING_ISO_8859_7;
 890     case 28598: return RTL_TEXTENCODING_ISO_8859_8;
 891     case 28599: return RTL_TEXTENCODING_ISO_8859_9;
 892     case 28605: return RTL_TEXTENCODING_ISO_8859_15;
 893     case 50220: return RTL_TEXTENCODING_ISO_2022_JP;
 894     case 50225: return RTL_TEXTENCODING_ISO_2022_KR;
 895     case 51932: return RTL_TEXTENCODING_EUC_JP;
 896     case 51936: return RTL_TEXTENCODING_EUC_CN;
 897     case 51949: return RTL_TEXTENCODING_EUC_KR;
 898     case 57002: return RTL_TEXTENCODING_ISCII_DEVANAGARI;
 899     case 65000: return RTL_TEXTENCODING_UTF7;
 900     case 65001: return RTL_TEXTENCODING_UTF8;
 901     default: return RTL_TEXTENCODING_DONTKNOW;
 902     }
 903 }
 904
 905 sal_uInt32 SAL_CALL
 906 rtl_getWindowsCodePageFromTextEncoding(rtl_TextEncoding nEncoding)
 907 {
 908     switch (nEncoding)
 909     {
 910     case RTL_TEXTENCODING_SYMBOL: return 42;
 911     case RTL_TEXTENCODING_IBM_437: return 437;
 912  /* case RTL_TEXTENCODING_ISO_8859_6: return 708; */
 913     case RTL_TEXTENCODING_IBM_737: return 737;
 914     case RTL_TEXTENCODING_IBM_775: return 775;
 915     case RTL_TEXTENCODING_IBM_850: return 850;
 916     case RTL_TEXTENCODING_IBM_852: return 852;
 917     case RTL_TEXTENCODING_IBM_855: return 855;
 918     case RTL_TEXTENCODING_IBM_857: return 857;
 919     case RTL_TEXTENCODING_IBM_860: return 860;
 920     case RTL_TEXTENCODING_IBM_861: return 861;
 921     case RTL_TEXTENCODING_IBM_862: return 862;
 922     case RTL_TEXTENCODING_IBM_863: return 863;
 923     case RTL_TEXTENCODING_IBM_864: return 864;
 924     case RTL_TEXTENCODING_IBM_865: return 865;
 925     case RTL_TEXTENCODING_IBM_866: return 866;
 926     case RTL_TEXTENCODING_IBM_869: return 869;
 927     case RTL_TEXTENCODING_MS_874: return 874;
 928     case RTL_TEXTENCODING_MS_932: return 932;
 929     case RTL_TEXTENCODING_MS_936: return 936;
 930     case RTL_TEXTENCODING_MS_949: return 949;
 931     case RTL_TEXTENCODING_MS_950: return 950;
 932     case RTL_TEXTENCODING_MS_1250: return 1250;
 933     case RTL_TEXTENCODING_MS_1251: return 1251;
 934     case RTL_TEXTENCODING_MS_1252: return 1252;
 935     case RTL_TEXTENCODING_MS_1253: return 1253;
 936     case RTL_TEXTENCODING_MS_1254: return 1254;
 937     case RTL_TEXTENCODING_MS_1255: return 1255;
 938     case RTL_TEXTENCODING_MS_1256: return 1256;
 939     case RTL_TEXTENCODING_MS_1257: return 1257;
 940     case RTL_TEXTENCODING_MS_1258: return 1258;
 941     case RTL_TEXTENCODING_MS_1361: return 1361;
 942     case RTL_TEXTENCODING_APPLE_ROMAN: return 10000;
 943     case RTL_TEXTENCODING_APPLE_JAPANESE: return 10001;
 944     case RTL_TEXTENCODING_APPLE_CHINTRAD: return 10002;
 945     case RTL_TEXTENCODING_APPLE_KOREAN: return 10003;
 946     case RTL_TEXTENCODING_APPLE_ARABIC: return 10004;
 947     case RTL_TEXTENCODING_APPLE_HEBREW: return 10005;
 948     case RTL_TEXTENCODING_APPLE_GREEK: return 10006;
 949     case RTL_TEXTENCODING_APPLE_CYRILLIC: return 10007;
 950     case RTL_TEXTENCODING_APPLE_CHINSIMP: return 10008;
 951     case RTL_TEXTENCODING_APPLE_ROMANIAN: return 10010;
 952     case RTL_TEXTENCODING_APPLE_UKRAINIAN: return 10017;
 953     case RTL_TEXTENCODING_APPLE_CENTEURO: return 10029;
 954     case RTL_TEXTENCODING_APPLE_ICELAND: return 10079;
 955     case RTL_TEXTENCODING_APPLE_TURKISH: return 10081;
 956     case RTL_TEXTENCODING_APPLE_CROATIAN: return 10082;
 957     case RTL_TEXTENCODING_ASCII_US: return 20127;
 958     case RTL_TEXTENCODING_KOI8_R: return 20866;
 959     case RTL_TEXTENCODING_KOI8_U: return 21866;
 960     case RTL_TEXTENCODING_ISO_8859_1: return 28591;
 961     case RTL_TEXTENCODING_ISO_8859_2: return 28592;
 962     case RTL_TEXTENCODING_ISO_8859_3: return 28593;
 963     case RTL_TEXTENCODING_ISO_8859_4: return 28594;
 964     case RTL_TEXTENCODING_ISO_8859_5: return 28595;
 965     case RTL_TEXTENCODING_ISO_8859_6: return 28596;
 966     case RTL_TEXTENCODING_ISO_8859_7: return 28597;
 967     case RTL_TEXTENCODING_ISO_8859_8: return 28598;
 968     case RTL_TEXTENCODING_ISO_8859_9: return 28599;
 969     case RTL_TEXTENCODING_ISO_8859_15: return 28605;
 970     case RTL_TEXTENCODING_ISO_2022_JP: return 50220;
 971     case RTL_TEXTENCODING_ISO_2022_KR: return 50225;
 972     case RTL_TEXTENCODING_EUC_JP: return 51932;
 973     case RTL_TEXTENCODING_EUC_CN: return 51936;
 974     case RTL_TEXTENCODING_EUC_KR: return 51949;
 975     case RTL_TEXTENCODING_ISCII_DEVANAGARI: return 57002;
 976     case RTL_TEXTENCODING_UTF7: return 65000;
 977     case RTL_TEXTENCODING_UTF8: return 65001;
 978     default: return 0;
 979     }
 980 }
 981
 982 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */