xbmc/input/InputCodingTableKorean.cpp

   1 /*
   2 *      Copyright (C) 2005-2015 Team Kodi
   3 *      http://kodi.tv
   4 *
   5 *  This Program is free software; you can redistribute it and/or modify
   6 *  it under the terms of the GNU General Public License as published by
   7 *  the Free Software Foundation; either version 2, or (at your option)
   8 *  any later version.
   9 *
  10 *  This Program is distributed in the hope that it will be useful,
  11 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
  12 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13 *  GNU General Public License for more details.
  14 *
  15 *  You should have received a copy of the GNU General Public License
  16 *  along with XBMC; see the file COPYING.  If not, see
  17 *  <http://www.gnu.org/licenses/>.
  18 *
  19 */
  20
  21 #include <stdlib.h>
  22 #include "InputCodingTableKorean.h"
  23 #include "utils/CharsetConverter.h"
  24
  25 CInputCodingTableKorean::CInputCodingTableKorean()
  26 {
  27 }
  28
  29 std::vector<std::wstring> CInputCodingTableKorean::GetResponse(int)
  30 {
  31   return m_words;
  32 }
  33
  34 bool CInputCodingTableKorean::GetWordListPage(const std::string& strCode, bool isFirstPage)
  35 {
  36   return false;
  37 }
  38
  39 void CInputCodingTableKorean::SetTextPrev(const std::string& strTextPrev)
  40 {
  41   m_strTextPrev = strTextPrev;
  42 }
  43
  44 int CInputCodingTableKorean::MergeCode(int choseong, int jungseong, int jongseong)
  45 {
  46   return (unsigned short) 0xAC00 + choseong * 21 * 28 + jungseong * 28 + jongseong + 1;
  47 }
  48
  49 // Reference
  50 // https://en.wikipedia.org/wiki/Hangul
  51 // http://www.theyt.net/wiki/%ED%95%9C%EC%98%81%ED%83%80%EB%B3%80%ED%99%98%EA%B8%B0
  52
  53 std::wstring CInputCodingTableKorean::InputToKorean(const std::wstring& input)
  54 {
  55   std::wstring dicEnglish = //L"rRseEfaqQtTdwWczxvgkoiOjpuPhynbml";
  56   { 0x72, 0x52, 0x73, 0x65, 0x45, 0x66, 0x61, 0x71, 0x51, 0x74, 0x54, 0x64, 0x77, 0x57, 0x63, 0x7A, 0x78, 0x76, 0x67, 0x6B, 0x6F, 0x69, 0x4F, 0x6A, 0x70, 0x75, 0x50, 0x68, 0x79, 0x6E, 0x62, 0x6D, 0x6C };
  57   std::wstring dicKorean = //L"ㄱㄲㄴㄷㄸㄹㅁㅂㅃㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎㅏㅐㅑㅒㅓㅔㅕㅖㅗㅛㅜㅠㅡㅣ";
  58   { 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142, 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b, 0x314c, 0x314d, 0x314e, 0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 0x3157, 0x315b, 0x315c, 0x3160, 0x3161, 0x3163 };
  59   std::wstring dicChoseong = //L"ㄱㄲㄴㄷㄸㄹㅁㅂㅃㅅㅆㅇㅈㅉㅊㅋㅌㅍㅎ";
  60   { 0x3131, 0x3132, 0x3134, 0x3137, 0x3138, 0x3139, 0x3141, 0x3142, 0x3143, 0x3145, 0x3146, 0x3147, 0x3148, 0x3149, 0x314a, 0x314b, 0x314c, 0x314d, 0x314e };
  61   std::wstring dicJungseong = //L"ㅏㅐㅑㅒㅓㅔㅕㅖㅗㅘㅙㅚㅛㅜㅝㅞㅟㅠㅡㅢㅣ";
  62   { 0x314f, 0x3150, 0x3151, 0x3152, 0x3153, 0x3154, 0x3155, 0x3156, 0x3157, 0x3158, 0x3159, 0x315a, 0x315b, 0x315c, 0x315d, 0x315e, 0x315f, 0x3160, 0x3161, 0x3162, 0x3163 };
  63   std::wstring dicJongseong = //L"ㄱㄲㄳㄴㄵㄶㄷㄹㄺㄻㄼㄽㄾㄿㅀㅁㅂㅄㅅㅆㅇㅈㅊㅋㅌㅍㅎ";
  64   { 0x3131, 0x3132, 0x3133, 0x3134, 0x3135, 0x3136, 0x3137, 0x3139, 0x313a, 0x313b, 0x313c, 0x313d, 0x313e, 0x313f, 0x3140, 0x3141, 0x3142, 0x3144, 0x3145, 0x3146, 0x3147, 0x3148, 0x314a, 0x314b, 0x314c, 0x314d, 0x314e };
  65
  66   std::wstring korean;
  67
  68   if (input.empty())
  69     return korean;
  70
  71   int choseong = -1, jungseong = -1, jongseong = -1;
  72
  73   for (unsigned int i = 0; i < input.size(); i++)
  74   {
  75     wchar_t ch = input.at(i);
  76     int key = dicKorean.find(ch);
  77
  78     // H/W Keyboard input with English will be changed to Korean
  79         // because H/W input in Korean is not supported.
  80     if (key == -1)
  81       key = dicEnglish.find(ch);
  82
  83     if (key == -1) // If not Korean and English
  84     {
  85       // If there is remained Korean, merge code into character
  86       if (choseong != -1) // There is choseong
  87       {
  88         if (jungseong != -1) // choseong+jungseong+(jongseong)
  89           korean += MergeCode(choseong, jungseong, jongseong);
  90         else // Only choseong
  91           korean += dicChoseong.at(choseong);
  92       }
  93       else
  94       {
  95         if (jungseong != -1) // Jungseong
  96           korean += dicJungseong.at(jungseong);
  97
  98         if (jongseong != -1) // Jongseong
  99           korean += dicJongseong.at(jongseong);
 100       }
 101       choseong = -1;
 102       jungseong = -1;
 103       jongseong = -1;
 104       korean += ch;
 105     }
 106     else if (key < 19) // If key is consonant, key could be choseong or jungseong
 107     {
 108       if (jungseong != -1)
 109       {
 110         if (choseong == -1) // Jungseong without choseong cannot have jongseong.
 111                                     // So inputted key is jungseong character, new character is begun.
 112         {
 113           korean += dicJungseong.at(jungseong);
 114           jungseong = -1;
 115           choseong = key;
 116         }
 117         else // Jungseong with choseong can have jongseong.
 118         {
 119           if (jongseong == -1) // Chongseong can have two consonant. So this is first consonant of chongseong.
 120           {
 121             jongseong = dicJongseong.find(dicKorean.at(key));
 122             if (jongseong == -1) // This consonant cannot be jongseong. ex) "ㄸ", "ㅃ", "ㅉ"
 123                         {
 124               korean += MergeCode(choseong, jungseong, jongseong);
 125               choseong = dicChoseong.find(dicKorean.at(key));
 126               jungseong = -1;
 127             }
 128           }
 129           else if (jongseong == 0 && key == 9)  // "ㄳ"
 130             jongseong = 2;
 131           else if (jongseong == 3 && key == 12) // "ㄵ"
 132             jongseong = 4;
 133           else if (jongseong == 3 && key == 18) // "ㄶ"
 134             jongseong = 5;
 135           else if (jongseong == 7 && key == 0)  // "ㄺ"
 136             jongseong = 8;
 137           else if (jongseong == 7 && key == 6)  // "ㄻ"
 138             jongseong = 9;
 139           else if (jongseong == 7 && key == 7)  // "ㄼ"
 140             jongseong = 10;
 141           else if (jongseong == 7 && key == 9)  // "ㄽ"
 142             jongseong = 11;
 143           else if (jongseong == 7 && key == 16) // "ㄾ"
 144             jongseong = 12;
 145           else if (jongseong == 7 && key == 17) // "ㄿ"
 146             jongseong = 13;
 147           else if (jongseong == 7 && key == 18) // "ㅀ"
 148             jongseong = 14;
 149           else if (jongseong == 16 && key == 9) // "ㅄ"
 150             jongseong = 17;
 151           else // Jongseong is completed. So new consonant is choseong.
 152           {
 153             korean += MergeCode(choseong, jungseong, jongseong);
 154             choseong = dicChoseong.find(dicKorean.at(key));
 155             jungseong = -1;
 156             jongseong = -1;
 157           }
 158         }
 159       }
 160       else // If there is no jungseong, new consonant can be choseong or second part of double consonant.
 161       {
 162         if (choseong == -1) // New consonant is choseong. Also it could be first part of double consonant.
 163         {
 164           if (jongseong != -1) // If choseong is already completed, new consonant is another choseong.
 165                                // So previous character has only jongseong.
 166           {
 167             korean += dicJongseong.at(jongseong);
 168             jongseong = -1;
 169           }
 170           choseong = dicChoseong.find(dicKorean.at(key));
 171         }
 172         // Find double consonant of chongseong
 173         else if (choseong == 0 && key == 9)   // "ㄳ"
 174         {
 175           choseong = -1;
 176           jongseong = 2;
 177         }
 178         else if (choseong == 2 && key == 12)  // "ㄵ"
 179         {
 180           choseong = -1;
 181           jongseong = 4;
 182         }
 183         else if (choseong == 2 && key == 18)  // "ㄶ"
 184         {
 185           choseong = -1;
 186           jongseong = 5;
 187         }
 188         else if (choseong == 5 && key == 0)   // "ㄺ"
 189         {
 190           choseong = -1;
 191           jongseong = 8;
 192         }
 193         else if (choseong == 5 && key == 6)   // "ㄻ"
 194         {
 195           choseong = -1;
 196           jongseong = 9;
 197         }
 198         else if (choseong == 5 && key == 7)   // "ㄼ"
 199         {
 200           choseong = -1;
 201           jongseong = 10;
 202         }
 203         else if (choseong == 5 && key == 9)   // "ㄽ"
 204         {
 205           choseong = -1;
 206           jongseong = 11;
 207         }
 208         else if (choseong == 5 && key == 16) // "ㄾ"
 209         {
 210           choseong = -1;
 211           jongseong = 12;
 212         }
 213         else if (choseong == 5 && key == 17) // "ㄿ"
 214         {
 215           choseong = -1;
 216           jongseong = 13;
 217         }
 218         else if (choseong == 5 && key == 18) // "ㅀ"
 219         {
 220           choseong = -1;
 221           jongseong = 14;
 222         }
 223         else if (choseong == 7 && key == 9) // "ㅄ"
 224         {
 225           choseong = -1;
 226           jongseong = 17;
 227         }
 228         else // In this case, previous character has only choseong. And new consonant is choseong.
 229         {
 230           korean += dicChoseong.at(choseong);
 231           choseong = dicChoseong.find(dicKorean.at(key));
 232         }
 233       }
 234     }
 235     else // If key is vowel, key is jungseong.
 236     {
 237       if (jongseong != -1) // If previous character has jongseong and this key is jungseong,
 238                            // actually latest vowel is not jongseong. It's choseong of new character.
 239       {
 240         // If jongseong of previous character is double consonant, we will seperate it to two vowel again.
 241         // First part of double consonant is jongseong of previous character.
 242         // Second part of double consonant is choseong of current character.
 243         int newCho;
 244         if (jongseong == 2)       // "ㄱ, ㅅ"
 245         {
 246           jongseong = 0;
 247           newCho = 9;
 248         }
 249         else if (jongseong == 4)  // "ㄴ, ㅈ"
 250         {
 251           jongseong = 3;
 252           newCho = 12;
 253         }
 254         else if (jongseong == 5)  // "ㄴ, ㅎ"
 255         {
 256           jongseong = 3;
 257           newCho = 18;
 258         }
 259         else if (jongseong == 8)  // "ㄹ, ㄱ"
 260         {
 261           jongseong = 7;
 262           newCho = 0;
 263         }
 264         else if (jongseong == 9)  // "ㄹ, ㅁ"
 265         {
 266           jongseong = 7;
 267           newCho = 6;
 268         }
 269         else if (jongseong == 10) // "ㄹ, ㅂ"
 270         {
 271           jongseong = 7;
 272           newCho = 7;
 273         }
 274         else if (jongseong == 11) // "ㄹ, ㅅ"
 275         {
 276           jongseong = 7;
 277           newCho = 9;
 278         }
 279         else if (jongseong == 12) // "ㄹ, ㅌ"
 280         {
 281           jongseong = 7;
 282           newCho = 16;
 283         }
 284         else if (jongseong == 13) // "ㄹ, ㅍ"
 285         {
 286           jongseong = 7;
 287           newCho = 17;
 288         }
 289         else if (jongseong == 14) // "ㄹ, ㅎ"
 290         {
 291           jongseong = 7;
 292           newCho = 18;
 293         }
 294         else if (jongseong == 17) // "ㅂ, ㅅ"
 295         {
 296           jongseong = 16;
 297           newCho = 9;
 298         }
 299         else // If jongseong is single consonant, previous character has no chongseong.
 300              // It's choseong of current character.
 301         {
 302           newCho = dicChoseong.find(dicJongseong.at(jongseong));
 303           jongseong = -1;
 304         }
 305         if (choseong != -1) // If previous character has choseong and jungseong.
 306           korean += MergeCode(choseong, jungseong, jongseong);
 307         else // If previous character has Jongseong only.
 308           korean += dicJongseong.at(jongseong);
 309
 310         choseong = newCho;
 311         jungseong = -1;
 312         jongseong = -1;
 313       }
 314       if (jungseong == -1) // If this key is first vowel, it's first part of jungseong.
 315       {
 316         jungseong = dicJungseong.find(dicKorean.at(key));
 317       }
 318       // If there is jungseong already, jungseong is double vowel.
 319       else if (jungseong == 8 && key == 19)   // "ㅘ"
 320         jungseong = 9;
 321       else if (jungseong == 8 && key == 20)   // "ㅙ"
 322         jungseong = 10;
 323       else if (jungseong == 8 && key == 32)   // "ㅚ"
 324         jungseong = 11;
 325       else if (jungseong == 13 && key == 23)  // "ㅝ"
 326         jungseong = 14;
 327       else if (jungseong == 13 && key == 24)  // "ㅞ"
 328         jungseong = 15;
 329       else if (jungseong == 13 && key == 32)  // "ㅟ"
 330         jungseong = 16;
 331       else if (jungseong == 18 && key == 32)  // "ㅢ"
 332         jungseong = 19;
 333       else // If two vowel cannot be double vowel.
 334       {
 335         if (choseong != -1) // Previous character is completed.
 336                             // Current character is begin with jungseong.
 337         {
 338           korean += MergeCode(choseong, jungseong, jongseong);
 339           choseong = -1;
 340         }
 341         else // Previous character has jungseon only.
 342           korean += dicJungseong.at(jungseong);
 343         jungseong = -1;
 344         korean += dicKorean.at(key);
 345       }
 346     }
 347   }
 348
 349   // Process last character
 350   if (choseong != -1)
 351   {
 352     if (jungseong != -1) // Current character has choseong and jungseong.
 353       korean += MergeCode(choseong, jungseong, jongseong);
 354     else // Current character has choseong only.
 355       korean += dicChoseong.at(choseong);
 356   }
 357   else
 358   {
 359     if (jungseong != -1)  // Current character has jungseong only
 360       korean += dicJungseong.at(jungseong);
 361     else if (jongseong != -1)  // Current character has jongseong only
 362       korean += dicJongseong.at(jongseong);
 363   }
 364
 365   return korean;
 366 }
 367
 368 std::string CInputCodingTableKorean::ConvertString(const std::string& strCode)
 369 {
 370   std::wstring input;
 371   std::string result;
 372   g_charsetConverter.utf8ToW(strCode, input);
 373   InputToKorean(input);
 374   g_charsetConverter.wToUTF8(InputToKorean(input), result);
 375   return m_strTextPrev + result;
 376 }