1 #include "romanization.hxx"
5 using utils::replace_all
;
8 Romanization::Romanization ()
12 // { {hepburn}, hiragana, katakana }
14 { {"va"}, "", "ヴァ" }, { {"va"}, "", "ヷ" },
15 { {"tsa"}, "", "ツァ" }, { {"fa"}, "", "ファ" },
16 { {"wi"}, "", "ウィ" }, { {"vi"}, "", "ヴィ" },
17 { {"ti"}, "", "ティ" }, { {"di"}, "", "ディ" },
18 { {"fi"}, "", "フィ" }, { {"vu"}, "", "ヴ" },
19 { {"tu"}, "", "トゥ" }, { {"tyu"}, "", "テュ" },
20 { {"du"}, "", "ドゥ" }, { {"dyu"}, "", "デュ" },
21 { {"fyu"}, "", "フュ" }, { {"ye"}, "", "イェ" },
22 { {"we"}, "", "ウェ" }, { {"ve"}, "", "ヴェ" },
23 { {"she"}, "", "シェ" }, { {"je"}, "", "ジェ" },
24 { {"che"}, "", "チェ" }, { {"tse"}, "", "ツェ" },
25 // hiragana wo is necessary (first occurence is used for replace)
26 { {"fe"}, "", "フェ" }, { {"wo"}, "を", "ウォ" },
27 { {"vo"}, "", "ヴォ" }, { {"vo"}, "", "ヺ" },
28 { {"tso"}, "", "ツォ" }, { {"fo"}, "", "フォ" },
29 { {"zi"}, "", "ズィ" }, { {"tsi"}, "", "ツィ" },
30 { {"tse"}, "", "ツェ" }, { {"tso"}, "", "ツォ" },
31 // extended katakana END
32 { {"kya"}, "きゃ", "キャ" }, { {"sha"}, "しゃ", "シャ" },
33 { {"cha"}, "ちゃ", "チャ" }, { {"nya"}, "にゃ", "ニャ" },
34 { {"hya"}, "ひゃ", "ヒャ" }, { {"mya"}, "みゃ", "ミャ" },
35 { {"rya"}, "りゃ", "リャ" }, { {"gya"}, "ぎゃ", "ギャ" },
36 { {"ja"}, "じゃ", "ジャ" }, { {"ja"}, "ぢゃ", "ヂャ" },
37 { {"bya"}, "びゃ", "ビャ" }, { {"pya"}, "ぴゃ", "ピャ" },
38 { {"kyu"}, "きゅ", "キュ" }, { {"shu"}, "しゅ", "シュ" },
39 { {"chu"}, "ちゅ", "チュ" }, { {"nyu"}, "にゅ", "ニュ" },
40 { {"hyu"}, "ひゅ", "ヒュ" }, { {"myu"}, "みゅ", "ミュ" },
41 { {"ryu"}, "りゅ", "リュ" }, { {"gyu"}, "ぎゅ", "ギュ" },
42 { {"ju"}, "じゅ", "ジュ" }, { {"ju"}, "ぢゅ", "ヂュ" },
43 { {"byu"}, "びゅ", "ビュ" }, { {"pyu"}, "ぴゅ", "ピュ" },
44 { {"kyo"}, "きょ", "キョ" }, { {"sho"}, "しょ", "ショ" },
45 { {"cho"}, "ちょ", "チョ" }, { {"nyo"}, "にょ", "ニョ" },
46 { {"hyo"}, "ひょ", "ヒョ" }, { {"myo"}, "みょ", "ミョ" },
47 { {"ryo"}, "りょ", "リョ" }, { {"gyo"}, "ぎょ", "ギョ" },
48 { {"jo"}, "じょ", "ジョ" }, { {"jo"}, "ぢょ", "ヂョ" },
49 { {"byo"}, "びょ", "ビョ" }, { {"pyo"}, "ぴょ", "ピョ" },
50 { {"ka"}, "か", "カ" }, { {"sa"}, "さ", "サ" },
51 { {"ta"}, "た", "タ" }, { {"na"}, "な", "ナ" },
52 { {"ha"}, "は", "ハ" }, { {"ma"}, "ま", "マ" },
53 { {"ya"}, "や", "ヤ" }, { {"ra"}, "ら", "ラ" },
54 { {"wa"}, "わ", "ワ" }, { {"ga"}, "が", "ガ" },
55 { {"za"}, "ざ", "ザ" }, { {"da"}, "だ", "ダ" },
56 { {"ba"}, "ば", "バ" }, { {"pa"}, "ぱ", "パ" },
57 { {"ki"}, "き", "キ" }, { {"shi"}, "し", "シ" },
58 { {"chi"}, "ち", "チ" }, { {"ni"}, "に", "ニ" },
59 { {"hi"}, "ひ", "ヒ" }, { {"mi"}, "み", "ミ" },
60 { {"ri"}, "り", "リ" }, { {"gi"}, "ぎ", "ギ" },
61 { {"ji"}, "じ", "ジ" }, { {"ji"}, "ぢ", "ヂ" },
62 { {"bi"}, "び", "ビ" }, { {"pi"}, "ぴ", "ピ" },
63 { {"ku"}, "く", "ク" }, { {"tsu"}, "つ", "ツ" },
64 { {"nu"}, "ぬ", "ヌ" }, { {"su"}, "す", "ス" },
65 { {"fu"}, "ふ", "フ" }, { {"mu"}, "む", "ム" },
66 { {"yu"}, "ゆ", "ユ" }, { {"ru"}, "る", "ル" },
67 { {"gu"}, "ぐ", "グ" }, { {"zu"}, "ず", "ズ" },
68 { {"zu"}, "づ", "ヅ" }, { {"bu"}, "ぶ", "ブ" },
69 { {"pu"}, "ぷ", "プ" }, { {"ke"}, "け", "ケ" },
70 { {"se"}, "せ", "セ" }, { {"te"}, "て", "テ" },
71 { {"ne"}, "ね", "ネ" }, { {"he"}, "へ", "ヘ" },
72 { {"me"}, "め", "メ" }, { {"re"}, "れ", "レ" },
73 { {"ge"}, "げ", "ゲ" }, { {"ze"}, "ぜ", "ゼ" },
74 { {"de"}, "で", "デ" }, { {"be"}, "べ", "ベ" },
75 { {"pe"}, "ぺ", "ペ" }, { {"ko"}, "こ", "コ" },
76 { {"so"}, "そ", "ソ" }, { {"to"}, "と", "ト" },
77 { {"no"}, "の", "ノ" }, { {"ho"}, "ほ", "ホ" },
78 { {"mo"}, "も", "モ" }, { {"yo"}, "よ", "ヨ" },
79 { {"ro"}, "ろ", "ロ" }, { {"go"}, "ご", "ゴ" },
80 { {"zo"}, "ぞ", "ゾ" }, { {"do"}, "ど", "ド" },
81 { {"bo"}, "ぼ", "ボ" }, { {"po"}, "ぽ", "ポ" },
83 { {"n"}, "ん", "ン" }, { {"a"}, "あ", "ア" },
84 { {"i"}, "い", "イ" }, { {"u"}, "う", "ウ" },
85 { {"e"}, "え", "エ" }, { {"o"}, "お", "オ" },
89 /// must be at the end. Needed when parsing JMdict
90 { {"we"}, "ゑ", "ヱ" }, { {"wi"}, "ゐ", "ヰ" },
91 { {"L"}, "L", "L" }, { {"A"}, "A", "A" },
92 { {"N"}, "N", "N" }, { {"S"}, "S", "S" },
93 { {"I"}, "I", "I" }, { {"M"}, "M", "M" },
95 { {"0"}, "0", "0" }, { {"1"}, "1", "1" },
96 { {"2"}, "2", "2" }, { {"8"}, "8", "8" }
98 tsus_
= {"k","s","t","n","h","m","r","w","g","d","z","b","p","c"};
104 string
Romanization::hiragana_to_katakana ( const string
&s
)
107 for ( auto &elt
: table_
)
108 replace_all( str
, elt
.hiragana
, elt
.katakana
);
113 string
Romanization::romaji_to_kana ( const string
&rmj
, bool to_hiragana
)
116 std::transform( str
.begin(), str
.end(), str
.begin(), ::tolower
);
117 string tsu
= (to_hiragana
) ? tsu_h_
:tsu_k_
;
120 for ( auto &elt
: table_
)
121 replace_all( str
, elt
.romaji
[current_romaji_
], (to_hiragana
) ? elt
.hiragana
:elt
.katakana
);
123 // replace double consonants with 'tsu'
124 // kocchi ->こっち (no こcち)
125 for ( auto &t
: tsus_
)
126 replace_all ( str
, t
, tsu
);
131 replace_all( str
, "'", "" );
133 // remove small tsu from strings end
134 size_t s
= str
.size(),
136 if ( s
> 2*t
&& str
.substr( s
- 2*t
, s
) == tsu
+tsu
)
137 return str
.substr( 0, str
.size()-1);
142 string
Romanization::kana_to_romaji ( const string
&k
)
146 for ( auto &elt
: table_
){
147 if ( elt
.hiragana
!= "" )
148 replace_all( str
, elt
.hiragana
, elt
.romaji
[current_romaji_
] );
149 if ( elt
.katakana
!= "" )
150 replace_all( str
, elt
.katakana
, elt
.romaji
[current_romaji_
]);
153 // remove 'tsu' from the end of the string
154 if ( str
.size() > 1 ){
155 string last_char
= str
.substr(str
.size()-1, str
.size() );
156 while( (last_char
== tsu_k_
) || (last_char
== tsu_h_
) )
157 str
.erase(str
.size()-1); // remove last character ( i.e. from (n-1) to the end )
160 // remove katakana 'tsu' ('tsu' is replaced by the next character)
162 while ( (pos
= str
.find(tsu_k_
, pos
)) != string::npos
)
163 str
.replace(pos
, tsu_k_
.size(), 1, str
.c_str()[pos
+tsu_k_
.size()]);
165 // remove hiragana 'tsu'
167 while ( (pos
= str
.find(tsu_h_
, pos
)) != string::npos
)
168 str
.replace(pos
, tsu_h_
.size(), 1, str
.c_str()[pos
+tsu_h_
.size()]);