1 #ifndef _ROMANIZATION_HXX
2 #define _ROMANIZATION_HXX
9 http://www.unicode.org/charts/unihangridindex.html
23 vector
<string
> romaji
;
27 // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs#CJK_Unified_Ideographs
28 inline static bool is_cjk_base (unsigned int ucs
) {return ucs
>=0x4e00 && ucs
<=0x9fff;}
29 inline static bool is_cjk_a (unsigned int ucs
) {return ucs
>=0x3400 && ucs
<=0x4dbf;}
30 inline static bool is_cjk_b (unsigned int ucs
) {return ucs
>=0x20000 && ucs
<=0x2a6df;}
31 inline static bool is_cjk_c (unsigned int ucs
) {return ucs
>=0x2a700 && ucs
<=0x2b73f;}
32 inline static bool is_cjk_d (unsigned int ucs
) {return ucs
>=0x2b740 && ucs
<=0x2b81f;}
33 // range U+1B000 - U+1B0FF is both hiragana ans katakana
34 inline static bool is_hiragana (unsigned int ucs
) {return ucs
>=0x3040 && ucs
<=0x309f;}
35 inline static bool is_katakana (unsigned int ucs
) {return ucs
>=0x30a0 && ucs
<=0xffef;}
36 inline static bool is_kana (unsigned int ucs
) {return is_hiragana(ucs
) || is_katakana(ucs
);}
37 inline static bool is_cjk (unsigned int ucs
)
38 {return is_cjk_base(ucs
) || is_cjk_a(ucs
) || is_cjk_b(ucs
) || is_cjk_c(ucs
) || is_cjk_d(ucs
);}
41 string
romaji_to_kana ( const string
&rmj
, bool to_hiragana
= true );
42 inline string
romaji_to_kana ( const char *rmj
, bool to_hiragana
= true )
43 { return romaji_to_kana(string(rmj
),to_hiragana
); };
45 inline string
romaji_to_hiragana ( const char *s
)
46 { return romaji_to_kana(string(s
),true); };
47 inline string
romaji_to_hiragana ( const string
&s
)
48 { return romaji_to_kana(s
,true); };
50 inline string
romaji_to_katakana ( const char *s
)
51 { return romaji_to_kana(string(s
),false); };
52 inline string
romaji_to_katakana ( const string
&s
)
53 { return romaji_to_kana(s
,false); };
55 inline string
hiragana_to_katakana ( const string
&s
);
57 string
kana_to_romaji ( const string
&k
);
59 static inline bool contains_kanji ( const char *s
)
61 for( unsigned int code
: utils::utf8_to_ints(s
) )
67 static inline bool is_kanji ( const char *s
)
69 for ( unsigned int code
: utils::utf8_to_ints(s
) )
75 static inline bool is_kanji ( int code
) { return is_cjk(code
);};
78 vector
<RmnElement
> table_
;
79 size_t current_romaji_
;
80 vector
<string
> romaji_names_
; // names of available romanization systems
83 // TODO: bude potreba pro kazdy system zvlast
89 #endif // _ROMANIZATION_HXX