config overriding; fixed setting colors
[aoi.git] / src / romanization.hxx
blob172ed5d26908089e6e02036cd7645450a50b5d85
1 #ifndef _ROMANIZATION_HXX
2 #define _ROMANIZATION_HXX
4 #include <string>
5 #include <vector>
6 #include "utils.hxx"
8 /*
9 http://www.unicode.org/charts/unihangridindex.html
13 using std::string;
14 using std::vector;
16 class Romanization
18 public:
19 Romanization();
20 ~Romanization(){};
22 struct RmnElement {
23 vector<string> romaji;
24 string hiragana;
25 string katakana;
27 // https://en.wikipedia.org/wiki/CJK_Unified_Ideographs#CJK_Unified_Ideographs
28 inline static bool is_cjk_base (unsigned int ucs) {return ucs>=0x4e00 && ucs<=0x9fff;}
29 inline static bool is_cjk_a (unsigned int ucs) {return ucs>=0x3400 && ucs<=0x4dbf;}
30 inline static bool is_cjk_b (unsigned int ucs) {return ucs>=0x20000 && ucs<=0x2a6df;}
31 inline static bool is_cjk_c (unsigned int ucs) {return ucs>=0x2a700 && ucs<=0x2b73f;}
32 inline static bool is_cjk_d (unsigned int ucs) {return ucs>=0x2b740 && ucs<=0x2b81f;}
33 // range U+1B000 - U+1B0FF is both hiragana ans katakana
34 inline static bool is_hiragana (unsigned int ucs) {return ucs>=0x3040 && ucs<=0x309f;}
35 inline static bool is_katakana (unsigned int ucs) {return ucs>=0x30a0 && ucs<=0xffef;}
36 inline static bool is_kana (unsigned int ucs) {return is_hiragana(ucs) || is_katakana(ucs);}
37 inline static bool is_cjk (unsigned int ucs)
38 {return is_cjk_base(ucs) || is_cjk_a(ucs) || is_cjk_b(ucs) || is_cjk_c(ucs) || is_cjk_d(ucs);}
41 string romaji_to_kana ( const string &rmj, bool to_hiragana = true );
42 inline string romaji_to_kana ( const char *rmj, bool to_hiragana = true )
43 { return romaji_to_kana(string(rmj),to_hiragana); };
45 inline string romaji_to_hiragana ( const char *s )
46 { return romaji_to_kana(string(s),true); };
47 inline string romaji_to_hiragana ( const string &s )
48 { return romaji_to_kana(s,true); };
50 inline string romaji_to_katakana ( const char *s )
51 { return romaji_to_kana(string(s),false); };
52 inline string romaji_to_katakana ( const string &s )
53 { return romaji_to_kana(s,false); };
55 inline string hiragana_to_katakana ( const string &s );
57 string kana_to_romaji ( const string &k );
59 static inline bool contains_kanji ( const char *s )
61 for( unsigned int code: utils::utf8_to_ints(s) )
62 if ( is_cjk(code) )
63 return true;
64 return false;
67 static inline bool is_kanji ( const char *s )
69 for ( unsigned int code: utils::utf8_to_ints(s) )
70 if ( !is_cjk(code) )
71 return false;
72 return true;
75 static inline bool is_kanji ( int code ) { return is_cjk(code);};
77 private:
78 vector<RmnElement> table_;
79 size_t current_romaji_;
80 vector<string> romaji_names_; // names of available romanization systems
81 string tsu_h_;
82 string tsu_k_;
83 // TODO: bude potreba pro kazdy system zvlast
84 vector<string> tsus_;
89 #endif // _ROMANIZATION_HXX