libtextcat/data/new_fingerprints/fpdb.conf

   1 #
   2 # A sample config file for the language models
   3 # provided with Gertjan van Noords language guesser
   4 # (http://odur.let.rug.nl/~vannoord/TextCat/)
   5 #
   6 # Notes:
   7 # - You may consider eliminating a couple of small languages from this
   8 # list because they cause false positives with big languages and are
   9 # bad for performance. (Do you really want to recognize Drents?)
  10 # - Putting the most probable languages at the top of the list
  11 # improves performance, because this will raise the threshold for
  12 # likely candidates more quickly.
  13 #
  14
  15 # this file have been modified (to OOo by Jocelyn MERAND joc.mer@gmail.com) to include country and encoding
  16 # guess strings are made as following : language-country-encoding
  17
  18 afrikaans.lm                         af--utf8
  19 albanian.lm                          sq--utf8
  20 amharic_utf.lm                       am--utf8
  21 arabic.lm                            ar--utf8
  22 basque.lm                            eu--utf8
  23 belarus.lm                           be--utf8
  24 bosnian.lm                           bs--utf8
  25 breton.lm                            br--utf8
  26 catalan.lm                           ca--utf8
  27 chinese_simplified.lm                zh-CN-utf8
  28 chinese_traditional.lm               zh-TW-utf8
  29 croatian.lm                          hr--utf8
  30 czech.lm                             cs--utf8
  31 danish.lm                            da--utf8
  32 dutch.lm                             nl--utf8
  33 english.lm                           en--utf8
  34 esperanto.lm                         eo--utf8
  35 estonian.lm                          et--utf8
  36 finnish.lm                           fi--utf8
  37 french.lm                            fr--utf8
  38 frisian.lm                           fy--utf8
  39 georgian.lm                          ka--utf8
  40 german.lm                            de--utf8
  41 greek.lm                             el--utf8
  42 hebrew.lm                            he--utf8
  43 hindi.lm                             hi--utf8
  44 hungarian.lm                         hu--utf8
  45 icelandic.lm                         is--utf8
  46 indonesian.lm                        id--utf8
  47 irish_gaelic.lm                      ga--utf8
  48 italian.lm                           it--utf8
  49 japanese.lm                          ja--utf8
  50 korean.lm                            ko--utf8
  51 latin.lm                             la--utf8
  52 latvian.lm                           lv--utf8
  53 lithuanian.lm                        lt--utf8
  54 luxembourgish.lm                     lb--utf8
  55 malay.lm                             ms--utf8
  56 manx_gaelic.lm                       gv--utf8
  57 marathi.lm                           mr--utf8
  58 mongolian_cyrillic.lm                mn--utf8
  59 nepali.lm                            ne--utf8
  60 norwegian.lm                         nb--utf8       # Norwegian (Bokmal)
  61 persian.lm                           fa--utf8       # Farsi
  62 polish.lm                            pl--utf8
  63 portuguese.lm                        pt-PT-utf8
  64 quechua.lm                           qu--utf8
  65 romanian.lm                          ro--utf8
  66 romansh.lm                           rm--utf8
  67 russian.lm                           ru--utf8
  68 sanskrit.lm                          sa--utf8
  69 scots.lm                             sco--utf8
  70 scots_gaelic.lm                      gd--utf8
  71 serbian_ascii.lm                     sh-YU-utf8
  72 slovak_ascii.lm                      sk-SK-utf8
  73 slovenian.lm                         sl--utf8
  74 spanish.lm                           es--utf8
  75 swahili.lm                           sw--utf8
  76 swedish.lm                           sv--utf8
  77 tagalog.lm                           tl--utf8
  78 tamil.lm                             ta--utf8
  79 thai.lm                              th--utf8
  80 turkish.lm                           tr--utf8
  81 ukrainian.lm                         uk--utf8
  82 vietnamese.lm                        vi--utf8
  83 welsh.lm                             cy--utf8
  84 yiddish_utf.lm                       yi--utf8