9 // There are no vowels here to avoid stop words, stemming, etc.
10 // We also exclude l, since it looks too much like 1
11 private const string token_chars
= "bcdfghjkmnpqrstvwxz0123456789";
13 // A mixture of ASCII characters and latin characters outside
14 // the normal ISO-8859-1 range to test UTF-8 support. These
15 // *should* still stem properly.
16 private const string unicode_token_chars
= "bĉđfğħjķłmńpqrŝſŧvwxžƀ0123456789";
18 public const int Count
= 512;
20 static public string IdToString (int id
)
22 if (id
< 0 || id
>= Count
)
23 throw new ArgumentException ();
24 return token_table
[id
];
27 static public int StringToId (string str
)
30 i
= Array
.BinarySearch (token_table
, str
);
31 if (i
< 0 || i
>= Count
)
36 private static Random random
= new Random ();
38 static public string GetRandom ()
40 return token_table
[random
.Next (Count
)];
43 static public string GetRandomWithUnicode ()
45 return unicode_token_table
[random
.Next (Count
)];
48 ///////////////////////////////////////////////////////////////////////
50 static private string [] token_table
;
51 static private string [] unicode_token_table
;
55 token_table
= new string [Count
];
56 unicode_token_table
= new string [Count
];
58 char [] buffer
= new char [2];
60 for (int i
= 0; i
< Count
; ++i
) {
62 a
= i
/ token_chars
.Length
;
63 b
= i
% token_chars
.Length
;
65 buffer
[0] = token_chars
[a
];
66 buffer
[1] = token_chars
[b
];
68 token_table
[i
] = new string (buffer
);
71 for (int i
= 0; i
< Count
; ++i
) {
73 a
= i
/ token_chars
.Length
;
74 b
= i
% unicode_token_chars
.Length
;
76 buffer
[0] = token_chars
[a
];
77 buffer
[1] = unicode_token_chars
[b
];
79 unicode_token_table
[i
] = new string (buffer
);