2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 #define SLASTW "Swn" //Last Word
22 #define WMARK "w" //Words
23 #define PMARK "p" //POS
24 #define KMARK "k" //Ambiguity Classes
25 #define MMARK "m" //Maybe
26 #define MFTMARK "f" //Most Frequent Tag --> f(-1) --> f-1:NN
27 #define PREFIX_MARK "a" //prefixes
28 #define SUFFIX_MARK "z" //Suffixes
29 #define CHAR_A_MARK "ca" //Character, counting from the beggining of the begining of the token (starting at 1)
30 #define CHAR_Z_MARK "cz" //Character, counting from the end of the begining of the token (starting at 1)
31 #define LENGTH_MARK "L" //token length
32 #define START_CAPITAL_MARK "SA" //start with upper case
33 #define START_LOWER_MARK "sa" //start with lower case
34 #define START_NUMBER_MARK "SN" //start with number
35 #define ALL_UPPER_MARK "AA" //all upper case
36 #define ALL_LOWER_MARK "aa" //all lower case
37 #define CONTAIN_CAP_MARK "CA" //contains a capital letter
38 #define CONTAIN_CAPS_MARK "CAA" //contains several capital letters
39 #define CONTAIN_PERIOD_MARK "CP" //contains period
40 #define CONTAIN_COMMA_MARK "CC" //contains comma
41 #define CONTAIN_NUMBER_MARK "CN" //contains number
42 #define MULTIWORD_MARK "MW" //contains underscores (multiword)