Updated source code from upstream SVN
[svmtool++.git] / src / er.cc
blobe7f4500c0dc279ad5e5211b058711b5b5ef2e526
1 /*
2 * Copyright (C) 2004 Jesus Gimenez, Lluis Marquez and Senen Moya
4 * This library is free software; you can redistribute it and/or
5 * modify it under the terms of the GNU Lesser General Public
6 * License as published by the Free Software Foundation; either
7 * version 2.1 of the License, or (at your option) any later version.
8 *
9 * This library is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 * Lesser General Public License for more details.
14 * You should have received a copy of the GNU Lesser General Public
15 * License along with this library; if not, write to the Free Software
16 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
19 #include "stdio.h"
20 #include "er.h"
22 /*****************************************************************
23 * Regular expressions
24 *****************************************************************/
26 regex_t erCard,erCardPunct,erCardSeps,erCardSuffix;
27 regex_t erMultiWord,erContainNum,erStartCap,erStartLower,erStartNumber,
28 erAllUp,erAllLow,erContainCap,erContainCaps,erContainPeriod,erContainComma;
30 /**************************************************/
32 #define ER_STARTCAP "^[A-ZÇÑÁÉÍÓÚÀÈÌÒÙÄËÏÖÜ].*$"
33 #define ER_STARTLOWER "^[a-zçñáéíóúàèìòùäëïöü].*$"
34 #define ER_STARTNUMBER "^[0-9].*$"
35 #define ER_ALLUP "^[A-ZÇÑÁÉÍÓÚÀÈÌÒÙÄËÏÖÜ]+$"
36 #define ER_ALLLOW "^[a-zçñáéíóúàèìòùäëïöü]+$"
37 #define ER_CONTAINCAP "^.+[A-ZÇÑÁÉÍÓÚÀÈÌÒÙÄËÏÖÜ].*$"
38 #define ER_CONTAINCAPS "^.*[A-ZÇÑÁÉÍÓÚÀÈÌÒÙÄËÏÖÜ].*[A-ZÇÑÁÉÍÓÚÀÈÌÒÙÄËÏÖÜ].*$"
39 #define ER_CONTAINPERIOD "^.*[.].*$"
40 #define ER_CONTAINCOMMA "^.*[,].*$"
41 #define ER_CONTAINNUM "^.*[0-9].*$"
42 #define ER_MULTIWORD "^.*[-].*$"
43 #define ER_CARD "^[0-9]+$"
44 #define ER_CARDPUNCT "^[0-9]+[,!?:.]+$"
45 #define ER_CARDSEPS "^[0-9]+[-,:\\/.][0-9,:\\/.-]+$"
46 #define ER_CARDSUFFIX "^[0-9]+[^0-9]+.*$"
48 /**************************************************/
50 void erCompRegExp()
52 regcomp (&erCard,ER_CARD,REG_EXTENDED);
53 regcomp (&erCardPunct,ER_CARDPUNCT,REG_EXTENDED);
54 regcomp (&erCardSeps,ER_CARDSEPS,REG_EXTENDED);
55 regcomp (&erCardSuffix,ER_CARDSUFFIX,REG_EXTENDED);
57 regcomp (&erStartCap,ER_STARTCAP,REG_EXTENDED);
58 regcomp (&erStartNumber,ER_STARTNUMBER,REG_EXTENDED);
59 regcomp (&erStartLower,ER_STARTLOWER,REG_EXTENDED);
60 regcomp (&erAllUp,ER_ALLUP,REG_EXTENDED);
61 regcomp (&erAllLow,ER_ALLLOW,REG_EXTENDED);
62 regcomp (&erContainCap,ER_CONTAINCAP,REG_EXTENDED);
63 regcomp (&erContainCaps,ER_CONTAINCAPS,REG_EXTENDED);
64 regcomp (&erContainPeriod,ER_CONTAINPERIOD,REG_EXTENDED);
65 regcomp (&erContainComma,ER_CONTAINCOMMA,REG_EXTENDED);
66 regcomp (&erContainNum,ER_CONTAINNUM,REG_EXTENDED);
67 regcomp (&erMultiWord,ER_MULTIWORD,REG_EXTENDED);
70 /**************************************************/
72 void erFreeRegExp()
74 regfree(&erCard);
75 regfree(&erCardSuffix);
76 regfree(&erCardSeps);
77 regfree(&erCardPunct);
79 regfree(&erStartCap);
80 regfree(&erStartLower);
81 regfree(&erStartNumber);
82 regfree(&erAllUp);
83 regfree(&erAllLow);
84 regfree(&erContainCap);
85 regfree(&erContainCaps);
86 regfree(&erContainComma);
87 regfree(&erContainPeriod);
88 regfree(&erContainNum);
89 regfree(&erMultiWord);
92 /**************************************************/
95 * return 1 if str is like the regular expression
96 * in other case return 0
98 int erLookRegExp2(void *er,const std::string& str)
100 if (!regexec ((regex_t *)er,str.c_str(),0,NULL,0)) return 1;
102 return 0;
105 /**************************************************/
107 int erLookRegExp(const std::string& m)
109 int ret=-1;
111 if (!regexec (&erCardPunct,m.c_str(),0,NULL,0)) ret=CARDPUNCT;
112 else if (!regexec (&erCardSeps,m.c_str(),0,NULL,0)) ret=CARDSEPS;
113 else if (!regexec (&erCardSuffix,m.c_str(),0,NULL,0)) ret=CARDSUFFIX;
114 else if (!regexec (&erCard,m.c_str(),0,NULL,0)) ret=CARD;
116 return ret;