2 * Modified version of StandardAnalyzer.cpp for Nepomuk mostly to optimize for filename indexing
3 * Copyright (C) 2008 Sebastian Trueg <trueg@kde.org>
5 * Based on StandardAnalyzer.cpp from the CLucene package.
6 * Copyright (C) 2003-2006 Ben van Klinken and the CLucene Team
8 * This library is free software; you can redistribute it and/or
9 * modify it under the terms of the GNU Library General Public
10 * License as published by the Free Software Foundation; either
11 * version 2 of the License, or (at your option) any later version.
13 * This library is distributed in the hope that it will be useful,
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 * Library General Public License for more details.
18 * You should have received a copy of the GNU Library General Public License
19 * along with this library; see the file COPYING.LIB. If not, write to
20 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 * Boston, MA 02110-1301, USA.
24 #include "cluceneanalyzer.h"
25 #include "clucenetokenizer.h"
26 #include "clucenefilter.h"
28 #include <CLucene/StdHeader.h>
29 #include <CLucene/util/VoidMap.h>
30 #include <CLucene/util/Reader.h>
31 #include <CLucene/analysis/AnalysisHeader.h>
32 #include <CLucene/analysis/Analyzers.h>
39 CLuceneAnalyzer::CLuceneAnalyzer()
42 CL_NS(analysis
)::StopFilter::fillStopTable( &stopSet
,CL_NS(analysis
)::StopAnalyzer::ENGLISH_STOP_WORDS
);
45 CLuceneAnalyzer::CLuceneAnalyzer( const TCHAR
** stopWords
):
48 CL_NS(analysis
)::StopFilter::fillStopTable( &stopSet
,stopWords
);
51 CLuceneAnalyzer::~CLuceneAnalyzer()
56 CL_NS(analysis
)::TokenStream
* CLuceneAnalyzer::tokenStream(const TCHAR
* fieldName
, Reader
* reader
)
58 CL_NS(analysis
)::TokenStream
* ret
= _CLNEW
CLuceneTokenizer(reader
);
59 ret
= _CLNEW
CLuceneFilter(ret
,true);
60 ret
= _CLNEW
CL_NS(analysis
)::LowerCaseFilter(ret
,true);
61 ret
= _CLNEW
CL_NS(analysis
)::StopFilter(ret
,true, &stopSet
);