1 /* This file is part of Strigi Desktop Search
3 * Copyright (C) 2002 Matthias Witzgall <witzi@gmx.net>
4 * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
23 Include the strigi specific headers.
25 #define STRIGI_IMPORT_API
26 #include <strigi/analyzerplugin.h>
27 #include <strigi/streamthroughanalyzer.h>
28 #include <strigi/analysisresult.h>
29 #include <strigi/fieldtypes.h>
31 using namespace Strigi
;
37 class DviThroughAnalyzerFactory
;
40 Define a class that inherits from StreamThroughAnalyzer.
41 The only function we really need to implement is connectInputStream()
43 class STRIGI_PLUGIN_API DviThroughAnalyzer
: public StreamThroughAnalyzer
{
45 AnalysisResult
* indexable
;
46 const DviThroughAnalyzerFactory
* factory
;
47 const char* name() const {
48 return "DviThroughAnalyzer";
51 DviThroughAnalyzer(const DviThroughAnalyzerFactory
* f
) :factory(f
) {}
52 ~DviThroughAnalyzer() {}
53 void setIndexable(AnalysisResult
* i
) { indexable
= i
; }
54 InputStream
*connectInputStream(InputStream
*in
);
55 /* we only read the header so we are ready immediately */
56 bool isReadyWithStream() { return true; }
60 Define a factory class the provides information about the fields that an
61 analyzer can extract. This has a function similar to KFilePlugin::addItemInfo.
63 class STRIGI_PLUGIN_API DviThroughAnalyzerFactory
: public StreamThroughAnalyzerFactory
{
64 friend class DviThroughAnalyzer
;
66 const char* name() const {
67 return "DviThroughAnalyzer";
69 /* This is why this class is a factory. */
70 StreamThroughAnalyzer
* newInstance() const {
71 return new DviThroughAnalyzer(this);
73 void registerFields(FieldRegister
& );
75 /* define static fields that contain the field names. */
76 static const string commentFieldName
;
77 static const string pagesFieldName
;
79 /* The RegisteredField instances are used to index specific fields quickly.
80 We pass a pointer to the instance instead of a string.
82 const RegisteredField
* commentField
;
83 const RegisteredField
* pagesField
;
86 const string
DviThroughAnalyzerFactory::commentFieldName("content.comment");
87 const string
DviThroughAnalyzerFactory::pagesFieldName("document.stats.page_count");
90 Register the field names so that the StreamIndexer knows which analyzer
91 provides what information.
94 DviThroughAnalyzerFactory::registerFields(FieldRegister
& r
) {
95 commentField
= r
.registerField(commentFieldName
, FieldRegister::stringType
,
97 pagesField
= r
.registerField(pagesFieldName
, FieldRegister::integerType
,
102 DviThroughAnalyzer::connectInputStream(InputStream
* in
) {
105 int32_t nread
= in
->read(c
, 270, 270);
110 // check the magic bytes (remember: all files pass through here)
111 const unsigned char* buffer
= (const unsigned char*)c
;
112 if (buffer
[0] != 247 || buffer
[1] != 2) {
113 // this file is not a DVI file
116 unsigned char bufferLength
= buffer
[14];
117 string
comment((const char*)buffer
+15, bufferLength
);
118 indexable
->addValue(factory
->commentField
, comment
);
120 // TODO: extract the number of pages
121 // this is tricky because we need to get the data from the end of the stream
122 // a general purpose event driven stream implementation is required for that
128 For plugins, we need to have a way to find out which plugins are defined in a
129 plugin. One instance of AnalyzerFactoryFactory per plugin profides this
132 class Factory
: public AnalyzerFactoryFactory
{
134 list
<StreamThroughAnalyzerFactory
*>
135 getStreamThroughAnalyzerFactories() const {
136 list
<StreamThroughAnalyzerFactory
*> af
;
137 af
.push_back(new DviThroughAnalyzerFactory());
143 Register the AnalyzerFactoryFactory
145 STRIGI_ANALYZER_FACTORY(Factory
)