update
[kdegraphics.git] / strigi-analyzer / dvi / dvithroughanalyzer.cpp
blob608111c50a6a4ab81ff6ebe49ff86a9e8041746d
1 /* This file is part of Strigi Desktop Search
3 * Copyright (C) 2002 Matthias Witzgall <witzi@gmx.net>
4 * Copyright (C) 2007 Jos van den Oever <jos@vandenoever.info>
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Library General Public License for more details.
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB. If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
23 Include the strigi specific headers.
25 #define STRIGI_IMPORT_API
26 #include <strigi/analyzerplugin.h>
27 #include <strigi/streamthroughanalyzer.h>
28 #include <strigi/analysisresult.h>
29 #include <strigi/fieldtypes.h>
31 using namespace Strigi;
32 using namespace std;
35 Declare the factory.
37 class DviThroughAnalyzerFactory;
40 Define a class that inherits from StreamThroughAnalyzer.
41 The only function we really need to implement is connectInputStream()
43 class STRIGI_PLUGIN_API DviThroughAnalyzer : public StreamThroughAnalyzer {
44 private:
45 AnalysisResult* indexable;
46 const DviThroughAnalyzerFactory* factory;
47 const char* name() const {
48 return "DviThroughAnalyzer";
50 public:
51 DviThroughAnalyzer(const DviThroughAnalyzerFactory* f) :factory(f) {}
52 ~DviThroughAnalyzer() {}
53 void setIndexable(AnalysisResult* i) { indexable = i; }
54 InputStream *connectInputStream(InputStream *in);
55 /* we only read the header so we are ready immediately */
56 bool isReadyWithStream() { return true; }
60 Define a factory class the provides information about the fields that an
61 analyzer can extract. This has a function similar to KFilePlugin::addItemInfo.
63 class STRIGI_PLUGIN_API DviThroughAnalyzerFactory : public StreamThroughAnalyzerFactory {
64 friend class DviThroughAnalyzer;
65 private:
66 const char* name() const {
67 return "DviThroughAnalyzer";
69 /* This is why this class is a factory. */
70 StreamThroughAnalyzer* newInstance() const {
71 return new DviThroughAnalyzer(this);
73 void registerFields(FieldRegister& );
75 /* define static fields that contain the field names. */
76 static const string commentFieldName;
77 static const string pagesFieldName;
79 /* The RegisteredField instances are used to index specific fields quickly.
80 We pass a pointer to the instance instead of a string.
82 const RegisteredField* commentField;
83 const RegisteredField* pagesField;
86 const string DviThroughAnalyzerFactory::commentFieldName("content.comment");
87 const string DviThroughAnalyzerFactory::pagesFieldName("document.stats.page_count");
90 Register the field names so that the StreamIndexer knows which analyzer
91 provides what information.
93 void
94 DviThroughAnalyzerFactory::registerFields(FieldRegister& r) {
95 commentField = r.registerField(commentFieldName, FieldRegister::stringType,
96 1, 0);
97 pagesField = r.registerField(pagesFieldName, FieldRegister::integerType,
98 1, 0);
101 InputStream*
102 DviThroughAnalyzer::connectInputStream(InputStream* in) {
103 // read the header
104 const char* c;
105 int32_t nread = in->read(c, 270, 270);
106 in->reset(0);
107 if (nread < 270) {
108 return in;
110 // check the magic bytes (remember: all files pass through here)
111 const unsigned char* buffer = (const unsigned char*)c;
112 if (buffer[0] != 247 || buffer[1] != 2) {
113 // this file is not a DVI file
114 return in;
116 unsigned char bufferLength = buffer[14];
117 string comment((const char*)buffer+15, bufferLength);
118 indexable->addValue(factory->commentField, comment);
120 // TODO: extract the number of pages
121 // this is tricky because we need to get the data from the end of the stream
122 // a general purpose event driven stream implementation is required for that
124 return in;
128 For plugins, we need to have a way to find out which plugins are defined in a
129 plugin. One instance of AnalyzerFactoryFactory per plugin profides this
130 information.
132 class Factory : public AnalyzerFactoryFactory {
133 public:
134 list<StreamThroughAnalyzerFactory*>
135 getStreamThroughAnalyzerFactories() const {
136 list<StreamThroughAnalyzerFactory*> af;
137 af.push_back(new DviThroughAnalyzerFactory());
138 return af;
143 Register the AnalyzerFactoryFactory
145 STRIGI_ANALYZER_FACTORY(Factory)