vfs: check userland buffers before reading them.
[haiku.git] / src / add-ons / index_server / fulltext / CLuceneDataBase.cpp
blob4d306e737f5f170420ccc459055771304f213acf
1 /*
2 * Copyright 2010, Haiku.
3 * Distributed under the terms of the MIT License.
5 * Authors:
6 * based on previous work of Ankur Sethi
7 * Clemens Zeidler <haiku@clemens-zeidler.de>
8 */
10 #include "CLuceneDataBase.h"
12 #include <Directory.h>
13 #include <File.h>
14 #include <TranslatorRoster.h>
17 #define DEBUG_CLUCENE_DATABASE
18 #ifdef DEBUG_CLUCENE_DATABASE
19 #include <stdio.h>
20 # define STRACE(x...) printf("FT: " x)
21 #else
22 # define STRACE(x...) ;
23 #endif
26 using namespace lucene::document;
27 using namespace lucene::util;
30 const uint8 kCluceneTries = 10;
33 wchar_t* to_wchar(const char *str)
35 int size = strlen(str) * sizeof(wchar_t) ;
36 wchar_t *wStr = new wchar_t[size] ;
38 if (mbstowcs(wStr, str, size) == -1) {
39 delete[] wStr ;
40 return NULL ;
41 } else
42 return wStr ;
46 CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath& databasePath)
48 fDataBasePath(databasePath),
49 fTempPath(databasePath),
50 fIndexWriter(NULL)
52 printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath.Path());
53 create_directory(fDataBasePath.Path(), 0755);
55 fTempPath.Append("temp_file");
59 CLuceneWriteDataBase::~CLuceneWriteDataBase()
61 // TODO: delete fTempPath file
65 status_t
66 CLuceneWriteDataBase::InitCheck()
69 return B_OK;
73 status_t
74 CLuceneWriteDataBase::AddDocument(const entry_ref& ref)
76 // check if already in the queue
77 for (unsigned int i = 0; i < fAddQueue.size(); i++) {
78 if (fAddQueue.at(i) == ref)
79 return B_OK;
81 fAddQueue.push_back(ref);
83 return B_OK;
87 status_t
88 CLuceneWriteDataBase::RemoveDocument(const entry_ref& ref)
90 // check if already in the queue
91 for (unsigned int i = 0; i < fAddQueue.size(); i++) {
92 if (fDeleteQueue.at(i) == ref)
93 return B_OK;
95 fDeleteQueue.push_back(ref);
96 return B_OK;
100 status_t
101 CLuceneWriteDataBase::Commit()
103 if (fAddQueue.size() == 0 && fDeleteQueue.size() == 0)
104 return B_OK;
105 STRACE("Commit\n");
107 _RemoveDocuments(fAddQueue);
108 _RemoveDocuments(fDeleteQueue);
109 fDeleteQueue.clear();
111 if (fAddQueue.size() == 0)
112 return B_OK;
114 fIndexWriter = _OpenIndexWriter();
115 if (fIndexWriter == NULL)
116 return B_ERROR;
118 status_t status = B_OK;
119 for (unsigned int i = 0; i < fAddQueue.size(); i++) {
120 if (!_IndexDocument(fAddQueue.at(i))) {
121 status = B_ERROR;
122 break;
126 fAddQueue.clear();
127 fIndexWriter->close();
128 delete fIndexWriter;
129 fIndexWriter = NULL;
131 return status;
135 IndexWriter*
136 CLuceneWriteDataBase::_OpenIndexWriter()
138 IndexWriter* writer = NULL;
139 for (int i = 0; i < kCluceneTries; i++) {
140 try {
141 bool createIndex = true;
142 if (IndexReader::indexExists(fDataBasePath.Path()))
143 createIndex = false;
145 writer = new IndexWriter(fDataBasePath.Path(),
146 &fStandardAnalyzer, createIndex);
147 if (writer)
148 break;
149 } catch (CLuceneError &error) {
150 STRACE("CLuceneError: _OpenIndexWriter %s\n", error.what());
151 delete writer;
152 writer = NULL;
155 return writer;
159 IndexReader*
160 CLuceneWriteDataBase::_OpenIndexReader()
162 IndexReader* reader = NULL;
164 BEntry entry(fDataBasePath.Path(), NULL);
165 if (!entry.Exists())
166 return NULL;
168 for (int i = 0; i < kCluceneTries; i++) {
169 try {
170 if (!IndexReader::indexExists(fDataBasePath.Path()))
171 return NULL;
173 reader = IndexReader::open(fDataBasePath.Path());
174 if (reader)
175 break;
176 } catch (CLuceneError &error) {
177 STRACE("CLuceneError: _OpenIndexReader %s\n", error.what());
178 delete reader;
179 reader = NULL;
183 return reader;
187 bool
188 CLuceneWriteDataBase::_RemoveDocuments(std::vector<entry_ref>& docs)
190 IndexReader *reader = NULL;
191 reader = _OpenIndexReader();
192 if (!reader)
193 return false;
194 bool status = false;
196 for (unsigned int i = 0; i < docs.size(); i++) {
197 BPath path(&docs.at(i));
198 wchar_t* wPath = to_wchar(path.Path());
199 if (wPath == NULL)
200 continue;
202 for (int i = 0; i < kCluceneTries; i++) {
203 status = _RemoveDocument(wPath, reader);
204 if (status)
205 break;
206 reader->close();
207 delete reader;
208 reader = _OpenIndexReader();
209 if (!reader) {
210 status = false;
211 break;
214 delete[] wPath;
216 if (!status)
217 break;
220 reader->close();
221 delete reader;
223 return status;
227 bool
228 CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath, IndexReader* reader)
230 try {
231 Term term(_T("path"), wPath);
232 reader->deleteDocuments(&term);
233 } catch (CLuceneError &error) {
234 STRACE("CLuceneError: deleteDocuments %s\n", error.what());
235 return false;
237 return true;
241 bool
242 CLuceneWriteDataBase::_IndexDocument(const entry_ref& ref)
244 BPath path(&ref);
246 BFile inFile, outFile;
247 inFile.SetTo(path.Path(), B_READ_ONLY);
248 if (inFile.InitCheck() != B_OK) {
249 STRACE("Can't open inFile %s\n", path.Path());
250 return false;
252 outFile.SetTo(fTempPath.Path(),
253 B_READ_WRITE | B_CREATE_FILE | B_ERASE_FILE);
254 if (outFile.InitCheck() != B_OK) {
255 STRACE("Can't open outFile %s\n", fTempPath.Path());
256 return false;
259 BTranslatorRoster* translatorRoster = BTranslatorRoster::Default();
260 if (translatorRoster->Translate(&inFile, NULL, NULL, &outFile, 'TEXT')
261 != B_OK)
262 return false;
264 inFile.Unset();
265 outFile.Unset();
267 FileReader* fileReader = new FileReader(fTempPath.Path(), "UTF-8");
268 wchar_t* wPath = to_wchar(path.Path());
269 if (wPath == NULL)
270 return false;
272 Document *document = new Document;
273 Field contentField(_T("contents"), fileReader,
274 Field::STORE_NO | Field::INDEX_TOKENIZED);
275 document->add(contentField);
276 Field pathField(_T("path"), wPath,
277 Field::STORE_YES | Field::INDEX_UNTOKENIZED);
278 document->add(pathField);
280 bool status = true;
281 for (int i = 0; i < kCluceneTries; i++) {
282 try {
283 fIndexWriter->addDocument(document);
284 STRACE("document added, retries: %i\n", i);
285 break;
286 } catch (CLuceneError &error) {
287 STRACE("CLuceneError addDocument %s\n", error.what());
288 fIndexWriter->close();
289 delete fIndexWriter;
290 fIndexWriter = _OpenIndexWriter();
291 if (fIndexWriter == NULL) {
292 status = false;
293 break;
298 if (!status)
299 delete document;
300 delete[] wPath;
301 return status;