2 * Copyright 2010, Haiku.
3 * Distributed under the terms of the MIT License.
6 * based on previous work of Ankur Sethi
7 * Clemens Zeidler <haiku@clemens-zeidler.de>
10 #include "CLuceneDataBase.h"
12 #include <Directory.h>
14 #include <TranslatorRoster.h>
17 #define DEBUG_CLUCENE_DATABASE
18 #ifdef DEBUG_CLUCENE_DATABASE
20 # define STRACE(x...) printf("FT: " x)
22 # define STRACE(x...) ;
26 using namespace lucene::document
;
27 using namespace lucene::util
;
30 const uint8 kCluceneTries
= 10;
33 wchar_t* to_wchar(const char *str
)
35 int size
= strlen(str
) * sizeof(wchar_t) ;
36 wchar_t *wStr
= new wchar_t[size
] ;
38 if (mbstowcs(wStr
, str
, size
) == -1)
45 CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath
& databasePath
)
47 fDataBasePath(databasePath
),
48 fTempPath(databasePath
),
51 printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath
.Path());
52 create_directory(fDataBasePath
.Path(), 0755);
54 fTempPath
.Append("temp_file");
58 CLuceneWriteDataBase::~CLuceneWriteDataBase()
60 // TODO: delete fTempPath file
65 CLuceneWriteDataBase::InitCheck()
73 CLuceneWriteDataBase::AddDocument(const entry_ref
& ref
)
75 // check if already in the queue
76 for (unsigned int i
= 0; i
< fAddQueue
.size(); i
++) {
77 if (fAddQueue
.at(i
) == ref
)
80 fAddQueue
.push_back(ref
);
87 CLuceneWriteDataBase::RemoveDocument(const entry_ref
& ref
)
89 // check if already in the queue
90 for (unsigned int i
= 0; i
< fAddQueue
.size(); i
++) {
91 if (fDeleteQueue
.at(i
) == ref
)
94 fDeleteQueue
.push_back(ref
);
100 CLuceneWriteDataBase::Commit()
102 if (fAddQueue
.size() == 0 && fDeleteQueue
.size() == 0)
106 _RemoveDocuments(fAddQueue
);
107 _RemoveDocuments(fDeleteQueue
);
108 fDeleteQueue
.clear();
110 if (fAddQueue
.size() == 0)
113 fIndexWriter
= _OpenIndexWriter();
114 if (fIndexWriter
== NULL
)
117 status_t status
= B_OK
;
118 for (unsigned int i
= 0; i
< fAddQueue
.size(); i
++) {
119 if (!_IndexDocument(fAddQueue
.at(i
))) {
126 fIndexWriter
->close();
135 CLuceneWriteDataBase::_OpenIndexWriter()
137 IndexWriter
* writer
= NULL
;
138 for (int i
= 0; i
< kCluceneTries
; i
++) {
140 bool createIndex
= true;
141 if (IndexReader::indexExists(fDataBasePath
.Path()))
144 writer
= new IndexWriter(fDataBasePath
.Path(),
145 &fStandardAnalyzer
, createIndex
);
148 } catch (CLuceneError
&error
) {
149 STRACE("CLuceneError: _OpenIndexWriter %s\n", error
.what());
159 CLuceneWriteDataBase::_OpenIndexReader()
161 IndexReader
* reader
= NULL
;
163 BEntry
entry(fDataBasePath
.Path(), NULL
);
167 for (int i
= 0; i
< kCluceneTries
; i
++) {
169 if (!IndexReader::indexExists(fDataBasePath
.Path()))
172 reader
= IndexReader::open(fDataBasePath
.Path());
175 } catch (CLuceneError
&error
) {
176 STRACE("CLuceneError: _OpenIndexReader %s\n", error
.what());
187 CLuceneWriteDataBase::_RemoveDocuments(std::vector
<entry_ref
>& docs
)
189 IndexReader
*reader
= NULL
;
190 reader
= _OpenIndexReader();
195 for (unsigned int i
= 0; i
< docs
.size(); i
++) {
196 BPath
path(&docs
.at(i
));
197 wchar_t* wPath
= to_wchar(path
.Path());
201 for (int i
= 0; i
< kCluceneTries
; i
++) {
202 status
= _RemoveDocument(wPath
, reader
);
207 reader
= _OpenIndexReader();
227 CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath
, IndexReader
* reader
)
230 Term
term(_T("path"), wPath
);
231 reader
->deleteDocuments(&term
);
232 } catch (CLuceneError
&error
) {
233 STRACE("CLuceneError: deleteDocuments %s\n", error
.what());
241 CLuceneWriteDataBase::_IndexDocument(const entry_ref
& ref
)
245 BFile inFile
, outFile
;
246 inFile
.SetTo(path
.Path(), B_READ_ONLY
);
247 if (inFile
.InitCheck() != B_OK
) {
248 STRACE("Can't open inFile %s\n", path
.Path());
251 outFile
.SetTo(fTempPath
.Path(),
252 B_READ_WRITE
| B_CREATE_FILE
| B_ERASE_FILE
);
253 if (outFile
.InitCheck() != B_OK
) {
254 STRACE("Can't open outFile %s\n", fTempPath
.Path());
258 BTranslatorRoster
* translatorRoster
= BTranslatorRoster::Default();
259 if (translatorRoster
->Translate(&inFile
, NULL
, NULL
, &outFile
, 'TEXT')
266 FileReader
* fileReader
= new FileReader(fTempPath
.Path(), "UTF-8");
267 wchar_t* wPath
= to_wchar(path
.Path());
271 Document
*document
= new Document
;
272 Field
contentField(_T("contents"), fileReader
,
273 Field::STORE_NO
| Field::INDEX_TOKENIZED
);
274 document
->add(contentField
);
275 Field
pathField(_T("path"), wPath
,
276 Field::STORE_YES
| Field::INDEX_UNTOKENIZED
);
277 document
->add(pathField
);
280 for (int i
= 0; i
< kCluceneTries
; i
++) {
282 fIndexWriter
->addDocument(document
);
283 STRACE("document added, retries: %i\n", i
);
285 } catch (CLuceneError
&error
) {
286 STRACE("CLuceneError addDocument %s\n", error
.what());
287 fIndexWriter
->close();
289 fIndexWriter
= _OpenIndexWriter();
290 if (fIndexWriter
== NULL
) {