2 * Copyright 2010, Haiku.
3 * Distributed under the terms of the MIT License.
6 * based on previous work of Ankur Sethi
7 * Clemens Zeidler <haiku@clemens-zeidler.de>
10 #include "CLuceneDataBase.h"
12 #include <Directory.h>
14 #include <TranslatorRoster.h>
17 #define DEBUG_CLUCENE_DATABASE
18 #ifdef DEBUG_CLUCENE_DATABASE
20 # define STRACE(x...) printf("FT: " x)
22 # define STRACE(x...) ;
26 using namespace lucene::document
;
27 using namespace lucene::util
;
30 const uint8 kCluceneTries
= 10;
33 wchar_t* to_wchar(const char *str
)
35 int size
= strlen(str
) * sizeof(wchar_t) ;
36 wchar_t *wStr
= new wchar_t[size
] ;
38 if (mbstowcs(wStr
, str
, size
) == -1) {
46 CLuceneWriteDataBase::CLuceneWriteDataBase(const BPath
& databasePath
)
48 fDataBasePath(databasePath
),
49 fTempPath(databasePath
),
52 printf("CLuceneWriteDataBase fDataBasePath %s\n", fDataBasePath
.Path());
53 create_directory(fDataBasePath
.Path(), 0755);
55 fTempPath
.Append("temp_file");
59 CLuceneWriteDataBase::~CLuceneWriteDataBase()
61 // TODO: delete fTempPath file
66 CLuceneWriteDataBase::InitCheck()
74 CLuceneWriteDataBase::AddDocument(const entry_ref
& ref
)
76 // check if already in the queue
77 for (unsigned int i
= 0; i
< fAddQueue
.size(); i
++) {
78 if (fAddQueue
.at(i
) == ref
)
81 fAddQueue
.push_back(ref
);
88 CLuceneWriteDataBase::RemoveDocument(const entry_ref
& ref
)
90 // check if already in the queue
91 for (unsigned int i
= 0; i
< fAddQueue
.size(); i
++) {
92 if (fDeleteQueue
.at(i
) == ref
)
95 fDeleteQueue
.push_back(ref
);
101 CLuceneWriteDataBase::Commit()
103 if (fAddQueue
.size() == 0 && fDeleteQueue
.size() == 0)
107 _RemoveDocuments(fAddQueue
);
108 _RemoveDocuments(fDeleteQueue
);
109 fDeleteQueue
.clear();
111 if (fAddQueue
.size() == 0)
114 fIndexWriter
= _OpenIndexWriter();
115 if (fIndexWriter
== NULL
)
118 status_t status
= B_OK
;
119 for (unsigned int i
= 0; i
< fAddQueue
.size(); i
++) {
120 if (!_IndexDocument(fAddQueue
.at(i
))) {
127 fIndexWriter
->close();
136 CLuceneWriteDataBase::_OpenIndexWriter()
138 IndexWriter
* writer
= NULL
;
139 for (int i
= 0; i
< kCluceneTries
; i
++) {
141 bool createIndex
= true;
142 if (IndexReader::indexExists(fDataBasePath
.Path()))
145 writer
= new IndexWriter(fDataBasePath
.Path(),
146 &fStandardAnalyzer
, createIndex
);
149 } catch (CLuceneError
&error
) {
150 STRACE("CLuceneError: _OpenIndexWriter %s\n", error
.what());
160 CLuceneWriteDataBase::_OpenIndexReader()
162 IndexReader
* reader
= NULL
;
164 BEntry
entry(fDataBasePath
.Path(), NULL
);
168 for (int i
= 0; i
< kCluceneTries
; i
++) {
170 if (!IndexReader::indexExists(fDataBasePath
.Path()))
173 reader
= IndexReader::open(fDataBasePath
.Path());
176 } catch (CLuceneError
&error
) {
177 STRACE("CLuceneError: _OpenIndexReader %s\n", error
.what());
188 CLuceneWriteDataBase::_RemoveDocuments(std::vector
<entry_ref
>& docs
)
190 IndexReader
*reader
= NULL
;
191 reader
= _OpenIndexReader();
196 for (unsigned int i
= 0; i
< docs
.size(); i
++) {
197 BPath
path(&docs
.at(i
));
198 wchar_t* wPath
= to_wchar(path
.Path());
202 for (int i
= 0; i
< kCluceneTries
; i
++) {
203 status
= _RemoveDocument(wPath
, reader
);
208 reader
= _OpenIndexReader();
228 CLuceneWriteDataBase::_RemoveDocument(wchar_t* wPath
, IndexReader
* reader
)
231 Term
term(_T("path"), wPath
);
232 reader
->deleteDocuments(&term
);
233 } catch (CLuceneError
&error
) {
234 STRACE("CLuceneError: deleteDocuments %s\n", error
.what());
242 CLuceneWriteDataBase::_IndexDocument(const entry_ref
& ref
)
246 BFile inFile
, outFile
;
247 inFile
.SetTo(path
.Path(), B_READ_ONLY
);
248 if (inFile
.InitCheck() != B_OK
) {
249 STRACE("Can't open inFile %s\n", path
.Path());
252 outFile
.SetTo(fTempPath
.Path(),
253 B_READ_WRITE
| B_CREATE_FILE
| B_ERASE_FILE
);
254 if (outFile
.InitCheck() != B_OK
) {
255 STRACE("Can't open outFile %s\n", fTempPath
.Path());
259 BTranslatorRoster
* translatorRoster
= BTranslatorRoster::Default();
260 if (translatorRoster
->Translate(&inFile
, NULL
, NULL
, &outFile
, 'TEXT')
267 FileReader
* fileReader
= new FileReader(fTempPath
.Path(), "UTF-8");
268 wchar_t* wPath
= to_wchar(path
.Path());
272 Document
*document
= new Document
;
273 Field
contentField(_T("contents"), fileReader
,
274 Field::STORE_NO
| Field::INDEX_TOKENIZED
);
275 document
->add(contentField
);
276 Field
pathField(_T("path"), wPath
,
277 Field::STORE_YES
| Field::INDEX_UNTOKENIZED
);
278 document
->add(pathField
);
281 for (int i
= 0; i
< kCluceneTries
; i
++) {
283 fIndexWriter
->addDocument(document
);
284 STRACE("document added, retries: %i\n", i
);
286 } catch (CLuceneError
&error
) {
287 STRACE("CLuceneError addDocument %s\n", error
.what());
288 fIndexWriter
->close();
290 fIndexWriter
= _OpenIndexWriter();
291 if (fIndexWriter
== NULL
) {