add more spacing
[personal-kdebase.git] / runtime / nepomuk / services / strigi / indexscheduler.cpp
blob29b7d8ae80933aa18e6dc56be3ce6e2da99eeaff
1 /* This file is part of the KDE Project
2 Copyright (c) 2008 Sebastian Trueg <trueg@kde.org>
4 Parts of this file are based on code from Strigi
5 Copyright (C) 2006-2007 Jos van den Oever <jos@vandenoever.info>
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License version 2 as published by the Free Software Foundation.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public License
17 along with this library; see the file COPYING.LIB. If not, write to
18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA.
22 #include "indexscheduler.h"
23 #include "config.h"
25 #include <QtCore/QMutexLocker>
26 #include <QtCore/QList>
27 #include <QtCore/QFile>
28 #include <QtCore/QFileInfo>
29 #include <QtCore/QDirIterator>
30 #include <QtCore/QDateTime>
31 #include <QtCore/QByteArray>
32 #include <QtCore/QUrl>
34 #include <KDebug>
35 #include <KTemporaryFile>
37 #include <map>
38 #include <vector>
40 #include <strigi/strigiconfig.h>
41 #include <strigi/indexwriter.h>
42 #include <strigi/indexmanager.h>
43 #include <strigi/indexreader.h>
44 #include <strigi/analysisresult.h>
45 #include <strigi/fileinputstream.h>
46 #include <strigi/analyzerconfiguration.h>
49 // FIXME: remove all files from the datastore which are in folders not supposed to be indexed
51 class StoppableConfiguration : public Strigi::AnalyzerConfiguration {
52 public:
53 StoppableConfiguration()
54 : m_stop(false) {
55 #if defined(STRIGI_IS_VERSION)
56 #if STRIGI_IS_VERSION( 0, 6, 1 )
57 setIndexArchiveContents( false );
58 #endif
59 #endif
62 bool indexMore() const {
63 return !m_stop;
66 bool addMoreText() const {
67 return !m_stop;
70 void setStop( bool s ) {
71 m_stop = s;
74 private:
75 bool m_stop;
79 namespace {
80 enum UpdateDirFlags {
81 /**
82 * No flags, only used to make code more readable
84 NoUpdateFlags = 0x0,
86 /**
87 * The folder should be updated recursive
89 UpdateRecursive = 0x1,
91 /**
92 * The folder has been scheduled to update by the
93 * update system, not by a call to updateDir
95 AutoUpdateFolder = 0x2
100 Nepomuk::IndexScheduler::IndexScheduler( Strigi::IndexManager* manager, QObject* parent )
101 : QThread( parent ),
102 m_suspended( false ),
103 m_stopped( false ),
104 m_indexing( false ),
105 m_indexManager( manager )
107 m_analyzerConfig = new StoppableConfiguration;
109 connect( Config::self(), SIGNAL( configChanged() ),
110 this, SLOT( readConfig() ) );
114 Nepomuk::IndexScheduler::~IndexScheduler()
116 delete m_analyzerConfig;
120 void Nepomuk::IndexScheduler::suspend()
122 if ( isRunning() ) {
123 QMutexLocker locker( &m_resumeStopMutex );
124 m_suspended = true;
129 void Nepomuk::IndexScheduler::resume()
131 if ( isRunning() ) {
132 QMutexLocker locker( &m_resumeStopMutex );
133 m_suspended = false;
134 m_resumeStopWc.wakeAll();
139 void Nepomuk::IndexScheduler::setSuspended( bool suspended )
141 if ( suspended )
142 suspend();
143 else
144 resume();
148 void Nepomuk::IndexScheduler::stop()
150 if ( isRunning() ) {
151 QMutexLocker locker( &m_resumeStopMutex );
152 m_stopped = true;
153 m_suspended = false;
154 m_analyzerConfig->setStop( true );
155 m_dirsToUpdateWc.wakeAll();
156 m_resumeStopWc.wakeAll();
161 bool Nepomuk::IndexScheduler::isSuspended() const
163 return isRunning() && m_suspended;
167 bool Nepomuk::IndexScheduler::isIndexing() const
169 return m_indexing;
173 QString Nepomuk::IndexScheduler::currentFolder() const
175 return m_currentFolder;
179 void Nepomuk::IndexScheduler::setIndexingStarted( bool started )
181 if ( started != m_indexing ) {
182 m_indexing = started;
183 if ( m_indexing )
184 emit indexingStarted();
185 else
186 emit indexingStopped();
191 void Nepomuk::IndexScheduler::run()
193 // set lowest priority for this thread
194 setPriority( QThread::IdlePriority );
196 // initialization
197 m_suspended = false;
198 m_stopped = false;
199 m_analyzerConfig->setStop( false );
200 readConfig();
202 Strigi::StreamAnalyzer analyzer( *m_analyzerConfig );
203 analyzer.setIndexWriter( *m_indexManager->indexWriter() );
205 setIndexingStarted( true );
207 // do the actual indexing
208 m_dirsToUpdate.clear();
209 foreach( const QString& f, Config::self()->folders() )
210 m_dirsToUpdate << qMakePair( f, UpdateRecursive|AutoUpdateFolder );
212 while ( 1 ) {
213 // wait for more dirs to analyze in case the initial
214 // indexing is done
215 if ( m_dirsToUpdate.isEmpty() ) {
216 setIndexingStarted( false );
218 m_dirsToUpdateMutex.lock();
219 m_dirsToUpdateWc.wait( &m_dirsToUpdateMutex );
220 m_dirsToUpdateMutex.unlock();
222 if ( !m_stopped )
223 setIndexingStarted( true );
226 // wait for resume or stop (or simply continue)
227 if ( !waitForContinue() ) {
228 break;
231 // get the next folder
232 m_dirsToUpdateMutex.lock();
233 QPair<QString, int> dir = *m_dirsToUpdate.begin();
234 m_dirsToUpdate.erase( m_dirsToUpdate.begin() );
235 m_dirsToUpdateMutex.unlock();
237 // update until stopped
238 if ( !updateDir( dir.first, &analyzer, dir.second & UpdateRecursive ) ) {
239 break;
241 m_currentFolder.clear();
244 setIndexingStarted( false );
248 // this method should be thread-safe ("should" because of the indexreader and -writer)
249 bool Nepomuk::IndexScheduler::updateDir( const QString& dir, Strigi::StreamAnalyzer* analyzer, bool recursive )
251 // kDebug() << dir << analyzer << recursive;
253 // inform interested clients
254 emit indexingFolder( dir );
256 m_currentFolder = dir;
258 // get a map of all indexed files from the dir including their stored mtime
259 std::map<std::string, time_t> filesInStore;
260 m_indexManager->indexReader()->getChildren( QFile::encodeName( dir ).data(), filesInStore );
261 std::map<std::string, time_t>::const_iterator filesInStoreEnd = filesInStore.end();
263 QList<QFileInfo> filesToIndex;
264 QList<QString> subFolders;
265 std::vector<std::string> filesToDelete;
267 // iterate over all files in the dir
268 // and select the ones we need to add or delete from the store
269 QDirIterator dirIt( dir, QDir::NoDotAndDotDot|QDir::Readable|QDir::Files|QDir::Dirs );
270 while ( dirIt.hasNext() ) {
271 QString path = dirIt.next();
273 QFileInfo fileInfo = dirIt.fileInfo();
275 bool indexFile = m_analyzerConfig->indexFile( QFile::encodeName( path ), QFile::encodeName( fileInfo.fileName() ) );
277 // check if this file is new by looking it up in the store
278 std::map<std::string, time_t>::iterator filesInStoreIt = filesInStore.find( QFile::encodeName( path ).data() );
279 bool newFile = ( filesInStoreIt == filesInStoreEnd );
281 // do we need to update? Did the file change?
282 bool fileChanged = !newFile && fileInfo.lastModified().toTime_t() != filesInStoreIt->second;
284 if ( indexFile && ( newFile || fileChanged ) )
285 filesToIndex << fileInfo;
287 if ( !newFile && ( fileChanged || !indexFile ) )
288 filesToDelete.push_back( filesInStoreIt->first );
290 // cleanup a bit for faster lookups
291 if ( !newFile )
292 filesInStore.erase( filesInStoreIt );
294 if ( indexFile && recursive && fileInfo.isDir() && !fileInfo.isSymLink() )
295 subFolders << path;
298 // all the files left in filesInStore are not in the current
299 // directory and should be deleted
300 for ( std::map<std::string, time_t>::const_iterator it = filesInStore.begin();
301 it != filesInStoreEnd; ++it ) {
302 filesToDelete.push_back( it->first );
305 // remove all files that need updating or have been removed
306 m_indexManager->indexWriter()->deleteEntries( filesToDelete );
308 // analyze all files that are new or need updating
309 foreach( const QFileInfo& file, filesToIndex ) {
311 // wait if we are suspended or return if we are stopped
312 if ( !waitForContinue() )
313 return false;
315 analyzeFile( file, analyzer );
318 // recurse into subdirs (we do this in a separate loop to always keep a proper state:
319 // compare m_currentFolder)
320 if ( recursive ) {
321 foreach( const QString& folder, subFolders ) {
322 if ( !Config::self()->excludeFolders().contains( folder ) &&
323 !updateDir( folder, analyzer, true ) )
324 return false;
328 return true;
332 void Nepomuk::IndexScheduler::analyzeFile( const QFileInfo& file, Strigi::StreamAnalyzer* analyzer )
334 // kDebug() << file.filePath();
336 Strigi::AnalysisResult analysisresult( QFile::encodeName( file.filePath() ).data(),
337 file.lastModified().toTime_t(),
338 *m_indexManager->indexWriter(),
339 *analyzer,
340 QFile::encodeName( file.path() ).data() );
341 if ( file.isFile() && !file.isSymLink() ) {
342 Strigi::FileInputStream stream( QFile::encodeName( file.filePath() ) );
343 analysisresult.index( &stream );
345 else {
346 analysisresult.index(0);
351 bool Nepomuk::IndexScheduler::waitForContinue()
353 QMutexLocker locker( &m_resumeStopMutex );
354 if ( m_suspended ) {
355 setIndexingStarted( false );
356 m_resumeStopWc.wait( &m_resumeStopMutex );
357 setIndexingStarted( true );
360 return !m_stopped;
364 void Nepomuk::IndexScheduler::updateDir( const QString& path )
366 QMutexLocker lock( &m_dirsToUpdateMutex );
367 m_dirsToUpdate << qMakePair( path, ( int )NoUpdateFlags );
368 m_dirsToUpdateWc.wakeAll();
372 void Nepomuk::IndexScheduler::updateAll()
374 QMutexLocker lock( &m_dirsToUpdateMutex );
376 // remove previously added folders to not index stuff we are not supposed to
377 // (FIXME: this does not include currently being indexed folders)
378 QSet<QPair<QString, int> >::iterator it = m_dirsToUpdate.begin();
379 while ( it != m_dirsToUpdate.end() ) {
380 if ( it->second & AutoUpdateFolder )
381 it = m_dirsToUpdate.erase( it );
382 else
383 ++it;
386 // update everything again in case the folders changed
387 foreach( const QString& f, Config::self()->folders() )
388 m_dirsToUpdate << qMakePair( f, UpdateRecursive|AutoUpdateFolder );
390 m_dirsToUpdateWc.wakeAll();
394 void Nepomuk::IndexScheduler::readConfig()
396 // load Strigi configuration
397 std::vector<std::pair<bool, std::string> > filters;
398 QStringList excludeFilters = Config::self()->excludeFilters();
399 QStringList includeFilters = Config::self()->includeFilters();
400 foreach( const QString& filter, excludeFilters ) {
401 filters.push_back( std::make_pair<bool, std::string>( false, filter.toUtf8().data() ) );
403 foreach( const QString& filter, includeFilters ) {
404 filters.push_back( std::make_pair<bool, std::string>( true, filter.toUtf8().data() ) );
406 m_analyzerConfig->setFilters(filters);
407 updateAll();
411 namespace {
412 class QDataStreamStrigiBufferedStream : public Strigi::BufferedStream<char>
414 public:
415 QDataStreamStrigiBufferedStream( QDataStream& stream )
416 : m_stream( stream ) {
419 int32_t fillBuffer( char* start, int32_t space ) {
420 int r = m_stream.readRawData( start, space );
421 if ( r == 0 ) {
422 // Strigi's API is so weird!
423 return -1;
425 else if ( r < 0 ) {
426 // Again: weird API. m_status is a protected member of StreamBaseBase (yes, 2x Base)
427 m_status = Strigi::Error;
428 return -1;
430 else {
431 return r;
435 private:
436 QDataStream& m_stream;
441 void Nepomuk::IndexScheduler::analyzeResource( const QUrl& uri, const QDateTime& modificationTime, QDataStream& data )
443 QDateTime existingMTime = QDateTime::fromTime_t( m_indexManager->indexReader()->mTime( uri.toEncoded().data() ) );
444 if ( existingMTime < modificationTime ) {
445 // remove the old data
446 std::vector<std::string> entries;
447 entries.push_back( uri.toEncoded().data() );
448 m_indexManager->indexWriter()->deleteEntries( entries );
450 // create the new
451 Strigi::StreamAnalyzer analyzer( *m_analyzerConfig );
452 analyzer.setIndexWriter( *m_indexManager->indexWriter() );
453 Strigi::AnalysisResult analysisresult( uri.toEncoded().data(),
454 modificationTime.toTime_t(),
455 *m_indexManager->indexWriter(),
456 analyzer );
457 QDataStreamStrigiBufferedStream stream( data );
458 analysisresult.index( &stream );
460 else {
461 kDebug() << uri << "up to date";
465 #include "indexscheduler.moc"