1 /* This file is part of the KDE Project
2 Copyright (c) 2008 Sebastian Trueg <trueg@kde.org>
4 Parts of this file are based on code from Strigi
5 Copyright (C) 2006-2007 Jos van den Oever <jos@vandenoever.info>
7 This library is free software; you can redistribute it and/or
8 modify it under the terms of the GNU Library General Public
9 License version 2 as published by the Free Software Foundation.
11 This library is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 Library General Public License for more details.
16 You should have received a copy of the GNU Library General Public License
17 along with this library; see the file COPYING.LIB. If not, write to
18 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 Boston, MA 02110-1301, USA.
22 #include "indexscheduler.h"
25 #include <QtCore/QMutexLocker>
26 #include <QtCore/QList>
27 #include <QtCore/QFile>
28 #include <QtCore/QFileInfo>
29 #include <QtCore/QDirIterator>
30 #include <QtCore/QDateTime>
31 #include <QtCore/QByteArray>
32 #include <QtCore/QUrl>
35 #include <KTemporaryFile>
40 #include <strigi/strigiconfig.h>
41 #include <strigi/indexwriter.h>
42 #include <strigi/indexmanager.h>
43 #include <strigi/indexreader.h>
44 #include <strigi/analysisresult.h>
45 #include <strigi/fileinputstream.h>
46 #include <strigi/analyzerconfiguration.h>
49 // FIXME: remove all files from the datastore which are in folders not supposed to be indexed
51 class StoppableConfiguration
: public Strigi::AnalyzerConfiguration
{
53 StoppableConfiguration()
55 #if defined(STRIGI_IS_VERSION)
56 #if STRIGI_IS_VERSION( 0, 6, 1 )
57 setIndexArchiveContents( false );
62 bool indexMore() const {
66 bool addMoreText() const {
70 void setStop( bool s
) {
82 * No flags, only used to make code more readable
87 * The folder should be updated recursive
89 UpdateRecursive
= 0x1,
92 * The folder has been scheduled to update by the
93 * update system, not by a call to updateDir
95 AutoUpdateFolder
= 0x2
100 Nepomuk::IndexScheduler::IndexScheduler( Strigi::IndexManager
* manager
, QObject
* parent
)
102 m_suspended( false ),
105 m_indexManager( manager
)
107 m_analyzerConfig
= new StoppableConfiguration
;
109 connect( Config::self(), SIGNAL( configChanged() ),
110 this, SLOT( readConfig() ) );
114 Nepomuk::IndexScheduler::~IndexScheduler()
116 delete m_analyzerConfig
;
120 void Nepomuk::IndexScheduler::suspend()
123 QMutexLocker
locker( &m_resumeStopMutex
);
129 void Nepomuk::IndexScheduler::resume()
132 QMutexLocker
locker( &m_resumeStopMutex
);
134 m_resumeStopWc
.wakeAll();
139 void Nepomuk::IndexScheduler::setSuspended( bool suspended
)
148 void Nepomuk::IndexScheduler::stop()
151 QMutexLocker
locker( &m_resumeStopMutex
);
154 m_analyzerConfig
->setStop( true );
155 m_dirsToUpdateWc
.wakeAll();
156 m_resumeStopWc
.wakeAll();
161 bool Nepomuk::IndexScheduler::isSuspended() const
163 return isRunning() && m_suspended
;
167 bool Nepomuk::IndexScheduler::isIndexing() const
173 QString
Nepomuk::IndexScheduler::currentFolder() const
175 return m_currentFolder
;
179 void Nepomuk::IndexScheduler::setIndexingStarted( bool started
)
181 if ( started
!= m_indexing
) {
182 m_indexing
= started
;
184 emit
indexingStarted();
186 emit
indexingStopped();
191 void Nepomuk::IndexScheduler::run()
193 // set lowest priority for this thread
194 setPriority( QThread::IdlePriority
);
199 m_analyzerConfig
->setStop( false );
202 Strigi::StreamAnalyzer
analyzer( *m_analyzerConfig
);
203 analyzer
.setIndexWriter( *m_indexManager
->indexWriter() );
205 setIndexingStarted( true );
207 // do the actual indexing
208 m_dirsToUpdate
.clear();
209 foreach( const QString
& f
, Config::self()->folders() )
210 m_dirsToUpdate
<< qMakePair( f
, UpdateRecursive
|AutoUpdateFolder
);
213 // wait for more dirs to analyze in case the initial
215 if ( m_dirsToUpdate
.isEmpty() ) {
216 setIndexingStarted( false );
218 m_dirsToUpdateMutex
.lock();
219 m_dirsToUpdateWc
.wait( &m_dirsToUpdateMutex
);
220 m_dirsToUpdateMutex
.unlock();
223 setIndexingStarted( true );
226 // wait for resume or stop (or simply continue)
227 if ( !waitForContinue() ) {
231 // get the next folder
232 m_dirsToUpdateMutex
.lock();
233 QPair
<QString
, int> dir
= *m_dirsToUpdate
.begin();
234 m_dirsToUpdate
.erase( m_dirsToUpdate
.begin() );
235 m_dirsToUpdateMutex
.unlock();
237 // update until stopped
238 if ( !updateDir( dir
.first
, &analyzer
, dir
.second
& UpdateRecursive
) ) {
241 m_currentFolder
.clear();
244 setIndexingStarted( false );
248 // this method should be thread-safe ("should" because of the indexreader and -writer)
249 bool Nepomuk::IndexScheduler::updateDir( const QString
& dir
, Strigi::StreamAnalyzer
* analyzer
, bool recursive
)
251 // kDebug() << dir << analyzer << recursive;
253 // inform interested clients
254 emit
indexingFolder( dir
);
256 m_currentFolder
= dir
;
258 // get a map of all indexed files from the dir including their stored mtime
259 std::map
<std::string
, time_t> filesInStore
;
260 m_indexManager
->indexReader()->getChildren( QFile::encodeName( dir
).data(), filesInStore
);
261 std::map
<std::string
, time_t>::const_iterator filesInStoreEnd
= filesInStore
.end();
263 QList
<QFileInfo
> filesToIndex
;
264 QList
<QString
> subFolders
;
265 std::vector
<std::string
> filesToDelete
;
267 // iterate over all files in the dir
268 // and select the ones we need to add or delete from the store
269 QDirIterator
dirIt( dir
, QDir::NoDotAndDotDot
|QDir::Readable
|QDir::Files
|QDir::Dirs
);
270 while ( dirIt
.hasNext() ) {
271 QString path
= dirIt
.next();
273 QFileInfo fileInfo
= dirIt
.fileInfo();
275 bool indexFile
= m_analyzerConfig
->indexFile( QFile::encodeName( path
), QFile::encodeName( fileInfo
.fileName() ) );
277 // check if this file is new by looking it up in the store
278 std::map
<std::string
, time_t>::iterator filesInStoreIt
= filesInStore
.find( QFile::encodeName( path
).data() );
279 bool newFile
= ( filesInStoreIt
== filesInStoreEnd
);
281 // do we need to update? Did the file change?
282 bool fileChanged
= !newFile
&& fileInfo
.lastModified().toTime_t() != filesInStoreIt
->second
;
284 if ( indexFile
&& ( newFile
|| fileChanged
) )
285 filesToIndex
<< fileInfo
;
287 if ( !newFile
&& ( fileChanged
|| !indexFile
) )
288 filesToDelete
.push_back( filesInStoreIt
->first
);
290 // cleanup a bit for faster lookups
292 filesInStore
.erase( filesInStoreIt
);
294 if ( indexFile
&& recursive
&& fileInfo
.isDir() && !fileInfo
.isSymLink() )
298 // all the files left in filesInStore are not in the current
299 // directory and should be deleted
300 for ( std::map
<std::string
, time_t>::const_iterator it
= filesInStore
.begin();
301 it
!= filesInStoreEnd
; ++it
) {
302 filesToDelete
.push_back( it
->first
);
305 // remove all files that need updating or have been removed
306 m_indexManager
->indexWriter()->deleteEntries( filesToDelete
);
308 // analyze all files that are new or need updating
309 foreach( const QFileInfo
& file
, filesToIndex
) {
311 // wait if we are suspended or return if we are stopped
312 if ( !waitForContinue() )
315 analyzeFile( file
, analyzer
);
318 // recurse into subdirs (we do this in a separate loop to always keep a proper state:
319 // compare m_currentFolder)
321 foreach( const QString
& folder
, subFolders
) {
322 if ( !Config::self()->excludeFolders().contains( folder
) &&
323 !updateDir( folder
, analyzer
, true ) )
332 void Nepomuk::IndexScheduler::analyzeFile( const QFileInfo
& file
, Strigi::StreamAnalyzer
* analyzer
)
334 // kDebug() << file.filePath();
336 Strigi::AnalysisResult
analysisresult( QFile::encodeName( file
.filePath() ).data(),
337 file
.lastModified().toTime_t(),
338 *m_indexManager
->indexWriter(),
340 QFile::encodeName( file
.path() ).data() );
341 if ( file
.isFile() && !file
.isSymLink() ) {
342 Strigi::FileInputStream
stream( QFile::encodeName( file
.filePath() ) );
343 analysisresult
.index( &stream
);
346 analysisresult
.index(0);
351 bool Nepomuk::IndexScheduler::waitForContinue()
353 QMutexLocker
locker( &m_resumeStopMutex
);
355 setIndexingStarted( false );
356 m_resumeStopWc
.wait( &m_resumeStopMutex
);
357 setIndexingStarted( true );
364 void Nepomuk::IndexScheduler::updateDir( const QString
& path
)
366 QMutexLocker
lock( &m_dirsToUpdateMutex
);
367 m_dirsToUpdate
<< qMakePair( path
, ( int )NoUpdateFlags
);
368 m_dirsToUpdateWc
.wakeAll();
372 void Nepomuk::IndexScheduler::updateAll()
374 QMutexLocker
lock( &m_dirsToUpdateMutex
);
376 // remove previously added folders to not index stuff we are not supposed to
377 // (FIXME: this does not include currently being indexed folders)
378 QSet
<QPair
<QString
, int> >::iterator it
= m_dirsToUpdate
.begin();
379 while ( it
!= m_dirsToUpdate
.end() ) {
380 if ( it
->second
& AutoUpdateFolder
)
381 it
= m_dirsToUpdate
.erase( it
);
386 // update everything again in case the folders changed
387 foreach( const QString
& f
, Config::self()->folders() )
388 m_dirsToUpdate
<< qMakePair( f
, UpdateRecursive
|AutoUpdateFolder
);
390 m_dirsToUpdateWc
.wakeAll();
394 void Nepomuk::IndexScheduler::readConfig()
396 // load Strigi configuration
397 std::vector
<std::pair
<bool, std::string
> > filters
;
398 QStringList excludeFilters
= Config::self()->excludeFilters();
399 QStringList includeFilters
= Config::self()->includeFilters();
400 foreach( const QString
& filter
, excludeFilters
) {
401 filters
.push_back( std::make_pair
<bool, std::string
>( false, filter
.toUtf8().data() ) );
403 foreach( const QString
& filter
, includeFilters
) {
404 filters
.push_back( std::make_pair
<bool, std::string
>( true, filter
.toUtf8().data() ) );
406 m_analyzerConfig
->setFilters(filters
);
412 class QDataStreamStrigiBufferedStream
: public Strigi::BufferedStream
<char>
415 QDataStreamStrigiBufferedStream( QDataStream
& stream
)
416 : m_stream( stream
) {
419 int32_t fillBuffer( char* start
, int32_t space
) {
420 int r
= m_stream
.readRawData( start
, space
);
422 // Strigi's API is so weird!
426 // Again: weird API. m_status is a protected member of StreamBaseBase (yes, 2x Base)
427 m_status
= Strigi::Error
;
436 QDataStream
& m_stream
;
441 void Nepomuk::IndexScheduler::analyzeResource( const QUrl
& uri
, const QDateTime
& modificationTime
, QDataStream
& data
)
443 QDateTime existingMTime
= QDateTime::fromTime_t( m_indexManager
->indexReader()->mTime( uri
.toEncoded().data() ) );
444 if ( existingMTime
< modificationTime
) {
445 // remove the old data
446 std::vector
<std::string
> entries
;
447 entries
.push_back( uri
.toEncoded().data() );
448 m_indexManager
->indexWriter()->deleteEntries( entries
);
451 Strigi::StreamAnalyzer
analyzer( *m_analyzerConfig
);
452 analyzer
.setIndexWriter( *m_indexManager
->indexWriter() );
453 Strigi::AnalysisResult
analysisresult( uri
.toEncoded().data(),
454 modificationTime
.toTime_t(),
455 *m_indexManager
->indexWriter(),
457 QDataStreamStrigiBufferedStream
stream( data
);
458 analysisresult
.index( &stream
);
461 kDebug() << uri
<< "up to date";
465 #include "indexscheduler.moc"