2 This file is part of the Nepomuk KDE project.
3 Copyright (C) 2007 Sebastian Trueg <trueg@kde.org>
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License version 2 as published by the Free Software Foundation.
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public License
15 along with this library; see the file COPYING.LIB. If not, write to
16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 Boston, MA 02110-1301, USA.
20 #include "searchthread.h"
24 #include <Nepomuk/ResourceManager>
25 #include <Nepomuk/Resource>
26 #include <Nepomuk/Types/Property>
27 #include <Nepomuk/Types/Class>
28 #include <Nepomuk/Types/Literal>
30 #include <Soprano/Version>
31 #include <Soprano/Model>
32 #include <Soprano/QueryResultIterator>
33 #include <Soprano/Node>
34 #include <Soprano/Statement>
35 #include <Soprano/LiteralValue>
36 #include <Soprano/StatementIterator>
37 #include <Soprano/Vocabulary/RDF>
38 #include <Soprano/Vocabulary/RDFS>
39 #include <Soprano/Vocabulary/NRL>
40 #include <Soprano/Vocabulary/NAO>
41 #include <Soprano/Vocabulary/XMLSchema>
42 #include <Soprano/Vocabulary/OWL>
46 #include <QtCore/QTime>
50 // FIXME: With our cutoff score we might miss results that are hit multiple times and thus, would get their
53 #warning Make query optimization methods return an invalid term if the query cannot be resolved and handle this as no results
55 using namespace Soprano
;
59 * The maximum number of resources that are matched in resolveValues when converting
60 * an equals or contains term.
62 const int MAX_RESOURCES
= 4;
65 void mergeInResult( QHash
<QUrl
, Nepomuk::Search::Result
>& results
, const Nepomuk::Search::Result
& resource
) {
66 QHash
<QUrl
, Nepomuk::Search::Result
>::iterator old
= results
.find( resource
.resourceUri() );
67 if ( old
== results
.end() ) {
68 results
.insert( resource
.resourceUri(), resource
);
71 // FIXME: how do we join the scores properly? Is adding a good idea? It can certainly not be multiplication!
72 Nepomuk::Search::Result
& result
= *old
;
73 result
.setScore( result
.score() + resource
.score() );
77 void mergeInResults( QHash
<QUrl
, Nepomuk::Search::Result
>& results
, const QHash
<QUrl
, Nepomuk::Search::Result
>& otherResults
) {
78 for ( QHash
<QUrl
, Nepomuk::Search::Result
>::const_iterator it
= otherResults
.constBegin();
79 it
!= otherResults
.constEnd(); ++it
) {
80 mergeInResult( results
, it
.value() );
84 // This is a copy of Soprano::Index::IndexFilterModel::encodeStringForLuceneQuery
85 // which we do not use to prevent linking to sopranoindex
86 QString
luceneQueryEscape( const QString
& s
) {
87 /* Chars to escape: + - && || ! ( ) { } [ ] ^ " ~ : \ */
89 static QRegExp
rx( "([\\-" + QRegExp::escape( "+&|!(){}[]^\"~:\\" ) + "])" );
91 es
.replace( rx
, "\\\\1" );
95 QString
luceneQueryEscape( const QUrl
& s
) {
96 return luceneQueryEscape( QString::fromAscii( s
.toEncoded() ) );
99 QString
createLuceneLiteralQuery( const QString
& escaped
) {
100 if ( escaped
.contains( QRegExp( "\\s" ) ) ) {
101 return "\"" + escaped
+ "\"";
108 QString
createLuceneQuery( const Nepomuk::Search::SearchNode
& node
) {
109 if ( node
.term
.type() == Nepomuk::Search::Term::LiteralTerm
) {
110 return createLuceneLiteralQuery( luceneQueryEscape( node
.term
.value().toString() ) );
112 else if ( node
.term
.type() == Nepomuk::Search::Term::ComparisonTerm
) {
113 return luceneQueryEscape( node
.term
.property() ) + ':' + createLuceneLiteralQuery( luceneQueryEscape( node
.term
.subTerms().first().value().toString() ) );
116 Q_ASSERT( node
.term
.type() == Nepomuk::Search::Term::AndTerm
||
117 node
.term
.type() == Nepomuk::Search::Term::OrTerm
);
120 foreach( const Nepomuk::Search::SearchNode
& n
, node
.subNodes
) {
121 sq
+= createLuceneQuery( n
);
123 if ( node
.term
.type() == Nepomuk::Search::Term::AndTerm
) {
124 return " ( " + sq
.join( " AND " ) + " ) ";
127 return " ( " + sq
.join( " OR " ) + " ) ";
132 QString
comparatorString( Nepomuk::Search::Term::Comparator c
) {
134 case Nepomuk::Search::Term::Contains
:
136 case Nepomuk::Search::Term::Equal
:
138 case Nepomuk::Search::Term::Greater
:
140 case Nepomuk::Search::Term::Smaller
:
142 case Nepomuk::Search::Term::GreaterOrEqual
:
144 case Nepomuk::Search::Term::SmallerOrEqual
:
152 bool isNumberLiteralValue( const Soprano::LiteralValue
& value
) {
153 return value
.isInt() || value
.isInt64() || value
.isUnsignedInt() || value
.isUnsignedInt64() || value
.isDouble();
157 QString
createGraphPattern( const Nepomuk::Search::SearchNode
& node
, int& varCnt
, const QString
& varName
= QString( "?r" ) )
159 switch( node
.term
.type() ) {
160 case Nepomuk::Search::Term::ComparisonTerm
: {
162 Nepomuk::Search::Term
subTerm( node
.term
.subTerms().first() );
165 // is the subterm (we only support one ATM) a final term (no further subterms)
166 // -> actually match the literal or resource
168 if ( subTerm
.type() == Nepomuk::Search::Term::ResourceTerm
||
169 subTerm
.type() == Nepomuk::Search::Term::LiteralTerm
) {
170 if( node
.term
.comparator() != Nepomuk::Search::Term::Equal
) {
171 // For numbers there is no need for quotes + this way we can handle all the xsd decimal types
172 // FIXME: it may be necessary to escape stuff
173 QString filter
= QString( "?var%1 %2 " )
175 .arg( comparatorString( node
.term
.comparator() ) );
176 if ( isNumberLiteralValue( subTerm
.value() ) ) {
177 filter
+= subTerm
.value().toString();
180 Nepomuk::Types::Property
prop( node
.term
.property() );
181 filter
+= QString( "\"%1\"" ).arg( subTerm
.value().toString() );
182 if ( prop
.literalRangeType().dataTypeUri().isValid() )
183 filter
+= QString( "^^<%1>" ).arg( prop
.literalRangeType().dataTypeUri().toString() );
186 return QString( "%1 <%2> ?var%3 . FILTER(%4) . " )
188 .arg( QString::fromAscii( node
.term
.property().toEncoded() ) )
193 if ( subTerm
.type() == Nepomuk::Search::Term::ResourceTerm
) {
194 return QString( "%1 <%2> <%3> . " )
196 .arg( QString::fromAscii( node
.term
.property().toEncoded() ) )
197 .arg( QString::fromAscii( subTerm
.resource().toEncoded() ) );
199 else if ( Nepomuk::Types::Property( node
.term
.property() ).range().isValid() ) {
200 return QString( "%7 <%1> ?x . { ?x <%2> \"%3\"^^<%4> . } UNION { ?x <%5> \"%3\"^^<%4>. } UNION { ?x <%6> \"%3\"^^<%4> . }" )
201 .arg( QString::fromAscii( node
.term
.property().toEncoded() ) )
202 .arg( Soprano::Vocabulary::RDFS::label().toString() )
203 .arg( subTerm
.value().toString() )
204 .arg( Soprano::Vocabulary::XMLSchema::string().toString() )
205 .arg( Soprano::Vocabulary::NAO::prefLabel().toString() )
206 .arg( Soprano::Vocabulary::NAO::identifier().toString() )
210 return QString( "%1 <%2> \"%3\"^^<%4> . " )
212 .arg( QString::fromAscii( node
.term
.property().toEncoded() ) )
213 .arg( subTerm
.value().toString() )
214 .arg( Nepomuk::Types::Property( node
.term
.property() ).literalRangeType().dataTypeUri().toString() );
220 // Is the subterm not final, i.e. has further subterms
221 // -> combine graph pattern with subterm graph pattern
224 QString bridgeVarName
= QString( "?var%1" ).arg( ++varCnt
);
225 return QString( "%1 <%2> %3 . " )
227 .arg( QString::fromAscii( node
.term
.property().toEncoded() ) )
228 .arg( bridgeVarName
)
229 + createGraphPattern( node
.subNodes
.first(), varCnt
, bridgeVarName
);
233 case Nepomuk::Search::Term::AndTerm
: {
235 foreach( const Nepomuk::Search::SearchNode
& n
, node
.subNodes
) {
236 s
+= createGraphPattern( n
, varCnt
);
242 case Nepomuk::Search::Term::OrTerm
: {
244 foreach( const Nepomuk::Search::SearchNode
& n
, node
.subNodes
) {
245 s
+= createGraphPattern( n
, varCnt
);
247 Q_ASSERT( !s
.isEmpty() );
248 return "{ " + s
.join( " } UNION { " ) + " } ";
252 Q_ASSERT_X( 0, "createGraphPattern", "unsupported Term type" );
260 Nepomuk::Search::SearchThread::SearchThread( QObject
* parent
)
266 Nepomuk::Search::SearchThread::~SearchThread()
271 void Nepomuk::Search::SearchThread::query( const Query
& term
, double cutOffScore
)
277 kDebug() << term
<< cutOffScore
;
281 m_cutOffScore
= cutOffScore
;
288 void Nepomuk::Search::SearchThread::cancel()
295 void Nepomuk::Search::SearchThread::run()
300 if ( m_searchTerm
.type() == Query::PlainQuery
) {
301 kDebug() << "Plain Query: " << m_searchTerm
;
302 Term t
= resolveFields( m_searchTerm
.term() );
303 kDebug() << "Fields resolved:" << t
;
304 t
= resolveValues( t
);
305 kDebug() << "Values resolved:" << t
;
307 kDebug() << "Optimized query:" << t
;
309 search( splitLuceneSparql( t
) /*optimize( resolveValues( resolveFields( m_searchTerm ) ) )*/, 1.0, true );
312 // FIXME: once we have the Soprano query API it should be simple to add the requestProperties here
313 // for now we do it the hacky way
314 QString query
= m_searchTerm
.sparqlQuery();
315 int pos
= query
.indexOf( QLatin1String( "where" ) );
317 query
.insert( pos
, buildRequestPropertyVariableList() + ' ' );
318 pos
= query
.lastIndexOf( '}' );
320 query
.insert( pos
, ' ' + buildRequestPropertyPatterns() + ' ' );
324 sparqlQuery( query
, 1.0, true );
327 kDebug() << time
.elapsed();
331 Nepomuk::Search::Term
Nepomuk::Search::SearchThread::resolveFields( const Term
& term
)
333 switch( term
.type() ) {
337 newTerm
.setType( term
.type() );
338 QList
<Term
> terms
= term
.subTerms();
339 foreach( const Term
& t
, terms
) {
340 if ( m_canceled
) break;
341 newTerm
.addSubTerm( resolveFields( t
) );
347 case Term::ComparisonTerm
: {
348 Term
newTerm( term
);
349 Term subTerm
= term
.subTerms().first();
350 if ( subTerm
.type() != Term::LiteralTerm
&&
351 subTerm
.type() != Term::ResourceTerm
) {
352 newTerm
.setSubTerms( QList
<Term
>() << resolveFields( subTerm
) );
355 if ( !newTerm
.property().isValid() ) {
356 // FIXME: use the score of the field search as boost factors
357 QList
<QUrl
> properties
= matchFieldName( term
.field() );
358 if ( properties
.count() > 0 ) {
359 if ( properties
.count() == 1 ) {
360 newTerm
.setProperty( properties
.first() );
365 orTerm
.setType( Term::OrTerm
);
366 foreach( const QUrl
& property
, properties
) {
368 t
.setProperty( property
);
369 orTerm
.addSubTerm( t
);
375 kDebug() << "Failed to resolve field" << term
.field() << "to any property!";
387 // precondition: resolveFields needs to be run before this one as it only touches properties
388 Nepomuk::Search::Term
Nepomuk::Search::SearchThread::resolveValues( const Term
& term
)
390 switch( term
.type() ) {
394 newTerm
.setType( term
.type() );
395 QList
<Term
> terms
= term
.subTerms();
396 foreach( const Term
& t
, terms
) {
397 if ( m_canceled
) break;
398 newTerm
.addSubTerm( resolveValues( t
) );
404 case Term::ComparisonTerm
: {
405 // FIXME: we could also handle this via lucene for literals but what is better?
406 // with lucene we have the additional work of getting the requestProperties
408 // FIXME: handle subqueries
411 // ComparisonTerm Terms can contain subterms that again. We do not support
412 // arbitrary subterms but only comparator terms. Here we will only resolve the
413 // last one since all others will be handled in a single SPARQL query.
415 // Also, non-comtains comparators are handled in the SPARQL query as well.
417 // Thus, in the end we only resolve literal contains terms.
419 if ( term
.comparator() == Term::Contains
&&
420 term
.subTerms().first().type() == Term::LiteralTerm
) {
422 Q_ASSERT ( term
.property().isValid() );
424 // we only need to augment terms that have a property with
425 // a non-literal range. These will never hit in a lucene query
427 Nepomuk::Types::Property
prop( term
.property() );
428 if ( prop
.range().isValid() ) {
431 orTerm
.setType( Term::OrTerm
);
433 // FIXME: cache the results as it is very well possible that we search the same multiple times
434 // if resolveFields did create an OR term
436 // rdfs:label has a higher priority than any other property
437 // TODO: without being able to query the resource type simple searching for term.value() is waaaaay to slow
438 //QString query = QString( "%1:\"%2\"^4 \"%2\"" )
439 QString query
= QString( "%1:\"%2\" OR %3:\"%2\" OR %4:\"%2\"" )
440 .arg( luceneQueryEscape( Soprano::Vocabulary::RDFS::label() ) )
441 .arg( term
.subTerms().first().value().toString() )
442 .arg( luceneQueryEscape( Soprano::Vocabulary::NAO::prefLabel() ) )
443 .arg( luceneQueryEscape( Soprano::Vocabulary::NAO::identifier() ) );
444 Soprano::QueryResultIterator hits
= ResourceManager::instance()->mainModel()->executeQuery( query
,
445 Soprano::Query::QueryLanguageUser
,
448 while ( hits
.next() ) {
449 if ( m_canceled
) break;
451 // FIXME: use the lucene score as boost factor
452 QUrl hit
= hits
.binding( 0 ).uri();
453 if ( prop
.range().uri() == Soprano::Vocabulary::RDFS::Resource() ||
454 Nepomuk::Resource( hit
).hasType( prop
.range().uri() ) ) {
455 orTerm
.addSubTerm( Term( term
.property(), hit
) );
456 if ( orTerm
.subTerms().count() == MAX_RESOURCES
) {
462 if ( orTerm
.subTerms().count() == 1 ) {
463 return orTerm
.subTerms().first();
465 else if ( orTerm
.subTerms().count() ) {
469 kDebug() << "Failed to match value" << term
.subTerms().first().value() << "to any possible resource.";
474 // nothing to do here
479 // non-literal term or non-contains term -> handled in SPARQL query
481 Term
newTerm( term
);
482 newTerm
.setSubTerms( QList
<Term
>() << resolveValues( term
.subTerms().first() ) );
493 Nepomuk::Search::Term
Nepomuk::Search::SearchThread::optimize( const Term
& term
)
495 switch( term
.type() ) {
498 QList
<Term
> subTerms
= term
.subTerms();
499 QList
<Term
> newSubTerms
;
500 QList
<Term
>::const_iterator
end( subTerms
.constEnd() );
501 for ( QList
<Term
>::const_iterator it
= subTerms
.constBegin();
504 Term ot
= optimize( t
);
505 if ( ot
.type() == term
.type() ) {
506 newSubTerms
+= ot
.subTerms();
513 newTerm
.setType( term
.type() );
514 newTerm
.setSubTerms( newSubTerms
);
524 Nepomuk::Search::SearchNode
Nepomuk::Search::SearchThread::splitLuceneSparql( const Term
& term
)
526 // Goal: separate the terms into 2 groups: literal and resource which are
527 // merged with only one AND or OR action. Is that possible?
529 // For now we will do this (our query lang does not handle nested queries anyway)
530 // LiteralTerm -> one lucene, no sparql
531 // ComparisonTerm -> one lucene, no sparql (resource contains will be resolved to equality above)
532 // AndTerm -> divide all subterms and create two "small" AND terms
533 // OrTerm -> divide all subterms and create two "small" OR terms
535 switch( term
.type() ) {
536 case Term::LiteralTerm
:
537 return SearchNode( term
, SearchNode::Lucene
);
539 case Term::ComparisonTerm
:
540 if ( term
.comparator() == Term::Contains
&&
541 term
.subTerms().first().type() == Term::LiteralTerm
) {
542 // no need for subnides here - we only use the subterm's value
543 return SearchNode( term
, SearchNode::Lucene
);
546 // all subnodes are resolved and can be handled in a SPARQL query
547 SearchNode
node( term
, SearchNode::Sparql
);
548 node
.subNodes
+= splitLuceneSparql( term
.subTerms().first() );
554 QList
<Term
> subTerms
= term
.subTerms();
555 QList
<SearchNode
> luceneNodes
, sparqlNodes
, unknownNodes
;
557 QList
<Term
>::const_iterator
end( subTerms
.constEnd() );
558 for ( QList
<Term
>::const_iterator it
= subTerms
.constBegin();
560 SearchNode node
= splitLuceneSparql( *it
);
561 if ( node
.type
== SearchNode::Lucene
) {
564 else if ( node
.type
== SearchNode::Sparql
) {
568 unknownNodes
+= node
;
572 if ( luceneNodes
.count() && !sparqlNodes
.count() && !unknownNodes
.count() ) {
573 return SearchNode( term
, SearchNode::Lucene
, luceneNodes
);
575 else if ( !luceneNodes
.count() && sparqlNodes
.count() && !unknownNodes
.count() ) {
576 return SearchNode( term
, SearchNode::Sparql
, sparqlNodes
);
578 else if ( !luceneNodes
.count() && !sparqlNodes
.count() && unknownNodes
.count() ) {
579 return SearchNode( term
, SearchNode::Unknown
, unknownNodes
);
583 newTerm
.setType( term
.type() );
584 SearchNode
andNode( newTerm
);
585 if ( luceneNodes
.count() )
586 andNode
.subNodes
+= SearchNode( term
, SearchNode::Lucene
, luceneNodes
);
587 if ( sparqlNodes
.count() )
588 andNode
.subNodes
+= SearchNode( term
, SearchNode::Sparql
, sparqlNodes
);
589 if ( unknownNodes
.count() )
590 andNode
.subNodes
+= SearchNode( term
, SearchNode::Unknown
, unknownNodes
);
596 // Q_ASSERT_X( 0, "splitLuceneSparql", "invalid term" );
597 return SearchNode( Term() );
602 QHash
<QUrl
, Nepomuk::Search::Result
> Nepomuk::Search::SearchThread::search( const SearchNode
& node
, double baseScore
, bool reportResults
)
604 if ( node
.type
== SearchNode::Lucene
) {
605 return luceneQuery( createLuceneQuery( node
), baseScore
, reportResults
);
607 else if ( node
.type
== SearchNode::Sparql
) {
608 return sparqlQuery( createSparqlQuery( node
), baseScore
, reportResults
);
610 else if ( node
.term
.type() == Term::AndTerm
) {
611 return andSearch( node
.subNodes
, baseScore
, reportResults
);
614 return orSearch( node
.subNodes
, baseScore
, reportResults
);
619 QHash
<QUrl
, Nepomuk::Search::Result
> Nepomuk::Search::SearchThread::andSearch( const QList
<SearchNode
>& nodes
, double baseScore
, bool reportResults
)
621 QHash
<QUrl
, Result
> results
;
623 foreach( const SearchNode
& node
, nodes
) {
624 if ( m_canceled
) break;
625 // FIXME: the search will restrict the number of results to maxResults although
626 // after the merge we might have less
627 QHash
<QUrl
, Result
> termResults
= search( node
, baseScore
, false );
629 results
= termResults
;
633 // intersect the results
634 // FIXME: sort by score
635 QHash
<QUrl
, Result
>::iterator it
= results
.begin();
636 while ( it
!= results
.end() ) {
637 if ( m_canceled
) break;
638 QHash
<QUrl
, Result
>::const_iterator termIt
= termResults
.constFind( it
.key() );
639 if ( termIt
!= termResults
.constEnd() ) {
641 it
.value().setScore( it
.value().score() + termIt
.value().score() );
645 it
= results
.erase( it
);
651 if ( reportResults
) {
652 for ( QHash
<QUrl
, Result
>::const_iterator it
= results
.constBegin();
653 it
!= results
.constEnd(); ++it
) {
654 if ( m_canceled
) break;
655 if ( m_searchTerm
.limit() > 0 && m_numResults
>= m_searchTerm
.limit() ) {
660 emit
newResult( it
.value() );
669 QHash
<QUrl
, Nepomuk::Search::Result
> Nepomuk::Search::SearchThread::orSearch( const QList
<SearchNode
>& nodes
, double baseScore
, bool reportResults
)
671 QHash
<QUrl
, Result
> results
;
672 foreach( const SearchNode
& node
, nodes
) {
673 if ( m_canceled
) break;
674 // FIXME: sort by score, ie. use the maxResults results with the highest score
675 mergeInResults( results
, search( node
, baseScore
, reportResults
) );
677 if ( reportResults
) {
678 for ( QHash
<QUrl
, Result
>::const_iterator it
= results
.constBegin();
679 it
!= results
.constEnd(); ++it
) {
680 if ( m_canceled
) break;
681 if ( m_searchTerm
.limit() > 0 && m_numResults
>= m_searchTerm
.limit() ) {
686 emit
newResult( it
.value() );
694 QList
<QUrl
> Nepomuk::Search::SearchThread::matchFieldName( const QString
& field
)
700 // Step 1: see if we have a direct match to a predicate label
701 // there is no need in selecting unused properties
702 QString query
= QString( "select distinct ?p where { "
704 "?p <%3> \"%4\"^^<%5> . "
706 .arg( Soprano::Vocabulary::RDF::type().toString() )
707 .arg( Soprano::Vocabulary::RDF::Property().toString() )
708 .arg( Soprano::Vocabulary::RDFS::label().toString() )
710 .arg( Soprano::Vocabulary::XMLSchema::string().toString() );
711 kDebug() << "Direct match query:" << query
;
713 Soprano::QueryResultIterator labelHits
= ResourceManager::instance()->mainModel()->executeQuery( query
,
714 Soprano::Query::QueryLanguageSparql
);
716 while ( labelHits
.next() ) {
717 results
<< labelHits
.binding( "p" ).uri();
718 kDebug() << "Found direct match" << labelHits
.binding( "p" ).uri();
721 if ( results
.isEmpty() ) {
722 // FIXME: how about we have two repositories: one for the ontologies and one for the data.
723 // I don't think there will be relations between the RDF or Xesam ontology and some
725 // Because then queries like the one we are doing here will be more performant since
726 // we do not search the data itself and do not have to filter
727 // BUT: What about inference?
729 query
= QString( "select ?p where { "
732 "FILTER(REGEX(STR(?label),'%4','i')) . }" )
733 .arg( Soprano::Vocabulary::RDF::type().toString() )
734 .arg( Soprano::Vocabulary::RDF::Property().toString() )
735 .arg( Soprano::Vocabulary::RDFS::label().toString() )
737 kDebug() << "Indirect hit query:" << query
;
738 labelHits
= ResourceManager::instance()->mainModel()->executeQuery( query
,
739 Soprano::Query::QueryLanguageSparql
);
741 while ( labelHits
.next() ) {
742 results
<< labelHits
.binding( "p" ).uri();
743 kDebug() << "Found indirect match by label" << labelHits
.binding( "p" ).uri();
748 if ( results
.isEmpty() ) {
749 query
= QString( "select ?p where { "
751 "FILTER(REGEX(STR(?p),'%3','i')) . }" )
752 .arg( Soprano::Vocabulary::RDF::type().toString() )
753 .arg( Soprano::Vocabulary::RDF::Property().toString() )
755 kDebug() << "Indirect hit query:" << query
;
756 labelHits
= ResourceManager::instance()->mainModel()->executeQuery( query
,
757 Soprano::Query::QueryLanguageSparql
);
759 while ( labelHits
.next() ) {
760 results
<< labelHits
.binding( "p" ).uri();
761 kDebug() << "Found indirect match by name" << labelHits
.binding( "p" ).uri();
770 QString
Nepomuk::Search::SearchThread::createSparqlQuery( const Nepomuk::Search::SearchNode
& node
)
773 return QString( "select distinct ?r %1 where { graph ?g { ?r a ?type . } . ?g a <%2> . %3 %4 }" )
774 .arg( buildRequestPropertyVariableList() )
775 .arg( Soprano::Vocabulary::NRL::InstanceBase().toString() )
776 .arg( createGraphPattern( node
, varCnt
) )
777 .arg( buildRequestPropertyPatterns() );
781 QHash
<QUrl
, Nepomuk::Search::Result
> Nepomuk::Search::SearchThread::sparqlQuery( const QString
& query
, double baseScore
, bool reportResults
)
785 QHash
<QUrl
, Result
> results
;
787 Soprano::QueryResultIterator hits
= ResourceManager::instance()->mainModel()->executeQuery( query
, Soprano::Query::QueryLanguageSparql
);
788 while ( hits
.next() ) {
789 if ( m_canceled
) break;
791 Result result
= extractResult( hits
);
792 result
.setScore( baseScore
);
794 kDebug() << "Found result:" << result
.resourceUri();
796 // these are actual direct hits and we can report them right away
797 if ( reportResults
) {
798 if ( m_searchTerm
.limit() > 0 && m_numResults
>= m_searchTerm
.limit() ) {
803 emit
newResult( result
);
807 results
.insert( result
.resourceUri(), result
);
814 QHash
<QUrl
, Nepomuk::Search::Result
> Nepomuk::Search::SearchThread::luceneQuery( const QString
& query
, double baseScore
, bool reportResults
)
816 QString
finalQuery( query
);
818 // if Soprano is 2.1.64 or newer the storage service does force the indexing or rdf:type which means that
819 // we can query it via lucene queries
820 // normally for completeness we would have to exclude all the owl and nrl properties but that would make
821 // for way to long queries and this should cover most cases anyway
822 // since we do not have inference we even need to check subclasses
823 #if SOPRANO_IS_VERSION(2,1,64)
824 finalQuery
+= QString(" AND NOT %1:%2 AND NOT %1:%3 AND NOT %1:%4 AND NOT %1:%5 AND NOT %1:%6 AND NOT %1:%7")
825 .arg( luceneQueryEscape(Soprano::Vocabulary::RDF::type()) )
826 .arg( luceneQueryEscape(Soprano::Vocabulary::RDF::Property()) )
827 .arg( luceneQueryEscape(Soprano::Vocabulary::RDFS::Class()) )
828 .arg( luceneQueryEscape(Soprano::Vocabulary::OWL::Class()) )
829 .arg( luceneQueryEscape(Soprano::Vocabulary::NRL::InstanceBase()) )
830 .arg( luceneQueryEscape(Soprano::Vocabulary::NRL::Ontology()) )
831 .arg( luceneQueryEscape(Soprano::Vocabulary::NRL::KnowledgeBase()) );
834 kDebug() << finalQuery
;
836 Soprano::QueryResultIterator hits
= ResourceManager::instance()->mainModel()->executeQuery( finalQuery
,
837 Soprano::Query::QueryLanguageUser
,
839 QHash
<QUrl
, Result
> results
;
841 while ( hits
.next() ) {
842 if ( m_canceled
) break;
844 QUrl hitUri
= hits
.binding( 0 ).uri();
845 double hitScore
= hits
.binding( 1 ).literal().toDouble() * baseScore
;
847 if ( hitScore
>= cutOffScore() ) {
848 Result
result( hitUri
, hitScore
);
850 if ( !m_searchTerm
.requestProperties().isEmpty() ) {
851 // FIXME: when merging with results from sparqlQuery there is no need to fetch them twice!
852 fetchRequestPropertiesForResource( result
);
855 // these are actual direct hits and we can report them right away
856 if ( reportResults
) {
857 if ( m_searchTerm
.limit() > 0 && m_numResults
>= m_searchTerm
.limit() ) {
862 kDebug() << "direct hit:" << hitUri
<< hitScore
;
863 emit
newResult( result
);
867 results
.insert( hitUri
, result
);
870 kDebug() << "Score too low:" << hitUri
<< hitScore
;
878 QString
Nepomuk::Search::SearchThread::buildRequestPropertyVariableList() const
880 int numRequestProperties
= m_searchTerm
.requestProperties().count();
882 for ( int i
= 1; i
<= numRequestProperties
; ++i
) {
883 s
+= QString( "?reqProp%1 " ).arg( i
);
889 QString
Nepomuk::Search::SearchThread::buildRequestPropertyPatterns() const
891 QList
<Query::RequestProperty
> requestProperties
= m_searchTerm
.requestProperties();
894 foreach ( const Query::RequestProperty
& rp
, requestProperties
) {
899 s
+= QString( "?r <%1> ?reqProp%2 . " ).arg( QString::fromAscii( rp
.first
.toEncoded() ) ).arg( i
++ );
909 Nepomuk::Search::Result
Nepomuk::Search::SearchThread::extractResult( const Soprano::QueryResultIterator
& it
) const
911 Result
result( it
.binding( 0 ).uri() );
914 QList
<Query::RequestProperty
> requestProperties
= m_searchTerm
.requestProperties();
915 foreach ( const Query::RequestProperty
& rp
, requestProperties
) {
916 result
.addRequestProperty( rp
.first
, it
.binding( QString("reqProp%1").arg( i
++ ) ) );
919 // score will be set above
924 void Nepomuk::Search::SearchThread::fetchRequestPropertiesForResource( Result
& result
)
926 QString q
= QString( "select distinct %1 where { %2 }" )
927 .arg( buildRequestPropertyVariableList() )
928 .arg( buildRequestPropertyPatterns().replace( "?r ", '<' + QString::fromAscii( result
.resourceUri().toEncoded() ) + "> " ) );
930 Soprano::QueryResultIterator reqPropHits
= ResourceManager::instance()->mainModel()->executeQuery( q
, Soprano::Query::QueryLanguageSparql
);
931 if ( reqPropHits
.next() ) {
933 QList
<Query::RequestProperty
> requestProperties
= m_searchTerm
.requestProperties();
934 foreach ( const Query::RequestProperty
& rp
, requestProperties
) {
935 result
.addRequestProperty( rp
.first
, reqPropHits
.binding( QString("reqProp%1").arg( i
++ ) ) );
940 #include "searchthread.moc"