2 This file is part of the Nepomuk KDE project.
3 Copyright (C) 2007 Sebastian Trueg <trueg@kde.org>
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License version 2 as published by the Free Software Foundation.
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public License
15 along with this library; see the file COPYING.LIB. If not, write to
16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 Boston, MA 02110-1301, USA.
20 #include "queryparser.h"
24 #include <QtCore/QRegExp>
25 #include <QtCore/QSet>
32 * select distinct ?r ?p ?x ?label ?comment where { { ?r ?p ?x . } UNION { ?r ?p ?r2 . ?r2 ?p2 ?x . } . FILTER(isLiteral(?x)) . FILTER REGEX(STR(?p),'hastag','i') . FILTER REGEX(STR(?x),'nepomuk','i') . OPTIONAL { { ?r <http://www.w3.org/2000/01/rdf-schema#label> ?label } UNION { ?r <http://www.semanticdesktop.org/ontologies/2007/08/15/nao#prefLabel> ?label . } UNION { ?r <http://freedesktop.org/standards/xesam/1.0/core#name> ?label . } . ?r <http://www.w3.org/2000/01/rdf-schema#comment> ?comment . } . }
36 // a field differs from a plain term in that it does never allow comparators
37 QString
s_fieldNamePattern( "([^\\s\"':=<>]+|(?:([\"'])[^\"':=<>]+\\%1))" );
38 QString
s_plainTermPattern( "([^\\s\"':=<>]+|(?:([\"'])[^\"']+\\%1))" );
39 QString
s_inExclusionPattern( "([\\+\\-]?)" );
40 QString
s_uriPattern( "<([^<>]+)>" );
41 QString
s_comparatorPattern( "(:|\\<=|\\>=|=|\\<|\\>)" );
43 // match a simple search text
44 // captures: 1 - The optional + or - sign (may be empty)
45 // 2 - the search text (including optional paranthesis)
46 QRegExp
s_plainTermRx( s_inExclusionPattern
+ s_plainTermPattern
.arg( 3 ) );
48 // match a field search term: fieldname + relation (:, =, etc) + search text with optional paranthesis
49 // captures: 1 - The optional + or - sign (may be empty)
52 // 4 - search text (including optional paranthesis)
53 QRegExp
s_fieldRx( s_inExclusionPattern
+ s_fieldNamePattern
.arg( 3 ) + s_comparatorPattern
+ s_plainTermPattern
.arg( 6 ) );
55 // match a property URI search term: property URI + relation (:, =, etc) + search text with optional paranthesis
56 // captures: 1 - The optional + or - sign (may be empty)
59 // 4 - search text (including optional paranthesis)
60 QRegExp
s_propertyRx( s_inExclusionPattern
+ s_uriPattern
+ s_comparatorPattern
+ s_plainTermPattern
.arg( 5 ) );
62 // match a property URI search term: property URI + relation (:, =, etc) + resource URI
63 // captures: 1 - The optional + or - sign (may be empty)
66 QRegExp
s_resourceRx( s_inExclusionPattern
+ s_uriPattern
+ "(?::|=)" + s_uriPattern
);
68 QRegExp
s_fieldFieldRx( s_inExclusionPattern
+ s_fieldNamePattern
.arg( 3 ) + s_comparatorPattern
+ "\\(" + s_fieldNamePattern
.arg( 6 ) + s_comparatorPattern
+ s_plainTermPattern
.arg( 9 ) + "\\)" );
70 Nepomuk::Search::Term::Comparator
fieldTypeRelationFromString( const QString
& s
) {
72 return Nepomuk::Search::Term::Equal
;
74 else if ( s
== ":" ) {
75 return Nepomuk::Search::Term::Contains
;
77 else if ( s
== ">" ) {
78 return Nepomuk::Search::Term::Greater
;
80 else if ( s
== "<" ) {
81 return Nepomuk::Search::Term::Smaller
;
83 else if ( s
== ">=" ) {
84 return Nepomuk::Search::Term::GreaterOrEqual
;
86 else if ( s
== "<=" ) {
87 return Nepomuk::Search::Term::SmallerOrEqual
;
90 kDebug() << "FIXME: Unsupported relation:" << s
;
91 return Nepomuk::Search::Term::Equal
;
95 QString
stripQuotes( const QString
& s
) {
98 return s
.mid( 1 ).left( s
.length()-2 );
105 QUrl
tryToBeIntelligentAboutParsingUrl( const QString
& s
) {
106 if ( s
.contains( '%' ) && !s
.contains( '/' ) ) {
107 return QUrl::fromEncoded( s
.toAscii() );
114 Soprano::LiteralValue
createLiteral( const QString
& s
) {
116 int i
= s
.toInt( &b
);
118 return Soprano::LiteralValue( i
);
119 double d
= s
.toDouble( &b
);
121 return Soprano::LiteralValue( d
);
127 Nepomuk::Search::Query
Nepomuk::Search::QueryParser::parseQuery( const QString
& query
)
130 return parser
.parse( query
);
134 class Nepomuk::Search::QueryParser::Private
137 QSet
<QString
> andKeywords
;
138 QSet
<QString
> orKeywords
;
142 Nepomuk::Search::QueryParser::QueryParser()
145 QString andListStr
= i18nc( "Boolean AND keyword in desktop search strings. You can add several variants separated by spaces, e.g. retain the English one alongside the translation; keywords are not case sensitive. Make sure there is no conflict with the OR keyword.", "and" );
146 foreach ( const QString
&andKeyword
, andListStr
.split( " ", QString::SkipEmptyParts
) ) {
147 d
->andKeywords
.insert( andKeyword
.toLower() );
149 QString orListStr
= i18nc( "Boolean OR keyword in desktop search strings. You can add several variants separated by spaces, e.g. retain the English one alongside the translation; keywords are not case sensitive. Make sure there is no conflict with the AND keyword.", "or" );
150 foreach ( const QString
&orKeyword
, orListStr
.split( " ", QString::SkipEmptyParts
) ) {
151 d
->orKeywords
.insert( orKeyword
.toLower() );
156 Nepomuk::Search::QueryParser::~QueryParser()
162 Nepomuk::Search::Query
Nepomuk::Search::QueryParser::parse( const QString
& query
)
164 // TODO: a "real" parser which can handle all of the Xesam user language
165 // This one for example does not handle nesting at all.
169 bool inOrBlock
= false;
170 bool inAndBlock
= false;
173 while ( pos
< query
.length() ) {
175 while ( pos
< query
.length() && query
[pos
].isSpace() ) {
176 kDebug() << "Skipping space at" << pos
;
182 if ( pos
< query
.length() ) {
183 if ( s_resourceRx
.indexIn( query
, pos
) == pos
) {
184 // FIXME: honour the +-
185 kDebug() << "matched resource term at" << pos
<< s_resourceRx
.cap( 0 );
186 term
= Term( tryToBeIntelligentAboutParsingUrl( s_resourceRx
.cap( 2 ) ),
187 tryToBeIntelligentAboutParsingUrl( s_resourceRx
.cap( 3 ) ) );
188 pos
+= s_resourceRx
.matchedLength();
190 else if ( s_propertyRx
.indexIn( query
, pos
) == pos
) {
191 // FIXME: honour the +-
192 kDebug() << "matched property term at" << pos
<< s_propertyRx
.cap( 0 );
193 term
.setProperty( tryToBeIntelligentAboutParsingUrl( s_propertyRx
.cap( 2 ) ) );
194 term
.addSubTerm( Term( createLiteral( stripQuotes( s_propertyRx
.cap( 4 ) ) ) ) );
195 QString comparator
= s_propertyRx
.cap( 3 );
196 term
.setType( Term::ComparisonTerm
);
197 term
.setComparator( fieldTypeRelationFromString( comparator
) );
198 pos
+= s_propertyRx
.matchedLength();
200 else if ( s_fieldFieldRx
.indexIn( query
, pos
) == pos
) {
201 kDebug() << "matched field field term at" << pos
202 << s_fieldFieldRx
.cap( 0 )
203 << s_fieldFieldRx
.cap( 2 )
204 << s_fieldFieldRx
.cap( 4 )
205 << s_fieldFieldRx
.cap( 5 )
206 << s_fieldFieldRx
.cap( 7 )
207 << s_fieldFieldRx
.cap( 8 );
208 term
.setField( stripQuotes( s_fieldFieldRx
.cap( 2 ) ) );
209 QString comparator
= s_fieldFieldRx
.cap( 4 );
210 term
.setType( Term::ComparisonTerm
);
211 term
.setComparator( fieldTypeRelationFromString( comparator
) );
212 term
.addSubTerm( Term( stripQuotes( s_fieldFieldRx
.cap( 5 ) ), s_fieldFieldRx
.cap( 8 ), fieldTypeRelationFromString( s_fieldFieldRx
.cap( 7 ) ) ) );
213 pos
+= s_fieldFieldRx
.matchedLength();
215 else if ( s_fieldRx
.indexIn( query
, pos
) == pos
) {
216 // FIXME: honour the +-
217 kDebug() << "matched field term at" << pos
<< s_fieldRx
.cap( 0 ) << s_fieldRx
.cap( 2 ) << s_fieldRx
.cap( 4 ) << s_fieldRx
.cap( 5 );
218 term
.setField( stripQuotes( s_fieldRx
.cap( 2 ) ) );
219 term
.addSubTerm( Term( createLiteral( stripQuotes( s_fieldRx
.cap( 5 ) ) ) ) );
220 QString comparator
= s_fieldRx
.cap( 4 );
221 term
.setType( Term::ComparisonTerm
);
222 term
.setComparator( fieldTypeRelationFromString( comparator
) );
223 pos
+= s_fieldRx
.matchedLength();
225 else if ( s_plainTermRx
.indexIn( query
, pos
) == pos
) {
226 // FIXME: honour the +-
227 QString value
= stripQuotes( s_plainTermRx
.cap( 2 ) );
228 if ( d
->orKeywords
.contains( value
.toLower() ) ) {
231 else if ( d
->andKeywords
.contains( value
.toLower() ) ) {
235 kDebug() << "matched literal at" << pos
<< value
;
236 term
= Term( Soprano::LiteralValue( value
) );
238 pos
+= s_plainTermRx
.matchedLength();
241 kDebug() << "Invalid query at" << pos
<< query
;
245 if ( term
.isValid() ) {
246 if ( inOrBlock
&& !terms
.isEmpty() ) {
248 orTerm
.setType( Term::OrTerm
);
249 orTerm
.addSubTerm( terms
.takeLast() );
250 orTerm
.addSubTerm( term
);
251 terms
.append( orTerm
);
253 else if ( inAndBlock
&& !terms
.isEmpty() ) {
255 andTerm
.setType( Term::AndTerm
);
256 andTerm
.addSubTerm( terms
.takeLast() );
257 andTerm
.addSubTerm( term
);
258 terms
.append( andTerm
);
261 terms
.append( term
);
267 if ( terms
.count() == 1 ) {
270 else if ( terms
.count() > 0 ) {
272 t
.setType( Term::AndTerm
);
273 t
.setSubTerms( terms
);