not quite so much needs to be delayed to the init() function
[personal-kdebase.git] / workspace / libs / nepomukquery / queryparser.cpp
blob78cfbbf824c8b637e24bbf584b82ac06309de619
1 /*
2 This file is part of the Nepomuk KDE project.
3 Copyright (C) 2007 Sebastian Trueg <trueg@kde.org>
5 This library is free software; you can redistribute it and/or
6 modify it under the terms of the GNU Library General Public
7 License version 2 as published by the Free Software Foundation.
9 This library is distributed in the hope that it will be useful,
10 but WITHOUT ANY WARRANTY; without even the implied warranty of
11 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
12 Library General Public License for more details.
14 You should have received a copy of the GNU Library General Public License
15 along with this library; see the file COPYING.LIB. If not, write to
16 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
17 Boston, MA 02110-1301, USA.
20 #include "queryparser.h"
21 #include "query.h"
22 #include "term.h"
24 #include <QtCore/QRegExp>
25 #include <QtCore/QSet>
27 #include <KDebug>
28 #include <KLocale>
31 /* Advanced queries:
32 * select distinct ?r ?p ?x ?label ?comment where { { ?r ?p ?x . } UNION { ?r ?p ?r2 . ?r2 ?p2 ?x . } . FILTER(isLiteral(?x)) . FILTER REGEX(STR(?p),'hastag','i') . FILTER REGEX(STR(?x),'nepomuk','i') . OPTIONAL { { ?r <http://www.w3.org/2000/01/rdf-schema#label> ?label } UNION { ?r <http://www.semanticdesktop.org/ontologies/2007/08/15/nao#prefLabel> ?label . } UNION { ?r <http://freedesktop.org/standards/xesam/1.0/core#name> ?label . } . ?r <http://www.w3.org/2000/01/rdf-schema#comment> ?comment . } . }
35 namespace {
36 // a field differs from a plain term in that it does never allow comparators
37 QString s_fieldNamePattern( "([^\\s\"':=<>]+|(?:([\"'])[^\"':=<>]+\\%1))" );
38 QString s_plainTermPattern( "([^\\s\"':=<>]+|(?:([\"'])[^\"']+\\%1))" );
39 QString s_inExclusionPattern( "([\\+\\-]?)" );
40 QString s_uriPattern( "<([^<>]+)>" );
41 QString s_comparatorPattern( "(:|\\<=|\\>=|=|\\<|\\>)" );
43 // match a simple search text
44 // captures: 1 - The optional + or - sign (may be empty)
45 // 2 - the search text (including optional paranthesis)
46 QRegExp s_plainTermRx( s_inExclusionPattern + s_plainTermPattern.arg( 3 ) );
48 // match a field search term: fieldname + relation (:, =, etc) + search text with optional paranthesis
49 // captures: 1 - The optional + or - sign (may be empty)
50 // 2 - fieldname
51 // 3 - relation
52 // 4 - search text (including optional paranthesis)
53 QRegExp s_fieldRx( s_inExclusionPattern + s_fieldNamePattern.arg( 3 ) + s_comparatorPattern + s_plainTermPattern.arg( 6 ) );
55 // match a property URI search term: property URI + relation (:, =, etc) + search text with optional paranthesis
56 // captures: 1 - The optional + or - sign (may be empty)
57 // 2 - property URI
58 // 3 - relation
59 // 4 - search text (including optional paranthesis)
60 QRegExp s_propertyRx( s_inExclusionPattern + s_uriPattern + s_comparatorPattern + s_plainTermPattern.arg( 5 ) );
62 // match a property URI search term: property URI + relation (:, =, etc) + resource URI
63 // captures: 1 - The optional + or - sign (may be empty)
64 // 2 - property URI
65 // 3 - resource URI
66 QRegExp s_resourceRx( s_inExclusionPattern + s_uriPattern + "(?::|=)" + s_uriPattern );
68 QRegExp s_fieldFieldRx( s_inExclusionPattern + s_fieldNamePattern.arg( 3 ) + s_comparatorPattern + "\\(" + s_fieldNamePattern.arg( 6 ) + s_comparatorPattern + s_plainTermPattern.arg( 9 ) + "\\)" );
70 Nepomuk::Search::Term::Comparator fieldTypeRelationFromString( const QString& s ) {
71 if ( s == "=" ) {
72 return Nepomuk::Search::Term::Equal;
74 else if ( s == ":" ) {
75 return Nepomuk::Search::Term::Contains;
77 else if ( s == ">" ) {
78 return Nepomuk::Search::Term::Greater;
80 else if ( s == "<" ) {
81 return Nepomuk::Search::Term::Smaller;
83 else if ( s == ">=" ) {
84 return Nepomuk::Search::Term::GreaterOrEqual;
86 else if ( s == "<=" ) {
87 return Nepomuk::Search::Term::SmallerOrEqual;
89 else {
90 kDebug() << "FIXME: Unsupported relation:" << s;
91 return Nepomuk::Search::Term::Equal;
95 QString stripQuotes( const QString& s ) {
96 if ( s[0] == '\'' ||
97 s[0] == '\"' ) {
98 return s.mid( 1 ).left( s.length()-2 );
100 else {
101 return s;
105 QUrl tryToBeIntelligentAboutParsingUrl( const QString& s ) {
106 if ( s.contains( '%' ) && !s.contains( '/' ) ) {
107 return QUrl::fromEncoded( s.toAscii() );
109 else {
110 return QUrl( s );
114 Soprano::LiteralValue createLiteral( const QString& s ) {
115 bool b = false;
116 int i = s.toInt( &b );
117 if ( b )
118 return Soprano::LiteralValue( i );
119 double d = s.toDouble( &b );
120 if ( b )
121 return Soprano::LiteralValue( d );
122 return s;
127 Nepomuk::Search::Query Nepomuk::Search::QueryParser::parseQuery( const QString& query )
129 QueryParser parser;
130 return parser.parse( query );
134 class Nepomuk::Search::QueryParser::Private
136 public:
137 QSet<QString> andKeywords;
138 QSet<QString> orKeywords;
142 Nepomuk::Search::QueryParser::QueryParser()
143 : d( new Private() )
145 QString andListStr = i18nc( "Boolean AND keyword in desktop search strings. You can add several variants separated by spaces, e.g. retain the English one alongside the translation; keywords are not case sensitive. Make sure there is no conflict with the OR keyword.", "and" );
146 foreach ( const QString &andKeyword, andListStr.split( " ", QString::SkipEmptyParts ) ) {
147 d->andKeywords.insert( andKeyword.toLower() );
149 QString orListStr = i18nc( "Boolean OR keyword in desktop search strings. You can add several variants separated by spaces, e.g. retain the English one alongside the translation; keywords are not case sensitive. Make sure there is no conflict with the AND keyword.", "or" );
150 foreach ( const QString &orKeyword, orListStr.split( " ", QString::SkipEmptyParts ) ) {
151 d->orKeywords.insert( orKeyword.toLower() );
156 Nepomuk::Search::QueryParser::~QueryParser()
158 delete d;
162 Nepomuk::Search::Query Nepomuk::Search::QueryParser::parse( const QString& query )
164 // TODO: a "real" parser which can handle all of the Xesam user language
165 // This one for example does not handle nesting at all.
167 QList<Term> terms;
169 bool inOrBlock = false;
170 bool inAndBlock = false;
172 int pos = 0;
173 while ( pos < query.length() ) {
174 // skip whitespace
175 while ( pos < query.length() && query[pos].isSpace() ) {
176 kDebug() << "Skipping space at" << pos;
177 ++pos;
180 Term term;
182 if ( pos < query.length() ) {
183 if ( s_resourceRx.indexIn( query, pos ) == pos ) {
184 // FIXME: honour the +-
185 kDebug() << "matched resource term at" << pos << s_resourceRx.cap( 0 );
186 term = Term( tryToBeIntelligentAboutParsingUrl( s_resourceRx.cap( 2 ) ),
187 tryToBeIntelligentAboutParsingUrl( s_resourceRx.cap( 3 ) ) );
188 pos += s_resourceRx.matchedLength();
190 else if ( s_propertyRx.indexIn( query, pos ) == pos ) {
191 // FIXME: honour the +-
192 kDebug() << "matched property term at" << pos << s_propertyRx.cap( 0 );
193 term.setProperty( tryToBeIntelligentAboutParsingUrl( s_propertyRx.cap( 2 ) ) );
194 term.addSubTerm( Term( createLiteral( stripQuotes( s_propertyRx.cap( 4 ) ) ) ) );
195 QString comparator = s_propertyRx.cap( 3 );
196 term.setType( Term::ComparisonTerm );
197 term.setComparator( fieldTypeRelationFromString( comparator ) );
198 pos += s_propertyRx.matchedLength();
200 else if ( s_fieldFieldRx.indexIn( query, pos ) == pos ) {
201 kDebug() << "matched field field term at" << pos
202 << s_fieldFieldRx.cap( 0 )
203 << s_fieldFieldRx.cap( 2 )
204 << s_fieldFieldRx.cap( 4 )
205 << s_fieldFieldRx.cap( 5 )
206 << s_fieldFieldRx.cap( 7 )
207 << s_fieldFieldRx.cap( 8 );
208 term.setField( stripQuotes( s_fieldFieldRx.cap( 2 ) ) );
209 QString comparator = s_fieldFieldRx.cap( 4 );
210 term.setType( Term::ComparisonTerm );
211 term.setComparator( fieldTypeRelationFromString( comparator ) );
212 term.addSubTerm( Term( stripQuotes( s_fieldFieldRx.cap( 5 ) ), s_fieldFieldRx.cap( 8 ), fieldTypeRelationFromString( s_fieldFieldRx.cap( 7 ) ) ) );
213 pos += s_fieldFieldRx.matchedLength();
215 else if ( s_fieldRx.indexIn( query, pos ) == pos ) {
216 // FIXME: honour the +-
217 kDebug() << "matched field term at" << pos << s_fieldRx.cap( 0 ) << s_fieldRx.cap( 2 ) << s_fieldRx.cap( 4 ) << s_fieldRx.cap( 5 );
218 term.setField( stripQuotes( s_fieldRx.cap( 2 ) ) );
219 term.addSubTerm( Term( createLiteral( stripQuotes( s_fieldRx.cap( 5 ) ) ) ) );
220 QString comparator = s_fieldRx.cap( 4 );
221 term.setType( Term::ComparisonTerm );
222 term.setComparator( fieldTypeRelationFromString( comparator ) );
223 pos += s_fieldRx.matchedLength();
225 else if ( s_plainTermRx.indexIn( query, pos ) == pos ) {
226 // FIXME: honour the +-
227 QString value = stripQuotes( s_plainTermRx.cap( 2 ) );
228 if ( d->orKeywords.contains( value.toLower() ) ) {
229 inOrBlock = true;
231 else if ( d->andKeywords.contains( value.toLower() ) ) {
232 inAndBlock = true;
234 else {
235 kDebug() << "matched literal at" << pos << value;
236 term = Term( Soprano::LiteralValue( value ) );
238 pos += s_plainTermRx.matchedLength();
240 else {
241 kDebug() << "Invalid query at" << pos << query;
242 return Term();
245 if ( term.isValid() ) {
246 if ( inOrBlock && !terms.isEmpty() ) {
247 Term orTerm;
248 orTerm.setType( Term::OrTerm );
249 orTerm.addSubTerm( terms.takeLast() );
250 orTerm.addSubTerm( term );
251 terms.append( orTerm );
253 else if ( inAndBlock && !terms.isEmpty() ) {
254 Term andTerm;
255 andTerm.setType( Term::AndTerm );
256 andTerm.addSubTerm( terms.takeLast() );
257 andTerm.addSubTerm( term );
258 terms.append( andTerm );
260 else {
261 terms.append( term );
267 if ( terms.count() == 1 ) {
268 return terms[0];
270 else if ( terms.count() > 0 ) {
271 Term t;
272 t.setType( Term::AndTerm );
273 t.setSubTerms( terms );
274 return t;
276 else {
277 return Term();