beagled/Lucene.Net/QueryParser/QueryParser.jj

   1 /**
   2  * Copyright 2004 The Apache Software Foundation
   3  *
   4  * Licensed under the Apache License, Version 2.0 (the "License");
   5  * you may not use this file except in compliance with the License.
   6  * You may obtain a copy of the License at
   7  *
   8  *     http://www.apache.org/licenses/LICENSE-2.0
   9  *
  10  * Unless required by applicable law or agreed to in writing, software
  11  * distributed under the License is distributed on an "AS IS" BASIS,
  12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  13  * See the License for the specific language governing permissions and
  14  * limitations under the License.
  15  */
  16
  17 options {
  18   STATIC=false;
  19   JAVA_UNICODE_ESCAPE=true;
  20   USER_CHAR_STREAM=true;
  21 }
  22
  23 PARSER_BEGIN(QueryParser)
  24
  25 package org.apache.lucene.queryParser;
  26
  27 import java.util.Vector;
  28 import java.io.*;
  29 import java.text.*;
  30 import java.util.*;
  31 import org.apache.lucene.index.Term;
  32 import org.apache.lucene.analysis.*;
  33 import org.apache.lucene.document.*;
  34 import org.apache.lucene.search.*;
  35 import org.apache.lucene.util.Parameter;
  36
  37 /**
  38  * This class is generated by JavaCC.  The most important method is
  39  * {@link #parse(String)}.
  40  *
  41  * The syntax for query strings is as follows:
  42  * A Query is a series of clauses.
  43  * A clause may be prefixed by:
  44  * <ul>
  45  * <li> a plus (<code>+</code>) or a minus (<code>-</code>) sign, indicating
  46  * that the clause is required or prohibited respectively; or
  47  * <li> a term followed by a colon, indicating the field to be searched.
  48  * This enables one to construct queries which search multiple fields.
  49  * </ul>
  50  *
  51  * A clause may be either:
  52  * <ul>
  53  * <li> a term, indicating all the documents that contain this term; or
  54  * <li> a nested query, enclosed in parentheses.  Note that this may be used
  55  * with a <code>+</code>/<code>-</code> prefix to require any of a set of
  56  * terms.
  57  * </ul>
  58  *
  59  * Thus, in BNF, the query grammar is:
  60  * <pre>
  61  *   Query  ::= ( Clause )*
  62  *   Clause ::= ["+", "-"] [&lt;TERM&gt; ":"] ( &lt;TERM&gt; | "(" Query ")" )
  63  * </pre>
  64  *
  65  * <p>
  66  * Examples of appropriately formatted queries can be found in the <a
  67  * href="http://lucene.apache.org/java/docs/queryparsersyntax.html">query syntax
  68  * documentation</a>.
  69  * </p>
  70  *
  71  * <p>Note that QueryParser is <em>not</em> thread-safe.</p>
  72  *
  73  * @author Brian Goetz
  74  * @author Peter Halacsy
  75  * @author Tatu Saloranta
  76  */
  77
  78 public class QueryParser {
  79
  80   private static final int CONJ_NONE   = 0;
  81   private static final int CONJ_AND    = 1;
  82   private static final int CONJ_OR     = 2;
  83
  84   private static final int MOD_NONE    = 0;
  85   private static final int MOD_NOT     = 10;
  86   private static final int MOD_REQ     = 11;
  87
  88   /** @deprecated use {@link #OR_OPERATOR} instead */
  89   public static final int DEFAULT_OPERATOR_OR  = 0;
  90   /** @deprecated use {@link #AND_OPERATOR} instead */
  91   public static final int DEFAULT_OPERATOR_AND = 1;
  92
  93   // make it possible to call setDefaultOperator() without accessing
  94   // the nested class:
  95   /** Alternative form of QueryParser.Operator.AND */
  96   public static final Operator AND_OPERATOR = Operator.AND;
  97   /** Alternative form of QueryParser.Operator.OR */
  98   public static final Operator OR_OPERATOR = Operator.OR;
  99
 100   /** The actual operator that parser uses to combine query terms */
 101   private Operator operator = OR_OPERATOR;
 102
 103   boolean lowercaseExpandedTerms = true;
 104
 105   Analyzer analyzer;
 106   String field;
 107   int phraseSlop = 0;
 108   float fuzzyMinSim = FuzzyQuery.defaultMinSimilarity;
 109   int fuzzyPrefixLength = FuzzyQuery.defaultPrefixLength;
 110   Locale locale = Locale.getDefault();
 111
 112   /** The default operator for parsing queries.
 113    * Use {@link QueryParser#setDefaultOperator} to change it.
 114    */
 115   static public final class Operator extends Parameter {
 116     private Operator(String name) {
 117       super(name);
 118     }
 119     static public final Operator OR = new Operator("OR");
 120     static public final Operator AND = new Operator("AND");
 121   }
 122
 123   /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
 124    *  @param query  the query string to be parsed.
 125    *  @param field  the default field for query terms.
 126    *  @param analyzer   used to find terms in the query text.
 127    *  @throws ParseException if the parsing fails
 128    *
 129    *  @deprecated Use an instance of QueryParser and the {@link #parse(String)} method instead.
 130    */
 131   static public Query parse(String query, String field, Analyzer analyzer)
 132        throws ParseException {
 133     QueryParser parser = new QueryParser(field, analyzer);
 134     return parser.parse(query);
 135   }
 136
 137   /** Constructs a query parser.
 138    *  @param f  the default field for query terms.
 139    *  @param a   used to find terms in the query text.
 140    */
 141   public QueryParser(String f, Analyzer a) {
 142     this(new FastCharStream(new StringReader("")));
 143     analyzer = a;
 144     field = f;
 145   }
 146
 147   /** Parses a query string, returning a {@link org.apache.lucene.search.Query}.
 148    *  @param query  the query string to be parsed.
 149    *  @throws ParseException if the parsing fails
 150    */
 151   public Query parse(String query) throws ParseException {
 152     ReInit(new FastCharStream(new StringReader(query)));
 153     try {
 154       return Query(field);
 155     }
 156     catch (TokenMgrError tme) {
 157       throw new ParseException(tme.getMessage());
 158     }
 159     catch (BooleanQuery.TooManyClauses tmc) {
 160       throw new ParseException("Too many boolean clauses");
 161     }
 162   }
 163
 164    /**
 165    * @return Returns the analyzer.
 166    */
 167   public Analyzer getAnalyzer() {
 168     return analyzer;
 169   }
 170
 171   /**
 172    * @return Returns the field.
 173    */
 174   public String getField() {
 175     return field;
 176   }
 177
 178    /**
 179    * Get the minimal similarity for fuzzy queries.
 180    */
 181   public float getFuzzyMinSim() {
 182       return fuzzyMinSim;
 183   }
 184
 185   /**
 186    * Set the minimum similarity for fuzzy queries.
 187    * Default is 0.5f.
 188    */
 189   public void setFuzzyMinSim(float fuzzyMinSim) {
 190       this.fuzzyMinSim = fuzzyMinSim;
 191   }
 192
 193    /**
 194    * Get the prefix length for fuzzy queries.
 195    * @return Returns the fuzzyPrefixLength.
 196    */
 197   public int getFuzzyPrefixLength() {
 198     return fuzzyPrefixLength;
 199   }
 200
 201   /**
 202    * Set the prefix length for fuzzy queries. Default is 0.
 203    * @param fuzzyPrefixLength The fuzzyPrefixLength to set.
 204    */
 205   public void setFuzzyPrefixLength(int fuzzyPrefixLength) {
 206     this.fuzzyPrefixLength = fuzzyPrefixLength;
 207   }
 208
 209   /**
 210    * Sets the default slop for phrases.  If zero, then exact phrase matches
 211    * are required.  Default value is zero.
 212    */
 213   public void setPhraseSlop(int phraseSlop) {
 214     this.phraseSlop = phraseSlop;
 215   }
 216
 217   /**
 218    * Gets the default slop for phrases.
 219    */
 220   public int getPhraseSlop() {
 221     return phraseSlop;
 222   }
 223
 224   /**
 225    * Sets the boolean operator of the QueryParser.
 226    * In default mode (<code>DEFAULT_OPERATOR_OR</code>) terms without any modifiers
 227    * are considered optional: for example <code>capital of Hungary</code> is equal to
 228    * <code>capital OR of OR Hungary</code>.<br/>
 229    * In <code>DEFAULT_OPERATOR_AND</code> terms are considered to be in conjuction: the
 230    * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
 231    * @deprecated use {@link #setDefaultOperator(QueryParser.Operator)} instead
 232    */
 233   public void setOperator(int op) {
 234     if (op == DEFAULT_OPERATOR_AND)
 235       this.operator = AND_OPERATOR;
 236     else if (op == DEFAULT_OPERATOR_OR)
 237       this.operator = OR_OPERATOR;
 238     else
 239       throw new IllegalArgumentException("Unknown operator " + op);
 240   }
 241
 242   /**
 243    * Sets the boolean operator of the QueryParser.
 244    * In default mode (<code>OR_OPERATOR</code>) terms without any modifiers
 245    * are considered optional: for example <code>capital of Hungary</code> is equal to
 246    * <code>capital OR of OR Hungary</code>.<br/>
 247    * In <code>AND_OPERATOR</code> mode terms are considered to be in conjuction: the
 248    * above mentioned query is parsed as <code>capital AND of AND Hungary</code>
 249    */
 250   public void setDefaultOperator(Operator op) {
 251     this.operator = op;
 252   }
 253
 254   /**
 255    * Gets implicit operator setting, which will be either DEFAULT_OPERATOR_AND
 256    * or DEFAULT_OPERATOR_OR.
 257    * @deprecated use {@link #getDefaultOperator()} instead
 258    */
 259   public int getOperator() {
 260     if(operator == AND_OPERATOR)
 261       return DEFAULT_OPERATOR_AND;
 262     else if(operator == OR_OPERATOR)
 263       return DEFAULT_OPERATOR_OR;
 264     else
 265       throw new IllegalStateException("Unknown operator " + operator);
 266   }
 267
 268   /**
 269    * Gets implicit operator setting, which will be either AND_OPERATOR
 270    * or OR_OPERATOR.
 271    */
 272   public Operator getDefaultOperator() {
 273     return operator;
 274   }
 275
 276   /**
 277    * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
 278    * lower-cased or not.  Default is <code>true</code>.
 279    * @deprecated use {@link #setLowercaseExpandedTerms(boolean)} instead
 280    */
 281   public void setLowercaseWildcardTerms(boolean lowercaseExpandedTerms) {
 282     this.lowercaseExpandedTerms = lowercaseExpandedTerms;
 283   }
 284
 285   /**
 286    * Whether terms of wildcard, prefix, fuzzy and range queries are to be automatically
 287    * lower-cased or not.  Default is <code>true</code>.
 288    */
 289   public void setLowercaseExpandedTerms(boolean lowercaseExpandedTerms) {
 290     this.lowercaseExpandedTerms = lowercaseExpandedTerms;
 291   }
 292
 293   /**
 294    * @deprecated use {@link #getLowercaseExpandedTerms()} instead
 295    */
 296   public boolean getLowercaseWildcardTerms() {
 297     return lowercaseExpandedTerms;
 298   }
 299
 300   /**
 301    * @see #setLowercaseExpandedTerms(boolean)
 302    */
 303   public boolean getLowercaseExpandedTerms() {
 304     return lowercaseExpandedTerms;
 305   }
 306
 307   /**
 308    * Set locale used by date range parsing.
 309    */
 310   public void setLocale(Locale locale) {
 311     this.locale = locale;
 312   }
 313
 314   /**
 315    * Returns current locale, allowing access by subclasses.
 316    */
 317   public Locale getLocale() {
 318     return locale;
 319   }
 320
 321   protected void addClause(Vector clauses, int conj, int mods, Query q) {
 322     boolean required, prohibited;
 323
 324     // If this term is introduced by AND, make the preceding term required,
 325     // unless it's already prohibited
 326     if (clauses.size() > 0 && conj == CONJ_AND) {
 327       BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
 328       if (!c.isProhibited())
 329         c.setOccur(BooleanClause.Occur.MUST);
 330     }
 331
 332     if (clauses.size() > 0 && operator == AND_OPERATOR && conj == CONJ_OR) {
 333       // If this term is introduced by OR, make the preceding term optional,
 334       // unless it's prohibited (that means we leave -a OR b but +a OR b-->a OR b)
 335       // notice if the input is a OR b, first term is parsed as required; without
 336       // this modification a OR b would parsed as +a OR b
 337       BooleanClause c = (BooleanClause) clauses.elementAt(clauses.size()-1);
 338       if (!c.isProhibited())
 339         c.setOccur(BooleanClause.Occur.SHOULD);
 340     }
 341
 342     // We might have been passed a null query; the term might have been
 343     // filtered away by the analyzer.
 344     if (q == null)
 345       return;
 346
 347     if (operator == OR_OPERATOR) {
 348       // We set REQUIRED if we're introduced by AND or +; PROHIBITED if
 349       // introduced by NOT or -; make sure not to set both.
 350       prohibited = (mods == MOD_NOT);
 351       required = (mods == MOD_REQ);
 352       if (conj == CONJ_AND && !prohibited) {
 353         required = true;
 354       }
 355     } else {
 356       // We set PROHIBITED if we're introduced by NOT or -; We set REQUIRED
 357       // if not PROHIBITED and not introduced by OR
 358       prohibited = (mods == MOD_NOT);
 359       required   = (!prohibited && conj != CONJ_OR);
 360     }
 361     if (required && !prohibited)
 362       clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST));
 363     else if (!required && !prohibited)
 364       clauses.addElement(new BooleanClause(q, BooleanClause.Occur.SHOULD));
 365     else if (!required && prohibited)
 366       clauses.addElement(new BooleanClause(q, BooleanClause.Occur.MUST_NOT));
 367     else
 368       throw new RuntimeException("Clause cannot be both required and prohibited");
 369   }
 370
 371   /**
 372    * Note that parameter analyzer is ignored. Calls inside the parser always
 373    * use class member analyzer.
 374    *
 375    * @exception ParseException throw in overridden method to disallow
 376    * @deprecated use {@link #getFieldQuery(String, String)}
 377    */
 378   protected Query getFieldQuery(String field,
 379                                                     Analyzer analyzer,
 380                                                     String queryText)  throws ParseException {
 381     return getFieldQuery(field, queryText);
 382   }
 383
 384   /**
 385    * @exception ParseException throw in overridden method to disallow
 386    */
 387   protected Query getFieldQuery(String field, String queryText)  throws ParseException {
 388     // Use the analyzer to get all the tokens, and then build a TermQuery,
 389     // PhraseQuery, or nothing based on the term count
 390
 391     TokenStream source = analyzer.tokenStream(field, new StringReader(queryText));
 392     Vector v = new Vector();
 393     org.apache.lucene.analysis.Token t;
 394     int positionCount = 0;
 395     boolean severalTokensAtSamePosition = false;
 396
 397     while (true) {
 398       try {
 399         t = source.next();
 400       }
 401       catch (IOException e) {
 402         t = null;
 403       }
 404       if (t == null)
 405         break;
 406       v.addElement(t);
 407       if (t.getPositionIncrement() != 0)
 408         positionCount += t.getPositionIncrement();
 409       else
 410         severalTokensAtSamePosition = true;
 411     }
 412     try {
 413       source.close();
 414     }
 415     catch (IOException e) {
 416       // ignore
 417     }
 418
 419     if (v.size() == 0)
 420       return null;
 421     else if (v.size() == 1) {
 422       t = (org.apache.lucene.analysis.Token) v.elementAt(0);
 423       return new TermQuery(new Term(field, t.termText()));
 424     } else {
 425       if (severalTokensAtSamePosition) {
 426         if (positionCount == 1) {
 427           // no phrase query:
 428           BooleanQuery q = new BooleanQuery(true);
 429           for (int i = 0; i < v.size(); i++) {
 430             t = (org.apache.lucene.analysis.Token) v.elementAt(i);
 431             TermQuery currentQuery = new TermQuery(
 432                 new Term(field, t.termText()));
 433             q.add(currentQuery, BooleanClause.Occur.SHOULD);
 434           }
 435           return q;
 436         }
 437         else {
 438           // phrase query:
 439           MultiPhraseQuery mpq = new MultiPhraseQuery();
 440           List multiTerms = new ArrayList();
 441           for (int i = 0; i < v.size(); i++) {
 442             t = (org.apache.lucene.analysis.Token) v.elementAt(i);
 443             if (t.getPositionIncrement() == 1 && multiTerms.size() > 0) {
 444               mpq.add((Term[])multiTerms.toArray(new Term[0]));
 445               multiTerms.clear();
 446             }
 447             multiTerms.add(new Term(field, t.termText()));
 448           }
 449           mpq.add((Term[])multiTerms.toArray(new Term[0]));
 450           return mpq;
 451         }
 452       }
 453       else {
 454         PhraseQuery q = new PhraseQuery();
 455         q.setSlop(phraseSlop);
 456         for (int i = 0; i < v.size(); i++) {
 457           q.add(new Term(field, ((org.apache.lucene.analysis.Token)
 458               v.elementAt(i)).termText()));
 459
 460         }
 461         return q;
 462       }
 463     }
 464   }
 465
 466   /**
 467    * Note that parameter analyzer is ignored. Calls inside the parser always
 468    * use class member analyzer.
 469    *
 470    * @exception ParseException throw in overridden method to disallow
 471    * @deprecated use {@link #getFieldQuery(String, String, int)}
 472    */
 473   protected Query getFieldQuery(String field,
 474                                                     Analyzer analyzer,
 475                                                     String queryText,
 476                                                     int slop) throws ParseException {
 477     return getFieldQuery(field, queryText, slop);
 478   }
 479
 480   /**
 481    * Base implementation delegates to {@link #getFieldQuery(String,String)}.
 482    * This method may be overridden, for example, to return
 483    * a SpanNearQuery instead of a PhraseQuery.
 484    *
 485    * @exception ParseException throw in overridden method to disallow
 486    */
 487   protected Query getFieldQuery(String field, String queryText, int slop)
 488         throws ParseException {
 489     Query query = getFieldQuery(field, queryText);
 490
 491     if (query instanceof PhraseQuery) {
 492       ((PhraseQuery) query).setSlop(slop);
 493     }
 494     if (query instanceof MultiPhraseQuery) {
 495       ((MultiPhraseQuery) query).setSlop(slop);
 496     }
 497
 498     return query;
 499   }
 500
 501   /**
 502    * Note that parameter analyzer is ignored. Calls inside the parser always
 503    * use class member analyzer.
 504    *
 505    * @exception ParseException throw in overridden method to disallow
 506    * @deprecated use {@link #getRangeQuery(String, String, String, boolean)}
 507    */
 508   protected Query getRangeQuery(String field,
 509       Analyzer analyzer,
 510       String part1,
 511       String part2,
 512       boolean inclusive) throws ParseException {
 513     return getRangeQuery(field, part1, part2, inclusive);
 514   }
 515
 516   /**
 517    * @exception ParseException throw in overridden method to disallow
 518    */
 519   protected Query getRangeQuery(String field,
 520                                 String part1,
 521                                 String part2,
 522                                 boolean inclusive) throws ParseException
 523   {
 524     if (lowercaseExpandedTerms) {
 525       part1 = part1.toLowerCase();
 526       part2 = part2.toLowerCase();
 527     }
 528     try {
 529       DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT, locale);
 530       df.setLenient(true);
 531       Date d1 = df.parse(part1);
 532       Date d2 = df.parse(part2);
 533       part1 = DateField.dateToString(d1);
 534       part2 = DateField.dateToString(d2);
 535     }
 536     catch (Exception e) { }
 537
 538     return new RangeQuery(new Term(field, part1),
 539                           new Term(field, part2),
 540                           inclusive);
 541   }
 542
 543   /**
 544    * Factory method for generating query, given a set of clauses.
 545    * By default creates a boolean query composed of clauses passed in.
 546    *
 547    * Can be overridden by extending classes, to modify query being
 548    * returned.
 549    *
 550    * @param clauses Vector that contains {@link BooleanClause} instances
 551    *    to join.
 552    *
 553    * @return Resulting {@link Query} object.
 554    * @exception ParseException throw in overridden method to disallow
 555    */
 556   protected Query getBooleanQuery(Vector clauses) throws ParseException {
 557     return getBooleanQuery(clauses, false);
 558   }
 559
 560   /**
 561    * Factory method for generating query, given a set of clauses.
 562    * By default creates a boolean query composed of clauses passed in.
 563    *
 564    * Can be overridden by extending classes, to modify query being
 565    * returned.
 566    *
 567    * @param clauses Vector that contains {@link BooleanClause} instances
 568    *    to join.
 569    * @param disableCoord true if coord scoring should be disabled.
 570    *
 571    * @return Resulting {@link Query} object.
 572    * @exception ParseException throw in overridden method to disallow
 573    */
 574   protected Query getBooleanQuery(Vector clauses, boolean disableCoord)
 575     throws ParseException
 576   {
 577     BooleanQuery query = new BooleanQuery(disableCoord);
 578     for (int i = 0; i < clauses.size(); i++) {
 579   query.add((BooleanClause)clauses.elementAt(i));
 580     }
 581     return query;
 582   }
 583
 584   /**
 585    * Factory method for generating a query. Called when parser
 586    * parses an input term token that contains one or more wildcard
 587    * characters (? and *), but is not a prefix term token (one
 588    * that has just a single * character at the end)
 589    *<p>
 590    * Depending on settings, prefix term may be lower-cased
 591    * automatically. It will not go through the default Analyzer,
 592    * however, since normal Analyzers are unlikely to work properly
 593    * with wildcard templates.
 594    *<p>
 595    * Can be overridden by extending classes, to provide custom handling for
 596    * wildcard queries, which may be necessary due to missing analyzer calls.
 597    *
 598    * @param field Name of the field query will use.
 599    * @param termStr Term token that contains one or more wild card
 600    *   characters (? or *), but is not simple prefix term
 601    *
 602    * @return Resulting {@link Query} built for the term
 603    * @exception ParseException throw in overridden method to disallow
 604    */
 605   protected Query getWildcardQuery(String field, String termStr) throws ParseException
 606   {
 607     if (lowercaseExpandedTerms) {
 608       termStr = termStr.toLowerCase();
 609     }
 610     Term t = new Term(field, termStr);
 611     return new WildcardQuery(t);
 612   }
 613
 614   /**
 615    * Factory method for generating a query (similar to
 616    * {@link #getWildcardQuery}). Called when parser parses an input term
 617    * token that uses prefix notation; that is, contains a single '*' wildcard
 618    * character as its last character. Since this is a special case
 619    * of generic wildcard term, and such a query can be optimized easily,
 620    * this usually results in a different query object.
 621    *<p>
 622    * Depending on settings, a prefix term may be lower-cased
 623    * automatically. It will not go through the default Analyzer,
 624    * however, since normal Analyzers are unlikely to work properly
 625    * with wildcard templates.
 626    *<p>
 627    * Can be overridden by extending classes, to provide custom handling for
 628    * wild card queries, which may be necessary due to missing analyzer calls.
 629    *
 630    * @param field Name of the field query will use.
 631    * @param termStr Term token to use for building term for the query
 632    *    (<b>without</b> trailing '*' character!)
 633    *
 634    * @return Resulting {@link Query} built for the term
 635    * @exception ParseException throw in overridden method to disallow
 636    */
 637   protected Query getPrefixQuery(String field, String termStr) throws ParseException
 638   {
 639     if (lowercaseExpandedTerms) {
 640       termStr = termStr.toLowerCase();
 641     }
 642     Term t = new Term(field, termStr);
 643     return new PrefixQuery(t);
 644   }
 645
 646  /**
 647    * @deprecated use {@link #getFuzzyQuery(String, String, float)}
 648    */
 649   protected Query getFuzzyQuery(String field, String termStr) throws ParseException {
 650     return getFuzzyQuery(field, termStr, fuzzyMinSim);
 651   }
 652
 653    /**
 654    * Factory method for generating a query (similar to
 655    * {@link #getWildcardQuery}). Called when parser parses
 656    * an input term token that has the fuzzy suffix (~) appended.
 657    *
 658    * @param field Name of the field query will use.
 659    * @param termStr Term token to use for building term for the query
 660    *
 661    * @return Resulting {@link Query} built for the term
 662    * @exception ParseException throw in overridden method to disallow
 663    */
 664   protected Query getFuzzyQuery(String field, String termStr, float minSimilarity) throws ParseException
 665   {
 666     if (lowercaseExpandedTerms) {
 667       termStr = termStr.toLowerCase();
 668     }
 669     Term t = new Term(field, termStr);
 670     return new FuzzyQuery(t, minSimilarity, fuzzyPrefixLength);
 671   }
 672
 673   /**
 674    * Returns a String where the escape char has been
 675    * removed, or kept only once if there was a double escape.
 676    */
 677   private String discardEscapeChar(String input) {
 678     char[] caSource = input.toCharArray();
 679     char[] caDest = new char[caSource.length];
 680     int j = 0;
 681     for (int i = 0; i < caSource.length; i++) {
 682       if ((caSource[i] != '\\') || (i > 0 && caSource[i-1] == '\\')) {
 683         caDest[j++]=caSource[i];
 684       }
 685     }
 686     return new String(caDest, 0, j);
 687   }
 688
 689   /**
 690    * Returns a String where those characters that QueryParser
 691    * expects to be escaped are escaped by a preceding <code>\</code>.
 692    */
 693   public static String escape(String s) {
 694     StringBuffer sb = new StringBuffer();
 695     for (int i = 0; i < s.length(); i++) {
 696       char c = s.charAt(i);
 697       // NOTE: keep this in sync with _ESCAPED_CHAR below!
 698       if (c == '\\' || c == '+' || c == '-' || c == '!' || c == '(' || c == ')' || c == ':'
 699         || c == '^' || c == '[' || c == ']' || c == '\"' || c == '{' || c == '}' || c == '~'
 700         || c == '*' || c == '?') {
 701         sb.append('\\');
 702       }
 703       sb.append(c);
 704     }
 705     return sb.toString();
 706   }
 707
 708   /**
 709    * Command line tool to test QueryParser, using {@link org.apache.lucene.analysis.SimpleAnalyzer}.
 710    * Usage:<br>
 711    * <code>java org.apache.lucene.queryParser.QueryParser &lt;input&gt;</code>
 712    */
 713   public static void main(String[] args) throws Exception {
 714     if (args.length == 0) {
 715       System.out.println("Usage: java org.apache.lucene.queryParser.QueryParser <input>");
 716       System.exit(0);
 717     }
 718     QueryParser qp = new QueryParser("field",
 719                            new org.apache.lucene.analysis.SimpleAnalyzer());
 720     Query q = qp.parse(args[0]);
 721     System.out.println(q.toString("field"));
 722   }
 723 }
 724
 725 PARSER_END(QueryParser)
 726
 727 /* ***************** */
 728 /* Token Definitions */
 729 /* ***************** */
 730
 731 <*> TOKEN : {
 732   <#_NUM_CHAR:   ["0"-"9"] >
 733 // NOTE: keep this in sync with escape(String) above!
 734 | <#_ESCAPED_CHAR: "\\" [ "\\", "+", "-", "!", "(", ")", ":", "^",
 735                           "[", "]", "\"", "{", "}", "~", "*", "?" ] >
 736 | <#_TERM_START_CHAR: ( ~[ " ", "\t", "\n", "\r", "+", "-", "!", "(", ")", ":", "^",
 737                            "[", "]", "\"", "{", "}", "~", "*", "?" ]
 738                        | <_ESCAPED_CHAR> ) >
 739 | <#_TERM_CHAR: ( <_TERM_START_CHAR> | <_ESCAPED_CHAR> | "-" | "+" ) >
 740 | <#_WHITESPACE: ( " " | "\t" | "\n" | "\r") >
 741 }
 742
 743 <DEFAULT, RangeIn, RangeEx> SKIP : {
 744   <<_WHITESPACE>>
 745 }
 746
 747 // OG: to support prefix queries:
 748 // http://issues.apache.org/bugzilla/show_bug.cgi?id=12137
 749 // Change from:
 750 //
 751 // | <WILDTERM:  <_TERM_START_CHAR>
 752 //              (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
 753 // To:
 754 //
 755 // (<_TERM_START_CHAR> | [ "*", "?" ]) (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
 756
 757 <DEFAULT> TOKEN : {
 758   <AND:       ("AND" | "&&") >
 759 | <OR:        ("OR" | "||") >
 760 | <NOT:       ("NOT" | "!") >
 761 | <PLUS:      "+" >
 762 | <MINUS:     "-" >
 763 | <LPAREN:    "(" >
 764 | <RPAREN:    ")" >
 765 | <COLON:     ":" >
 766 | <CARAT:     "^" > : Boost
 767 | <QUOTED:     "\"" (~["\""])+ "\"">
 768 | <TERM:      <_TERM_START_CHAR> (<_TERM_CHAR>)*  >
 769 | <FUZZY_SLOP:     "~" ( (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? )? >
 770 | <PREFIXTERM:  <_TERM_START_CHAR> (<_TERM_CHAR>)* "*" >
 771 | <WILDTERM:  <_TERM_START_CHAR>
 772               (<_TERM_CHAR> | ( [ "*", "?" ] ))* >
 773 | <RANGEIN_START: "[" > : RangeIn
 774 | <RANGEEX_START: "{" > : RangeEx
 775 }
 776
 777 <Boost> TOKEN : {
 778 <NUMBER:    (<_NUM_CHAR>)+ ( "." (<_NUM_CHAR>)+ )? > : DEFAULT
 779 }
 780
 781 <RangeIn> TOKEN : {
 782 <RANGEIN_TO: "TO">
 783 | <RANGEIN_END: "]"> : DEFAULT
 784 | <RANGEIN_QUOTED: "\"" (~["\""])+ "\"">
 785 | <RANGEIN_GOOP: (~[ " ", "]" ])+ >
 786 }
 787
 788 <RangeEx> TOKEN : {
 789 <RANGEEX_TO: "TO">
 790 | <RANGEEX_END: "}"> : DEFAULT
 791 | <RANGEEX_QUOTED: "\"" (~["\""])+ "\"">
 792 | <RANGEEX_GOOP: (~[ " ", "}" ])+ >
 793 }
 794
 795 // *   Query  ::= ( Clause )*
 796 // *   Clause ::= ["+", "-"] [<TERM> ":"] ( <TERM> | "(" Query ")" )
 797
 798 int Conjunction() : {
 799   int ret = CONJ_NONE;
 800 }
 801 {
 802   [
 803     <AND> { ret = CONJ_AND; }
 804     | <OR>  { ret = CONJ_OR; }
 805   ]
 806   { return ret; }
 807 }
 808
 809 int Modifiers() : {
 810   int ret = MOD_NONE;
 811 }
 812 {
 813   [
 814      <PLUS> { ret = MOD_REQ; }
 815      | <MINUS> { ret = MOD_NOT; }
 816      | <NOT> { ret = MOD_NOT; }
 817   ]
 818   { return ret; }
 819 }
 820
 821 Query Query(String field) :
 822 {
 823   Vector clauses = new Vector();
 824   Query q, firstQuery=null;
 825   int conj, mods;
 826 }
 827 {
 828   mods=Modifiers() q=Clause(field)
 829   {
 830     addClause(clauses, CONJ_NONE, mods, q);
 831     if (mods == MOD_NONE)
 832         firstQuery=q;
 833   }
 834   (
 835     conj=Conjunction() mods=Modifiers() q=Clause(field)
 836     { addClause(clauses, conj, mods, q); }
 837   )*
 838     {
 839       if (clauses.size() == 1 && firstQuery != null)
 840         return firstQuery;
 841       else {
 842   return getBooleanQuery(clauses);
 843       }
 844     }
 845 }
 846
 847 Query Clause(String field) : {
 848   Query q;
 849   Token fieldToken=null, boost=null;
 850 }
 851 {
 852   [
 853     LOOKAHEAD(2)
 854     fieldToken=<TERM> <COLON> {
 855       field=discardEscapeChar(fieldToken.image);
 856     }
 857   ]
 858
 859   (
 860    q=Term(field)
 861    | <LPAREN> q=Query(field) <RPAREN> (<CARAT> boost=<NUMBER>)?
 862
 863   )
 864     {
 865       if (boost != null) {
 866         float f = (float)1.0;
 867   try {
 868     f = Float.valueOf(boost.image).floatValue();
 869           q.setBoost(f);
 870   } catch (Exception ignored) { }
 871       }
 872       return q;
 873     }
 874 }
 875
 876
 877 Query Term(String field) : {
 878   Token term, boost=null, fuzzySlop=null, goop1, goop2;
 879   boolean prefix = false;
 880   boolean wildcard = false;
 881   boolean fuzzy = false;
 882   boolean rangein = false;
 883   Query q;
 884 }
 885 {
 886   (
 887      (
 888        term=<TERM>
 889        | term=<PREFIXTERM> { prefix=true; }
 890        | term=<WILDTERM> { wildcard=true; }
 891        | term=<NUMBER>
 892      )
 893      [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ]
 894      [ <CARAT> boost=<NUMBER> [ fuzzySlop=<FUZZY_SLOP> { fuzzy=true; } ] ]
 895      {
 896        String termImage=discardEscapeChar(term.image);
 897        if (wildcard) {
 898        q = getWildcardQuery(field, termImage);
 899        } else if (prefix) {
 900          q = getPrefixQuery(field,
 901            discardEscapeChar(term.image.substring
 902           (0, term.image.length()-1)));
 903        } else if (fuzzy) {
 904           float fms = fuzzyMinSim;
 905           try {
 906             fms = Float.valueOf(fuzzySlop.image.substring(1)).floatValue();
 907           } catch (Exception ignored) { }
 908          if(fms < 0.0f || fms > 1.0f){
 909            throw new ParseException("Minimum similarity for a FuzzyQuery has to be between 0.0f and 1.0f !");
 910          }
 911          if(fms == fuzzyMinSim)
 912            q = getFuzzyQuery(field, termImage);
 913          else
 914            q = getFuzzyQuery(field, termImage, fms);
 915        } else {
 916          q = getFieldQuery(field, analyzer, termImage);
 917        }
 918      }
 919      | ( <RANGEIN_START> ( goop1=<RANGEIN_GOOP>|goop1=<RANGEIN_QUOTED> )
 920          [ <RANGEIN_TO> ] ( goop2=<RANGEIN_GOOP>|goop2=<RANGEIN_QUOTED> )
 921          <RANGEIN_END> )
 922        [ <CARAT> boost=<NUMBER> ]
 923         {
 924           if (goop1.kind == RANGEIN_QUOTED) {
 925             goop1.image = goop1.image.substring(1, goop1.image.length()-1);
 926           } else {
 927             goop1.image = discardEscapeChar(goop1.image);
 928           }
 929           if (goop2.kind == RANGEIN_QUOTED) {
 930             goop2.image = goop2.image.substring(1, goop2.image.length()-1);
 931       } else {
 932         goop2.image = discardEscapeChar(goop2.image);
 933       }
 934           q = getRangeQuery(field, analyzer, goop1.image, goop2.image, true);
 935         }
 936      | ( <RANGEEX_START> ( goop1=<RANGEEX_GOOP>|goop1=<RANGEEX_QUOTED> )
 937          [ <RANGEEX_TO> ] ( goop2=<RANGEEX_GOOP>|goop2=<RANGEEX_QUOTED> )
 938          <RANGEEX_END> )
 939        [ <CARAT> boost=<NUMBER> ]
 940         {
 941           if (goop1.kind == RANGEEX_QUOTED) {
 942             goop1.image = goop1.image.substring(1, goop1.image.length()-1);
 943           } else {
 944             goop1.image = discardEscapeChar(goop1.image);
 945           }
 946           if (goop2.kind == RANGEEX_QUOTED) {
 947             goop2.image = goop2.image.substring(1, goop2.image.length()-1);
 948       } else {
 949         goop2.image = discardEscapeChar(goop2.image);
 950       }
 951
 952           q = getRangeQuery(field, analyzer, goop1.image, goop2.image, false);
 953         }
 954      | term=<QUOTED>
 955        [ fuzzySlop=<FUZZY_SLOP> ]
 956        [ <CARAT> boost=<NUMBER> ]
 957        {
 958          int s = phraseSlop;
 959
 960          if (fuzzySlop != null) {
 961            try {
 962              s = Float.valueOf(fuzzySlop.image.substring(1)).intValue();
 963            }
 964            catch (Exception ignored) { }
 965          }
 966          q = getFieldQuery(field, analyzer, term.image.substring(1, term.image.length()-1), s);
 967        }
 968   )
 969   {
 970     if (boost != null) {
 971       float f = (float) 1.0;
 972       try {
 973         f = Float.valueOf(boost.image).floatValue();
 974       }
 975       catch (Exception ignored) {
 976     /* Should this be handled somehow? (defaults to "no boost", if
 977      * boost number is invalid)
 978      */
 979       }
 980
 981       // avoid boosting null queries, such as those caused by stop words
 982       if (q != null) {
 983         q.setBoost(f);
 984       }
 985     }
 986     return q;
 987   }
 988 }