2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using IndexReader
= Lucene
.Net
.Index
.IndexReader
;
18 using Hits
= Lucene
.Net
.Search
.Hits
;
19 using Similarity
= Lucene
.Net
.Search
.Similarity
;
20 using Parameter
= Lucene
.Net
.Util
.Parameter
;
21 namespace Lucene
.Net
.Documents
24 /// <summary>A field is a section of a Document. Each field has two parts, a name and a
25 /// value. Values may be free text, provided as a String or as a Reader, or they
26 /// may be atomic keywords, which are not further processed. Such keywords may
27 /// be used to represent dates, urls, etc. Fields are optionally stored in the
28 /// index, so that they may be returned with hits on the document.
32 public sealed class Field
34 private System
.String name
= "body";
36 // the one and only data object for all different kind of field values
37 private System
.Object fieldsData
= null;
39 private bool storeTermVector
= false;
40 private bool storeOffsetWithTermVector
= false;
41 private bool storePositionWithTermVector
= false;
42 private bool isStored
= false;
43 private bool isIndexed
= true;
44 private bool isTokenized
= true;
45 private bool isBinary
= false;
46 private bool isCompressed
= false;
48 private float boost
= 1.0f
;
51 public sealed class Store
: Parameter
54 internal Store(System
.String name
) : base(name
)
58 /// <summary>Store the original field value in the index in a compressed form. This is
59 /// useful for long documents and for binary valued fields.
61 public static readonly Store COMPRESS
= new Store("COMPRESS");
63 /// <summary>Store the original field value in the index. This is useful for short texts
64 /// like a document's title which should be displayed with the results. The
65 /// value is stored in its original form, i.e. no analyzer is used before it is
68 public static readonly Store YES
= new Store("YES");
70 /// <summary>Do not store the field value in the index. </summary>
71 public static readonly Store NO
= new Store("NO");
75 public sealed class Index
: Parameter
78 internal Index(System
.String name
) : base(name
)
82 /// <summary>Do not index the field value. This field can thus not be searched,
83 /// but one can still access its contents provided it is
84 /// {@link Field.Store stored}.
86 public static readonly Index NO
= new Index("NO");
88 /// <summary>Index the field's value so it can be searched. An Analyzer will be used
89 /// to tokenize and possibly further normalize the text before its
90 /// terms will be stored in the index. This is useful for common text.
92 public static readonly Index TOKENIZED
= new Index("TOKENIZED");
94 /// <summary>Index the field's value without using an Analyzer, so it can be searched.
95 /// As no analyzer is used the value will be stored as a single term. This is
96 /// useful for unique Ids like product numbers.
98 public static readonly Index UN_TOKENIZED
= new Index("UN_TOKENIZED");
102 public sealed class TermVector
: Parameter
105 internal TermVector(System
.String name
) : base(name
)
109 /// <summary>Do not store term vectors. </summary>
110 public static readonly TermVector NO
= new TermVector("NO");
112 /// <summary>Store the term vectors of each document. A term vector is a list
113 /// of the document's terms and their number of occurences in that document.
115 public static readonly TermVector YES
= new TermVector("YES");
117 /// <summary> Store the term vector + token position information
120 /// <seealso cref="#YES">
122 public static readonly TermVector WITH_POSITIONS
= new TermVector("WITH_POSITIONS");
124 /// <summary> Store the term vector + Token offset information
127 /// <seealso cref="#YES">
129 public static readonly TermVector WITH_OFFSETS
= new TermVector("WITH_OFFSETS");
131 /// <summary> Store the term vector + Token position and offset information
134 /// <seealso cref="#YES">
136 /// <seealso cref="#WITH_POSITIONS">
138 /// <seealso cref="#WITH_OFFSETS">
140 public static readonly TermVector WITH_POSITIONS_OFFSETS
= new TermVector("WITH_POSITIONS_OFFSETS");
143 /// <summary>Sets the boost factor hits on this field. This value will be
144 /// multiplied into the score of all hits on this this field of this
147 /// <p>The boost is multiplied by {@link Document#GetBoost()} of the document
148 /// containing this field. If a document has multiple fields with the same
149 /// name, all such values are multiplied together. This product is then
150 /// multipled by the value {@link Similarity#LengthNorm(String,int)}, and
151 /// rounded by {@link Similarity#EncodeNorm(float)} before it is stored in the
152 /// index. One should attempt to ensure that this product does not overflow
153 /// the range of that encoding.
156 /// <seealso cref="Document#SetBoost(float)">
158 /// <seealso cref="int)">
160 /// <seealso cref="Similarity#EncodeNorm(float)">
162 public void SetBoost(float boost
)
167 /// <summary>Returns the boost factor for hits for this field.
169 /// <p>The default value is 1.0.
171 /// <p>Note: this value is not stored directly with the document in the index.
172 /// Documents returned from {@link IndexReader#Document(int)} and
173 /// {@link Hits#Doc(int)} may thus not have the same value present as when
174 /// this field was indexed.
177 /// <seealso cref="#SetBoost(float)">
179 public float GetBoost()
184 /// <summary>Constructs a String-valued Field that is not tokenized, but is indexed
185 /// and stored. Useful for non-text fields, e.g. date or url.
187 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
188 /// Field(name, value, Field.Store.YES, Field.Index.UN_TOKENIZED)} instead
190 public static Field
Keyword(System
.String name
, System
.String value_Renamed
)
192 return new Field(name
, value_Renamed
, true, true, false);
195 /// <summary>Constructs a String-valued Field that is not tokenized nor indexed,
196 /// but is stored in the index, for return with hits.
198 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
199 /// Field(name, value, Field.Store.YES, Field.Index.NO)} instead
201 public static Field
UnIndexed(System
.String name
, System
.String value_Renamed
)
203 return new Field(name
, value_Renamed
, true, false, false);
206 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
207 /// and is stored in the index, for return with hits. Useful for short text
208 /// fields, like "title" or "subject". Term vector will not be stored for this field.
210 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
211 /// Field(name, value, Field.Store.YES, Field.Index.TOKENIZED)} instead
213 public static Field
Text(System
.String name
, System
.String value_Renamed
)
215 return Text(name
, value_Renamed
, false);
218 /// <summary>Constructs a Date-valued Field that is not tokenized and is indexed,
219 /// and stored in the index, for return with hits.
221 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
222 /// Field(name, value, Field.Store.YES, Field.Index.UN_TOKENIZED)} instead
224 public static Field
Keyword(System
.String name
, System
.DateTime value_Renamed
)
226 return new Field(name
, DateField
.DateToString(value_Renamed
), true, true, false);
229 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
230 /// and is stored in the index, for return with hits. Useful for short text
231 /// fields, like "title" or "subject".
233 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)
234 /// Field(name, value, Field.Store.YES, Field.Index.TOKENIZED, storeTermVector)} instead
236 public static Field
Text(System
.String name
, System
.String value_Renamed
, bool storeTermVector
)
238 return new Field(name
, value_Renamed
, true, true, true, storeTermVector
);
241 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
242 /// but that is not stored in the index. Term vector will not be stored for this field.
244 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)
245 /// Field(name, value, Field.Store.NO, Field.Index.TOKENIZED)} instead
247 public static Field
UnStored(System
.String name
, System
.String value_Renamed
)
249 return UnStored(name
, value_Renamed
, false);
252 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
253 /// but that is not stored in the index.
255 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)
256 /// Field(name, value, Field.Store.NO, Field.Index.TOKENIZED, storeTermVector)} instead
258 public static Field
UnStored(System
.String name
, System
.String value_Renamed
, bool storeTermVector
)
260 return new Field(name
, value_Renamed
, false, true, true, storeTermVector
);
263 /// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
264 /// not stored in the index verbatim. Useful for longer text fields, like
265 /// "body". Term vector will not be stored for this field.
267 /// <deprecated> use {@link #Field(String, Reader) Field(name, value)} instead
269 public static Field
Text(System
.String name
, System
.IO
.TextReader value_Renamed
)
271 return Text(name
, value_Renamed
, false);
274 /// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
275 /// not stored in the index verbatim. Useful for longer text fields, like
278 /// <deprecated> use {@link #Field(String, Reader, Field.TermVector)
279 /// Field(name, value, storeTermVector)} instead
281 public static Field
Text(System
.String name
, System
.IO
.TextReader value_Renamed
, bool storeTermVector
)
283 Field f
= new Field(name
, value_Renamed
);
284 f
.storeTermVector
= storeTermVector
;
288 /// <summary>Returns the name of the field as an interned string.
289 /// For example "date", "title", "body", ...
291 public System
.String
Name()
296 /// <summary>The value of the field as a String, or null. If null, the Reader value
297 /// or binary value is used. Exactly one of stringValue(), readerValue(), and
298 /// binaryValue() must be set.
300 public System
.String
StringValue()
302 return fieldsData
as System
.String
;
305 /// <summary>The value of the field as a Reader, or null. If null, the String value
306 /// or binary value is used. Exactly one of stringValue(), readerValue(),
307 /// and binaryValue() must be set.
309 public System
.IO
.TextReader
ReaderValue()
311 return fieldsData
as System
.IO
.TextReader
;
314 /// <summary>The value of the field in Binary, or null. If null, the Reader or
315 /// String value is used. Exactly one of stringValue(), readerValue() and
316 /// binaryValue() must be set.
318 public byte[] BinaryValue()
320 return fieldsData
as byte[];
323 /// <summary> Create a field by specifying its name, value and how it will
324 /// be saved in the index. Term vectors will not be stored in the index.
327 /// <param name="name">The name of the field
329 /// <param name="value">The string to process
331 /// <param name="store">Whether <code>value</code> should be stored in the index
333 /// <param name="index">Whether the field should be indexed, and if so, if it should
334 /// be tokenized before indexing
336 /// <throws> NullPointerException if name or value is <code>null</code> </throws>
337 /// <throws> IllegalArgumentException if the field is neither stored nor indexed </throws>
338 public Field(System
.String name
, System
.String value_Renamed
, Store store
, Index index
) : this(name
, value_Renamed
, store
, index
, TermVector
.NO
)
342 /// <summary> Create a field by specifying its name, value and how it will
343 /// be saved in the index.
346 /// <param name="name">The name of the field
348 /// <param name="value">The string to process
350 /// <param name="store">Whether <code>value</code> should be stored in the index
352 /// <param name="index">Whether the field should be indexed, and if so, if it should
353 /// be tokenized before indexing
355 /// <param name="termVector">Whether term vector should be stored
357 /// <throws> NullPointerException if name or value is <code>null</code> </throws>
358 /// <throws> IllegalArgumentException in any of the following situations: </throws>
360 /// <li>the field is neither stored nor indexed</li>
361 /// <li>the field is not indexed but termVector is <code>TermVector.YES</code></li>
364 public Field(System
.String name
, System
.String value_Renamed
, Store store
, Index index
, TermVector termVector
)
367 throw new System
.NullReferenceException("name cannot be null");
368 if (value_Renamed
== null)
369 throw new System
.NullReferenceException("value cannot be null");
370 if (index
== Index
.NO
&& store
== Store
.NO
)
371 throw new System
.ArgumentException("it doesn't make sense to have a field that " + "is neither indexed nor stored");
372 if (index
== Index
.NO
&& termVector
!= TermVector
.NO
)
373 throw new System
.ArgumentException("cannot store term vector information " + "for a field that is not indexed");
375 this.name
= String
.Intern(name
); // field names are interned
376 this.fieldsData
= value_Renamed
;
378 if (store
== Store
.YES
)
380 this.isStored
= true;
381 this.isCompressed
= false;
383 else if (store
== Store
.COMPRESS
)
385 this.isStored
= true;
386 this.isCompressed
= true;
388 else if (store
== Store
.NO
)
390 this.isStored
= false;
391 this.isCompressed
= false;
395 throw new System
.ArgumentException("unknown store parameter " + store
);
398 if (index
== Index
.NO
)
400 this.isIndexed
= false;
401 this.isTokenized
= false;
403 else if (index
== Index
.TOKENIZED
)
405 this.isIndexed
= true;
406 this.isTokenized
= true;
408 else if (index
== Index
.UN_TOKENIZED
)
410 this.isIndexed
= true;
411 this.isTokenized
= false;
415 throw new System
.ArgumentException("unknown index parameter " + index
);
418 this.isBinary
= false;
420 SetStoreTermVector(termVector
);
423 /// <summary> Create a tokenized and indexed field that is not stored. Term vectors will
427 /// <param name="name">The name of the field
429 /// <param name="reader">The reader with the content
431 /// <throws> NullPointerException if name or reader is <code>null</code> </throws>
432 public Field(System
.String name
, System
.IO
.TextReader reader
) : this(name
, reader
, TermVector
.NO
)
436 /// <summary> Create a tokenized and indexed field that is not stored, optionally with
437 /// storing term vectors.
440 /// <param name="name">The name of the field
442 /// <param name="reader">The reader with the content
444 /// <param name="termVector">Whether term vector should be stored
446 /// <throws> NullPointerException if name or reader is <code>null</code> </throws>
447 public Field(System
.String name
, System
.IO
.TextReader reader
, TermVector termVector
)
450 throw new System
.NullReferenceException("name cannot be null");
452 throw new System
.NullReferenceException("reader cannot be null");
454 this.name
= String
.Intern(name
); // field names are interned
455 this.fieldsData
= reader
;
457 this.isStored
= false;
458 this.isCompressed
= false;
460 this.isIndexed
= true;
461 this.isTokenized
= true;
463 this.isBinary
= false;
465 SetStoreTermVector(termVector
);
468 /// <summary>Create a field by specifying all parameters except for <code>storeTermVector</code>,
469 /// which is set to <code>false</code>.
472 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index)} instead
474 public Field(System
.String name
, System
.String string_Renamed
, bool store
, bool index
, bool token
) : this(name
, string_Renamed
, store
, index
, token
, false)
479 /// <summary> Create a stored field with binary value. Optionally the value may be compressed.
482 /// <param name="name">The name of the field
484 /// <param name="value">The binary value
486 /// <param name="store">How <code>value</code> should be stored (compressed or not.)
488 public Field(System
.String name
, byte[] value_Renamed
, Store store
)
491 throw new System
.ArgumentException("name cannot be null");
492 if (value_Renamed
== null)
493 throw new System
.ArgumentException("value cannot be null");
495 this.name
= String
.Intern(name
);
496 this.fieldsData
= value_Renamed
;
498 if (store
== Store
.YES
)
500 this.isStored
= true;
501 this.isCompressed
= false;
503 else if (store
== Store
.COMPRESS
)
505 this.isStored
= true;
506 this.isCompressed
= true;
508 else if (store
== Store
.NO
)
509 throw new System
.ArgumentException("binary values can't be unstored");
512 throw new System
.ArgumentException("unknown store parameter " + store
);
515 this.isIndexed
= false;
516 this.isTokenized
= false;
518 this.isBinary
= true;
520 SetStoreTermVector(TermVector
.NO
);
523 /// <summary> </summary>
524 /// <param name="name">The name of the field
526 /// <param name="string">The string to process
528 /// <param name="store">true if the field should store the string
530 /// <param name="index">true if the field should be indexed
532 /// <param name="token">true if the field should be tokenized
534 /// <param name="storeTermVector">true if we should store the Term Vector info
537 /// <deprecated> use {@link #Field(String, String, Field.Store, Field.Index, Field.TermVector)} instead
539 public Field(System
.String name
, System
.String string_Renamed
, bool store
, bool index
, bool token
, bool storeTermVector
)
542 throw new System
.NullReferenceException("name cannot be null");
543 if (string_Renamed
== null)
544 throw new System
.NullReferenceException("value cannot be null");
545 if (!index
&& storeTermVector
)
546 throw new System
.ArgumentException("cannot store a term vector for fields that are not indexed");
548 this.name
= String
.Intern(name
); // field names are interned
549 this.fieldsData
= string_Renamed
;
550 this.isStored
= store
;
551 this.isIndexed
= index
;
552 this.isTokenized
= token
;
553 this.storeTermVector
= storeTermVector
;
556 private void SetStoreTermVector(TermVector termVector
)
558 if (termVector
== TermVector
.NO
)
560 this.storeTermVector
= false;
561 this.storePositionWithTermVector
= false;
562 this.storeOffsetWithTermVector
= false;
564 else if (termVector
== TermVector
.YES
)
566 this.storeTermVector
= true;
567 this.storePositionWithTermVector
= false;
568 this.storeOffsetWithTermVector
= false;
570 else if (termVector
== TermVector
.WITH_POSITIONS
)
572 this.storeTermVector
= true;
573 this.storePositionWithTermVector
= true;
574 this.storeOffsetWithTermVector
= false;
576 else if (termVector
== TermVector
.WITH_OFFSETS
)
578 this.storeTermVector
= true;
579 this.storePositionWithTermVector
= false;
580 this.storeOffsetWithTermVector
= true;
582 else if (termVector
== TermVector
.WITH_POSITIONS_OFFSETS
)
584 this.storeTermVector
= true;
585 this.storePositionWithTermVector
= true;
586 this.storeOffsetWithTermVector
= true;
590 throw new System
.ArgumentException("unknown termVector parameter " + termVector
);
594 /// <summary>True iff the value of the field is to be stored in the index for return
595 /// with search hits. It is an error for this to be true if a field is
598 public bool IsStored()
603 /// <summary>True iff the value of the field is to be indexed, so that it may be
606 public bool IsIndexed()
611 /// <summary>True iff the value of the field should be tokenized as text prior to
612 /// indexing. Un-tokenized fields are indexed as a single word and may not be
615 public bool IsTokenized()
620 /// <summary>True if the value of the field is stored and compressed within the index </summary>
621 public bool IsCompressed()
626 /// <summary>True iff the term or terms used to index this field are stored as a term
627 /// vector, available from {@link IndexReader#GetTermFreqVector(int,String)}.
628 /// These methods do not provide access to the original content of the field,
629 /// only to terms used to index it. If the original content must be
630 /// preserved, use the <code>stored</code> attribute instead.
633 /// <seealso cref="String)">
635 public bool IsTermVectorStored()
637 return storeTermVector
;
640 /// <summary> True iff terms are stored as term vector together with their offsets
641 /// (start and end positon in source text).
643 public bool IsStoreOffsetWithTermVector()
645 return storeOffsetWithTermVector
;
648 /// <summary> True iff terms are stored as term vector together with their token positions.</summary>
649 public bool IsStorePositionWithTermVector()
651 return storePositionWithTermVector
;
654 /// <summary>True iff the value of the filed is stored as binary </summary>
655 public bool IsBinary()
660 /// <summary>Prints a Field for human consumption. </summary>
661 public override System
.String
ToString()
663 System
.Text
.StringBuilder result
= new System
.Text
.StringBuilder();
666 result
.Append("stored");
668 result
.Append("/compressed");
670 result
.Append("/uncompressed");
674 if (result
.Length
> 0)
676 result
.Append("indexed");
680 if (result
.Length
> 0)
682 result
.Append("tokenized");
686 if (result
.Length
> 0)
688 result
.Append("termVector");
690 if (storeOffsetWithTermVector
)
692 if (result
.Length
> 0)
694 result
.Append("termVectorOffsets");
696 if (storePositionWithTermVector
)
698 if (result
.Length
> 0)
700 result
.Append("termVectorPosition");
704 if (result
.Length
> 0)
706 result
.Append("binary");
713 if (fieldsData
!= null)
715 result
.Append(fieldsData
);
719 return result
.ToString();