2 * Copyright 2004 The Apache Software Foundation
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
8 * http://www.apache.org/licenses/LICENSE-2.0
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
17 using IndexReader
= Lucene
.Net
.Index
.IndexReader
;
18 using Hits
= Lucene
.Net
.Search
.Hits
;
19 using Similarity
= Lucene
.Net
.Search
.Similarity
;
20 namespace Lucene
.Net
.Documents
23 /// <summary>A Field is a section of a Document. Each Field has two parts, a name and a
24 /// value. Values may be free text, provided as a String or as a Reader, or they
25 /// may be atomic keywords, which are not further processed. Such keywords may
26 /// be used to represent dates, urls, etc. Fields are optionally stored in the
27 /// index, so that they may be returned with hits on the document.
31 public sealed class Field
33 private System
.String name
= "body";
34 private System
.String stringValue
= null;
35 private bool storeTermVector
= false;
36 private System
.IO
.TextReader readerValue
= null;
37 private bool isStored
= false;
38 private bool isIndexed
= true;
39 private bool isTokenized
= true;
41 private float boost
= 1.0f
;
43 /// <summary>Sets the boost factor hits on this Field. This value will be
44 /// multiplied into the score of all hits on this this Field of this
47 /// <p>The boost is multiplied by {@link Document#GetBoost()} of the document
48 /// containing this Field. If a document has multiple fields with the same
49 /// name, all such values are multiplied together. This product is then
50 /// multipled by the value {@link Similarity#LengthNorm(String,int)}, and
51 /// rounded by {@link Similarity#EncodeNorm(float)} before it is stored in the
52 /// index. One should attempt to ensure that this product does not overflow
53 /// the range of that encoding.
56 /// <seealso cref="Document#SetBoost(float)">
58 /// <seealso cref="int)">
60 /// <seealso cref="Similarity#EncodeNorm(float)">
62 public void SetBoost(float boost
)
67 /// <summary>Returns the boost factor for hits on any Field of this document.
69 /// <p>The default value is 1.0.
71 /// <p>Note: this value is not stored directly with the document in the index.
72 /// Documents returned from {@link IndexReader#Document(int)} and {@link
73 /// Hits#Doc(int)} may thus not have the same value present as when this Field
77 /// <seealso cref="#SetBoost(float)">
79 public float GetBoost()
84 /// <summary>Constructs a String-valued Field that is not tokenized, but is indexed
85 /// and stored. Useful for non-text fields, e.g. date or url.
87 public static Field
Keyword(System
.String name
, System
.String value_Renamed
)
89 return new Field(name
, value_Renamed
, true, true, false);
92 /// <summary>Constructs a String-valued Field that is not tokenized nor indexed,
93 /// but is stored in the index, for return with hits.
95 public static Field
UnIndexed(System
.String name
, System
.String value_Renamed
)
97 return new Field(name
, value_Renamed
, true, false, false);
100 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
101 /// and is stored in the index, for return with hits. Useful for short text
102 /// fields, like "title" or "subject". Term vector will not be stored for this Field.
104 public static Field
Text(System
.String name
, System
.String value_Renamed
)
106 return Text(name
, value_Renamed
, false);
109 /// <summary>Constructs a Date-valued Field that is not tokenized and is indexed,
110 /// and stored in the index, for return with hits.
112 public static Field
Keyword(System
.String name
, System
.DateTime value_Renamed
)
114 return new Field(name
, DateField
.DateToString(value_Renamed
), true, true, false);
117 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
118 /// and is stored in the index, for return with hits. Useful for short text
119 /// fields, like "title" or "subject".
121 public static Field
Text(System
.String name
, System
.String value_Renamed
, bool storeTermVector
)
123 return new Field(name
, value_Renamed
, true, true, true, storeTermVector
);
126 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
127 /// but that is not stored in the index. Term vector will not be stored for this Field.
129 public static Field
UnStored(System
.String name
, System
.String value_Renamed
)
131 return UnStored(name
, value_Renamed
, false);
134 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
135 /// but that is not stored in the index.
137 public static Field
UnStored(System
.String name
, System
.String value_Renamed
, bool storeTermVector
)
139 return new Field(name
, value_Renamed
, false, true, true, storeTermVector
);
142 /// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
143 /// not stored in the index verbatim. Useful for longer text fields, like
144 /// "body". Term vector will not be stored for this Field.
146 public static Field
Text(System
.String name
, System
.IO
.TextReader value_Renamed
)
148 return Text(name
, value_Renamed
, false);
151 /// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
152 /// not stored in the index verbatim. Useful for longer text fields, like
155 public static Field
Text(System
.String name
, System
.IO
.TextReader value_Renamed
, bool storeTermVector
)
157 Field f
= new Field(name
, value_Renamed
);
158 f
.storeTermVector
= storeTermVector
;
162 /// <summary>The name of the Field (e.g., "date", "subject", "title", or "body")
163 /// as an interned string.
165 public System
.String
Name()
170 /// <summary>The value of the Field as a String, or null. If null, the Reader value
171 /// is used. Exactly one of stringValue() and readerValue() must be set.
173 public System
.String
StringValue()
177 /// <summary>The value of the Field as a Reader, or null. If null, the String value
178 /// is used. Exactly one of stringValue() and readerValue() must be set.
180 public System
.IO
.TextReader
ReaderValue()
186 /// <summary>Create a Field by specifying all parameters except for <code>storeTermVector</code>,
187 /// which is set to <code>false</code>.
189 public Field(System
.String name
, System
.String string_Renamed
, bool store
, bool index
, bool token
):this(name
, string_Renamed
, store
, index
, token
, false)
193 /// <summary> </summary>
194 /// <param name="name">The name of the Field
196 /// <param name="string">The string to process
198 /// <param name="store">true if the Field should store the string
200 /// <param name="index">true if the Field should be indexed
202 /// <param name="token">true if the Field should be tokenized
204 /// <param name="storeTermVector">true if we should store the Term Vector info
206 public Field(System
.String name
, System
.String string_Renamed
, bool store
, bool index
, bool token
, bool storeTermVector
)
209 throw new System
.ArgumentException("name cannot be null");
210 if (string_Renamed
== null)
211 throw new System
.ArgumentException("value cannot be null");
212 if (!index
&& storeTermVector
)
213 throw new System
.ArgumentException("cannot store a term vector for fields that are not indexed.");
215 this.name
= String
.Intern(name
); // Field names are interned
216 this.stringValue
= string_Renamed
;
217 this.isStored
= store
;
218 this.isIndexed
= index
;
219 this.isTokenized
= token
;
220 this.storeTermVector
= storeTermVector
;
223 internal Field(System
.String name
, System
.IO
.TextReader reader
)
226 throw new System
.ArgumentException("name cannot be null");
228 throw new System
.ArgumentException("value cannot be null");
230 this.name
= String
.Intern(name
); // Field names are interned
231 this.readerValue
= reader
;
234 /// <summary>True iff the value of the Field is to be stored in the index for return
235 /// with search hits. It is an error for this to be true if a Field is
238 public bool IsStored()
243 /// <summary>True iff the value of the Field is to be indexed, so that it may be
246 public bool IsIndexed()
251 /// <summary>True iff the value of the Field should be tokenized as text prior to
252 /// indexing. Un-tokenized fields are indexed as a single word and may not be
255 public bool IsTokenized()
260 /// <summary>True iff the term or terms used to index this Field are stored as a term
261 /// vector, available from {@link IndexReader#GetTermFreqVector(int,String)}.
262 /// These methods do not provide access to the original content of the Field,
263 /// only to terms used to index it. If the original content must be
264 /// preserved, use the <code>stored</code> attribute instead.
267 /// <seealso cref="String)">
269 public bool IsTermVectorStored()
271 return storeTermVector
;
274 /// <summary>Prints a Field for human consumption. </summary>
275 public override System
.String
ToString()
277 if (isStored
&& isIndexed
&& !isTokenized
)
278 return "Keyword<" + name
+ ":" + stringValue
+ ">";
279 else if (isStored
&& !isIndexed
&& !isTokenized
)
280 return "Unindexed<" + name
+ ":" + stringValue
+ ">";
281 else if (isStored
&& isIndexed
&& isTokenized
&& stringValue
!= null)
282 return "Text<" + name
+ ":" + stringValue
+ ">";
283 else if (!isStored
&& isIndexed
&& isTokenized
&& readerValue
!= null)
285 return "Text<" + name
+ ":" + readerValue
+ ">";
287 else if (!isStored
&& isIndexed
&& isTokenized
)
289 return "UnStored<" + name
+ ">";
293 return base.ToString();