Thumbnail file hits. Based on a patch from D Bera
[beagle.git] / beagled / Lucene.Net / Document / Field.cs
blob187f62126f34036d2b9925cfe6513f6892e3514e
1 /*
2 * Copyright 2004 The Apache Software Foundation
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
16 using System;
17 using IndexReader = Lucene.Net.Index.IndexReader;
18 using Hits = Lucene.Net.Search.Hits;
19 using Similarity = Lucene.Net.Search.Similarity;
20 namespace Lucene.Net.Documents
23 /// <summary>A Field is a section of a Document. Each Field has two parts, a name and a
24 /// value. Values may be free text, provided as a String or as a Reader, or they
25 /// may be atomic keywords, which are not further processed. Such keywords may
26 /// be used to represent dates, urls, etc. Fields are optionally stored in the
27 /// index, so that they may be returned with hits on the document.
28 /// </summary>
30 [Serializable]
31 public sealed class Field
33 private System.String name = "body";
34 private System.String stringValue = null;
35 private bool storeTermVector = false;
36 private System.IO.TextReader readerValue = null;
37 private bool isStored = false;
38 private bool isIndexed = true;
39 private bool isTokenized = true;
41 private float boost = 1.0f;
43 /// <summary>Sets the boost factor hits on this Field. This value will be
44 /// multiplied into the score of all hits on this this Field of this
45 /// document.
46 ///
47 /// <p>The boost is multiplied by {@link Document#GetBoost()} of the document
48 /// containing this Field. If a document has multiple fields with the same
49 /// name, all such values are multiplied together. This product is then
50 /// multipled by the value {@link Similarity#LengthNorm(String,int)}, and
51 /// rounded by {@link Similarity#EncodeNorm(float)} before it is stored in the
52 /// index. One should attempt to ensure that this product does not overflow
53 /// the range of that encoding.
54 ///
55 /// </summary>
56 /// <seealso cref="Document#SetBoost(float)">
57 /// </seealso>
58 /// <seealso cref="int)">
59 /// </seealso>
60 /// <seealso cref="Similarity#EncodeNorm(float)">
61 /// </seealso>
62 public void SetBoost(float boost)
64 this.boost = boost;
67 /// <summary>Returns the boost factor for hits on any Field of this document.
68 ///
69 /// <p>The default value is 1.0.
70 ///
71 /// <p>Note: this value is not stored directly with the document in the index.
72 /// Documents returned from {@link IndexReader#Document(int)} and {@link
73 /// Hits#Doc(int)} may thus not have the same value present as when this Field
74 /// was indexed.
75 ///
76 /// </summary>
77 /// <seealso cref="#SetBoost(float)">
78 /// </seealso>
79 public float GetBoost()
81 return boost;
84 /// <summary>Constructs a String-valued Field that is not tokenized, but is indexed
85 /// and stored. Useful for non-text fields, e.g. date or url.
86 /// </summary>
87 public static Field Keyword(System.String name, System.String value_Renamed)
89 return new Field(name, value_Renamed, true, true, false);
92 /// <summary>Constructs a String-valued Field that is not tokenized nor indexed,
93 /// but is stored in the index, for return with hits.
94 /// </summary>
95 public static Field UnIndexed(System.String name, System.String value_Renamed)
97 return new Field(name, value_Renamed, true, false, false);
100 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
101 /// and is stored in the index, for return with hits. Useful for short text
102 /// fields, like "title" or "subject". Term vector will not be stored for this Field.
103 /// </summary>
104 public static Field Text(System.String name, System.String value_Renamed)
106 return Text(name, value_Renamed, false);
109 /// <summary>Constructs a Date-valued Field that is not tokenized and is indexed,
110 /// and stored in the index, for return with hits.
111 /// </summary>
112 public static Field Keyword(System.String name, System.DateTime value_Renamed)
114 return new Field(name, DateField.DateToString(value_Renamed), true, true, false);
117 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
118 /// and is stored in the index, for return with hits. Useful for short text
119 /// fields, like "title" or "subject".
120 /// </summary>
121 public static Field Text(System.String name, System.String value_Renamed, bool storeTermVector)
123 return new Field(name, value_Renamed, true, true, true, storeTermVector);
126 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
127 /// but that is not stored in the index. Term vector will not be stored for this Field.
128 /// </summary>
129 public static Field UnStored(System.String name, System.String value_Renamed)
131 return UnStored(name, value_Renamed, false);
134 /// <summary>Constructs a String-valued Field that is tokenized and indexed,
135 /// but that is not stored in the index.
136 /// </summary>
137 public static Field UnStored(System.String name, System.String value_Renamed, bool storeTermVector)
139 return new Field(name, value_Renamed, false, true, true, storeTermVector);
142 /// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
143 /// not stored in the index verbatim. Useful for longer text fields, like
144 /// "body". Term vector will not be stored for this Field.
145 /// </summary>
146 public static Field Text(System.String name, System.IO.TextReader value_Renamed)
148 return Text(name, value_Renamed, false);
151 /// <summary>Constructs a Reader-valued Field that is tokenized and indexed, but is
152 /// not stored in the index verbatim. Useful for longer text fields, like
153 /// "body".
154 /// </summary>
155 public static Field Text(System.String name, System.IO.TextReader value_Renamed, bool storeTermVector)
157 Field f = new Field(name, value_Renamed);
158 f.storeTermVector = storeTermVector;
159 return f;
162 /// <summary>The name of the Field (e.g., "date", "subject", "title", or "body")
163 /// as an interned string.
164 /// </summary>
165 public System.String Name()
167 return name;
170 /// <summary>The value of the Field as a String, or null. If null, the Reader value
171 /// is used. Exactly one of stringValue() and readerValue() must be set.
172 /// </summary>
173 public System.String StringValue()
175 return stringValue;
177 /// <summary>The value of the Field as a Reader, or null. If null, the String value
178 /// is used. Exactly one of stringValue() and readerValue() must be set.
179 /// </summary>
180 public System.IO.TextReader ReaderValue()
182 return readerValue;
186 /// <summary>Create a Field by specifying all parameters except for <code>storeTermVector</code>,
187 /// which is set to <code>false</code>.
188 /// </summary>
189 public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token):this(name, string_Renamed, store, index, token, false)
193 /// <summary> </summary>
194 /// <param name="name">The name of the Field
195 /// </param>
196 /// <param name="string">The string to process
197 /// </param>
198 /// <param name="store">true if the Field should store the string
199 /// </param>
200 /// <param name="index">true if the Field should be indexed
201 /// </param>
202 /// <param name="token">true if the Field should be tokenized
203 /// </param>
204 /// <param name="storeTermVector">true if we should store the Term Vector info
205 /// </param>
206 public Field(System.String name, System.String string_Renamed, bool store, bool index, bool token, bool storeTermVector)
208 if (name == null)
209 throw new System.ArgumentException("name cannot be null");
210 if (string_Renamed == null)
211 throw new System.ArgumentException("value cannot be null");
212 if (!index && storeTermVector)
213 throw new System.ArgumentException("cannot store a term vector for fields that are not indexed.");
215 this.name = String.Intern(name); // Field names are interned
216 this.stringValue = string_Renamed;
217 this.isStored = store;
218 this.isIndexed = index;
219 this.isTokenized = token;
220 this.storeTermVector = storeTermVector;
223 internal Field(System.String name, System.IO.TextReader reader)
225 if (name == null)
226 throw new System.ArgumentException("name cannot be null");
227 if (reader == null)
228 throw new System.ArgumentException("value cannot be null");
230 this.name = String.Intern(name); // Field names are interned
231 this.readerValue = reader;
234 /// <summary>True iff the value of the Field is to be stored in the index for return
235 /// with search hits. It is an error for this to be true if a Field is
236 /// Reader-valued.
237 /// </summary>
238 public bool IsStored()
240 return isStored;
243 /// <summary>True iff the value of the Field is to be indexed, so that it may be
244 /// searched on.
245 /// </summary>
246 public bool IsIndexed()
248 return isIndexed;
251 /// <summary>True iff the value of the Field should be tokenized as text prior to
252 /// indexing. Un-tokenized fields are indexed as a single word and may not be
253 /// Reader-valued.
254 /// </summary>
255 public bool IsTokenized()
257 return isTokenized;
260 /// <summary>True iff the term or terms used to index this Field are stored as a term
261 /// vector, available from {@link IndexReader#GetTermFreqVector(int,String)}.
262 /// These methods do not provide access to the original content of the Field,
263 /// only to terms used to index it. If the original content must be
264 /// preserved, use the <code>stored</code> attribute instead.
265 ///
266 /// </summary>
267 /// <seealso cref="String)">
268 /// </seealso>
269 public bool IsTermVectorStored()
271 return storeTermVector;
274 /// <summary>Prints a Field for human consumption. </summary>
275 public override System.String ToString()
277 if (isStored && isIndexed && !isTokenized)
278 return "Keyword<" + name + ":" + stringValue + ">";
279 else if (isStored && !isIndexed && !isTokenized)
280 return "Unindexed<" + name + ":" + stringValue + ">";
281 else if (isStored && isIndexed && isTokenized && stringValue != null)
282 return "Text<" + name + ":" + stringValue + ">";
283 else if (!isStored && isIndexed && isTokenized && readerValue != null)
285 return "Text<" + name + ":" + readerValue + ">";
287 else if (!isStored && isIndexed && isTokenized)
289 return "UnStored<" + name + ">";
291 else
293 return base.ToString();