1 # :title:Ruby Xapian bindings
2 # =Ruby Xapian bindings
4 # Original version by Paul Legato (plegato@nks.net), 4/20/06.
6 # Copyright (C) 2006 Networked Knowledge Systems, Inc.
7 # Copyright (C) 2008,2011,2019 Olly Betts
8 # Copyright (C) 2010 Richard Boulton
10 # This program is free software; you can redistribute it and/or
11 # modify it under the terms of the GNU General Public License as
12 # published by the Free Software Foundation; either version 2 of the
13 # License, or (at your option) any later version.
15 # This program is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with this program; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
25 # ==Underscore methods
26 # Note: Methods whose names start with an underscore character _ are internal
27 # methods from the C++ API. Their functionality is not accessible in a
28 # Ruby-friendly way, so this file provides wrapper code to make it easier to
29 # use them from a Ruby programming idiom. Most are also dangerous insofar as
30 # misusing them can cause your program to segfault. In particular, all of
31 # Xapian's *Iterator classes are wrapped into nice Ruby-friendly Arrays.
33 # It should never be necessary to use any method whose name starts with an
34 # underscore from user-level code. Make sure you are _VERY_ certain that you
35 # know exactly what you're doing if you do use one of these methods. Beware.
36 # You've been warned...
41 ######## load the SWIG-generated library
45 # iterate over two dangerous iterators (i.e. those that can cause segfaults
46 # if used improperly.)
47 # If block_given? then the results are fed to it one by one, otherwise the
48 # results are returned as an Array.
49 # Users should never need to use this method.
51 # wrapper is a lambda that returns some appropriate Ruby object to wrap the
52 # results from the underlying Iterator
53 def _safelyIterate(dangerousStart, dangerousEnd, wrapper) #:nodoc:
56 while not item.equals(dangerousEnd) do
57 yield wrapper.call(item)
62 while not item.equals(dangerousEnd) do
63 retval.push(wrapper.call(item))
69 module_function :_safelyIterate
72 ### safe Ruby wrapper for the dangerous C++ Xapian::TermIterator class
74 attr_accessor :term, :wdf, :termfreq
76 def initialize(term, wdf=nil, termfreq=nil)
83 return other.is_a?(Xapian::Term) && other.term == @term && other.wdf == @wdf && other.termfreq == @termfreq
87 ### Ruby wrapper for a Match, i.e. a Xapian::MSetIterator (Match Set) in C++.
88 # it's no longer an iterator in the Ruby version, but we want to preserve its
90 # (MSetIterator is not dangerous, but it is inconvenient to use from a Ruby
91 # idiom, so we wrap it..)
93 attr_accessor :docid, :document, :rank, :weight, :collapse_count, :percent
95 def initialize(docid, document, rank, weight, collapse_count, percent)
100 @collapse_count = collapse_count
105 return other.is_a?(Xapian::Match) && other.docid == @docid && other.rank == @rank &&
106 other.weight == @weight && other.collapse_count == @collapse_count && other.percent == @percent
108 end # class Xapian::Match
110 # Ruby wrapper for an ExpandTerm, i.e. a Xapian::ESetIterator in C++
111 # Not dangerous, but inconvenient to use from a Ruby programming idiom, so we
113 class Xapian::ExpandTerm
114 attr_accessor :name, :weight
116 def initialize(name, weight)
122 return other.is_a?(Xapian::ExpandTerm) && other.name == @name && other.weight == @weight
125 end # Xapian::ExpandTerm
127 # Ruby wrapper for Xapian::ValueIterator
129 attr_accessor :value, :valueno, :docid
131 def initialize(value, valueno, docid)
138 return other.is_a?(Xapian::Value) && other.value == @value && other.valueno == @valueno && other.docid == @docid
143 # {Xapian::Document C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Document.html]
144 # for methods not specific to Ruby.
146 # Extend Xapian::Document with a nice wrapper for its nasty input_iterators
147 class Xapian::Document
149 Xapian._safelyIterate(self._dangerous_termlist_begin(),
150 self._dangerous_termlist_end(),
152 |item| Xapian::Term.new(item.term, item.wdf)
158 Xapian._safelyIterate(self._dangerous_values_begin(),
159 self._dangerous_values_end(),
161 |item| Xapian::Value.new(item.value,
168 end # class Xapian::Document
171 # {Xapian::Query C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Query.html]
172 # for methods not specific to Ruby.
174 # Extend Xapian::Query with a nice wrapper for its dangerous iterators
177 # termfreq is not supported by TermIterators from Queries
178 Xapian._safelyIterate(self._dangerous_terms_begin(),
179 self._dangerous_terms_end(),
181 |item| Xapian::Term.new(item.term, item.wdf)
186 def unique_terms(&block)
187 # termfreq is not supported by TermIterators from Queries
188 Xapian._safelyIterate(self._dangerous_unique_terms_begin(),
189 self._dangerous_unique_terms_end(),
191 |item| Xapian::Term.new(item.term, item.wdf)
198 # {Xapian::Enquire C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Enquire.html]
199 # for methods not specific to Ruby.
201 # Extend Xapian::Enquire with a nice wrapper for its dangerous iterators
202 class Xapian::Enquire
203 # Get matching terms for some document.
204 # document can be either a Xapian::DocID or a Xapian::MSetIterator
205 def matching_terms(document, &block)
206 Xapian._safelyIterate(self._dangerous_matching_terms_begin(document),
207 self._dangerous_matching_terms_end(document),
208 lambda { |item| Xapian::Term.new(item.term, item.wdf) },
211 end # Xapian::Enquire
214 # {Xapian::MSet C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1MSet.html]
215 # for methods not specific to Ruby.
217 # MSetIterators are not dangerous, just inconvenient to use within a Ruby
218 # programming idiom. So we wrap them.
221 Xapian._safelyIterate(self._begin(),
223 lambda { |item| Xapian::Match.new(item.docid, item.document, item.rank, item.weight, item.collapse_count, item.percent) },
229 # {Xapian::ESet C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1ESet.html]
230 # for methods not specific to Ruby.
232 # ESetIterators are not dangerous, just inconvenient to use within a Ruby
233 # programming idiom. So we wrap them.
236 # note: in the ExpandTerm wrapper, we implicitly rename
237 # ESetIterator#term() (defined in xapian-headers.i) to ExpandTerm#term()
238 Xapian._safelyIterate(self._begin(),
240 lambda { |item| Xapian::ExpandTerm.new(item.term, item.weight) },
247 # Wrapper for the C++ class Xapian::PostingIterator
248 class Xapian::Posting
249 attr_accessor :docid, :doclength, :wdf
251 def initialize(docid, doclength, wdf)
253 @doclength = doclength
258 return other.is_a?(Xapian::Posting) && other.docid == @docid && other.doclength == @doclength &&
261 end # Xapian::Posting
264 # {Xapian::Database C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1Database.html]
265 # for methods not specific to Ruby.
267 # Wrap some dangerous iterators.
268 class Xapian::Database
269 # Returns an Array of all Xapian::Terms for this database.
270 def allterms(pref = '', &block)
271 Xapian._safelyIterate(self._dangerous_allterms_begin(pref),
272 self._dangerous_allterms_end(pref),
273 lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) },
277 # Returns an Array of all metadata keys for this database.
278 def metadata_keys(pref = '', &block)
279 Xapian._safelyIterate(self._dangerous_metadata_keys_begin(pref),
280 self._dangerous_metadata_keys_end(pref),
281 lambda { |item| item.term },
285 # Returns an Array of Xapian::Postings for the given term.
287 def postlist(term, &block)
288 Xapian._safelyIterate(self._dangerous_postlist_begin(term),
289 self._dangerous_postlist_end(term),
290 lambda { |item| Xapian::Posting.new(item.docid, item.doclength, item.wdf) },
294 # Returns an Array of Terms for the given docid.
295 def termlist(docid, &block)
296 Xapian._safelyIterate(self._dangerous_termlist_begin(docid),
297 self._dangerous_termlist_end(docid),
298 lambda { |item| Xapian::Term.new(item.term, item.wdf, item.termfreq) },
300 end # termlist(docid)
302 # Returns an Array of term positions for the given term (a String)
303 # in the given docid.
304 def positionlist(docid, term, &block)
305 Xapian._safelyIterate(self._dangerous_positionlist_begin(docid, term),
306 self._dangerous_positionlist_end(docid, term),
307 lambda { |item| item.termpos },
311 # Returns an Array of Xapian::Value objects for the given slot in the
313 def valuestream(slot, &block)
314 Xapian._safelyIterate(self._dangerous_valuestream_begin(slot),
315 self._dangerous_valuestream_end(slot),
316 lambda { |item| Xapian::Value.new(item.value, slot, item.docid) },
318 end # valuestream(slot)
320 # Returns an Array of Xapian::Term objects for the spelling dictionary.
321 def spellings(&block)
322 Xapian._safelyIterate(self._dangerous_spellings_begin(),
323 self._dangerous_spellings_end(),
324 lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) },
328 # Returns an Array of synonyms of the given term.
329 def synonyms(term, &block)
330 Xapian._safelyIterate(self._dangerous_synonyms_begin(term),
331 self._dangerous_synonyms_end(term),
332 lambda { |item| item.term },
336 # Returns an Array of terms with synonyms.
337 def synonym_keys(&block)
338 Xapian._safelyIterate(self._dangerous_synonym_keys_begin(),
339 self._dangerous_synonym_keys_end(),
340 lambda { |item| item.term },
343 end # Xapian::Database
346 # {Xapian::ValueCountMatchSpy C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1ValueCountMatchSpy.html]
347 # for methods not specific to Ruby.
349 # Wrap some dangerous iterators.
350 class Xapian::ValueCountMatchSpy
351 # Returns an Array of all the values seen, in alphabetical order
353 Xapian._safelyIterate(self._dangerous_values_begin(),
354 self._dangerous_values_end(),
355 lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) },
359 # Returns an Array of the top values seen, by frequency
360 def top_values(maxvalues, &block)
361 Xapian._safelyIterate(self._dangerous_top_values_begin(maxvalues),
362 self._dangerous_top_values_end(maxvalues),
363 lambda { |item| Xapian::Term.new(item.term, 0, item.termfreq) },
366 end # Xapian::Database
369 # {Xapian::LatLongCoords C++ API documentation}[https://xapian.org/docs/apidoc/html/classXapian_1_1LatLongCoords.html]
370 # for methods not specific to Ruby.
372 # Wrap some dangerous iterators.
373 class Xapian::LatLongCoords
374 # Returns an Array of all the values seen, in alphabetical order
376 Xapian._safelyIterate(self._begin(),
378 lambda { |item| item.get_coord() },
381 end # Xapian::LatLongCoords
383 class Xapian::QueryParser
384 # Returns an Array of all words in the query ignored as stopwords.
386 Xapian._safelyIterate(self._dangerous_stoplist_begin(),
387 self._dangerous_stoplist_end(),
388 lambda { |item| item.term },
392 # Returns an Array of all words in the query which stem to a given term.
393 def unstem(term, &block)
394 Xapian._safelyIterate(self._dangerous_unstem_begin(term),
395 self._dangerous_unstem_end(term),
396 lambda { |item| item.term },
399 end # Xapian::QueryParser
401 # Compatibility wrapping for Xapian::BAD_VALUENO (wrapped as a constant since
402 # xapian-bindings 1.4.10).
403 def Xapian::BAD_VALUENO()
404 return Xapian::BAD_VALUENO