3 class LocalIndex < AbstractIndex
4 include MoreLikeThis::IndexMethods
6 # the 'real' Ferret Index instance
7 attr_reader :ferret_index
9 def initialize(aaf_configuration)
11 rebuild_index unless File.file? "#{aaf_configuration[:index_dir]}/segments"
12 @ferret_index = Ferret::Index::Index.new(aaf_configuration[:ferret])
15 def rebuild_index(models = [])
16 logger.debug "rebuild index: #{models.join ' '}"
17 models = models.flatten.uniq.map(&:constantize)
18 # default attributes for fields
19 fi = Ferret::Index::FieldInfos.new(:store => :no,
24 fi.add_field(:id, :store => :yes, :index => :untokenized)
26 if aaf_configuration[:store_class_name]
27 fi.add_field(:class_name, :store => :yes, :index => :untokenized)
29 # collect field options from all models
31 models.each do |model|
32 fields.update(model.aaf_configuration[:ferret_fields])
34 logger.debug("class #{aaf_configuration[:class_name]}: fields for index: #{fields.keys.join(',')}")
35 fields.each_pair do |field, options|
36 fi.add_field(field, { :store => :no,
37 :index => :yes }.update(options))
39 index = Ferret::Index::Index.new(aaf_configuration[:ferret].dup.update(:auto_flush => false,
42 # TODO make configurable through options
44 models.each do |model|
45 # index in batches of 1000 to limit memory consumption (fixes #24)
47 0.step(model.count, batch_size) do |i|
48 model.find(:all, :limit => batch_size, :offset => i).each do |rec|
54 logger.debug("Created Ferret index in: #{aaf_configuration[:index_dir]}")
58 # close combined index readers, just in case
59 # this seems to fix a strange test failure that seems to relate to a
60 # multi_index looking at an old version of the content_base index.
61 ActsAsFerret::multi_indexes.each_pair do |key, index|
62 # puts "#{key} -- #{self.name}"
63 # TODO only close those where necessary (watch inheritance, where
64 # self.name is base class of a class where key is made from)
65 index.close #if key =~ /#{self.name}/
67 ActsAsFerret::multi_indexes.clear
70 # parses the given query string
71 def process_query(query)
72 # work around ferret bug in #process_query (doesn't ensure the
74 ferret_index.synchronize do
75 ferret_index.send(:ensure_reader_open)
76 original_query = ferret_index.process_query(query)
80 def total_hits(query, options = {})
81 ferret_index.search(query, options).total_hits
84 def find_id_by_contents(query, options = {}, &block)
86 #logger.debug "query: #{ferret_index.process_query query}"
87 total_hits = ferret_index.search_each(query, options) do |hit, score|
88 doc = ferret_index[hit]
89 model = aaf_configuration[:store_class_name] ? doc[:class_name] : aaf_configuration[:class_name]
91 yield model, doc[:id], score
93 result << { :model => model, :id => doc[:id], :score => score }
96 #logger.debug "id_score_model array: #{result.inspect}"
97 return block_given? ? total_hits : [total_hits, result]
100 def id_multi_search(query, models, options = {})
101 models.map!(&:constantize)
102 searcher = multi_index(models)
104 total_hits = searcher.search_each(query, options) do |hit, score|
107 yield doc[:class_name], doc[:id], score
109 result << { :model => doc[:class_name], :id => doc[:id], :score => score }
112 return block_given? ? total_hits : [ total_hits, result ]
115 ######################################
116 # methods working on a single record
117 # called from instance_methods, here to simplify interfacing with the
118 # remote ferret server
119 # TODO having to pass id and class_name around like this isn't nice
120 ######################################
122 # add record to index
123 # record may be the full AR object, a Ferret document instance or a Hash
125 record = record.to_doc unless Hash === record || Ferret::Document === record
126 ferret_index << record
130 # delete record from index
131 def remove(id, class_name)
132 ferret_index.query_delete query_for_record(id, class_name)
135 # highlight search terms for the record with the given id.
136 def highlight(id, class_name, query, options = {})
137 options.reverse_merge! :num_excerpts => 2, :pre_tag => '<em>', :post_tag => '</em>'
139 ferret_index.synchronize do
140 doc_num = document_number(id, class_name)
142 highlights << ferret_index.highlight(query, doc_num, options)
144 query = process_query(query) # process only once
145 aaf_configuration[:ferret_fields].each_pair do |field, config|
146 next if config[:store] == :no || config[:highlight] == :no
147 options[:field] = field
148 highlights << ferret_index.highlight(query, doc_num, options)
152 return highlights.compact.flatten[0..options[:num_excerpts]-1]
155 # retrieves the ferret document number of the record with the given id.
156 def document_number(id, class_name)
157 hits = ferret_index.search(query_for_record(id, class_name))
158 return hits.hits.first.doc if hits.total_hits == 1
159 raise "cannot determine document number from primary key: #{id}"
162 # build a ferret query matching only the record with the given id
163 # the class name only needs to be given in case of a shared index configuration
164 def query_for_record(id, class_name = nil)
165 Ferret::Search::TermQuery.new(:id, id.to_s)
171 # returns a MultiIndex instance operating on a MultiReader
172 def multi_index(model_classes)
173 model_classes.sort! { |a, b| a.name <=> b.name }
174 key = model_classes.inject("") { |s, clazz| s + clazz.name }
175 multi_config = aaf_configuration[:ferret].dup
176 multi_config.delete :default_field # we don't want the default field list of *this* class for multi_searching
177 ActsAsFerret::multi_indexes[key] ||= MultiIndex.new(model_classes, multi_config)