1 module ActsAsFerret #:nodoc:
3 # This module defines the acts_as_ferret method and is included into
8 def reloadable?; false end
10 # declares a class as ferret-searchable.
13 # fields:: names all fields to include in the index. If not given,
14 # all attributes of the class will be indexed. You may also give
15 # symbols pointing to instance methods of your model here, i.e.
16 # to retrieve and index data from a related model.
18 # additional_fields:: names fields to include in the index, in addition
19 # to those derived from the db scheme. use if you want
20 # to add custom fields derived from methods to the db
21 # fields (which will be picked by aaf). This option will
22 # be ignored when the fields option is given, in that
23 # case additional fields get specified there.
25 # index_dir:: declares the directory where to put the index for this class.
26 # The default is RAILS_ROOT/index/RAILS_ENV/CLASSNAME.
27 # The index directory will be created if it doesn't exist.
29 # single_index:: set this to true to let this class use a Ferret
30 # index that is shared by all classes having :single_index set to true.
31 # :store_class_name is set to true implicitly, as well as index_dir, so
32 # don't bother setting these when using this option. the shared index
33 # will be located in index/<RAILS_ENV>/shared .
35 # store_class_name:: to make search across multiple models (with either
36 # single_index or the multi_search method) useful, set
37 # this to true. the model class name will be stored in a keyword field
40 # reindex_batch_size:: reindexing is done in batches of this size, default is 1000
41 # mysql_fast_batches:: set this to false to disable the faster mysql batching
42 # algorithm if this model uses a non-integer primary key named
45 # raise_drb_errors:: Set this to true if you want aaf to raise Exceptions
46 # in case the DRb server cannot be reached (in other word - behave like
47 # versions up to 0.4.3). Defaults to false so DRb exceptions
48 # are logged but not raised. Be sure to set up some
49 # monitoring so you still detect when your DRb server died for
52 # ferret:: Hash of Options that directly influence the way the Ferret engine works. You
53 # can use most of the options the Ferret::I class accepts here, too. Among the
56 # or_default:: whether query terms are required by
57 # default (the default, false), or not (true)
59 # analyzer:: the analyzer to use for query parsing (default: nil,
60 # which means the ferret StandardAnalyzer gets used)
62 # default_field:: use to set one or more fields that are searched for query terms
63 # that don't have an explicit field list. This list should *not*
64 # contain any untokenized fields. If it does, you're asking
65 # for trouble (i.e. not getting results for queries having
66 # stop words in them). Aaf by default initializes the default field
67 # list to contain all tokenized fields. If you use :single_index => true,
68 # you really should set this option specifying your default field
69 # list (which should be equal in all your classes sharing the index).
70 # Otherwise you might get incorrect search results and you won't get
71 # any lazy loading of stored field data.
73 # For downwards compatibility reasons you can also specify the Ferret options in the
75 def acts_as_ferret(options={}, ferret_options={})
77 options[:remote] = true if options[:remote].nil?
79 # force local mode if running *inside* the Ferret server - somewhere the
80 # real indexing has to be done after all :-)
81 # Usually the automatic detection of server mode works fine, however if you
82 # require your model classes in environment.rb they will get loaded before the
83 # DRb server is started, so this code is executed too early and detection won't
84 # work. In this case you'll get endless loops resulting in "stack level too deep"
86 # To get around this, start the DRb server with the environment variable
87 # FERRET_USE_LOCAL_INDEX set to '1'.
88 logger.debug "Asked for a remote server ? #{options[:remote].inspect}, ENV[\"FERRET_USE_LOCAL_INDEX\"] is #{ENV["FERRET_USE_LOCAL_INDEX"].inspect}, looks like we are#{ActsAsFerret::Remote::Server.running || ENV['FERRET_USE_LOCAL_INDEX'] ? '' : ' not'} the server"
89 options.delete(:remote) if ENV["FERRET_USE_LOCAL_INDEX"] || ActsAsFerret::Remote::Server.running
91 if options[:remote] && options[:remote] !~ /^druby/
92 # read server location from config/ferret_server.yml
93 options[:remote] = ActsAsFerret::Remote::Config.new.uri rescue nil
97 logger.info "Will use remote index server which should be available at #{options[:remote]}"
99 logger.info "Will use local index."
104 extend SharedIndexClassMethods if options[:single_index]
106 include InstanceMethods
107 include MoreLikeThis::InstanceMethods
110 after_create :ferret_create
111 after_update :ferret_update
112 after_destroy :ferret_destroy
114 cattr_accessor :aaf_configuration
117 self.aaf_configuration = {
118 :index_dir => "#{ActsAsFerret::index_dir}/#{self.name.underscore}",
119 :store_class_name => false,
120 :name => self.table_name,
121 :class_name => self.name,
122 :single_index => false,
123 :reindex_batch_size => 1000,
124 :ferret => {}, # Ferret config Hash
125 :ferret_fields => {}, # list of indexed fields that will be filled later
126 :enabled => true, # used for class-wide disabling of Ferret
127 :mysql_fast_batches => true, # turn off to disable the faster, id based batching mechanism for MySQL
128 :raise_drb_errors => false # handle DRb connection errors by default
131 # merge aaf options with args
132 aaf_configuration.update(options) if options.is_a?(Hash)
133 # apply appropriate settings for shared index
134 if aaf_configuration[:single_index]
135 aaf_configuration[:index_dir] = "#{ActsAsFerret::index_dir}/shared"
136 aaf_configuration[:store_class_name] = true
139 # set ferret default options
140 aaf_configuration[:ferret].reverse_merge!( :or_default => false,
141 :handle_parse_errors => true,
142 :default_field => nil # will be set later on
143 #:max_clauses => 512,
144 #:analyzer => Ferret::Analysis::StandardAnalyzer.new,
145 # :wild_card_downcase => true
148 # merge ferret options with those from second parameter hash
149 aaf_configuration[:ferret].update(ferret_options) if ferret_options.is_a?(Hash)
151 unless options[:remote]
152 ActsAsFerret::ensure_directory aaf_configuration[:index_dir]
153 aaf_configuration[:index_base_dir] = aaf_configuration[:index_dir]
154 aaf_configuration[:index_dir] = find_last_index_version(aaf_configuration[:index_dir])
155 logger.debug "using index in #{aaf_configuration[:index_dir]}"
158 # these properties are somewhat vital to the plugin and shouldn't
159 # be overwritten by the user:
160 aaf_configuration[:ferret].update(
161 :key => (aaf_configuration[:single_index] ? [:id, :class_name] : :id),
162 :path => aaf_configuration[:index_dir],
163 :auto_flush => true, # slower but more secure in terms of locking problems TODO disable when running in drb mode?
164 :create_if_missing => true
167 if aaf_configuration[:fields]
168 add_fields(aaf_configuration[:fields])
170 add_fields(self.new.attributes.keys.map { |k| k.to_sym })
171 add_fields(aaf_configuration[:additional_fields])
174 # now that all fields have been added, we can initialize the default
175 # field list to be used by the query parser.
176 # It will include all content fields *not* marked as :untokenized.
177 # This fixes the otherwise failing CommentTest#test_stopwords. Basically
178 # this means that by default only tokenized fields (which all fields are
179 # by default) will be searched. If you want to search inside the contents
180 # of an untokenized field, you'll have to explicitly specify it in your
183 # Unfortunately this is not very useful with a shared index (see
184 # http://projects.jkraemer.net/acts_as_ferret/ticket/85)
185 # You should consider specifying the default field list to search for as
186 # part of the ferret_options hash in your call to acts_as_ferret.
187 aaf_configuration[:ferret][:default_field] ||= if aaf_configuration[:single_index]
188 logger.warn "You really should set the acts_as_ferret :default_field option when using a shared index!"
191 aaf_configuration[:ferret_fields].keys.select do |f|
192 aaf_configuration[:ferret_fields][f][:index] != :untokenized
195 logger.info "default field list: #{aaf_configuration[:ferret][:default_field].inspect}"
198 aaf_index.ensure_index_exists
205 # find the most recent version of an index
206 def find_last_index_version(basedir)
207 # check for versioned index
208 versions = Dir.entries(basedir).select do |f|
209 dir = File.join(basedir, f)
210 File.directory?(dir) && File.file?(File.join(dir, 'segments')) && f =~ /^\d+(_\d+)?$/
213 # select latest version
215 File.join basedir, versions.last
222 # helper that defines a method that adds the given field to a ferret
224 def define_to_field_method(field, options = {})
225 if options[:boost].is_a?(Symbol)
226 dynamic_boost = options[:boost]
227 options.delete :boost
229 options.reverse_merge!( :store => :no,
232 :term_vector => :with_positions_offsets,
235 options[:term_vector] = :no if options[:index] == :no
236 aaf_configuration[:ferret_fields][field] = options
238 define_method("#{field}_to_ferret".to_sym) do
240 val = content_for_field_name(field, dynamic_boost)
242 logger.warn("Error retrieving value for field #{field}: #{$!}")
245 logger.debug("Adding field #{field} with value '#{val}' to index")
250 def add_fields(field_config)
251 if field_config.is_a? Hash
252 field_config.each_pair do |key,val|
253 define_to_field_method(key,val)
255 elsif field_config.respond_to?(:each)
256 field_config.each do |field|
257 define_to_field_method(field)