7 # This PerFieldAnalyzer is a workaround to a memory leak in
8 # ferret 0.11.4. It does basically do the same as the original
9 # Ferret::Analysis::PerFieldAnalyzer, but without the leak :)
11 # http://ferret.davebalmain.com/api/classes/Ferret/Analysis/PerFieldAnalyzer.html
13 # Thanks to Ben from omdb.org for tracking this down and creating this
15 # You can read more about the issue there:
16 # http://blog.omdb-beta.org/2007/7/29/tracking-down-a-memory-leak-in-ferret-0-11-4
17 class PerFieldAnalyzer < ::Ferret::Analysis::Analyzer
18 def initialize( default_analyzer = StandardAnalyzer.new )
20 @default_analyzer = default_analyzer
23 def add_field( field, analyzer )
24 @analyzers[field] = analyzer
28 def token_stream(field, string)
29 @analyzers.has_key?(field) ? @analyzers[field].token_stream(field, string) :
30 @default_analyzer.token_stream(field, string)
36 attr_accessor :batch_size, :logger
38 def index_models(models)
39 models.each { |model| index_model model }
43 ActsAsFerret::close_multi_indexes
46 def index_model(model)
47 bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
48 :model => model, :index => self, :reindex => true)
49 logger.info "reindexing model #{model.name}"
51 model.records_for_rebuild(@batch_size) do |records, offset|
52 bulk_indexer.index_records(records, offset)
56 def bulk_index(model, ids, options = {})
57 options.reverse_merge! :optimize => true
58 orig_flush = @auto_flush
60 bulk_indexer = ActsAsFerret::BulkIndexer.new(:batch_size => @batch_size, :logger => logger,
61 :model => model, :index => self, :total => ids.size)
62 model.records_for_bulk_index(ids, @batch_size) do |records, offset|
63 logger.debug "#{model} bulk indexing #{records.size} at #{offset}"
64 bulk_indexer.index_records(records, offset)
66 logger.info 'finishing bulk index...'
69 logger.info 'optimizing...'
72 @auto_flush = orig_flush
77 # add marshalling support to SortFields
78 class Search::SortField
83 def self._load(string)
85 when /<DOC(_ID)?>!/ : Ferret::Search::SortField::DOC_ID_REV
86 when /<DOC(_ID)?>/ : Ferret::Search::SortField::DOC_ID
87 when '<SCORE>!' : Ferret::Search::SortField::SCORE_REV
88 when '<SCORE>' : Ferret::Search::SortField::SCORE
89 when /^(\w+):<(\w+)>(!)?$/ : new($1.to_sym, :type => $2.to_sym, :reverse => !$3.nil?)
90 else raise "invalid value: #{string}"
95 # add marshalling support to Sort
101 def self._load(string)
102 # we exclude the last <DOC> sorting as it is appended by new anyway
103 if string =~ /^Sort\[(.*?)(<DOC>(!)?)?\]$/
104 sort_fields = $1.split(',').map do |value|
106 Ferret::Search::SortField._load value unless value.blank?
108 new sort_fields.compact
110 raise "invalid value: #{string}"