1 # = PStore -- Transactional File Storage for Ruby Objects
5 # documentation by Kev Jackson and James Edward Gray II
6 # improved by Hongli Lai
8 # See PStore for documentation.
16 # PStore implements a file based persistence mechanism based on a Hash. User
17 # code can store hierarchies of Ruby objects (values) into the data store file
18 # by name (keys). An object hierarchy may be just a single object. User code
19 # may later read values back from the data store or even update data, as needed.
21 # The transactional behavior ensures that any changes succeed or fail together.
22 # This can be used to ensure that the data store is not left in a transitory
23 # state, where some values were updated but others were not.
25 # Behind the scenes, Ruby objects are stored to the data store file with
26 # Marshal. That carries the usual limitations. Proc objects cannot be
27 # marshalled, for example.
33 # # a mock wiki object...
35 # def initialize( page_name, author, contents )
36 # @page_name = page_name
37 # @revisions = Array.new
39 # add_revision(author, contents)
42 # attr_reader :page_name
44 # def add_revision( author, contents )
45 # @revisions << { :created => Time.now,
47 # :contents => contents }
50 # def wiki_page_references
51 # [@page_name] + @revisions.last[:contents].scan(/\b(?:[A-Z]+[a-z]+){2,}/)
57 # # create a new page...
58 # home_page = WikiPage.new( "HomePage", "James Edward Gray II",
59 # "A page about the JoysOfDocumentation..." )
61 # # then we want to update page data and the index together, or not at all...
62 # wiki = PStore.new("wiki_pages.pstore")
63 # wiki.transaction do # begin transaction; do all of this or none of it
65 # wiki[home_page.page_name] = home_page
66 # # ensure that an index has been created...
67 # wiki[:wiki_index] ||= Array.new
68 # # update wiki index...
69 # wiki[:wiki_index].push(*home_page.wiki_page_references)
70 # end # commit changes to wiki data store file
72 # ### Some time later... ###
75 # wiki.transaction(true) do # begin read-only transaction, no changes allowed
76 # wiki.roots.each do |data_root_name|
78 # p wiki[data_root_name]
82 # == Transaction modes
84 # By default, file integrity is only ensured as long as the operating system
85 # (and the underlying hardware) doesn't raise any unexpected I/O errors. If an
86 # I/O error occurs while PStore is writing to its file, then the file will
89 # You can prevent this by setting <em>pstore.ultra_safe = true</em>.
90 # However, this results in a minor performance loss, and only works on platforms
91 # that support atomic file renames. Please consult the documentation for
92 # +ultra_safe+ for details.
94 # Needless to say, if you're storing valuable data with PStore, then you should
95 # backup the PStore files from time to time.
97 binmode = defined?(File::BINARY) ? File::BINARY : 0
98 RDWR_ACCESS = File::RDWR | File::CREAT | binmode
99 RD_ACCESS = File::RDONLY | binmode
100 WR_ACCESS = File::WRONLY | File::CREAT | File::TRUNC | binmode
102 # The error type thrown by all PStore methods.
103 class Error < StandardError
106 # Whether PStore should do its best to prevent file corruptions, even when under
107 # unlikely-to-occur error conditions such as out-of-space conditions and other
108 # unusual OS filesystem errors. Setting this flag comes at the price in the form
109 # of a performance loss.
111 # This flag only has effect on platforms on which file renames are atomic (e.g.
112 # all POSIX platforms: Linux, MacOS X, FreeBSD, etc). The default value is false.
113 attr_accessor :ultra_safe
116 # To construct a PStore object, pass in the _file_ path where you would like
117 # the data to be stored.
119 # PStore objects are always reentrant. But if _thread_safe_ is set to true,
120 # then it will become thread-safe at the cost of a minor performance hit.
122 def initialize(file, thread_safe = false)
123 dir = File::dirname(file)
124 unless File::directory? dir
125 raise PStore::Error, format("directory %s does not exist", dir)
127 if File::exist? file and not File::readable? file
128 raise PStore::Error, format("file %s not readable", file)
137 @lock = DummyMutex.new
141 # Raises PStore::Error if the calling code is not in a PStore#transaction.
143 raise PStore::Error, "not in transaction" unless @transaction
146 # Raises PStore::Error if the calling code is not in a PStore#transaction or
147 # if the code is in a read-only PStore#transaction.
149 def in_transaction_wr()
151 raise PStore::Error, "in read-only transaction" if @rdonly
153 private :in_transaction, :in_transaction_wr
156 # Retrieves a value from the PStore file data, by _name_. The hierarchy of
157 # Ruby objects stored under that root _name_ will be returned.
159 # *WARNING*: This method is only valid in a PStore#transaction. It will
160 # raise PStore::Error if called at any other time.
167 # This method is just like PStore#[], save that you may also provide a
168 # _default_ value for the object. In the event the specified _name_ is not
169 # found in the data store, your _default_ will be returned instead. If you do
170 # not specify a default, PStore::Error will be raised if the object is not
173 # *WARNING*: This method is only valid in a PStore#transaction. It will
174 # raise PStore::Error if called at any other time.
176 def fetch(name, default=PStore::Error)
178 unless @table.key? name
179 if default == PStore::Error
180 raise PStore::Error, format("undefined root name `%s'", name)
188 # Stores an individual Ruby object or a hierarchy of Ruby objects in the data
189 # store file under the root _name_. Assigning to a _name_ already in the data
190 # store clobbers the old data.
196 # store = PStore.new("data_file.pstore")
197 # store.transaction do # begin transaction
198 # # load some data into the store...
199 # store[:single_object] = "My data..."
200 # store[:obj_heirarchy] = { "Kev Jackson" => ["rational.rb", "pstore.rb"],
201 # "James Gray" => ["erb.rb", "pstore.rb"] }
202 # end # commit changes to data store file
204 # *WARNING*: This method is only valid in a PStore#transaction and it cannot
205 # be read-only. It will raise PStore::Error if called at any other time.
212 # Removes an object hierarchy from the data store, by _name_.
214 # *WARNING*: This method is only valid in a PStore#transaction and it cannot
215 # be read-only. It will raise PStore::Error if called at any other time.
223 # Returns the names of all object hierarchies currently in the store.
225 # *WARNING*: This method is only valid in a PStore#transaction. It will
226 # raise PStore::Error if called at any other time.
233 # Returns true if the supplied _name_ is currently in the data store.
235 # *WARNING*: This method is only valid in a PStore#transaction. It will
236 # raise PStore::Error if called at any other time.
242 # Returns the path to the data store file.
248 # Ends the current PStore#transaction, committing any changes to the data
255 # store = PStore.new("data_file.pstore")
256 # store.transaction do # begin transaction
257 # # load some data into the store...
261 # store.commit # end transaction here, committing changes
263 # store[:three] = 3 # this change is never reached
266 # *WARNING*: This method is only valid in a PStore#transaction. It will
267 # raise PStore::Error if called at any other time.
272 throw :pstore_abort_transaction
275 # Ends the current PStore#transaction, discarding any changes to the data
282 # store = PStore.new("data_file.pstore")
283 # store.transaction do # begin transaction
284 # store[:one] = 1 # this change is not applied, see below...
285 # store[:two] = 2 # this change is not applied, see below...
287 # store.abort # end transaction here, discard all changes
289 # store[:three] = 3 # this change is never reached
292 # *WARNING*: This method is only valid in a PStore#transaction. It will
293 # raise PStore::Error if called at any other time.
298 throw :pstore_abort_transaction
302 # Opens a new transaction for the data store. Code executed inside a block
303 # passed to this method may read and write data to and from the data store
306 # At the end of the block, changes are committed to the data store
307 # automatically. You may exit the transaction early with a call to either
308 # PStore#commit or PStore#abort. See those methods for details about how
309 # changes are handled. Raising an uncaught Exception in the block is
310 # equivalent to calling PStore#abort.
312 # If _read_only_ is set to +true+, you will only be allowed to read from the
313 # data store during the transaction and any attempts to change the data will
314 # raise a PStore::Error.
316 # Note that PStore does not support nested transactions.
318 def transaction(read_only = false, &block) # :yields: pstore
320 raise PStore::Error, "nested transaction" if @transaction
325 file = open_and_lock_file(@filename, read_only)
328 @table, checksum, original_data_size = load_data(file, read_only)
330 catch(:pstore_abort_transaction) do
334 if !@abort && !read_only
335 save_data(checksum, original_data_size, file)
338 file.close if !file.closed?
341 # This can only occur if read_only == true.
343 catch(:pstore_abort_transaction) do
354 # Constant for relieving Ruby's garbage collector.
356 EMPTY_MARSHAL_DATA = Marshal.dump({})
357 EMPTY_MARSHAL_CHECKSUM = Digest::MD5.digest(EMPTY_MARSHAL_DATA)
366 # Open the specified filename (either in read-only mode or in
367 # read-write mode) and lock it for reading or writing.
369 # The opened File object will be returned. If _read_only_ is true,
370 # and the file does not exist, then nil will be returned.
372 # All exceptions are propagated.
374 def open_and_lock_file(filename, read_only)
377 file = File.new(filename, RD_ACCESS)
379 file.flock(File::LOCK_SH)
389 file = File.new(filename, RDWR_ACCESS)
390 file.flock(File::LOCK_EX)
395 # Load the given PStore file.
396 # If +read_only+ is true, the unmarshalled Hash will be returned.
397 # If +read_only+ is false, a 3-tuple will be returned: the unmarshalled
398 # Hash, an MD5 checksum of the data, and the size of the data.
399 def load_data(file, read_only)
403 if !table.is_a?(Hash)
404 raise Error, "PStore file seems to be corrupted."
407 # This seems to be a newly-created file.
414 # This seems to be a newly-created file.
416 checksum = empty_marshal_checksum
417 size = empty_marshal_data.size
420 checksum = Digest::MD5.digest(data)
422 if !table.is_a?(Hash)
423 raise Error, "PStore file seems to be corrupted."
426 data.replace(EMPTY_STRING)
427 [table, checksum, size]
432 is_windows = RUBY_PLATFORM =~ /mswin/ ||
433 RUBY_PLATFORM =~ /mingw/ ||
434 RUBY_PLATFORM =~ /bbcwin/ ||
435 RUBY_PLATFORM =~ /wince/
436 self.class.__send__(:define_method, :on_windows?) do
442 # Check whether Marshal.dump supports the 'canonical' option. This option
443 # makes sure that Marshal.dump always dumps data structures in the same order.
444 # This is important because otherwise, the checksums that we generate may differ.
445 def marshal_dump_supports_canonical_option?
447 Marshal.dump(nil, -1, true)
452 self.class.__send__(:define_method, :marshal_dump_supports_canonical_option?) do
458 def save_data(original_checksum, original_file_size, file)
459 # We only want to save the new data if the size or checksum has changed.
460 # This results in less filesystem calls, which is good for performance.
461 if marshal_dump_supports_canonical_option?
462 new_data = Marshal.dump(@table, -1, true)
464 new_data = dump(@table)
466 new_checksum = Digest::MD5.digest(new_data)
468 if new_data.size != original_file_size || new_checksum != original_checksum
469 if @ultra_safe && !on_windows?
470 # Windows doesn't support atomic file renames.
471 save_data_with_atomic_file_rename_strategy(new_data, file)
473 save_data_with_fast_strategy(new_data, file)
477 new_data.replace(EMPTY_STRING)
480 def save_data_with_atomic_file_rename_strategy(data, file)
481 temp_filename = "#{@filename}.tmp.#{Process.pid}.#{rand 1000000}"
482 temp_file = File.new(temp_filename, WR_ACCESS)
484 temp_file.flock(File::LOCK_EX)
485 temp_file.write(data)
487 File.rename(temp_filename, @filename)
489 File.unlink(temp_file) rescue nil
496 def save_data_with_fast_strategy(data, file)
503 # This method is just a wrapped around Marshal.dump
504 # to allow subclass overriding used in YAML::Store.
505 def dump(table) # :nodoc:
509 # This method is just a wrapped around Marshal.load.
510 # to allow subclass overriding used in YAML::Store.
511 def load(content) # :nodoc:
512 Marshal::load(content)
515 def empty_marshal_data
518 def empty_marshal_checksum
519 EMPTY_MARSHAL_CHECKSUM
526 db = PStore.new("/tmp/foo")
529 ary = db["root"] = [1,2,3,4]
540 db.transaction(true) do