1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 # Use of this source code is governed by a BSD-style license that can be
3 # found in the LICENSE file.
9 from file_system
import FileSystem
, StatInfo
, FileNotFoundError
10 from future
import All
, Future
11 from path_util
import AssertIsDirectory
, IsDirectory
, ToDirectory
12 from third_party
.json_schema_compiler
.memoize
import memoize
15 class CachingFileSystem(FileSystem
):
16 '''FileSystem which implements a caching layer on top of |file_system|. If
17 |fail_on_miss| is True then cache misses throw a FileNotFoundError rather than
18 falling back onto the underlying FileSystem.
20 def __init__(self
, file_system
, object_store_creator
, fail_on_miss
=False):
21 self
._file
_system
= file_system
22 self
._fail
_on
_miss
= fail_on_miss
23 def create_object_store(category
, start_empty
=True):
24 return object_store_creator
.Create(
26 category
='%s/%s' % (file_system
.GetIdentity(), category
),
27 start_empty
=start_empty
)
28 # We only start the stat cache empty if |fail_on_miss| is False, i.e. if
29 # we're NOT running on a live instance and we can afford to fall back onto
30 # the underlying FileSystem impl.
31 self
._stat
_cache
= create_object_store('stat', start_empty
=not fail_on_miss
)
32 self
._read
_cache
= create_object_store('read', start_empty
=False)
33 self
._walk
_cache
= create_object_store('walk', start_empty
=False)
36 return self
._file
_system
.Refresh()
38 def StatAsync(self
, path
):
39 '''Stats the directory given, or if a file is given, stats the file's parent
40 directory to get info about the file.
42 # Always stat the parent directory, since it will have the stat of the child
43 # anyway, and this gives us an entire directory's stat info at once.
44 dir_path
, file_path
= posixpath
.split(path
)
45 dir_path
= ToDirectory(dir_path
)
47 def make_stat_info(dir_stat
):
48 '''Converts a dir stat into the correct resulting StatInfo; if the Stat
49 was for a file, the StatInfo should just contain that file.
53 # Was a file stat. Extract that file.
54 file_version
= dir_stat
.child_versions
.get(file_path
)
55 if file_version
is None:
56 raise FileNotFoundError('No stat found for %s in %s (found %s)' %
57 (path
, dir_path
, dir_stat
.child_versions
))
58 return StatInfo(file_version
)
60 def raise_cache_miss(path
):
61 raise FileNotFoundError('Got cache miss when trying to stat %s' % path
)
63 dir_stat
= self
._stat
_cache
.Get(dir_path
).Get()
64 if dir_stat
is not None:
65 return Future(callback
=lambda: make_stat_info(dir_stat
))
67 if self
._fail
_on
_miss
:
68 logging
.warning('Bailing on stat cache miss for %s on %s' %
69 (dir_path
, self
.GetIdentity()))
70 return Future(callback
=lambda: raise_cache_miss(dir_path
))
73 assert dir_stat
is not None # should have raised a FileNotFoundError
74 # We only ever need to cache the dir stat.
75 self
._stat
_cache
.Set(dir_path
, dir_stat
)
76 return make_stat_info(dir_stat
)
77 return self
._MemoizedStatAsyncFromFileSystem
(dir_path
).Then(next
)
80 def _MemoizedStatAsyncFromFileSystem(self
, dir_path
):
81 '''This is a simple wrapper to memoize Futures to directory stats, since
82 StatAsync makes heavy use of it. Only cache directories so that the
83 memoized cache doesn't blow up.
85 assert IsDirectory(dir_path
)
86 return self
._file
_system
.StatAsync(dir_path
)
88 def Read(self
, paths
, skip_not_found
=False):
89 '''Reads a list of files. If a file is cached and it is not out of
90 date, it is returned. Otherwise, the file is retrieved from the file system.
92 # Files which aren't found are cached in the read object store as
93 # (path, None, None). This is to prevent re-reads of files we know
95 cached_read_values
= self
._read
_cache
.GetMulti(paths
).Get()
96 cached_stat_values
= self
._stat
_cache
.GetMulti(paths
).Get()
98 # Populate a map of paths to Futures to their stat. They may have already
99 # been cached in which case their Future will already have been constructed
104 if isinstance(error
, FileNotFoundError
):
109 stat_value
= cached_stat_values
.get(path
)
110 if stat_value
is None:
111 stat_future
= self
.StatAsync(path
)
113 stat_future
= stat_future
.Then(lambda x
: x
, handle
)
115 stat_future
= Future(value
=stat_value
)
116 stat_futures
[path
] = stat_future
118 # Filter only the cached data which is up to date by comparing to the latest
119 # stat. The cached read data includes the cached version. Remove it for
120 # the result returned to callers. |version| == None implies a non-existent
122 up_to_date_data
= dict(
123 (path
, data
) for path
, (data
, version
) in cached_read_values
.iteritems()
124 if version
is not None and stat_futures
[path
].Get().version
== version
)
127 # Filter out paths which we know do not exist, i.e. if |path| is in
128 # |cached_read_values| *and* has a None version, then it doesn't exist.
129 # See the above declaration of |cached_read_values| for more information.
130 paths
= [path
for path
in paths
131 if cached_read_values
.get(path
, (None, True))[1]]
133 remaining_paths
= set(paths
) - set(up_to_date_data
.iterkeys())
134 if len(remaining_paths
) == 0:
135 # Everything was cached and up-to-date.
136 return Future(value
=up_to_date_data
)
138 def raise_cache_miss(paths
):
139 raise FileNotFoundError('Got cache miss when trying to stat %s' % paths
)
141 if self
._fail
_on
_miss
:
142 # Ignore missing values and return anyway.
143 logging
.warn('Read cache miss for %s on %s' %
144 (remaining_paths
, self
.GetIdentity()))
145 return Future(callback
=lambda: raise_cache_miss(remaining_paths
))
147 def next(new_results
):
148 # Update the cache. This is a path -> (data, version) mapping.
149 self
._read
_cache
.SetMulti(
150 dict((path
, (new_result
, stat_futures
[path
].Get().version
))
151 for path
, new_result
in new_results
.iteritems()))
152 # Update the read cache to include files that weren't found, to prevent
153 # constantly trying to read a file we now know doesn't exist.
154 self
._read
_cache
.SetMulti(
155 dict((path
, (None, None)) for path
in paths
156 if stat_futures
[path
].Get() is None))
157 new_results
.update(up_to_date_data
)
160 # Read in the values that were uncached or old.
161 return self
._file
_system
.Read(remaining_paths
,
162 skip_not_found
=skip_not_found
).Then(next
)
164 def GetCommitID(self
):
165 return self
._file
_system
.GetCommitID()
167 def GetPreviousCommitID(self
):
168 return self
._file
_system
.GetPreviousCommitID()
170 def Walk(self
, root
, depth
=-1):
171 '''Overrides FileSystem.Walk() to provide caching functionality.
173 def file_lister(root
):
174 res
, root_stat
= All((self
._walk
_cache
.Get(root
),
175 self
.StatAsync(root
))).Get()
177 if res
and res
[2] == root_stat
.version
:
178 dirs
, files
= res
[0], res
[1]
180 # Wasn't cached, or not up to date.
182 for f
in self
.ReadSingle(root
).Get():
187 # Update the cache. This is a root -> (dirs, files, version) mapping.
188 self
._walk
_cache
.Set(root
, (dirs
, files
, root_stat
.version
))
190 return self
._file
_system
.Walk(root
, depth
=depth
, file_lister
=file_lister
)
192 def GetIdentity(self
):
193 return self
._file
_system
.GetIdentity()
195 def GetVersion(self
):
196 return self
._file
_system
.GetVersion()
199 return '%s of <%s>' % (type(self
).__name
__, repr(self
._file
_system
))