3 # Thomas Nagy, 2017-2018 (ita)
6 A system for fast partial rebuilds
8 Creating a large amount of task objects up front can take some time.
9 By making a few assumptions, it is possible to avoid posting creating
10 task objects for targets that are already up-to-date.
12 On a silly benchmark the gain observed for 1M tasks can be 5m->10s
13 for a single file change.
18 opt.load('fast_partial')
21 * Start with a clean build (run "waf distclean" after enabling)
22 * Mostly for C/C++/Fortran targets with link tasks (object-only targets are not handled)
23 try it in the folder generated by utils/genbench.py
24 * For full project builds: no --targets and no pruning from subfolders
25 * The installation phase is ignored
26 * `use=` dependencies are specified up front even across build groups
27 * Task generator source files are not obtained from globs
29 Implementation details:
30 * The first layer obtains file timestamps to recalculate file hashes only
31 when necessary (similar to md5_tstamp); the timestamps are then stored
32 in a dedicated pickle file
33 * A second layer associates each task generator to a file set to help
34 detecting changes. Task generators are to create their tasks only when
35 the related files have been modified. A specific db file is created
36 to store such data (5m -> 1m10)
37 * A third layer binds build context proxies onto task generators, replacing
38 the default context. While loading data for the full build uses more memory
39 (4GB -> 9GB), partial builds are then much faster (1m10 -> 13s)
40 * A fourth layer enables a 2-level cache on file signatures to
41 reduce the size of the main pickle file (13s -> 10s)
45 from waflib
import Build
, Context
, Errors
, Logs
, Task
, TaskGen
, Utils
46 from waflib
.TaskGen
import feature
, after_method
, taskgen_method
53 SKIPPABLE
= ['cshlib', 'cxxshlib', 'cstlib', 'cxxstlib', 'cprogram', 'cxxprogram']
55 TSTAMP_DB
= '.wafpickle_tstamp_db_file'
57 SAVED_ATTRS
= 'root node_sigs task_sigs imp_sigs raw_deps node_deps'.split()
59 class bld_proxy(object):
60 def __init__(self
, bld
):
61 object.__setattr
__(self
, 'bld', bld
)
63 object.__setattr
__(self
, 'node_class', type('Nod3', (waflib
.Node
.Node
,), {}))
64 self
.node_class
.__module
__ = 'waflib.Node'
65 self
.node_class
.ctx
= self
67 object.__setattr
__(self
, 'root', self
.node_class('', None))
70 object.__setattr
__(self
, x
, {})
74 def __setattr__(self
, name
, value
):
75 bld
= object.__getattribute
__(self
, 'bld')
76 setattr(bld
, name
, value
)
78 def __delattr__(self
, name
):
79 bld
= object.__getattribute
__(self
, 'bld')
82 def __getattribute__(self
, name
):
84 return object.__getattribute
__(self
, name
)
85 except AttributeError:
86 bld
= object.__getattribute
__(self
, 'bld')
87 return getattr(bld
, name
)
89 def __call__(self
, *k
, **kw
):
90 return self
.bld(*k
, **kw
)
93 for x
in ('srcnode', 'path', 'bldnode'):
94 node
= self
.root
.find_dir(getattr(self
.bld
, x
).abspath())
95 object.__setattr
__(self
, x
, node
)
97 def set_key(self
, store_key
):
98 object.__setattr
__(self
, 'store_key', store_key
)
100 def fix_tg_path(self
, *tgs
):
101 # changing Node objects on task generators is possible
102 # yet, all Node objects must belong to the same parent
104 tg
.path
= self
.root
.make_node(tg
.path
.abspath())
107 dbfn
= os
.path
.join(self
.variant_dir
, Context
.DBFILE
+ self
.store_key
)
108 Logs
.debug('rev_use: reading %s', dbfn
)
110 data
= Utils
.readf(dbfn
, 'rb')
111 except (EnvironmentError, EOFError):
112 # handle missing file/empty file
113 Logs
.debug('rev_use: Could not load the build cache %s (missing)', dbfn
)
116 waflib
.Node
.pickle_lock
.acquire()
117 waflib
.Node
.Nod3
= self
.node_class
119 data
= Build
.cPickle
.loads(data
)
120 except Exception as e
:
121 Logs
.debug('rev_use: Could not pickle the build cache %s: %r', dbfn
, e
)
123 for x
in SAVED_ATTRS
:
124 object.__setattr
__(self
, x
, data
.get(x
, {}))
126 waflib
.Node
.pickle_lock
.release()
131 for x
in Build
.SAVED_ATTRS
:
132 data
[x
] = getattr(self
, x
)
133 db
= os
.path
.join(self
.variant_dir
, Context
.DBFILE
+ self
.store_key
)
135 with waflib
.Node
.pickle_lock
:
136 waflib
.Node
.Nod3
= self
.node_class
138 x
= Build
.cPickle
.dumps(data
, Build
.PROTOCOL
)
139 except Build
.cPickle
.PicklingError
:
141 for node_deps
in data
['node_deps'].values():
142 for idx
, node
in enumerate(node_deps
):
143 # there may be more cross-context Node objects to fix,
144 # but this should be the main source
145 node_deps
[idx
] = root
.find_node(node
.abspath())
146 x
= Build
.cPickle
.dumps(data
, Build
.PROTOCOL
)
148 Logs
.debug('rev_use: storing %s', db
)
149 Utils
.writef(db
+ '.tmp', x
, m
='wb')
153 if not Utils
.is_win32
:
154 os
.chown(db
+ '.tmp', st
.st_uid
, st
.st_gid
)
155 except (AttributeError, OSError):
157 os
.rename(db
+ '.tmp', db
)
159 class bld(Build
.BuildContext
):
160 def __init__(self
, **kw
):
161 super(bld
, self
).__init
__(**kw
)
162 self
.hashes_md5_tstamp
= {}
164 def __call__(self
, *k
, **kw
):
165 # this is one way of doing it, one could use a task generator method too
166 bld
= kw
['bld'] = bld_proxy(self
)
167 ret
= TaskGen
.task_gen(*k
, **kw
)
168 self
.task_gen_cache_names
= {}
169 self
.add_to_group(ret
, group
=kw
.get('group'))
171 bld
.set_key(ret
.path
.abspath().replace(os
.sep
, '') + str(ret
.idx
))
177 def store_tstamps(self
):
178 # Called after a build is finished
179 # For each task generator, record all files involved in task objects
180 # optimization: done only if there was something built
184 except AttributeError:
185 f_deps
= self
.f_deps
= {}
189 for g
in self
.groups
:
192 staleness
= tg
.staleness
193 except AttributeError:
196 if staleness
!= DIRTY
:
197 # DONE case: there was nothing built
198 # NEEDED case: the tg was brought in because of 'use' propagation
199 # but nothing really changed for them, there may be incomplete
200 # tasks (object files) and in this case it is best to let the next build
201 # figure out if an input/output file changed
206 if tsk
.hasrun
== Task
.SUCCESS
:
209 elif tsk
.hasrun
== Task
.SKIPPED
:
212 # one failed task, clear the cache for this tg
214 del f_deps
[(tg
.path
.abspath(), tg
.idx
)]
218 # just store the new state because there is a change
221 # skip the rest because there is no valid cache possible
225 # all skipped, but is there anything in cache?
227 f_deps
[(tg
.path
.abspath(), tg
.idx
)]
229 # probably cleared because a wscript file changed
235 # there was a rebuild, store the data structure too
238 # all tasks skipped but no cache
239 # or a successful task build
243 st
.update(tsk
.inputs
)
244 st
.update(self
.node_deps
.get(tsk
.uid(), []))
246 # TODO do last/when loading the tgs?
248 for k
in ('wscript', 'wscript_build'):
249 n
= tg
.path
.find_node(k
)
252 lst
.append(n
.abspath())
254 lst
.extend(sorted(x
.abspath() for x
in st
))
256 f_deps
[(tg
.path
.abspath(), tg
.idx
)] = lst
259 # f_tstamps has everything, while md5_tstamp can be relatively empty on partial builds
260 self
.f_tstamps
[x
] = self
.hashes_md5_tstamp
[x
][0]
263 dbfn
= os
.path
.join(self
.variant_dir
, TSTAMP_DB
)
264 Logs
.debug('rev_use: storing %s', dbfn
)
265 dbfn_tmp
= dbfn
+ '.tmp'
266 x
= Build
.cPickle
.dumps([self
.f_tstamps
, f_deps
], Build
.PROTOCOL
)
267 Utils
.writef(dbfn_tmp
, x
, m
='wb')
268 os
.rename(dbfn_tmp
, dbfn
)
269 Logs
.debug('rev_use: stored %s', dbfn
)
273 if self
.producer
.dirty
:
274 Build
.BuildContext
.store(self
)
276 def compute_needed_tgs(self
):
277 # assume the 'use' keys are not modified during the build phase
279 dbfn
= os
.path
.join(self
.variant_dir
, TSTAMP_DB
)
280 Logs
.debug('rev_use: Loading %s', dbfn
)
282 data
= Utils
.readf(dbfn
, 'rb')
283 except (EnvironmentError, EOFError):
284 Logs
.debug('rev_use: Could not load the build cache %s (missing)', dbfn
)
289 self
.f_tstamps
, self
.f_deps
= Build
.cPickle
.loads(data
)
290 except Exception as e
:
291 Logs
.debug('rev_use: Could not pickle the build cache %s: %r', dbfn
, e
)
295 Logs
.debug('rev_use: Loaded %s', dbfn
)
298 # 1. obtain task generators that contain rebuilds
299 # 2. obtain the 'use' graph and its dual
301 reverse_use_map
= Utils
.defaultdict(list)
302 use_map
= Utils
.defaultdict(list)
304 for g
in self
.groups
:
310 lst
= tg
.use
= Utils
.to_list(tg
.use
)
311 except AttributeError:
316 xtg
= self
.get_tgen_by_name(x
)
317 except Errors
.WafError
:
320 use_map
[tg
].append(xtg
)
321 reverse_use_map
[xtg
].append(tg
)
323 Logs
.debug('rev_use: found %r stale tgs', len(stales
))
325 # 3. dfs to post downstream tg as stale
331 Logs
.debug('rev_use: marking down %r as stale', tg
.name
)
333 for x
in reverse_use_map
[tg
]:
338 # 4. dfs to find ancestors tg to mark as needed
339 self
.needed_tgs
= needed_tgs
= set()
344 if tg
.staleness
== DONE
:
345 Logs
.debug('rev_use: marking up %r as needed', tg
.name
)
346 tg
.staleness
= NEEDED
347 for x
in use_map
[tg
]:
352 # so we have the whole tg trees to post in the set "needed"
353 # load their build trees
354 for tg
in needed_tgs
:
356 tg
.bld
.fix_tg_path(tg
)
358 # the stale ones should be fully build, while the needed ones
359 # may skip a few tasks, see create_compiled_task and apply_link_after below
360 Logs
.debug('rev_use: amount of needed task gens: %r', len(needed_tgs
))
362 def post_group(self
):
363 # assumption: we can ignore the folder/subfolders cuts
367 except AttributeError:
372 if not self
.targets
or self
.targets
== '*':
373 for tg
in self
.groups
[self
.current_group
]:
374 # this can cut quite a lot of tg objects
375 if tg
in self
.needed_tgs
:
378 # default implementation
379 return Build
.BuildContext
.post_group()
381 def get_build_iterator(self
):
382 if not self
.targets
or self
.targets
== '*':
383 self
.compute_needed_tgs()
384 return Build
.BuildContext
.get_build_iterator(self
)
389 self
.staleness
= DIRTY
391 # 1. the case of always stale targets
392 if getattr(self
, 'always_stale', False):
395 # 2. check if the db file exists
396 db
= os
.path
.join(self
.bld
.variant_dir
, Context
.DBFILE
)
398 dbstat
= os
.stat(db
).st_mtime
400 Logs
.debug('rev_use: must post %r because this is a clean build')
403 # 3.a check if the configuration exists
404 cache_node
= self
.bld
.bldnode
.find_node('c4che/build.config.py')
408 # 3.b check if the configuration changed
409 if os
.stat(cache_node
.abspath()).st_mtime
> dbstat
:
410 Logs
.debug('rev_use: must post %r because the configuration has changed', self
.name
)
413 # 3.c any tstamp data?
415 f_deps
= self
.bld
.f_deps
416 except AttributeError:
417 Logs
.debug('rev_use: must post %r because there is no f_deps', self
.name
)
420 # 4. check if this is the first build (no cache)
422 lst
= f_deps
[(self
.path
.abspath(), self
.idx
)]
424 Logs
.debug('rev_use: must post %r because there it has no cached data', self
.name
)
428 cache
= self
.bld
.cache_tstamp_rev_use
429 except AttributeError:
430 cache
= self
.bld
.cache_tstamp_rev_use
= {}
432 # 5. check the timestamp of each dependency files listed is unchanged
433 f_tstamps
= self
.bld
.f_tstamps
436 old_ts
= f_tstamps
[x
]
438 Logs
.debug('rev_use: must post %r because %r is not in cache', self
.name
, x
)
445 ts
= cache
[x
] = os
.stat(x
).st_mtime
447 del f_deps
[(self
.path
.abspath(), self
.idx
)]
448 Logs
.debug('rev_use: must post %r because %r does not exist anymore', self
.name
, x
)
452 Logs
.debug('rev_use: must post %r because the timestamp on %r changed %r %r', self
.name
, x
, old_ts
, ts
)
455 self
.staleness
= DONE
459 def create_compiled_task(self
, name
, node
):
460 # skip the creation of object files
461 # assumption: object-only targets are not skippable
462 if self
.staleness
== NEEDED
:
463 # only libraries/programs can skip object files
465 if x
in self
.features
:
468 out
= '%s.%d.o' % (node
.name
, self
.idx
)
469 task
= self
.create_task(name
, node
, node
.parent
.find_or_declare(out
))
471 self
.compiled_tasks
.append(task
)
472 except AttributeError:
473 self
.compiled_tasks
= [task
]
477 @after_method('apply_link')
478 def apply_link_after(self
):
479 # cprogram/cxxprogram might be unnecessary
480 if self
.staleness
!= NEEDED
:
482 for tsk
in self
.tasks
:
483 tsk
.hasrun
= Task
.SKIPPED
485 def path_from(self
, node
):
486 # handle nodes of distinct types
487 if node
.ctx
is not self
.ctx
:
488 node
= self
.ctx
.root
.make_node(node
.abspath())
489 return self
.default_path_from(node
)
490 waflib
.Node
.Node
.default_path_from
= waflib
.Node
.Node
.path_from
491 waflib
.Node
.Node
.path_from
= path_from
494 # similar to md5_tstamp.py, but with 2-layer cache
495 # global_cache for the build context common for all task generators
496 # local_cache for the build context proxy (one by task generator)
498 # the global cache is not persistent
499 # the local cache is persistent and meant for partial builds
501 # assume all calls are made from a single thread
503 filename
= self
.abspath()
504 st
= os
.stat(filename
)
506 global_cache
= self
.ctx
.bld
.hashes_md5_tstamp
507 local_cache
= self
.ctx
.hashes_md5_tstamp
509 if filename
in global_cache
:
510 # value already calculated in this build
511 cval
= global_cache
[filename
]
513 # the value in global cache is assumed to be calculated once
514 # reverifying it could cause task generators
515 # to get distinct tstamp values, thus missing rebuilds
516 local_cache
[filename
] = cval
519 if filename
in local_cache
:
520 cval
= local_cache
[filename
]
521 if cval
[0] == st
.st_mtime
:
522 # correct value from a previous build
523 # put it in the global cache
524 global_cache
[filename
] = cval
527 ret
= Utils
.h_file(filename
)
528 local_cache
[filename
] = global_cache
[filename
] = (st
.st_mtime
, ret
)
530 waflib
.Node
.Node
.h_file
= h_file