ctdb-daemon: Use ctdb_parse_node_address() in ctdbd
[samba4-gss.git] / third_party / waf / waflib / extras / fast_partial.py
blob90a94723bb85f8c5db89f0d6cfb3ef0ced7ddb64
1 #! /usr/bin/env python
2 # encoding: utf-8
3 # Thomas Nagy, 2017-2018 (ita)
5 """
6 A system for fast partial rebuilds
8 Creating a large amount of task objects up front can take some time.
9 By making a few assumptions, it is possible to avoid posting creating
10 task objects for targets that are already up-to-date.
12 On a silly benchmark the gain observed for 1M tasks can be 5m->10s
13 for a single file change.
15 Usage::
17 def options(opt):
18 opt.load('fast_partial')
20 Assumptions:
21 * Start with a clean build (run "waf distclean" after enabling)
22 * Mostly for C/C++/Fortran targets with link tasks (object-only targets are not handled)
23 try it in the folder generated by utils/genbench.py
24 * For full project builds: no --targets and no pruning from subfolders
25 * The installation phase is ignored
26 * `use=` dependencies are specified up front even across build groups
27 * Task generator source files are not obtained from globs
29 Implementation details:
30 * The first layer obtains file timestamps to recalculate file hashes only
31 when necessary (similar to md5_tstamp); the timestamps are then stored
32 in a dedicated pickle file
33 * A second layer associates each task generator to a file set to help
34 detecting changes. Task generators are to create their tasks only when
35 the related files have been modified. A specific db file is created
36 to store such data (5m -> 1m10)
37 * A third layer binds build context proxies onto task generators, replacing
38 the default context. While loading data for the full build uses more memory
39 (4GB -> 9GB), partial builds are then much faster (1m10 -> 13s)
40 * A fourth layer enables a 2-level cache on file signatures to
41 reduce the size of the main pickle file (13s -> 10s)
42 """
44 import os
45 from waflib import Build, Context, Errors, Logs, Task, TaskGen, Utils
46 from waflib.TaskGen import feature, after_method, taskgen_method
47 import waflib.Node
49 DONE = 0
50 DIRTY = 1
51 NEEDED = 2
53 SKIPPABLE = ['cshlib', 'cxxshlib', 'cstlib', 'cxxstlib', 'cprogram', 'cxxprogram']
55 TSTAMP_DB = '.wafpickle_tstamp_db_file'
57 SAVED_ATTRS = 'root node_sigs task_sigs imp_sigs raw_deps node_deps'.split()
59 class bld_proxy(object):
60 def __init__(self, bld):
61 object.__setattr__(self, 'bld', bld)
63 object.__setattr__(self, 'node_class', type('Nod3', (waflib.Node.Node,), {}))
64 self.node_class.__module__ = 'waflib.Node'
65 self.node_class.ctx = self
67 object.__setattr__(self, 'root', self.node_class('', None))
68 for x in SAVED_ATTRS:
69 if x != 'root':
70 object.__setattr__(self, x, {})
72 self.fix_nodes()
74 def __setattr__(self, name, value):
75 bld = object.__getattribute__(self, 'bld')
76 setattr(bld, name, value)
78 def __delattr__(self, name):
79 bld = object.__getattribute__(self, 'bld')
80 delattr(bld, name)
82 def __getattribute__(self, name):
83 try:
84 return object.__getattribute__(self, name)
85 except AttributeError:
86 bld = object.__getattribute__(self, 'bld')
87 return getattr(bld, name)
89 def __call__(self, *k, **kw):
90 return self.bld(*k, **kw)
92 def fix_nodes(self):
93 for x in ('srcnode', 'path', 'bldnode'):
94 node = self.root.find_dir(getattr(self.bld, x).abspath())
95 object.__setattr__(self, x, node)
97 def set_key(self, store_key):
98 object.__setattr__(self, 'store_key', store_key)
100 def fix_tg_path(self, *tgs):
101 # changing Node objects on task generators is possible
102 # yet, all Node objects must belong to the same parent
103 for tg in tgs:
104 tg.path = self.root.make_node(tg.path.abspath())
106 def restore(self):
107 dbfn = os.path.join(self.variant_dir, Context.DBFILE + self.store_key)
108 Logs.debug('rev_use: reading %s', dbfn)
109 try:
110 data = Utils.readf(dbfn, 'rb')
111 except (EnvironmentError, EOFError):
112 # handle missing file/empty file
113 Logs.debug('rev_use: Could not load the build cache %s (missing)', dbfn)
114 else:
115 try:
116 waflib.Node.pickle_lock.acquire()
117 waflib.Node.Nod3 = self.node_class
118 try:
119 data = Build.cPickle.loads(data)
120 except Exception as e:
121 Logs.debug('rev_use: Could not pickle the build cache %s: %r', dbfn, e)
122 else:
123 for x in SAVED_ATTRS:
124 object.__setattr__(self, x, data.get(x, {}))
125 finally:
126 waflib.Node.pickle_lock.release()
127 self.fix_nodes()
129 def store(self):
130 data = {}
131 for x in Build.SAVED_ATTRS:
132 data[x] = getattr(self, x)
133 db = os.path.join(self.variant_dir, Context.DBFILE + self.store_key)
135 with waflib.Node.pickle_lock:
136 waflib.Node.Nod3 = self.node_class
137 try:
138 x = Build.cPickle.dumps(data, Build.PROTOCOL)
139 except Build.cPickle.PicklingError:
140 root = data['root']
141 for node_deps in data['node_deps'].values():
142 for idx, node in enumerate(node_deps):
143 # there may be more cross-context Node objects to fix,
144 # but this should be the main source
145 node_deps[idx] = root.find_node(node.abspath())
146 x = Build.cPickle.dumps(data, Build.PROTOCOL)
148 Logs.debug('rev_use: storing %s', db)
149 Utils.writef(db + '.tmp', x, m='wb')
150 try:
151 st = os.stat(db)
152 os.remove(db)
153 if not Utils.is_win32:
154 os.chown(db + '.tmp', st.st_uid, st.st_gid)
155 except (AttributeError, OSError):
156 pass
157 os.rename(db + '.tmp', db)
159 class bld(Build.BuildContext):
160 def __init__(self, **kw):
161 super(bld, self).__init__(**kw)
162 self.hashes_md5_tstamp = {}
164 def __call__(self, *k, **kw):
165 # this is one way of doing it, one could use a task generator method too
166 bld = kw['bld'] = bld_proxy(self)
167 ret = TaskGen.task_gen(*k, **kw)
168 self.task_gen_cache_names = {}
169 self.add_to_group(ret, group=kw.get('group'))
170 ret.bld = bld
171 bld.set_key(ret.path.abspath().replace(os.sep, '') + str(ret.idx))
172 return ret
174 def is_dirty(self):
175 return True
177 def store_tstamps(self):
178 # Called after a build is finished
179 # For each task generator, record all files involved in task objects
180 # optimization: done only if there was something built
181 do_store = False
182 try:
183 f_deps = self.f_deps
184 except AttributeError:
185 f_deps = self.f_deps = {}
186 self.f_tstamps = {}
188 allfiles = set()
189 for g in self.groups:
190 for tg in g:
191 try:
192 staleness = tg.staleness
193 except AttributeError:
194 staleness = DIRTY
196 if staleness != DIRTY:
197 # DONE case: there was nothing built
198 # NEEDED case: the tg was brought in because of 'use' propagation
199 # but nothing really changed for them, there may be incomplete
200 # tasks (object files) and in this case it is best to let the next build
201 # figure out if an input/output file changed
202 continue
204 do_cache = False
205 for tsk in tg.tasks:
206 if tsk.hasrun == Task.SUCCESS:
207 do_cache = True
208 pass
209 elif tsk.hasrun == Task.SKIPPED:
210 pass
211 else:
212 # one failed task, clear the cache for this tg
213 try:
214 del f_deps[(tg.path.abspath(), tg.idx)]
215 except KeyError:
216 pass
217 else:
218 # just store the new state because there is a change
219 do_store = True
221 # skip the rest because there is no valid cache possible
222 break
223 else:
224 if not do_cache:
225 # all skipped, but is there anything in cache?
226 try:
227 f_deps[(tg.path.abspath(), tg.idx)]
228 except KeyError:
229 # probably cleared because a wscript file changed
230 # store it
231 do_cache = True
233 if do_cache:
235 # there was a rebuild, store the data structure too
236 tg.bld.store()
238 # all tasks skipped but no cache
239 # or a successful task build
240 do_store = True
241 st = set()
242 for tsk in tg.tasks:
243 st.update(tsk.inputs)
244 st.update(self.node_deps.get(tsk.uid(), []))
246 # TODO do last/when loading the tgs?
247 lst = []
248 for k in ('wscript', 'wscript_build'):
249 n = tg.path.find_node(k)
250 if n:
251 n.get_bld_sig()
252 lst.append(n.abspath())
254 lst.extend(sorted(x.abspath() for x in st))
255 allfiles.update(lst)
256 f_deps[(tg.path.abspath(), tg.idx)] = lst
258 for x in allfiles:
259 # f_tstamps has everything, while md5_tstamp can be relatively empty on partial builds
260 self.f_tstamps[x] = self.hashes_md5_tstamp[x][0]
262 if do_store:
263 dbfn = os.path.join(self.variant_dir, TSTAMP_DB)
264 Logs.debug('rev_use: storing %s', dbfn)
265 dbfn_tmp = dbfn + '.tmp'
266 x = Build.cPickle.dumps([self.f_tstamps, f_deps], Build.PROTOCOL)
267 Utils.writef(dbfn_tmp, x, m='wb')
268 os.rename(dbfn_tmp, dbfn)
269 Logs.debug('rev_use: stored %s', dbfn)
271 def store(self):
272 self.store_tstamps()
273 if self.producer.dirty:
274 Build.BuildContext.store(self)
276 def compute_needed_tgs(self):
277 # assume the 'use' keys are not modified during the build phase
279 dbfn = os.path.join(self.variant_dir, TSTAMP_DB)
280 Logs.debug('rev_use: Loading %s', dbfn)
281 try:
282 data = Utils.readf(dbfn, 'rb')
283 except (EnvironmentError, EOFError):
284 Logs.debug('rev_use: Could not load the build cache %s (missing)', dbfn)
285 self.f_deps = {}
286 self.f_tstamps = {}
287 else:
288 try:
289 self.f_tstamps, self.f_deps = Build.cPickle.loads(data)
290 except Exception as e:
291 Logs.debug('rev_use: Could not pickle the build cache %s: %r', dbfn, e)
292 self.f_deps = {}
293 self.f_tstamps = {}
294 else:
295 Logs.debug('rev_use: Loaded %s', dbfn)
298 # 1. obtain task generators that contain rebuilds
299 # 2. obtain the 'use' graph and its dual
300 stales = set()
301 reverse_use_map = Utils.defaultdict(list)
302 use_map = Utils.defaultdict(list)
304 for g in self.groups:
305 for tg in g:
306 if tg.is_stale():
307 stales.add(tg)
309 try:
310 lst = tg.use = Utils.to_list(tg.use)
311 except AttributeError:
312 pass
313 else:
314 for x in lst:
315 try:
316 xtg = self.get_tgen_by_name(x)
317 except Errors.WafError:
318 pass
319 else:
320 use_map[tg].append(xtg)
321 reverse_use_map[xtg].append(tg)
323 Logs.debug('rev_use: found %r stale tgs', len(stales))
325 # 3. dfs to post downstream tg as stale
326 visited = set()
327 def mark_down(tg):
328 if tg in visited:
329 return
330 visited.add(tg)
331 Logs.debug('rev_use: marking down %r as stale', tg.name)
332 tg.staleness = DIRTY
333 for x in reverse_use_map[tg]:
334 mark_down(x)
335 for tg in stales:
336 mark_down(tg)
338 # 4. dfs to find ancestors tg to mark as needed
339 self.needed_tgs = needed_tgs = set()
340 def mark_needed(tg):
341 if tg in needed_tgs:
342 return
343 needed_tgs.add(tg)
344 if tg.staleness == DONE:
345 Logs.debug('rev_use: marking up %r as needed', tg.name)
346 tg.staleness = NEEDED
347 for x in use_map[tg]:
348 mark_needed(x)
349 for xx in visited:
350 mark_needed(xx)
352 # so we have the whole tg trees to post in the set "needed"
353 # load their build trees
354 for tg in needed_tgs:
355 tg.bld.restore()
356 tg.bld.fix_tg_path(tg)
358 # the stale ones should be fully build, while the needed ones
359 # may skip a few tasks, see create_compiled_task and apply_link_after below
360 Logs.debug('rev_use: amount of needed task gens: %r', len(needed_tgs))
362 def post_group(self):
363 # assumption: we can ignore the folder/subfolders cuts
364 def tgpost(tg):
365 try:
366 f = tg.post
367 except AttributeError:
368 pass
369 else:
372 if not self.targets or self.targets == '*':
373 for tg in self.groups[self.current_group]:
374 # this can cut quite a lot of tg objects
375 if tg in self.needed_tgs:
376 tgpost(tg)
377 else:
378 # default implementation
379 return Build.BuildContext.post_group()
381 def get_build_iterator(self):
382 if not self.targets or self.targets == '*':
383 self.compute_needed_tgs()
384 return Build.BuildContext.get_build_iterator(self)
386 @taskgen_method
387 def is_stale(self):
388 # assume no globs
389 self.staleness = DIRTY
391 # 1. the case of always stale targets
392 if getattr(self, 'always_stale', False):
393 return True
395 # 2. check if the db file exists
396 db = os.path.join(self.bld.variant_dir, Context.DBFILE)
397 try:
398 dbstat = os.stat(db).st_mtime
399 except OSError:
400 Logs.debug('rev_use: must post %r because this is a clean build')
401 return True
403 # 3.a check if the configuration exists
404 cache_node = self.bld.bldnode.find_node('c4che/build.config.py')
405 if not cache_node:
406 return True
408 # 3.b check if the configuration changed
409 if os.stat(cache_node.abspath()).st_mtime > dbstat:
410 Logs.debug('rev_use: must post %r because the configuration has changed', self.name)
411 return True
413 # 3.c any tstamp data?
414 try:
415 f_deps = self.bld.f_deps
416 except AttributeError:
417 Logs.debug('rev_use: must post %r because there is no f_deps', self.name)
418 return True
420 # 4. check if this is the first build (no cache)
421 try:
422 lst = f_deps[(self.path.abspath(), self.idx)]
423 except KeyError:
424 Logs.debug('rev_use: must post %r because there it has no cached data', self.name)
425 return True
427 try:
428 cache = self.bld.cache_tstamp_rev_use
429 except AttributeError:
430 cache = self.bld.cache_tstamp_rev_use = {}
432 # 5. check the timestamp of each dependency files listed is unchanged
433 f_tstamps = self.bld.f_tstamps
434 for x in lst:
435 try:
436 old_ts = f_tstamps[x]
437 except KeyError:
438 Logs.debug('rev_use: must post %r because %r is not in cache', self.name, x)
439 return True
441 try:
442 try:
443 ts = cache[x]
444 except KeyError:
445 ts = cache[x] = os.stat(x).st_mtime
446 except OSError:
447 del f_deps[(self.path.abspath(), self.idx)]
448 Logs.debug('rev_use: must post %r because %r does not exist anymore', self.name, x)
449 return True
450 else:
451 if ts != old_ts:
452 Logs.debug('rev_use: must post %r because the timestamp on %r changed %r %r', self.name, x, old_ts, ts)
453 return True
455 self.staleness = DONE
456 return False
458 @taskgen_method
459 def create_compiled_task(self, name, node):
460 # skip the creation of object files
461 # assumption: object-only targets are not skippable
462 if self.staleness == NEEDED:
463 # only libraries/programs can skip object files
464 for x in SKIPPABLE:
465 if x in self.features:
466 return None
468 out = '%s.%d.o' % (node.name, self.idx)
469 task = self.create_task(name, node, node.parent.find_or_declare(out))
470 try:
471 self.compiled_tasks.append(task)
472 except AttributeError:
473 self.compiled_tasks = [task]
474 return task
476 @feature(*SKIPPABLE)
477 @after_method('apply_link')
478 def apply_link_after(self):
479 # cprogram/cxxprogram might be unnecessary
480 if self.staleness != NEEDED:
481 return
482 for tsk in self.tasks:
483 tsk.hasrun = Task.SKIPPED
485 def path_from(self, node):
486 # handle nodes of distinct types
487 if node.ctx is not self.ctx:
488 node = self.ctx.root.make_node(node.abspath())
489 return self.default_path_from(node)
490 waflib.Node.Node.default_path_from = waflib.Node.Node.path_from
491 waflib.Node.Node.path_from = path_from
493 def h_file(self):
494 # similar to md5_tstamp.py, but with 2-layer cache
495 # global_cache for the build context common for all task generators
496 # local_cache for the build context proxy (one by task generator)
498 # the global cache is not persistent
499 # the local cache is persistent and meant for partial builds
501 # assume all calls are made from a single thread
503 filename = self.abspath()
504 st = os.stat(filename)
506 global_cache = self.ctx.bld.hashes_md5_tstamp
507 local_cache = self.ctx.hashes_md5_tstamp
509 if filename in global_cache:
510 # value already calculated in this build
511 cval = global_cache[filename]
513 # the value in global cache is assumed to be calculated once
514 # reverifying it could cause task generators
515 # to get distinct tstamp values, thus missing rebuilds
516 local_cache[filename] = cval
517 return cval[1]
519 if filename in local_cache:
520 cval = local_cache[filename]
521 if cval[0] == st.st_mtime:
522 # correct value from a previous build
523 # put it in the global cache
524 global_cache[filename] = cval
525 return cval[1]
527 ret = Utils.h_file(filename)
528 local_cache[filename] = global_cache[filename] = (st.st_mtime, ret)
529 return ret
530 waflib.Node.Node.h_file = h_file