3 final class DivinerGenerateWorkflow
extends DivinerWorkflow
{
7 protected function didConstruct() {
10 ->setSynopsis(pht('Generate documentation.'))
15 'help' => pht('Clear the caches before generating documentation.'),
20 'help' => pht('Path to a Diviner book configuration.'),
23 'name' => 'publisher',
25 'help' => pht('Specify a subclass of %s.', 'DivinerPublisher'),
26 'default' => 'DivinerLivePublisher',
29 'name' => 'repository',
30 'param' => 'identifier',
31 'help' => pht('Repository that the documentation belongs to.'),
36 protected function getAtomCache() {
37 if (!$this->atomCache
) {
38 $book_root = $this->getConfig('root');
39 $book_name = $this->getConfig('name');
40 $cache_directory = $book_root.'/.divinercache/'.$book_name;
41 $this->atomCache
= new DivinerAtomCache($cache_directory);
43 return $this->atomCache
;
46 protected function log($message) {
47 $console = PhutilConsole
::getConsole();
48 $console->writeErr($message."\n");
51 public function execute(PhutilArgumentParser
$args) {
52 $book = $args->getArg('book');
54 $books = array($book);
57 $this->log(pht('FINDING DOCUMENTATION BOOKS'));
59 $books = id(new FileFinder($cwd))
65 throw new PhutilArgumentUsageException(
67 "There are no Diviner '%s' files anywhere beneath the current ".
68 "directory. Use '%s' to specify a documentation book to generate.",
72 $this->log(pht('Found %s book(s).', phutil_count($books)));
76 foreach ($books as $book) {
77 $short_name = basename($book);
79 $this->log(pht('Generating book "%s"...', $short_name));
80 $this->generateBook($book, $args);
81 $this->log(pht('Completed generation of "%s".', $short_name)."\n");
85 private function generateBook($book, PhutilArgumentParser
$args) {
86 $this->atomCache
= null;
88 $this->readBookConfiguration($book);
90 if ($args->getArg('clean')) {
91 $this->log(pht('CLEARING CACHES'));
92 $this->getAtomCache()->delete();
93 $this->log(pht('Done.')."\n");
96 // The major challenge of documentation generation is one of dependency
97 // management. When regenerating documentation, we want to do the smallest
98 // amount of work we can, so that regenerating documentation after minor
103 // In the first stage, we find all the direct changes to source code since
104 // the last run. This stage relies on two data structures:
106 // - File Hash Map: `map<file_hash, node_hash>`
107 // - Atom Map: `map<node_hash, true>`
109 // First, we hash all the source files in the project to detect any which
110 // have changed since the previous run (i.e., their hash is not present in
111 // the File Hash Map). If a file's content hash appears in the map, it has
112 // not changed, so we don't need to reparse it.
114 // We break the contents of each file into "atoms", which represent a unit
115 // of source code (like a function, method, class or file). Each atom has a
116 // "node hash" based on the content of the atom: if a function definition
117 // changes, the node hash of the atom changes too. The primary output of
118 // the atom cache is a list of node hashes which exist in the project. This
119 // is the Atom Map. The node hash depends only on the definition of the atom
120 // and the atomizer implementation. It ends with an "N", for "node".
122 // (We need the Atom Map in addition to the File Hash Map because each file
123 // may have several atoms in it (e.g., multiple functions, or a class and
124 // its methods). The File Hash Map contains an exhaustive list of all atoms
125 // with type "file", but not child atoms of those top-level atoms.)
129 // We now know which atoms exist, and can compare the Atom Map to some
130 // existing cache to figure out what has changed. However, this isn't
131 // sufficient to figure out which documentation actually needs to be
132 // regenerated, because atoms depend on other atoms. For example, if `B
133 // extends A` and the definition for `A` changes, we need to regenerate the
134 // documentation in `B`. Similarly, if `X` links to `Y` and `Y` changes, we
135 // should regenerate `X`. (In both these cases, the documentation for the
136 // connected atom may not actually change, but in some cases it will, and
137 // the extra work we need to do is generally very small compared to the
138 // size of the project.)
140 // To figure out which other nodes have changed, we compute a "graph hash"
141 // for each node. This hash combines the "node hash" with the node hashes
142 // of connected nodes. Our primary output is a list of graph hashes, which
143 // a documentation generator can use to easily determine what work needs
144 // to be done by comparing the list with a list of cached graph hashes,
145 // then generating documentation for new hashes and deleting documentation
146 // for missing hashes. The graph hash ends with a "G", for "graph".
148 // In this stage, we rely on three data structures:
150 // - Symbol Map: `map<node_hash, symbol_hash>`
151 // - Edge Map: `map<node_hash, list<symbol_hash>>`
152 // - Graph Map: `map<node_hash, graph_hash>`
154 // Calculating the graph hash requires several steps, because we need to
155 // figure out which nodes an atom is attached to. The atom contains symbolic
156 // references to other nodes by name (e.g., `extends SomeClass`) in the form
157 // of @{class:DivinerAtomRefs}. We can also build a symbolic reference for
158 // any atom from the atom itself. Each @{class:DivinerAtomRef} generates a
159 // symbol hash, which ends with an "S", for "symbol".
161 // First, we update the symbol map. We remove (and mark dirty) any symbols
162 // associated with node hashes which no longer exist (e.g., old/dead nodes).
163 // Second, we add (and mark dirty) any symbols associated with new nodes.
164 // We also add edges defined by new nodes to the graph.
166 // We initialize a list of dirty nodes to the list of new nodes, then find
167 // all nodes connected to dirty symbols and add them to the dirty node list.
168 // This list now contains every node with a new or changed graph hash.
170 // We walk the dirty list and compute the new graph hashes, adding them
171 // to the graph hash map. This Graph Map can then be passed to an actual
172 // documentation generator, which can compare the graph hashes to a list
173 // of already-generated graph hashes and easily assess which documents need
174 // to be regenerated and which can be deleted.
176 $this->buildAtomCache();
177 $this->buildGraphCache();
179 $publisher_class = $args->getArg('publisher');
180 $symbols = id(new PhutilSymbolLoader())
181 ->setName($publisher_class)
182 ->setConcreteOnly(true)
183 ->setAncestorClass('DivinerPublisher')
184 ->selectAndLoadSymbols();
187 throw new PhutilArgumentUsageException(
189 "Publisher class '%s' must be a concrete subclass of %s.",
191 'DivinerPublisher'));
193 $publisher = newv($publisher_class, array());
195 $identifier = $args->getArg('repository');
197 if (strlen($identifier)) {
198 $repository = id(new PhabricatorRepositoryQuery())
199 ->setViewer(PhabricatorUser
::getOmnipotentUser())
200 ->withIdentifiers(array($identifier))
204 throw new PhutilArgumentUsageException(
206 'Repository "%s" does not exist.',
210 $publisher->setRepositoryPHID($repository->getPHID());
213 $this->publishDocumentation($args->getArg('clean'), $publisher);
217 /* -( Atom Cache )--------------------------------------------------------- */
220 private function buildAtomCache() {
221 $this->log(pht('BUILDING ATOM CACHE'));
223 $file_hashes = $this->findFilesInProject();
226 'Found %s file(s) in project.',
227 phutil_count($file_hashes)));
229 $this->deleteDeadAtoms($file_hashes);
230 $atomize = $this->getFilesToAtomize($file_hashes);
233 'Found %s unatomized, uncached file(s).',
234 phutil_count($atomize)));
236 $file_atomizers = $this->getAtomizersForFiles($atomize);
239 'Found %s file(s) to atomize.',
240 phutil_count($file_atomizers)));
242 $futures = $this->buildAtomizerFutures($file_atomizers);
245 'Atomizing %s file(s).',
246 phutil_count($file_atomizers)));
249 $this->resolveAtomizerFutures($futures, $file_hashes);
250 $this->log(pht('Atomization complete.'));
252 $this->log(pht('Atom cache is up to date, no files to atomize.'));
255 $this->log(pht('Writing atom cache.'));
256 $this->getAtomCache()->saveAtoms();
257 $this->log(pht('Done.')."\n");
260 private function getAtomizersForFiles(array $files) {
261 $rules = $this->getRules();
262 $exclude = $this->getExclude();
263 $atomizers = array();
265 foreach ($files as $file) {
266 foreach ($exclude as $pattern) {
267 if (preg_match($pattern, $file)) {
272 foreach ($rules as $rule => $atomizer) {
273 $ok = preg_match($rule, $file);
276 pht("Rule '%s' is not a valid regular expression.", $rule));
279 $atomizers[$file] = $atomizer;
288 private function getRules() {
289 return $this->getConfig('rules', array(
290 '/\\.diviner$/' => 'DivinerArticleAtomizer',
291 '/\\.php$/' => 'DivinerPHPAtomizer',
295 private function getExclude() {
296 $exclude = (array)$this->getConfig('exclude', array());
300 private function findFilesInProject() {
301 $raw_hashes = id(new FileFinder($this->getConfig('root')))
302 ->excludePath('*/.*')
304 ->setGenerateChecksums(true)
307 $version = $this->getDivinerAtomWorldVersion();
309 $file_hashes = array();
310 foreach ($raw_hashes as $file => $md5_hash) {
311 $rel_file = Filesystem
::readablePath($file, $this->getConfig('root'));
312 // We want the hash to change if the file moves or Diviner gets updated,
313 // not just if the file content changes. Derive a hash from everything
315 $file_hashes[$rel_file] = md5("{$rel_file}\0{$md5_hash}\0{$version}").'F';
321 private function deleteDeadAtoms(array $file_hashes) {
322 $atom_cache = $this->getAtomCache();
324 $hash_to_file = array_flip($file_hashes);
325 foreach ($atom_cache->getFileHashMap() as $hash => $atom) {
326 if (empty($hash_to_file[$hash])) {
327 $atom_cache->deleteFileHash($hash);
332 private function getFilesToAtomize(array $file_hashes) {
333 $atom_cache = $this->getAtomCache();
336 foreach ($file_hashes as $file => $hash) {
337 if (!$atom_cache->fileHashExists($hash)) {
345 private function buildAtomizerFutures(array $file_atomizers) {
346 $atomizers = array();
347 foreach ($file_atomizers as $file => $atomizer) {
348 $atomizers[$atomizer][] = $file;
351 $root = dirname(phutil_get_library_root('phabricator'));
352 $config_root = $this->getConfig('root');
354 $bar = id(new PhutilConsoleProgressBar())
355 ->setTotal(count($file_atomizers));
358 foreach ($atomizers as $class => $files) {
359 foreach (array_chunk($files, 32) as $chunk) {
360 $future = new ExecFuture(
361 '%s atomize --ugly --book %s --atomizer %s -- %Ls',
362 $root.'/bin/diviner',
363 $this->getBookConfigPath(),
366 $future->setCWD($config_root);
368 $futures[] = $future;
370 $bar->update(count($chunk));
379 private function resolveAtomizerFutures(array $futures, array $file_hashes) {
380 assert_instances_of($futures, 'Future');
382 $atom_cache = $this->getAtomCache();
383 $bar = id(new PhutilConsoleProgressBar())
384 ->setTotal(count($futures));
385 $futures = id(new FutureIterator($futures))
388 foreach ($futures as $key => $future) {
390 $atoms = $future->resolveJSON();
392 foreach ($atoms as $atom) {
393 if ($atom['type'] == DivinerAtom
::TYPE_FILE
) {
394 $file_hash = $file_hashes[$atom['file']];
395 $atom_cache->addFileHash($file_hash, $atom['hash']);
397 $atom_cache->addAtom($atom);
399 } catch (Exception
$e) {
409 * Get a global version number, which changes whenever any atom or atomizer
410 * implementation changes in a way which is not backward-compatible.
412 private function getDivinerAtomWorldVersion() {
414 $version['atom'] = DivinerAtom
::getAtomSerializationVersion();
415 $version['rules'] = $this->getRules();
417 $atomizers = id(new PhutilClassMapQuery())
418 ->setAncestorClass('DivinerAtomizer')
421 $atomizer_versions = array();
422 foreach ($atomizers as $atomizer) {
423 $name = get_class($atomizer);
424 $atomizer_versions[$name] = call_user_func(
427 'getAtomizerVersion',
431 ksort($atomizer_versions);
432 $version['atomizers'] = $atomizer_versions;
434 return md5(serialize($version));
438 /* -( Graph Cache )-------------------------------------------------------- */
441 private function buildGraphCache() {
442 $this->log(pht('BUILDING GRAPH CACHE'));
444 $atom_cache = $this->getAtomCache();
445 $symbol_map = $atom_cache->getSymbolMap();
446 $atoms = $atom_cache->getAtomMap();
448 $dirty_symbols = array();
449 $dirty_nhashes = array();
451 $del_atoms = array_diff_key($symbol_map, $atoms);
454 'Found %s obsolete atom(s) in graph.',
455 phutil_count($del_atoms)));
457 foreach ($del_atoms as $nhash => $shash) {
458 $atom_cache->deleteSymbol($nhash);
459 $dirty_symbols[$shash] = true;
461 $atom_cache->deleteEdges($nhash);
462 $atom_cache->deleteGraph($nhash);
465 $new_atoms = array_diff_key($atoms, $symbol_map);
468 'Found %s new atom(s) in graph.',
469 phutil_count($new_atoms)));
471 foreach ($new_atoms as $nhash => $ignored) {
472 $shash = $this->computeSymbolHash($nhash);
473 $atom_cache->addSymbol($nhash, $shash);
474 $dirty_symbols[$shash] = true;
476 $atom_cache->addEdges($nhash, $this->getEdges($nhash));
478 $dirty_nhashes[$nhash] = true;
481 $this->log(pht('Propagating changes through the graph.'));
483 // Find all the nodes which point at a dirty node, and dirty them. Then
484 // find all the nodes which point at those nodes and dirty them, and so
485 // on. (This is slightly overkill since we probably don't need to propagate
486 // dirtiness across documentation "links" between symbols, but we do want
487 // to propagate it across "extends", and we suffer only a little bit of
488 // collateral damage by over-dirtying as long as the documentation isn't
489 // too well-connected.)
491 $symbol_stack = array_keys($dirty_symbols);
492 while ($symbol_stack) {
493 $symbol_hash = array_pop($symbol_stack);
495 foreach ($atom_cache->getEdgesWithDestination($symbol_hash) as $edge) {
496 $dirty_nhashes[$edge] = true;
497 $src_hash = $this->computeSymbolHash($edge);
498 if (empty($dirty_symbols[$src_hash])) {
499 $dirty_symbols[$src_hash] = true;
500 $symbol_stack[] = $src_hash;
507 'Found %s affected atoms.',
508 phutil_count($dirty_nhashes)));
510 foreach ($dirty_nhashes as $nhash => $ignored) {
511 $atom_cache->addGraph($nhash, $this->computeGraphHash($nhash));
514 $this->log(pht('Writing graph cache.'));
516 $atom_cache->saveGraph();
517 $atom_cache->saveEdges();
518 $atom_cache->saveSymbols();
520 $this->log(pht('Done.')."\n");
523 private function computeSymbolHash($node_hash) {
524 $atom_cache = $this->getAtomCache();
525 $atom = $atom_cache->getAtom($node_hash);
529 pht("No such atom with node hash '%s'!", $node_hash));
532 $ref = DivinerAtomRef
::newFromDictionary($atom['ref']);
533 return $ref->toHash();
536 private function getEdges($node_hash) {
537 $atom_cache = $this->getAtomCache();
538 $atom = $atom_cache->getAtom($node_hash);
542 // Make the atom depend on its own symbol, so that all atoms with the same
543 // symbol are dirtied (e.g., if a codebase defines the function `f()`
544 // several times, all of them should be dirtied when one is dirtied).
545 $refs[DivinerAtomRef
::newFromDictionary($atom)->toHash()] = true;
547 foreach (array_merge($atom['extends'], $atom['links']) as $ref_dict) {
548 $ref = DivinerAtomRef
::newFromDictionary($ref_dict);
549 if ($ref->getBook() == $atom['book']) {
550 $refs[$ref->toHash()] = true;
554 return array_keys($refs);
557 private function computeGraphHash($node_hash) {
558 $atom_cache = $this->getAtomCache();
559 $atom = $atom_cache->getAtom($node_hash);
561 $edges = $this->getEdges($node_hash);
565 'atomHash' => $atom['hash'],
569 return md5(serialize($inputs)).'G';
572 private function publishDocumentation($clean, DivinerPublisher
$publisher) {
573 $atom_cache = $this->getAtomCache();
574 $graph_map = $atom_cache->getGraphMap();
576 $this->log(pht('PUBLISHING DOCUMENTATION'));
579 ->setDropCaches($clean)
580 ->setConfig($this->getAllConfig())
581 ->setAtomCache($atom_cache)
582 ->setRenderer(new DivinerDefaultRenderer())
583 ->publishAtoms(array_values($graph_map));
585 $this->log(pht('Done.'));