Correct a parameter order swap in "diffusion.historyquery" for Mercurial
[phabricator.git] / src / applications / search / management / PhabricatorSearchManagementNgramsWorkflow.php
blob3d873788490e14d59db2d6f4d271c56a8cbbc55d
1 <?php
3 final class PhabricatorSearchManagementNgramsWorkflow
4 extends PhabricatorSearchManagementWorkflow {
6 protected function didConstruct() {
7 $this
8 ->setName('ngrams')
9 ->setSynopsis(
10 pht(
11 'Recompute common ngrams. This is an advanced workflow that '.
12 'can harm search quality if used improperly.'))
13 ->setArguments(
14 array(
15 array(
16 'name' => 'reset',
17 'help' => pht('Reset all common ngram records.'),
19 array(
20 'name' => 'threshold',
21 'param' => 'threshold',
22 'help' => pht(
23 'Prune ngrams present in more than this fraction of '.
24 'documents. Provide a value between 0.0 and 1.0.'),
26 ));
29 public function execute(PhutilArgumentParser $args) {
30 $min_documents = 4096;
32 $is_reset = $args->getArg('reset');
33 $threshold = $args->getArg('threshold');
35 if ($is_reset && $threshold !== null) {
36 throw new PhutilArgumentUsageException(
37 pht('Specify either --reset or --threshold, not both.'));
40 if (!$is_reset && $threshold === null) {
41 throw new PhutilArgumentUsageException(
42 pht('Specify either --reset or --threshold.'));
45 if (!$is_reset) {
46 if (!is_numeric($threshold)) {
47 throw new PhutilArgumentUsageException(
48 pht('Specify a numeric threshold between 0 and 1.'));
51 $threshold = (double)$threshold;
52 if ($threshold <= 0 || $threshold >= 1) {
53 throw new PhutilArgumentUsageException(
54 pht('Threshold must be greater than 0.0 and less than 1.0.'));
58 $all_objects = id(new PhutilClassMapQuery())
59 ->setAncestorClass('PhabricatorFerretInterface')
60 ->execute();
62 foreach ($all_objects as $object) {
63 $engine = $object->newFerretEngine();
64 $conn = $object->establishConnection('w');
65 $display_name = get_class($object);
67 if ($is_reset) {
68 echo tsprintf(
69 "%s\n",
70 pht(
71 'Resetting common ngrams for "%s".',
72 $display_name));
74 queryfx(
75 $conn,
76 'DELETE FROM %T',
77 $engine->getCommonNgramsTableName());
78 continue;
81 $document_count = queryfx_one(
82 $conn,
83 'SELECT COUNT(*) N FROM %T',
84 $engine->getDocumentTableName());
85 $document_count = $document_count['N'];
87 if ($document_count < $min_documents) {
88 echo tsprintf(
89 "%s\n",
90 pht(
91 'Too few documents of type "%s" for any ngrams to be common.',
92 $display_name));
93 continue;
96 $min_frequency = (int)ceil($document_count * $threshold);
97 $common_ngrams = queryfx_all(
98 $conn,
99 'SELECT ngram, COUNT(*) N FROM %T
100 GROUP BY ngram
101 HAVING N >= %d',
102 $engine->getNgramsTableName(),
103 $min_frequency);
105 if (!$common_ngrams) {
106 echo tsprintf(
107 "%s\n",
108 pht(
109 'No new common ngrams exist for "%s".',
110 $display_name));
111 continue;
114 $sql = array();
115 foreach ($common_ngrams as $ngram) {
116 $sql[] = qsprintf(
117 $conn,
118 '(%s, 1)',
119 $ngram['ngram']);
122 foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
123 queryfx(
124 $conn,
125 'INSERT IGNORE INTO %T (ngram, needsCollection)
126 VALUES %LQ',
127 $engine->getCommonNgramsTableName(),
128 $chunk);
131 echo tsprintf(
132 "%s\n",
133 pht(
134 'Updated common ngrams for "%s".',
135 $display_name));