3 final class PhabricatorSearchManagementNgramsWorkflow
4 extends PhabricatorSearchManagementWorkflow
{
6 protected function didConstruct() {
11 'Recompute common ngrams. This is an advanced workflow that '.
12 'can harm search quality if used improperly.'))
17 'help' => pht('Reset all common ngram records.'),
20 'name' => 'threshold',
21 'param' => 'threshold',
23 'Prune ngrams present in more than this fraction of '.
24 'documents. Provide a value between 0.0 and 1.0.'),
29 public function execute(PhutilArgumentParser
$args) {
30 $min_documents = 4096;
32 $is_reset = $args->getArg('reset');
33 $threshold = $args->getArg('threshold');
35 if ($is_reset && $threshold !== null) {
36 throw new PhutilArgumentUsageException(
37 pht('Specify either --reset or --threshold, not both.'));
40 if (!$is_reset && $threshold === null) {
41 throw new PhutilArgumentUsageException(
42 pht('Specify either --reset or --threshold.'));
46 if (!is_numeric($threshold)) {
47 throw new PhutilArgumentUsageException(
48 pht('Specify a numeric threshold between 0 and 1.'));
51 $threshold = (double)$threshold;
52 if ($threshold <= 0 ||
$threshold >= 1) {
53 throw new PhutilArgumentUsageException(
54 pht('Threshold must be greater than 0.0 and less than 1.0.'));
58 $all_objects = id(new PhutilClassMapQuery())
59 ->setAncestorClass('PhabricatorFerretInterface')
62 foreach ($all_objects as $object) {
63 $engine = $object->newFerretEngine();
64 $conn = $object->establishConnection('w');
65 $display_name = get_class($object);
71 'Resetting common ngrams for "%s".',
77 $engine->getCommonNgramsTableName());
81 $document_count = queryfx_one(
83 'SELECT COUNT(*) N FROM %T',
84 $engine->getDocumentTableName());
85 $document_count = $document_count['N'];
87 if ($document_count < $min_documents) {
91 'Too few documents of type "%s" for any ngrams to be common.',
96 $min_frequency = (int)ceil($document_count * $threshold);
97 $common_ngrams = queryfx_all(
99 'SELECT ngram, COUNT(*) N FROM %T
102 $engine->getNgramsTableName(),
105 if (!$common_ngrams) {
109 'No new common ngrams exist for "%s".',
115 foreach ($common_ngrams as $ngram) {
122 foreach (PhabricatorLiskDAO
::chunkSQL($sql) as $chunk) {
125 'INSERT IGNORE INTO %T (ngram, needsCollection)
127 $engine->getCommonNgramsTableName(),
134 'Updated common ngrams for "%s".',