Correct a parameter order swap in "diffusion.historyquery" for Mercurial
[phabricator.git] / src / applications / search / engineextension / PhabricatorFerretFulltextEngineExtension.php
blobe97fab2c5374b9572f363cb6be4a490ef303a85f
1 <?php
3 final class PhabricatorFerretFulltextEngineExtension
4 extends PhabricatorFulltextEngineExtension {
6 const EXTENSIONKEY = 'ferret';
9 public function getExtensionName() {
10 return pht('Ferret Fulltext Engine');
14 public function shouldIndexFulltextObject($object) {
15 return ($object instanceof PhabricatorFerretInterface);
19 public function indexFulltextObject(
20 $object,
21 PhabricatorSearchAbstractDocument $document) {
23 $phid = $document->getPHID();
24 $engine = $object->newFerretEngine();
26 $is_closed = 0;
27 $author_phid = null;
28 $owner_phid = null;
29 foreach ($document->getRelationshipData() as $relationship) {
30 list($related_type, $related_phid) = $relationship;
31 switch ($related_type) {
32 case PhabricatorSearchRelationship::RELATIONSHIP_OPEN:
33 $is_closed = 0;
34 break;
35 case PhabricatorSearchRelationship::RELATIONSHIP_CLOSED:
36 $is_closed = 1;
37 break;
38 case PhabricatorSearchRelationship::RELATIONSHIP_OWNER:
39 $owner_phid = $related_phid;
40 break;
41 case PhabricatorSearchRelationship::RELATIONSHIP_UNOWNED:
42 $owner_phid = null;
43 break;
44 case PhabricatorSearchRelationship::RELATIONSHIP_AUTHOR:
45 $author_phid = $related_phid;
46 break;
50 $stemmer = $engine->newStemmer();
52 // Copy all of the "title" and "body" fields to create new "core" fields.
53 // This allows users to search "in title or body" with the "core:" prefix.
54 $document_fields = $document->getFieldData();
55 $virtual_fields = array();
56 foreach ($document_fields as $field) {
57 $virtual_fields[] = $field;
59 list($key, $raw_corpus) = $field;
60 switch ($key) {
61 case PhabricatorSearchDocumentFieldType::FIELD_TITLE:
62 case PhabricatorSearchDocumentFieldType::FIELD_BODY:
63 $virtual_fields[] = array(
64 PhabricatorSearchDocumentFieldType::FIELD_CORE,
65 $raw_corpus,
67 break;
70 $virtual_fields[] = array(
71 PhabricatorSearchDocumentFieldType::FIELD_ALL,
72 $raw_corpus,
76 $empty_template = array(
77 'raw' => array(),
78 'term' => array(),
79 'normal' => array(),
82 $ferret_corpus_map = array();
84 foreach ($virtual_fields as $field) {
85 list($key, $raw_corpus) = $field;
86 if (!strlen($raw_corpus)) {
87 continue;
90 $term_corpus = $engine->newTermsCorpus($raw_corpus);
92 $normal_corpus = $stemmer->stemCorpus($raw_corpus);
93 $normal_corpus = $engine->newTermsCorpus($normal_corpus);
95 if (!isset($ferret_corpus_map[$key])) {
96 $ferret_corpus_map[$key] = $empty_template;
99 $ferret_corpus_map[$key]['raw'][] = $raw_corpus;
100 $ferret_corpus_map[$key]['term'][] = $term_corpus;
101 $ferret_corpus_map[$key]['normal'][] = $normal_corpus;
104 $ferret_fields = array();
105 $ngrams_source = array();
106 foreach ($ferret_corpus_map as $key => $fields) {
107 $raw_corpus = $fields['raw'];
108 $raw_corpus = implode("\n", $raw_corpus);
109 if (strlen($raw_corpus)) {
110 $ngrams_source[] = $raw_corpus;
113 $normal_corpus = $fields['normal'];
114 $normal_corpus = implode("\n", $normal_corpus);
115 if (strlen($normal_corpus)) {
116 $ngrams_source[] = $normal_corpus;
119 $term_corpus = $fields['term'];
120 $term_corpus = implode("\n", $term_corpus);
121 if (strlen($term_corpus)) {
122 $ngrams_source[] = $term_corpus;
125 $ferret_fields[] = array(
126 'fieldKey' => $key,
127 'rawCorpus' => $raw_corpus,
128 'termCorpus' => $term_corpus,
129 'normalCorpus' => $normal_corpus,
132 $ngrams_source = implode("\n", $ngrams_source);
134 $ngram_engine = new PhabricatorSearchNgramEngine();
135 $ngrams = $ngram_engine->getTermNgramsFromString($ngrams_source);
137 $conn = $object->establishConnection('w');
139 if ($ngrams) {
140 $common = queryfx_all(
141 $conn,
142 'SELECT ngram FROM %T WHERE ngram IN (%Ls)',
143 $engine->getCommonNgramsTableName(),
144 $ngrams);
145 $common = ipull($common, 'ngram', 'ngram');
147 foreach ($ngrams as $key => $ngram) {
148 if (isset($common[$ngram])) {
149 unset($ngrams[$key]);
150 continue;
153 // NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
154 $trimmed_ngram = rtrim($ngram, ' ');
155 if (isset($common[$trimmed_ngram])) {
156 unset($ngrams[$key]);
157 continue;
162 $object->openTransaction();
164 try {
165 // See T13587. If this document already exists in the index, we try to
166 // update the existing rows to avoid leaving the ngrams table heavily
167 // fragmented.
169 $old_document = queryfx_one(
170 $conn,
171 'SELECT id FROM %T WHERE objectPHID = %s',
172 $engine->getDocumentTableName(),
173 $object->getPHID());
174 if ($old_document) {
175 $old_document_id = (int)$old_document['id'];
176 } else {
177 $old_document_id = null;
180 if ($old_document_id === null) {
181 queryfx(
182 $conn,
183 'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified,
184 authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)',
185 $engine->getDocumentTableName(),
186 $object->getPHID(),
187 $is_closed,
188 $document->getDocumentCreated(),
189 $document->getDocumentModified(),
190 $author_phid,
191 $owner_phid);
192 $document_id = $conn->getInsertID();
194 $is_new = true;
195 } else {
196 $document_id = $old_document_id;
197 queryfx(
198 $conn,
199 'UPDATE %T
201 isClosed = %d,
202 epochCreated = %d,
203 epochModified = %d,
204 authorPHID = %ns,
205 ownerPHID = %ns
206 WHERE id = %d',
207 $engine->getDocumentTableName(),
208 $is_closed,
209 $document->getDocumentCreated(),
210 $document->getDocumentModified(),
211 $author_phid,
212 $owner_phid,
213 $document_id);
215 $is_new = false;
218 $this->updateStoredFields(
219 $conn,
220 $is_new,
221 $document_id,
222 $engine,
223 $ferret_fields);
225 $this->updateStoredNgrams(
226 $conn,
227 $is_new,
228 $document_id,
229 $engine,
230 $ngrams);
232 } catch (Exception $ex) {
233 $object->killTransaction();
234 throw $ex;
235 } catch (Throwable $ex) {
236 $object->killTransaction();
237 throw $ex;
240 $object->saveTransaction();
243 private function updateStoredFields(
244 AphrontDatabaseConnection $conn,
245 $is_new,
246 $document_id,
247 PhabricatorFerretEngine $engine,
248 $new_fields) {
250 if (!$is_new) {
251 $old_fields = queryfx_all(
252 $conn,
253 'SELECT * FROM %T WHERE documentID = %d',
254 $engine->getFieldTableName(),
255 $document_id);
256 } else {
257 $old_fields = array();
260 $old_fields = ipull($old_fields, null, 'fieldKey');
261 $new_fields = ipull($new_fields, null, 'fieldKey');
263 $delete_rows = array();
264 $insert_rows = array();
265 $update_rows = array();
267 foreach ($old_fields as $field_key => $old_field) {
268 if (!isset($new_fields[$field_key])) {
269 $delete_rows[] = $old_field;
273 $compare_keys = array(
274 'rawCorpus',
275 'termCorpus',
276 'normalCorpus',
279 foreach ($new_fields as $field_key => $new_field) {
280 if (!isset($old_fields[$field_key])) {
281 $insert_rows[] = $new_field;
282 continue;
285 $old_field = $old_fields[$field_key];
287 $same_row = true;
288 foreach ($compare_keys as $compare_key) {
289 if ($old_field[$compare_key] !== $new_field[$compare_key]) {
290 $same_row = false;
291 break;
295 if ($same_row) {
296 continue;
299 $new_field['id'] = $old_field['id'];
300 $update_rows[] = $new_field;
303 if ($delete_rows) {
304 queryfx(
305 $conn,
306 'DELETE FROM %T WHERE id IN (%Ld)',
307 $engine->getFieldTableName(),
308 ipull($delete_rows, 'id'));
311 foreach ($update_rows as $update_row) {
312 queryfx(
313 $conn,
314 'UPDATE %T
316 rawCorpus = %s,
317 termCorpus = %s,
318 normalCorpus = %s
319 WHERE id = %d',
320 $engine->getFieldTableName(),
321 $update_row['rawCorpus'],
322 $update_row['termCorpus'],
323 $update_row['normalCorpus'],
324 $update_row['id']);
327 foreach ($insert_rows as $insert_row) {
328 queryfx(
329 $conn,
330 'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus,
331 normalCorpus) VALUES (%d, %s, %s, %s, %s)',
332 $engine->getFieldTableName(),
333 $document_id,
334 $insert_row['fieldKey'],
335 $insert_row['rawCorpus'],
336 $insert_row['termCorpus'],
337 $insert_row['normalCorpus']);
341 private function updateStoredNgrams(
342 AphrontDatabaseConnection $conn,
343 $is_new,
344 $document_id,
345 PhabricatorFerretEngine $engine,
346 $new_ngrams) {
348 if ($is_new) {
349 $old_ngrams = array();
350 } else {
351 $old_ngrams = queryfx_all(
352 $conn,
353 'SELECT id, ngram FROM %T WHERE documentID = %d',
354 $engine->getNgramsTableName(),
355 $document_id);
358 $old_ngrams = ipull($old_ngrams, 'id', 'ngram');
359 $new_ngrams = array_fuse($new_ngrams);
361 $delete_ids = array();
362 $insert_ngrams = array();
364 // NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
366 foreach ($old_ngrams as $ngram => $id) {
367 if (isset($new_ngrams[$ngram])) {
368 continue;
371 $untrimmed_ngram = $ngram.' ';
372 if (isset($new_ngrams[$untrimmed_ngram])) {
373 continue;
376 $delete_ids[] = $id;
379 foreach ($new_ngrams as $ngram) {
380 if (isset($old_ngrams[$ngram])) {
381 continue;
384 $trimmed_ngram = rtrim($ngram, ' ');
385 if (isset($old_ngrams[$trimmed_ngram])) {
386 continue;
389 $insert_ngrams[] = $ngram;
392 if ($delete_ids) {
393 $sql = array();
394 foreach ($delete_ids as $id) {
395 $sql[] = qsprintf(
396 $conn,
397 '%d',
398 $id);
401 foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
402 queryfx(
403 $conn,
404 'DELETE FROM %T WHERE id IN (%LQ)',
405 $engine->getNgramsTableName(),
406 $chunk);
410 if ($insert_ngrams) {
411 $sql = array();
412 foreach ($insert_ngrams as $ngram) {
413 $sql[] = qsprintf(
414 $conn,
415 '(%d, %s)',
416 $document_id,
417 $ngram);
420 foreach (PhabricatorLiskDAO::chunkSQL($sql) as $chunk) {
421 queryfx(
422 $conn,
423 'INSERT INTO %T (documentID, ngram) VALUES %LQ',
424 $engine->getNgramsTableName(),
425 $chunk);
430 public function newFerretSearchFunctions() {
431 return array(
432 id(new FerretConfigurableSearchFunction())
433 ->setFerretFunctionName('all')
434 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_ALL),
435 id(new FerretConfigurableSearchFunction())
436 ->setFerretFunctionName('title')
437 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_TITLE),
438 id(new FerretConfigurableSearchFunction())
439 ->setFerretFunctionName('body')
440 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_BODY),
441 id(new FerretConfigurableSearchFunction())
442 ->setFerretFunctionName('core')
443 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_CORE),
444 id(new FerretConfigurableSearchFunction())
445 ->setFerretFunctionName('comment')
446 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType::FIELD_COMMENT),