3 final class PhabricatorFerretFulltextEngineExtension
4 extends PhabricatorFulltextEngineExtension
{
6 const EXTENSIONKEY
= 'ferret';
9 public function getExtensionName() {
10 return pht('Ferret Fulltext Engine');
14 public function shouldIndexFulltextObject($object) {
15 return ($object instanceof PhabricatorFerretInterface
);
19 public function indexFulltextObject(
21 PhabricatorSearchAbstractDocument
$document) {
23 $phid = $document->getPHID();
24 $engine = $object->newFerretEngine();
29 foreach ($document->getRelationshipData() as $relationship) {
30 list($related_type, $related_phid) = $relationship;
31 switch ($related_type) {
32 case PhabricatorSearchRelationship
::RELATIONSHIP_OPEN
:
35 case PhabricatorSearchRelationship
::RELATIONSHIP_CLOSED
:
38 case PhabricatorSearchRelationship
::RELATIONSHIP_OWNER
:
39 $owner_phid = $related_phid;
41 case PhabricatorSearchRelationship
::RELATIONSHIP_UNOWNED
:
44 case PhabricatorSearchRelationship
::RELATIONSHIP_AUTHOR
:
45 $author_phid = $related_phid;
50 $stemmer = $engine->newStemmer();
52 // Copy all of the "title" and "body" fields to create new "core" fields.
53 // This allows users to search "in title or body" with the "core:" prefix.
54 $document_fields = $document->getFieldData();
55 $virtual_fields = array();
56 foreach ($document_fields as $field) {
57 $virtual_fields[] = $field;
59 list($key, $raw_corpus) = $field;
61 case PhabricatorSearchDocumentFieldType
::FIELD_TITLE
:
62 case PhabricatorSearchDocumentFieldType
::FIELD_BODY
:
63 $virtual_fields[] = array(
64 PhabricatorSearchDocumentFieldType
::FIELD_CORE
,
70 $virtual_fields[] = array(
71 PhabricatorSearchDocumentFieldType
::FIELD_ALL
,
76 $empty_template = array(
82 $ferret_corpus_map = array();
84 foreach ($virtual_fields as $field) {
85 list($key, $raw_corpus) = $field;
86 if (!strlen($raw_corpus)) {
90 $term_corpus = $engine->newTermsCorpus($raw_corpus);
92 $normal_corpus = $stemmer->stemCorpus($raw_corpus);
93 $normal_corpus = $engine->newTermsCorpus($normal_corpus);
95 if (!isset($ferret_corpus_map[$key])) {
96 $ferret_corpus_map[$key] = $empty_template;
99 $ferret_corpus_map[$key]['raw'][] = $raw_corpus;
100 $ferret_corpus_map[$key]['term'][] = $term_corpus;
101 $ferret_corpus_map[$key]['normal'][] = $normal_corpus;
104 $ferret_fields = array();
105 $ngrams_source = array();
106 foreach ($ferret_corpus_map as $key => $fields) {
107 $raw_corpus = $fields['raw'];
108 $raw_corpus = implode("\n", $raw_corpus);
109 if (strlen($raw_corpus)) {
110 $ngrams_source[] = $raw_corpus;
113 $normal_corpus = $fields['normal'];
114 $normal_corpus = implode("\n", $normal_corpus);
115 if (strlen($normal_corpus)) {
116 $ngrams_source[] = $normal_corpus;
119 $term_corpus = $fields['term'];
120 $term_corpus = implode("\n", $term_corpus);
121 if (strlen($term_corpus)) {
122 $ngrams_source[] = $term_corpus;
125 $ferret_fields[] = array(
127 'rawCorpus' => $raw_corpus,
128 'termCorpus' => $term_corpus,
129 'normalCorpus' => $normal_corpus,
132 $ngrams_source = implode("\n", $ngrams_source);
134 $ngram_engine = new PhabricatorSearchNgramEngine();
135 $ngrams = $ngram_engine->getTermNgramsFromString($ngrams_source);
137 $conn = $object->establishConnection('w');
140 $common = queryfx_all(
142 'SELECT ngram FROM %T WHERE ngram IN (%Ls)',
143 $engine->getCommonNgramsTableName(),
145 $common = ipull($common, 'ngram', 'ngram');
147 foreach ($ngrams as $key => $ngram) {
148 if (isset($common[$ngram])) {
149 unset($ngrams[$key]);
153 // NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
154 $trimmed_ngram = rtrim($ngram, ' ');
155 if (isset($common[$trimmed_ngram])) {
156 unset($ngrams[$key]);
162 $object->openTransaction();
165 // See T13587. If this document already exists in the index, we try to
166 // update the existing rows to avoid leaving the ngrams table heavily
169 $old_document = queryfx_one(
171 'SELECT id FROM %T WHERE objectPHID = %s',
172 $engine->getDocumentTableName(),
175 $old_document_id = (int)$old_document['id'];
177 $old_document_id = null;
180 if ($old_document_id === null) {
183 'INSERT INTO %T (objectPHID, isClosed, epochCreated, epochModified,
184 authorPHID, ownerPHID) VALUES (%s, %d, %d, %d, %ns, %ns)',
185 $engine->getDocumentTableName(),
188 $document->getDocumentCreated(),
189 $document->getDocumentModified(),
192 $document_id = $conn->getInsertID();
196 $document_id = $old_document_id;
207 $engine->getDocumentTableName(),
209 $document->getDocumentCreated(),
210 $document->getDocumentModified(),
218 $this->updateStoredFields(
225 $this->updateStoredNgrams(
232 } catch (Exception
$ex) {
233 $object->killTransaction();
235 } catch (Throwable
$ex) {
236 $object->killTransaction();
240 $object->saveTransaction();
243 private function updateStoredFields(
244 AphrontDatabaseConnection
$conn,
247 PhabricatorFerretEngine
$engine,
251 $old_fields = queryfx_all(
253 'SELECT * FROM %T WHERE documentID = %d',
254 $engine->getFieldTableName(),
257 $old_fields = array();
260 $old_fields = ipull($old_fields, null, 'fieldKey');
261 $new_fields = ipull($new_fields, null, 'fieldKey');
263 $delete_rows = array();
264 $insert_rows = array();
265 $update_rows = array();
267 foreach ($old_fields as $field_key => $old_field) {
268 if (!isset($new_fields[$field_key])) {
269 $delete_rows[] = $old_field;
273 $compare_keys = array(
279 foreach ($new_fields as $field_key => $new_field) {
280 if (!isset($old_fields[$field_key])) {
281 $insert_rows[] = $new_field;
285 $old_field = $old_fields[$field_key];
288 foreach ($compare_keys as $compare_key) {
289 if ($old_field[$compare_key] !== $new_field[$compare_key]) {
299 $new_field['id'] = $old_field['id'];
300 $update_rows[] = $new_field;
306 'DELETE FROM %T WHERE id IN (%Ld)',
307 $engine->getFieldTableName(),
308 ipull($delete_rows, 'id'));
311 foreach ($update_rows as $update_row) {
320 $engine->getFieldTableName(),
321 $update_row['rawCorpus'],
322 $update_row['termCorpus'],
323 $update_row['normalCorpus'],
327 foreach ($insert_rows as $insert_row) {
330 'INSERT INTO %T (documentID, fieldKey, rawCorpus, termCorpus,
331 normalCorpus) VALUES (%d, %s, %s, %s, %s)',
332 $engine->getFieldTableName(),
334 $insert_row['fieldKey'],
335 $insert_row['rawCorpus'],
336 $insert_row['termCorpus'],
337 $insert_row['normalCorpus']);
341 private function updateStoredNgrams(
342 AphrontDatabaseConnection
$conn,
345 PhabricatorFerretEngine
$engine,
349 $old_ngrams = array();
351 $old_ngrams = queryfx_all(
353 'SELECT id, ngram FROM %T WHERE documentID = %d',
354 $engine->getNgramsTableName(),
358 $old_ngrams = ipull($old_ngrams, 'id', 'ngram');
359 $new_ngrams = array_fuse($new_ngrams);
361 $delete_ids = array();
362 $insert_ngrams = array();
364 // NOTE: MySQL discards trailing whitespace in CHAR(X) columns.
366 foreach ($old_ngrams as $ngram => $id) {
367 if (isset($new_ngrams[$ngram])) {
371 $untrimmed_ngram = $ngram.' ';
372 if (isset($new_ngrams[$untrimmed_ngram])) {
379 foreach ($new_ngrams as $ngram) {
380 if (isset($old_ngrams[$ngram])) {
384 $trimmed_ngram = rtrim($ngram, ' ');
385 if (isset($old_ngrams[$trimmed_ngram])) {
389 $insert_ngrams[] = $ngram;
394 foreach ($delete_ids as $id) {
401 foreach (PhabricatorLiskDAO
::chunkSQL($sql) as $chunk) {
404 'DELETE FROM %T WHERE id IN (%LQ)',
405 $engine->getNgramsTableName(),
410 if ($insert_ngrams) {
412 foreach ($insert_ngrams as $ngram) {
420 foreach (PhabricatorLiskDAO
::chunkSQL($sql) as $chunk) {
423 'INSERT INTO %T (documentID, ngram) VALUES %LQ',
424 $engine->getNgramsTableName(),
430 public function newFerretSearchFunctions() {
432 id(new FerretConfigurableSearchFunction())
433 ->setFerretFunctionName('all')
434 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType
::FIELD_ALL
),
435 id(new FerretConfigurableSearchFunction())
436 ->setFerretFunctionName('title')
437 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType
::FIELD_TITLE
),
438 id(new FerretConfigurableSearchFunction())
439 ->setFerretFunctionName('body')
440 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType
::FIELD_BODY
),
441 id(new FerretConfigurableSearchFunction())
442 ->setFerretFunctionName('core')
443 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType
::FIELD_CORE
),
444 id(new FerretConfigurableSearchFunction())
445 ->setFerretFunctionName('comment')
446 ->setFerretFieldKey(PhabricatorSearchDocumentFieldType
::FIELD_COMMENT
),