3 class PhabricatorElasticFulltextStorageEngine
4 extends PhabricatorFulltextStorageEngine
{
10 public function setService(PhabricatorSearchService
$service) {
11 $this->service
= $service;
12 $config = $service->getConfig();
13 $index = idx($config, 'path', '/phabricator');
14 $this->index
= str_replace('/', '', $index);
15 $this->timeout
= idx($config, 'timeout', 15);
16 $this->version
= (int)idx($config, 'version', 5);
20 public function getEngineIdentifier() {
21 return 'elasticsearch';
24 public function getTimestampField() {
25 return $this->version
< 2 ?
26 '_timestamp' : 'lastModified';
29 public function getTextFieldType() {
30 return $this->version
>= 5
34 public function getHostType() {
35 return new PhabricatorElasticsearchHost($this);
38 public function getHostForRead() {
39 return $this->getService()->getAnyHostForRole('read');
42 public function getHostForWrite() {
43 return $this->getService()->getAnyHostForRole('write');
46 public function setTimeout($timeout) {
47 $this->timeout
= $timeout;
51 public function getTimeout() {
52 return $this->timeout
;
55 public function getTypeConstants($class) {
56 $relationship_class = new ReflectionClass($class);
57 $typeconstants = $relationship_class->getConstants();
58 return array_unique(array_values($typeconstants));
61 public function reindexAbstractDocument(
62 PhabricatorSearchAbstractDocument
$doc) {
64 $host = $this->getHostForWrite();
66 $type = $doc->getDocumentType();
67 $phid = $doc->getPHID();
68 $handle = id(new PhabricatorHandleQuery())
69 ->setViewer(PhabricatorUser
::getOmnipotentUser())
70 ->withPHIDs(array($phid))
73 $timestamp_key = $this->getTimestampField();
76 'title' => $doc->getDocumentTitle(),
77 'dateCreated' => $doc->getDocumentCreated(),
78 $timestamp_key => $doc->getDocumentModified(),
81 foreach ($doc->getFieldData() as $field) {
82 list($field_name, $corpus, $aux) = $field;
83 if (!isset($spec[$field_name])) {
84 $spec[$field_name] = array($corpus);
86 $spec[$field_name][] = $corpus;
89 $spec[$field_name][] = $aux;
93 foreach ($doc->getRelationshipData() as $field) {
94 list($field_name, $related_phid, $rtype, $time) = $field;
95 if (!isset($spec[$field_name])) {
96 $spec[$field_name] = array($related_phid);
98 $spec[$field_name][] = $related_phid;
101 $spec[$field_name.'_ts'] = $time;
105 $this->executeRequest($host, "/{$type}/{$phid}/", $spec, 'PUT');
108 private function buildSpec(PhabricatorSavedQuery
$query) {
109 $q = new PhabricatorElasticsearchQueryBuilder('bool');
110 $query_string = $query->getParameter('query');
111 if (strlen($query_string)) {
112 $fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType');
114 // Build a simple_query_string query over all fields that must match all
115 // of the words in the search string.
116 $q->addMustClause(array(
117 'simple_query_string' => array(
118 'query' => $query_string,
120 PhabricatorSearchDocumentFieldType
::FIELD_TITLE
.'.*',
121 PhabricatorSearchDocumentFieldType
::FIELD_BODY
.'.*',
122 PhabricatorSearchDocumentFieldType
::FIELD_COMMENT
.'.*',
124 'default_operator' => 'AND',
128 // This second query clause is "SHOULD' so it only affects ranking of
129 // documents which already matched the Must clause. This amplifies the
130 // score of documents which have an exact match on title, body
132 $q->addShouldClause(array(
133 'simple_query_string' => array(
134 'query' => $query_string,
137 PhabricatorSearchDocumentFieldType
::FIELD_TITLE
.'^4',
138 PhabricatorSearchDocumentFieldType
::FIELD_BODY
.'^3',
139 PhabricatorSearchDocumentFieldType
::FIELD_COMMENT
.'^1.2',
141 'analyzer' => 'english_exact',
142 'default_operator' => 'and',
148 $exclude = $query->getParameter('exclude');
150 $q->addFilterClause(array(
153 'values' => array($exclude),
159 $relationship_map = array(
160 PhabricatorSearchRelationship
::RELATIONSHIP_AUTHOR
=>
161 $query->getParameter('authorPHIDs', array()),
162 PhabricatorSearchRelationship
::RELATIONSHIP_SUBSCRIBER
=>
163 $query->getParameter('subscriberPHIDs', array()),
164 PhabricatorSearchRelationship
::RELATIONSHIP_PROJECT
=>
165 $query->getParameter('projectPHIDs', array()),
166 PhabricatorSearchRelationship
::RELATIONSHIP_REPOSITORY
=>
167 $query->getParameter('repositoryPHIDs', array()),
170 $statuses = $query->getParameter('statuses', array());
171 $statuses = array_fuse($statuses);
173 $rel_open = PhabricatorSearchRelationship
::RELATIONSHIP_OPEN
;
174 $rel_closed = PhabricatorSearchRelationship
::RELATIONSHIP_CLOSED
;
175 $rel_unowned = PhabricatorSearchRelationship
::RELATIONSHIP_UNOWNED
;
177 $include_open = !empty($statuses[$rel_open]);
178 $include_closed = !empty($statuses[$rel_closed]);
180 if ($include_open && !$include_closed) {
181 $q->addExistsClause($rel_open);
182 } else if (!$include_open && $include_closed) {
183 $q->addExistsClause($rel_closed);
186 if ($query->getParameter('withUnowned')) {
187 $q->addExistsClause($rel_unowned);
190 $rel_owner = PhabricatorSearchRelationship
::RELATIONSHIP_OWNER
;
191 if ($query->getParameter('withAnyOwner')) {
192 $q->addExistsClause($rel_owner);
194 $owner_phids = $query->getParameter('ownerPHIDs', array());
195 if (count($owner_phids)) {
196 $q->addTermsClause($rel_owner, $owner_phids);
200 foreach ($relationship_map as $field => $phids) {
201 if (is_array($phids) && !empty($phids)) {
202 $q->addTermsClause($field, $phids);
206 if (!$q->getClauseCount('must')) {
207 $q->addMustClause(array('match_all' => array('boost' => 1 )));
213 'bool' => $q->toArray(),
218 if (!$query->getParameter('query')) {
219 $spec['sort'] = array(
220 array('dateCreated' => 'desc'),
224 $offset = (int)$query->getParameter('offset', 0);
225 $limit = (int)$query->getParameter('limit', 101);
226 if ($offset +
$limit > 10000) {
227 throw new Exception(pht(
228 'Query offset is too large. offset+limit=%s (max=%s)',
232 $spec['from'] = $offset;
233 $spec['size'] = $limit;
238 public function executeSearch(PhabricatorSavedQuery
$query) {
239 $types = $query->getParameter('types');
242 PhabricatorSearchApplicationSearchEngine
::getIndexableDocumentTypes());
245 // Don't use '/_search' for the case that there is something
246 // else in the index (for example if 'phabricator' is only an alias to
247 // some bigger index). Use '/$types/_search' instead.
248 $uri = '/'.implode(',', $types).'/_search';
250 $spec = $this->buildSpec($query);
251 $exceptions = array();
253 foreach ($this->service
->getAllHostsForRole('read') as $host) {
255 $response = $this->executeRequest($host, $uri, $spec);
256 $phids = ipull($response['hits']['hits'], '_id');
258 } catch (Exception
$e) {
262 throw new PhutilAggregateException(pht('All Fulltext Search hosts failed:'),
266 public function indexExists(PhabricatorElasticsearchHost
$host = null) {
268 $host = $this->getHostForRead();
271 if ($this->version
>= 5) {
273 $res = $this->executeRequest($host, $uri, array());
274 return isset($res['indices']['phabricator']);
275 } else if ($this->version
>= 2) {
280 return (bool)$this->executeRequest($host, $uri, array());
281 } catch (HTTPFutureHTTPResponseStatus
$e) {
282 if ($e->getStatusCode() == 404) {
289 private function getIndexConfiguration() {
291 $data['settings'] = array(
293 'auto_expand_replicas' => '0-2',
296 'english_stop' => array(
298 'stopwords' => '_english_',
300 'english_stemmer' => array(
302 'language' => 'english',
304 'english_possessive_stemmer' => array(
306 'language' => 'possessive_english',
310 'english_exact' => array(
311 'tokenizer' => 'standard',
312 'filter' => array('lowercase'),
314 'letter_stop' => array(
315 'tokenizer' => 'letter',
316 'filter' => array('lowercase', 'english_stop'),
318 'english_stem' => array(
319 'tokenizer' => 'standard',
321 'english_possessive_stemmer',
332 $fields = $this->getTypeConstants('PhabricatorSearchDocumentFieldType');
333 $relationships = $this->getTypeConstants('PhabricatorSearchRelationship');
335 $doc_types = array_keys(
336 PhabricatorSearchApplicationSearchEngine
::getIndexableDocumentTypes());
338 $text_type = $this->getTextFieldType();
340 foreach ($doc_types as $type) {
341 $properties = array();
342 foreach ($fields as $field) {
343 // Use the custom analyzer for the corpus of text
344 $properties[$field] = array(
345 'type' => $text_type,
348 'type' => $text_type,
349 'analyzer' => 'english_exact',
350 'search_analyzer' => 'english',
351 'search_quote_analyzer' => 'english_exact',
354 'type' => $text_type,
355 'analyzer' => 'letter_stop',
358 'type' => $text_type,
359 'analyzer' => 'english_stem',
365 if ($this->version
< 5) {
366 foreach ($relationships as $rel) {
367 $properties[$rel] = array(
369 'index' => 'not_analyzed',
370 'include_in_all' => false,
372 $properties[$rel.'_ts'] = array(
374 'include_in_all' => false,
378 foreach ($relationships as $rel) {
379 $properties[$rel] = array(
381 'include_in_all' => false,
382 'doc_values' => false,
384 $properties[$rel.'_ts'] = array(
386 'include_in_all' => false,
391 // Ensure we have dateCreated since the default query requires it
392 $properties['dateCreated']['type'] = 'date';
393 $properties['lastModified']['type'] = 'date';
395 $data['mappings'][$type]['properties'] = $properties;
400 public function indexIsSane(PhabricatorElasticsearchHost
$host = null) {
402 $host = $this->getHostForRead();
404 if (!$this->indexExists($host)) {
407 $cur_mapping = $this->executeRequest($host, '/_mapping/', array());
408 $cur_settings = $this->executeRequest($host, '/_settings/', array());
409 $actual = array_merge($cur_settings[$this->index
],
410 $cur_mapping[$this->index
]);
412 $res = $this->check($actual, $this->getIndexConfiguration());
417 * Recursively check if two Elasticsearch configuration arrays are equal
420 * @param $required array
423 private function check($actual, $required, $path = '') {
424 foreach ($required as $key => $value) {
425 if (!array_key_exists($key, $actual)) {
426 if ($key === '_all') {
427 // The _all field never comes back so we just have to assume it
433 if (is_array($value)) {
434 if (!is_array($actual[$key])) {
437 if (!$this->check($actual[$key], $value, $path.'.'.$key)) {
443 $actual[$key] = self
::normalizeConfigValue($actual[$key]);
444 $value = self
::normalizeConfigValue($value);
445 if ($actual[$key] != $value) {
453 * Normalize a config value for comparison. Elasticsearch accepts all kinds
454 * of config values but it tends to throw back 'true' for true and 'false' for
455 * false so we normalize everything. Sometimes, oddly, it'll throw back false
458 * @param mixed $value config value
459 * @return mixed value normalized
461 private static function normalizeConfigValue($value) {
462 if ($value === true) {
464 } else if ($value === false) {
470 public function initIndex() {
471 $host = $this->getHostForWrite();
472 if ($this->indexExists()) {
473 $this->executeRequest($host, '/', array(), 'DELETE');
475 $data = $this->getIndexConfiguration();
476 $this->executeRequest($host, '/', $data, 'PUT');
479 public function getIndexStats(PhabricatorElasticsearchHost
$host = null) {
480 if ($this->version
< 2) {
484 $host = $this->getHostForRead();
488 $res = $this->executeRequest($host, $uri, array());
489 $stats = $res['indices'][$this->index
];
492 idxv($stats, array('primaries', 'search', 'query_total')),
494 idxv($stats, array('total', 'docs', 'count')),
496 idxv($stats, array('total', 'docs', 'deleted')),
497 pht('Storage Used') =>
498 phutil_format_bytes(idxv($stats,
499 array('total', 'store', 'size_in_bytes'))),
503 private function executeRequest(PhabricatorElasticsearchHost
$host, $path,
504 array $data, $method = 'GET') {
506 $uri = $host->getURI($path);
507 $data = phutil_json_encode($data);
508 $future = new HTTPSFuture($uri, $data);
509 $future->addHeader('Content-Type', 'application/json');
511 if ($method != 'GET') {
512 $future->setMethod($method);
514 if ($this->getTimeout()) {
515 $future->setTimeout($this->getTimeout());
518 list($body) = $future->resolvex();
519 } catch (HTTPFutureResponseStatus
$ex) {
520 if ($ex->isTimeout() ||
(int)$ex->getStatusCode() > 499) {
521 $host->didHealthCheck(false);
526 if ($method != 'GET') {
531 $data = phutil_json_decode($body);
532 $host->didHealthCheck(true);
534 } catch (PhutilJSONParserException
$ex) {
535 $host->didHealthCheck(false);
536 throw new PhutilProxyException(
537 pht('Elasticsearch server returned invalid JSON!'),