chrome/browser/ui/app_list/search/tokenized_string_match.cc

   1 // Copyright 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "chrome/browser/ui/app_list/search/tokenized_string_match.h"
   6
   7 #include "chrome/browser/ui/app_list/search/tokenized_string_char_iterator.h"
   8
   9 namespace app_list {
  10
  11 namespace {
  12
  13 // The factors below are applied when the current char of query matches
  14 // the current char of the text to be matched. Different factors are chosen
  15 // based on where the match happens. kIsPrefixMultiplier is used when the
  16 // matched portion is a prefix of both the query and the text, which implies
  17 // that the matched chars are at the same position in query and text. This is
  18 // the most preferred case thus it has the highest score. When the current char
  19 // of the query and the text does not match, the algorithm moves to the next
  20 // token in the text and try to match from there. kIsFrontOfWordMultipler will
  21 // be used if the first char of the token matches the current char of the query.
  22 // Otherwise, the match is considered as weak and kIsWeakHitMultiplier is
  23 // used.
  24 // Examples:
  25 //   Suppose the text to be matched is 'Google Chrome'.
  26 //   Query 'go' would yield kIsPrefixMultiplier for each char.
  27 //   Query 'gc' would use kIsPrefixMultiplier for 'g' and
  28 //       kIsFrontOfWordMultipler for 'c'.
  29 //   Query 'ch' would use kIsFrontOfWordMultipler for 'c' and
  30 //       kIsWeakHitMultiplier for 'h'.
  31 const double kIsPrefixMultiplier = 1.0;
  32 const double kIsFrontOfWordMultipler = 0.8;
  33 const double kIsWeakHitMultiplier = 0.6;
  34
  35 // A relevance score that represents no match.
  36 const double kNoMatchScore = 0.0;
  37
  38 }  // namespace
  39
  40 TokenizedStringMatch::TokenizedStringMatch()
  41     : relevance_(kNoMatchScore) {}
  42
  43 TokenizedStringMatch::~TokenizedStringMatch() {}
  44
  45 bool TokenizedStringMatch::Calculate(const TokenizedString& query,
  46                                      const TokenizedString& text) {
  47   relevance_ = kNoMatchScore;
  48   hits_.clear();
  49
  50   ui::Range hit = ui::Range::InvalidRange();
  51
  52   TokenizedStringCharIterator query_iter(query);
  53   TokenizedStringCharIterator text_iter(text);
  54
  55   while (!query_iter.end() && !text_iter.end()) {
  56     if (query_iter.Get() == text_iter.Get()) {
  57       if (query_iter.GetArrayPos() == text_iter.GetArrayPos())
  58         relevance_ += kIsPrefixMultiplier;
  59       else if (text_iter.IsFirstCharOfToken())
  60         relevance_ += kIsFrontOfWordMultipler;
  61       else
  62         relevance_ += kIsWeakHitMultiplier;
  63
  64       if (!hit.IsValid())
  65         hit.set_start(text_iter.GetArrayPos());
  66       hit.set_end(text_iter.GetArrayPos() + text_iter.GetCharSize());
  67
  68       query_iter.NextChar();
  69       text_iter.NextChar();
  70     } else {
  71       if (hit.IsValid()) {
  72         hits_.push_back(hit);
  73         hit = ui::Range::InvalidRange();
  74       }
  75
  76       text_iter.NextToken();
  77     }
  78   }
  79
  80   // No match if query is not fully consumed.
  81   if (!query_iter.end()) {
  82     relevance_ = kNoMatchScore;
  83     hits_.clear();
  84     return false;
  85   }
  86
  87   if (hit.IsValid())
  88     hits_.push_back(hit);
  89
  90   // Using length() for normalizing is not 100% correct but should be good
  91   // enough compared with using real char count of the text.
  92   if (text.text().length())
  93     relevance_ /= text.text().length();
  94
  95   return relevance_ > kNoMatchScore;
  96 }
  97
  98 bool TokenizedStringMatch::Calculate(const string16& query,
  99                                      const string16& text) {
 100   const TokenizedString tokenized_query(query);
 101   const TokenizedString tokenized_text(text);
 102   return Calculate(tokenized_query, tokenized_text);
 103 }
 104
 105 }  // namespace app_list