Merge Chromium + Blink git repositories
[chromium-blink-merge.git] / content / renderer / android / email_detector.cc
blob1c335ee117b243234658905f8d8351283057d4ff
1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/android/email_detector.h"
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "content/public/renderer/android_content_detection_prefixes.h"
11 #include "net/base/escape.h"
12 #include "third_party/icu/source/i18n/unicode/regex.h"
14 namespace {
16 // Maximum length of an email address.
17 const size_t kMaximumEmailLength = 254;
19 // Regex to match email addresses.
20 // This is more specific than RFC 2822 (uncommon special characters are
21 // disallowed) in order to avoid false positives.
22 // Delimiters are word boundaries to allow punctuation, quote marks etc. around
23 // the address.
24 const char kEmailRegex[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b";
26 } // anonymous namespace
28 namespace content {
30 EmailDetector::EmailDetector() {
33 size_t EmailDetector::GetMaximumContentLength() {
34 return kMaximumEmailLength;
37 GURL EmailDetector::GetIntentURL(const std::string& content_text) {
38 if (content_text.empty())
39 return GURL();
41 return GURL(kEmailPrefix +
42 net::EscapeQueryParamValue(content_text, true));
45 bool EmailDetector::FindContent(const base::string16::const_iterator& begin,
46 const base::string16::const_iterator& end,
47 size_t* start_pos,
48 size_t* end_pos,
49 std::string* content_text) {
50 base::string16 utf16_input = base::string16(begin, end);
51 icu::UnicodeString pattern(kEmailRegex);
52 icu::UnicodeString input(utf16_input.data(), utf16_input.length());
53 UErrorCode status = U_ZERO_ERROR;
54 scoped_ptr<icu::RegexMatcher> matcher(
55 new icu::RegexMatcher(pattern,
56 input,
57 UREGEX_CASE_INSENSITIVE,
58 status));
59 if (matcher->find()) {
60 *start_pos = matcher->start(status);
61 DCHECK(U_SUCCESS(status));
62 *end_pos = matcher->end(status);
63 DCHECK(U_SUCCESS(status));
64 icu::UnicodeString content_ustr(matcher->group(status));
65 DCHECK(U_SUCCESS(status));
66 base::UTF16ToUTF8(content_ustr.getBuffer(), content_ustr.length(),
67 content_text);
68 return true;
71 return false;
74 } // namespace content