1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "content/renderer/android/email_detector.h"
7 #include "base/logging.h"
8 #include "base/memory/scoped_ptr.h"
9 #include "base/strings/utf_string_conversions.h"
10 #include "content/public/renderer/android_content_detection_prefixes.h"
11 #include "net/base/escape.h"
12 #include "third_party/icu/source/i18n/unicode/regex.h"
16 // Maximum length of an email address.
17 const size_t kMaximumEmailLength
= 254;
19 // Regex to match email addresses.
20 // This is more specific than RFC 2822 (uncommon special characters are
21 // disallowed) in order to avoid false positives.
22 // Delimiters are word boundaries to allow punctuation, quote marks etc. around
24 const char kEmailRegex
[] = "\\b[A-Z0-9._%+-]+@[A-Z0-9.-]+\\.[A-Z]{2,6}\\b";
26 } // anonymous namespace
30 EmailDetector::EmailDetector() {
33 size_t EmailDetector::GetMaximumContentLength() {
34 return kMaximumEmailLength
;
37 GURL
EmailDetector::GetIntentURL(const std::string
& content_text
) {
38 if (content_text
.empty())
41 return GURL(kEmailPrefix
+
42 net::EscapeQueryParamValue(content_text
, true));
45 bool EmailDetector::FindContent(const base::string16::const_iterator
& begin
,
46 const base::string16::const_iterator
& end
,
49 std::string
* content_text
) {
50 base::string16 utf16_input
= base::string16(begin
, end
);
51 icu::UnicodeString
pattern(kEmailRegex
);
52 icu::UnicodeString
input(utf16_input
.data(), utf16_input
.length());
53 UErrorCode status
= U_ZERO_ERROR
;
54 scoped_ptr
<icu::RegexMatcher
> matcher(
55 new icu::RegexMatcher(pattern
,
57 UREGEX_CASE_INSENSITIVE
,
59 if (matcher
->find()) {
60 *start_pos
= matcher
->start(status
);
61 DCHECK(U_SUCCESS(status
));
62 *end_pos
= matcher
->end(status
);
63 DCHECK(U_SUCCESS(status
));
64 icu::UnicodeString
content_ustr(matcher
->group(status
));
65 DCHECK(U_SUCCESS(status
));
66 base::UTF16ToUTF8(content_ustr
.getBuffer(), content_ustr
.length(),
74 } // namespace content