Move parseFontFaceDescriptor to CSSPropertyParser.cpp
[chromium-blink-merge.git] / third_party / WebKit / Source / core / editing / spellcheck / TextCheckingHelper.cpp
blob10ec32134f29413470dc2baea4968a5f93518200
1 /*
2 * Copyright (C) 2006, 2007 Apple Inc. All rights reserved.
3 * Copyright (C) 2008 Nokia Corporation and/or its subsidiary(-ies)
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
14 * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
15 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
17 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE COMPUTER, INC. OR
18 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
24 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 #include "config.h"
28 #include "core/editing/spellcheck/TextCheckingHelper.h"
30 #include "core/dom/Document.h"
31 #include "core/dom/Range.h"
32 #include "core/editing/VisiblePosition.h"
33 #include "core/editing/VisibleUnits.h"
34 #include "core/editing/iterators/CharacterIterator.h"
35 #include "core/editing/iterators/WordAwareIterator.h"
36 #include "core/editing/markers/DocumentMarkerController.h"
37 #include "core/frame/LocalFrame.h"
38 #include "core/frame/Settings.h"
39 #include "core/page/SpellCheckerClient.h"
40 #include "platform/text/TextBreakIterator.h"
41 #include "platform/text/TextCheckerClient.h"
43 namespace blink {
45 static void findBadGrammars(TextCheckerClient& client, const UChar* text, int start, int length, Vector<TextCheckingResult>& results)
47 int checkLocation = start;
48 int checkLength = length;
50 while (0 < checkLength) {
51 int badGrammarLocation = -1;
52 int badGrammarLength = 0;
53 Vector<GrammarDetail> badGrammarDetails;
54 client.checkGrammarOfString(String(text + checkLocation, checkLength), badGrammarDetails, &badGrammarLocation, &badGrammarLength);
55 if (!badGrammarLength)
56 break;
57 ASSERT(0 <= badGrammarLocation && badGrammarLocation <= checkLength);
58 ASSERT(0 < badGrammarLength && badGrammarLocation + badGrammarLength <= checkLength);
59 TextCheckingResult badGrammar;
60 badGrammar.decoration = TextDecorationTypeGrammar;
61 badGrammar.location = checkLocation + badGrammarLocation;
62 badGrammar.length = badGrammarLength;
63 badGrammar.details.swap(badGrammarDetails);
64 results.append(badGrammar);
66 checkLocation += (badGrammarLocation + badGrammarLength);
67 checkLength -= (badGrammarLocation + badGrammarLength);
71 static void findMisspellings(TextCheckerClient& client, const UChar* text, int start, int length, Vector<TextCheckingResult>& results)
73 TextBreakIterator* iterator = wordBreakIterator(text + start, length);
74 if (!iterator)
75 return;
76 int wordStart = iterator->current();
77 while (0 <= wordStart) {
78 int wordEnd = iterator->next();
79 if (wordEnd < 0)
80 break;
81 int wordLength = wordEnd - wordStart;
82 int misspellingLocation = -1;
83 int misspellingLength = 0;
84 client.checkSpellingOfString(String(text + start + wordStart, wordLength), &misspellingLocation, &misspellingLength);
85 if (0 < misspellingLength) {
86 ASSERT(0 <= misspellingLocation && misspellingLocation <= wordLength);
87 ASSERT(0 < misspellingLength && misspellingLocation + misspellingLength <= wordLength);
88 TextCheckingResult misspelling;
89 misspelling.decoration = TextDecorationTypeSpelling;
90 misspelling.location = start + wordStart + misspellingLocation;
91 misspelling.length = misspellingLength;
92 misspelling.replacement = client.getAutoCorrectSuggestionForMisspelledWord(String(text + misspelling.location, misspelling.length));
93 results.append(misspelling);
96 wordStart = wordEnd;
100 EphemeralRange expandRangeToSentenceBoundary(const EphemeralRange& range)
102 return EphemeralRange(startOfSentence(createVisiblePosition(range.startPosition())).deepEquivalent(), endOfSentence(createVisiblePosition(range.endPosition())).deepEquivalent());
105 static EphemeralRange expandToParagraphBoundary(const EphemeralRange& range)
107 return EphemeralRange(startOfParagraph(createVisiblePosition(range.startPosition())).deepEquivalent(), endOfParagraph(createVisiblePosition(range.endPosition())).deepEquivalent());
110 TextCheckingParagraph::TextCheckingParagraph(const EphemeralRange& checkingRange)
111 : m_checkingRange(checkingRange)
112 , m_checkingStart(-1)
113 , m_checkingEnd(-1)
114 , m_checkingLength(-1)
118 TextCheckingParagraph::TextCheckingParagraph(const EphemeralRange& checkingRange, const EphemeralRange& paragraphRange)
119 : m_checkingRange(checkingRange)
120 , m_paragraphRange(paragraphRange)
121 , m_checkingStart(-1)
122 , m_checkingEnd(-1)
123 , m_checkingLength(-1)
127 TextCheckingParagraph::TextCheckingParagraph(PassRefPtrWillBeRawPtr<Range> checkingRange, PassRefPtrWillBeRawPtr<Range> paragraphRange)
128 : m_checkingRange(checkingRange.get())
129 , m_paragraphRange(paragraphRange.get())
130 , m_checkingStart(-1)
131 , m_checkingEnd(-1)
132 , m_checkingLength(-1)
136 TextCheckingParagraph::~TextCheckingParagraph()
140 void TextCheckingParagraph::expandRangeToNextEnd()
142 ASSERT(m_checkingRange.isNotNull());
143 setParagraphRange(EphemeralRange(paragraphRange().startPosition(), endOfParagraph(startOfNextParagraph(createVisiblePosition(paragraphRange().startPosition()))).deepEquivalent()));
144 invalidateParagraphRangeValues();
147 void TextCheckingParagraph::invalidateParagraphRangeValues()
149 m_checkingStart = m_checkingEnd = -1;
150 m_offsetAsRange = EphemeralRange();
151 m_text = String();
154 int TextCheckingParagraph::rangeLength() const
156 ASSERT(m_checkingRange.isNotNull());
157 return TextIterator::rangeLength(paragraphRange().startPosition(), paragraphRange().endPosition());
160 EphemeralRange TextCheckingParagraph::paragraphRange() const
162 ASSERT(m_checkingRange.isNotNull());
163 if (m_paragraphRange.isNull())
164 m_paragraphRange = expandToParagraphBoundary(checkingRange());
165 return m_paragraphRange;
168 void TextCheckingParagraph::setParagraphRange(const EphemeralRange& range)
170 m_paragraphRange = range;
173 EphemeralRange TextCheckingParagraph::subrange(int characterOffset, int characterCount) const
175 ASSERT(m_checkingRange.isNotNull());
176 return calculateCharacterSubrange(paragraphRange(), characterOffset, characterCount);
179 int TextCheckingParagraph::offsetTo(const Position& position) const
181 ASSERT(m_checkingRange.isNotNull());
182 return TextIterator::rangeLength(offsetAsRange().startPosition(), position);
185 bool TextCheckingParagraph::isEmpty() const
187 // Both predicates should have same result, but we check both just to be sure.
188 // We need to investigate to remove this redundancy.
189 return isRangeEmpty() || isTextEmpty();
192 EphemeralRange TextCheckingParagraph::offsetAsRange() const
194 ASSERT(m_checkingRange.isNotNull());
195 if (m_offsetAsRange.isNull())
196 m_offsetAsRange = EphemeralRange(paragraphRange().startPosition(), checkingRange().startPosition());
198 return m_offsetAsRange;
201 const String& TextCheckingParagraph::text() const
203 ASSERT(m_checkingRange.isNotNull());
204 if (m_text.isEmpty())
205 m_text = plainText(paragraphRange());
206 return m_text;
209 int TextCheckingParagraph::checkingStart() const
211 ASSERT(m_checkingRange.isNotNull());
212 if (m_checkingStart == -1)
213 m_checkingStart = TextIterator::rangeLength(offsetAsRange().startPosition(), offsetAsRange().endPosition());
214 return m_checkingStart;
217 int TextCheckingParagraph::checkingEnd() const
219 ASSERT(m_checkingRange.isNotNull());
220 if (m_checkingEnd == -1)
221 m_checkingEnd = checkingStart() + TextIterator::rangeLength(checkingRange().startPosition(), checkingRange().endPosition());
222 return m_checkingEnd;
225 int TextCheckingParagraph::checkingLength() const
227 ASSERT(m_checkingRange.isNotNull());
228 if (-1 == m_checkingLength)
229 m_checkingLength = TextIterator::rangeLength(checkingRange().startPosition(), checkingRange().endPosition());
230 return m_checkingLength;
233 TextCheckingHelper::TextCheckingHelper(SpellCheckerClient& client, const Position& start, const Position& end)
234 : m_client(&client)
235 , m_start(start)
236 , m_end(end)
240 TextCheckingHelper::~TextCheckingHelper()
244 String TextCheckingHelper::findFirstMisspelling(int& firstMisspellingOffset, bool markAll)
246 WordAwareIterator it(m_start, m_end);
247 firstMisspellingOffset = 0;
249 String firstMisspelling;
250 int currentChunkOffset = 0;
252 while (!it.atEnd()) {
253 int length = it.length();
255 // Skip some work for one-space-char hunks
256 if (!(length == 1 && it.characterAt(0) == ' ')) {
258 int misspellingLocation = -1;
259 int misspellingLength = 0;
260 m_client->textChecker().checkSpellingOfString(it.substring(0, length), &misspellingLocation, &misspellingLength);
262 // 5490627 shows that there was some code path here where the String constructor below crashes.
263 // We don't know exactly what combination of bad input caused this, so we're making this much
264 // more robust against bad input on release builds.
265 ASSERT(misspellingLength >= 0);
266 ASSERT(misspellingLocation >= -1);
267 ASSERT(!misspellingLength || misspellingLocation >= 0);
268 ASSERT(misspellingLocation < length);
269 ASSERT(misspellingLength <= length);
270 ASSERT(misspellingLocation + misspellingLength <= length);
272 if (misspellingLocation >= 0 && misspellingLength > 0 && misspellingLocation < length && misspellingLength <= length && misspellingLocation + misspellingLength <= length) {
274 // Compute range of misspelled word
275 const EphemeralRange misspellingRange = calculateCharacterSubrange(EphemeralRange(m_start, m_end), currentChunkOffset + misspellingLocation, misspellingLength);
277 // Remember first-encountered misspelling and its offset.
278 if (!firstMisspelling) {
279 firstMisspellingOffset = currentChunkOffset + misspellingLocation;
280 firstMisspelling = it.substring(misspellingLocation, misspellingLength);
283 // Store marker for misspelled word.
284 misspellingRange.document().markers().addMarker(misspellingRange.startPosition(), misspellingRange.endPosition(), DocumentMarker::Spelling);
286 // Bail out if we're marking only the first misspelling, and not all instances.
287 if (!markAll)
288 break;
292 currentChunkOffset += length;
293 it.advance();
296 return firstMisspelling;
299 String TextCheckingHelper::findFirstMisspellingOrBadGrammar(bool checkGrammar, bool& outIsSpelling, int& outFirstFoundOffset, GrammarDetail& outGrammarDetail)
301 if (!unifiedTextCheckerEnabled())
302 return "";
304 String firstFoundItem;
305 String misspelledWord;
306 String badGrammarPhrase;
308 // Initialize out parameters; these will be updated if we find something to return.
309 outIsSpelling = true;
310 outFirstFoundOffset = 0;
311 outGrammarDetail.location = -1;
312 outGrammarDetail.length = 0;
313 outGrammarDetail.guesses.clear();
314 outGrammarDetail.userDescription = "";
316 // Expand the search range to encompass entire paragraphs, since text checking needs that much context.
317 // Determine the character offset from the start of the paragraph to the start of the original search range,
318 // since we will want to ignore results in this area.
319 Position paragraphStart = startOfParagraph(createVisiblePosition(m_start)).toParentAnchoredPosition();
320 Position paragraphEnd = m_end;
321 int totalRangeLength = TextIterator::rangeLength(paragraphStart, paragraphEnd);
322 paragraphEnd = endOfParagraph(createVisiblePosition(m_start)).toParentAnchoredPosition();
324 int rangeStartOffset = TextIterator::rangeLength(paragraphStart, m_start);
325 int totalLengthProcessed = 0;
327 bool firstIteration = true;
328 bool lastIteration = false;
329 while (totalLengthProcessed < totalRangeLength) {
330 // Iterate through the search range by paragraphs, checking each one for spelling and grammar.
331 int currentLength = TextIterator::rangeLength(paragraphStart, paragraphEnd);
332 int currentStartOffset = firstIteration ? rangeStartOffset : 0;
333 int currentEndOffset = currentLength;
334 if (inSameParagraph(createVisiblePosition(paragraphStart), createVisiblePosition(m_end))) {
335 // Determine the character offset from the end of the original search range to the end of the paragraph,
336 // since we will want to ignore results in this area.
337 currentEndOffset = TextIterator::rangeLength(paragraphStart, m_end);
338 lastIteration = true;
340 if (currentStartOffset < currentEndOffset) {
341 String paragraphString = plainText(EphemeralRange(paragraphStart, paragraphEnd));
342 if (paragraphString.length() > 0) {
343 bool foundGrammar = false;
344 int spellingLocation = 0;
345 int grammarPhraseLocation = 0;
346 int grammarDetailLocation = 0;
347 unsigned grammarDetailIndex = 0;
349 Vector<TextCheckingResult> results;
350 TextCheckingTypeMask checkingTypes = checkGrammar ? (TextCheckingTypeSpelling | TextCheckingTypeGrammar) : TextCheckingTypeSpelling;
351 checkTextOfParagraph(m_client->textChecker(), paragraphString, checkingTypes, results);
353 for (unsigned i = 0; i < results.size(); i++) {
354 const TextCheckingResult* result = &results[i];
355 if (result->decoration == TextDecorationTypeSpelling && result->location >= currentStartOffset && result->location + result->length <= currentEndOffset) {
356 ASSERT(result->length > 0 && result->location >= 0);
357 spellingLocation = result->location;
358 misspelledWord = paragraphString.substring(result->location, result->length);
359 ASSERT(misspelledWord.length());
360 break;
362 if (checkGrammar && result->decoration == TextDecorationTypeGrammar && result->location < currentEndOffset && result->location + result->length > currentStartOffset) {
363 ASSERT(result->length > 0 && result->location >= 0);
364 // We can't stop after the first grammar result, since there might still be a spelling result after
365 // it begins but before the first detail in it, but we can stop if we find a second grammar result.
366 if (foundGrammar)
367 break;
368 for (unsigned j = 0; j < result->details.size(); j++) {
369 const GrammarDetail* detail = &result->details[j];
370 ASSERT(detail->length > 0 && detail->location >= 0);
371 if (result->location + detail->location >= currentStartOffset && result->location + detail->location + detail->length <= currentEndOffset && (!foundGrammar || result->location + detail->location < grammarDetailLocation)) {
372 grammarDetailIndex = j;
373 grammarDetailLocation = result->location + detail->location;
374 foundGrammar = true;
377 if (foundGrammar) {
378 grammarPhraseLocation = result->location;
379 outGrammarDetail = result->details[grammarDetailIndex];
380 badGrammarPhrase = paragraphString.substring(result->location, result->length);
381 ASSERT(badGrammarPhrase.length());
386 if (!misspelledWord.isEmpty() && (!checkGrammar || badGrammarPhrase.isEmpty() || spellingLocation <= grammarDetailLocation)) {
387 int spellingOffset = spellingLocation - currentStartOffset;
388 if (!firstIteration)
389 spellingOffset += TextIterator::rangeLength(m_start, paragraphStart);
390 outIsSpelling = true;
391 outFirstFoundOffset = spellingOffset;
392 firstFoundItem = misspelledWord;
393 break;
395 if (checkGrammar && !badGrammarPhrase.isEmpty()) {
396 int grammarPhraseOffset = grammarPhraseLocation - currentStartOffset;
397 if (!firstIteration)
398 grammarPhraseOffset += TextIterator::rangeLength(m_start, paragraphStart);
399 outIsSpelling = false;
400 outFirstFoundOffset = grammarPhraseOffset;
401 firstFoundItem = badGrammarPhrase;
402 break;
406 if (lastIteration || totalLengthProcessed + currentLength >= totalRangeLength)
407 break;
408 VisiblePosition newParagraphStart = startOfNextParagraph(createVisiblePosition(paragraphEnd));
409 paragraphStart = newParagraphStart.toParentAnchoredPosition();
410 paragraphEnd = endOfParagraph(newParagraphStart).toParentAnchoredPosition();
411 firstIteration = false;
412 totalLengthProcessed += currentLength;
414 return firstFoundItem;
417 int TextCheckingHelper::findFirstGrammarDetail(const Vector<GrammarDetail>& grammarDetails, int badGrammarPhraseLocation, int startOffset, int endOffset, bool markAll) const
419 // Found some bad grammar. Find the earliest detail range that starts in our search range (if any).
420 // Optionally add a DocumentMarker for each detail in the range.
421 int earliestDetailLocationSoFar = -1;
422 int earliestDetailIndex = -1;
423 for (unsigned i = 0; i < grammarDetails.size(); i++) {
424 const GrammarDetail* detail = &grammarDetails[i];
425 ASSERT(detail->length > 0 && detail->location >= 0);
427 int detailStartOffsetInParagraph = badGrammarPhraseLocation + detail->location;
429 // Skip this detail if it starts before the original search range
430 if (detailStartOffsetInParagraph < startOffset)
431 continue;
433 // Skip this detail if it starts after the original search range
434 if (detailStartOffsetInParagraph >= endOffset)
435 continue;
437 if (markAll) {
438 const EphemeralRange badGrammarRange = calculateCharacterSubrange(EphemeralRange(m_start, m_end), badGrammarPhraseLocation - startOffset + detail->location, detail->length);
439 badGrammarRange.document().markers().addMarker(badGrammarRange.startPosition(), badGrammarRange.endPosition(), DocumentMarker::Grammar, detail->userDescription);
442 // Remember this detail only if it's earlier than our current candidate (the details aren't in a guaranteed order)
443 if (earliestDetailIndex < 0 || earliestDetailLocationSoFar > detail->location) {
444 earliestDetailIndex = i;
445 earliestDetailLocationSoFar = detail->location;
449 return earliestDetailIndex;
452 String TextCheckingHelper::findFirstBadGrammar(GrammarDetail& outGrammarDetail, int& outGrammarPhraseOffset, bool markAll)
454 // Initialize out parameters; these will be updated if we find something to return.
455 outGrammarDetail.location = -1;
456 outGrammarDetail.length = 0;
457 outGrammarDetail.guesses.clear();
458 outGrammarDetail.userDescription = "";
459 outGrammarPhraseOffset = 0;
461 String firstBadGrammarPhrase;
463 // Expand the search range to encompass entire paragraphs, since grammar checking needs that much context.
464 // Determine the character offset from the start of the paragraph to the start of the original search range,
465 // since we will want to ignore results in this area.
466 TextCheckingParagraph paragraph(EphemeralRange(m_start, m_end));
468 // Start checking from beginning of paragraph, but skip past results that occur before the start of the original search range.
469 int startOffset = 0;
470 while (startOffset < paragraph.checkingEnd()) {
471 Vector<GrammarDetail> grammarDetails;
472 int badGrammarPhraseLocation = -1;
473 int badGrammarPhraseLength = 0;
474 m_client->textChecker().checkGrammarOfString(paragraph.textSubstring(startOffset), grammarDetails, &badGrammarPhraseLocation, &badGrammarPhraseLength);
476 if (!badGrammarPhraseLength) {
477 ASSERT(badGrammarPhraseLocation == -1);
478 return String();
481 ASSERT(badGrammarPhraseLocation >= 0);
482 badGrammarPhraseLocation += startOffset;
485 // Found some bad grammar. Find the earliest detail range that starts in our search range (if any).
486 int badGrammarIndex = findFirstGrammarDetail(grammarDetails, badGrammarPhraseLocation, paragraph.checkingStart(), paragraph.checkingEnd(), markAll);
487 if (badGrammarIndex >= 0) {
488 ASSERT(static_cast<unsigned>(badGrammarIndex) < grammarDetails.size());
489 outGrammarDetail = grammarDetails[badGrammarIndex];
492 // If we found a detail in range, then we have found the first bad phrase (unless we found one earlier but
493 // kept going so we could mark all instances).
494 if (badGrammarIndex >= 0 && firstBadGrammarPhrase.isEmpty()) {
495 outGrammarPhraseOffset = badGrammarPhraseLocation - paragraph.checkingStart();
496 firstBadGrammarPhrase = paragraph.textSubstring(badGrammarPhraseLocation, badGrammarPhraseLength);
498 // Found one. We're done now, unless we're marking each instance.
499 if (!markAll)
500 break;
503 // These results were all between the start of the paragraph and the start of the search range; look
504 // beyond this phrase.
505 startOffset = badGrammarPhraseLocation + badGrammarPhraseLength;
508 return firstBadGrammarPhrase;
511 bool TextCheckingHelper::markAllMisspellings()
513 // Use the "markAll" feature of findFirstMisspelling. Ignore the return value and the "out parameter";
514 // all we need to do is mark every instance.
515 int ignoredOffset;
516 return findFirstMisspelling(ignoredOffset, true).isEmpty();
519 void TextCheckingHelper::markAllBadGrammar()
521 // Use the "markAll" feature of findFirstBadGrammar. Ignore the return value and "out parameters"; all we need to
522 // do is mark every instance.
523 GrammarDetail ignoredGrammarDetail;
524 int ignoredOffset;
525 findFirstBadGrammar(ignoredGrammarDetail, ignoredOffset, true);
528 bool TextCheckingHelper::unifiedTextCheckerEnabled() const
530 ASSERT(m_start.isNotNull());
531 Document& doc = m_start.computeContainerNode()->document();
532 return blink::unifiedTextCheckerEnabled(doc.frame());
535 void checkTextOfParagraph(TextCheckerClient& client, const String& text, TextCheckingTypeMask checkingTypes, Vector<TextCheckingResult>& results)
537 Vector<UChar> characters;
538 text.appendTo(characters);
539 unsigned length = text.length();
541 Vector<TextCheckingResult> spellingResult;
542 if (checkingTypes & TextCheckingTypeSpelling)
543 findMisspellings(client, characters.data(), 0, length, spellingResult);
545 Vector<TextCheckingResult> grammarResult;
546 if (checkingTypes & TextCheckingTypeGrammar) {
547 // Only checks grammartical error before the first misspellings
548 int grammarCheckLength = length;
549 for (const auto& spelling : spellingResult) {
550 if (spelling.location < grammarCheckLength)
551 grammarCheckLength = spelling.location;
554 findBadGrammars(client, characters.data(), 0, grammarCheckLength, grammarResult);
557 if (grammarResult.size())
558 results.swap(grammarResult);
560 if (spellingResult.size()) {
561 if (results.isEmpty())
562 results.swap(spellingResult);
563 else
564 results.appendVector(spellingResult);
568 bool unifiedTextCheckerEnabled(const LocalFrame* frame)
570 if (!frame)
571 return false;
573 const Settings* settings = frame->settings();
574 if (!settings)
575 return false;
577 return settings->unifiedTextCheckerEnabled();