1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is inline spellchecker code.
17 * The Initial Developer of the Original Code is Google Inc.
18 * Portions created by the Initial Developer are Copyright (C) 2004-2006
19 * the Initial Developer. All Rights Reserved.
22 * Brett Wilson <brettw@gmail.com> (original author)
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
39 #include "nsIDOMDocument.h"
40 #include "nsIDOMDocumentRange.h"
41 #include "nsIDOMViewCSS.h"
42 #include "nsIDocument.h"
45 #include "nsIUGenCategory.h"
47 //#define DEBUG_SPELLCHECK
53 * This class extracts text from the DOM and builds it into a single string.
54 * The string includes whitespace breaks whereever non-inline elements begin
55 * and end. This string is broken into "real words", following somewhat
56 * complex rules; for example substrings that look like URLs or
57 * email addresses are treated as single words, but otherwise many kinds of
58 * punctuation are treated as word separators. GetNextWord provides a way
59 * to iterate over these "real words".
61 * The basic operation is:
63 * 1. Call Init with the weak pointer to the editor that you're using.
64 * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop
65 * at the word boundary after that. If SetEnd is not called, we'll stop
66 * at the end of the document's root element.
67 * 3. Call SetPosition to initialize the current position inside the
68 * previously given range.
69 * 4. Call GetNextWord over and over until it returns false.
72 class mozInlineSpellWordUtil
79 NodeOffset(nsIDOMNode
* aNode
, PRInt32 aOffset
) :
80 mNode(aNode
), mOffset(aOffset
) {}
83 mozInlineSpellWordUtil()
85 mSoftBegin(nsnull
, 0), mSoftEnd(nsnull
, 0),
86 mNextWordIndex(-1), mSoftTextValid(PR_FALSE
) {}
88 nsresult
Init(nsWeakPtr aWeakEditor
);
90 nsresult
SetEnd(nsIDOMNode
* aEndNode
, PRInt32 aEndOffset
);
92 // sets the current position, this should be inside the range. If we are in
93 // the middle of a word, we'll move to its start.
94 nsresult
SetPosition(nsIDOMNode
* aNode
, PRInt32 aOffset
);
96 // Given a point inside or immediately following a word, this returns the
97 // DOM range that exactly encloses that word's characters. The current
98 // position will be at the end of the word. This will find the previous
99 // word if the current position is space, so if you care that the point is
100 // inside the word, you should check the range.
102 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
103 // before you actually generate the range you are interested in and iterate
105 nsresult
GetRangeForWord(nsIDOMNode
* aWordNode
, PRInt32 aWordOffset
,
106 nsIDOMRange
** aRange
);
108 // Moves to the the next word in the range, and retrieves it's text and range.
109 // An empty word and a NULL range are returned when we are done checking.
110 // aSkipChecking will be set if the word is "special" and shouldn't be
111 // checked (e.g., an email address).
112 nsresult
GetNextWord(nsAString
& aText
, nsIDOMRange
** aRange
,
113 PRBool
* aSkipChecking
);
115 // Call to normalize some punctuation. This function takes an autostring
116 // so we can access characters directly.
117 static void NormalizeWord(nsSubstring
& aWord
);
119 nsIDOMDocumentRange
* GetDocumentRange() const { return mDOMDocumentRange
; }
120 nsIDocument
* GetDocument() const { return mDocument
; }
121 nsIDOMNode
* GetRootNode() { return mRootNode
; }
122 nsIUGenCategory
* GetCategories() { return mCategories
; }
126 // cached stuff for the editor, set by Init
127 nsCOMPtr
<nsIDOMDocumentRange
> mDOMDocumentRange
;
128 nsCOMPtr
<nsIDocument
> mDocument
;
129 nsCOMPtr
<nsIDOMViewCSS
> mCSSView
;
130 nsCOMPtr
<nsIUGenCategory
> mCategories
;
132 // range to check, see SetRange
133 nsIDOMNode
* mRootNode
;
134 NodeOffset mSoftBegin
;
137 // DOM text covering the soft range, with newlines added at block boundaries
139 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
140 // DOM node appears at most once in this list.
141 struct DOMTextMapping
{
142 NodeOffset mNodeOffset
;
143 PRInt32 mSoftTextOffset
;
146 DOMTextMapping(NodeOffset aNodeOffset
, PRInt32 aSoftTextOffset
, PRInt32 aLength
)
147 : mNodeOffset(aNodeOffset
), mSoftTextOffset(aSoftTextOffset
),
150 nsTArray
<DOMTextMapping
> mSoftTextDOMMapping
;
152 // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
154 PRInt32 mSoftTextOffset
;
156 PRPackedBool mCheckableWord
;
158 RealWord(PRInt32 aOffset
, PRInt32 aLength
, PRPackedBool aCheckable
)
159 : mSoftTextOffset(aOffset
), mLength(aLength
), mCheckableWord(aCheckable
) {}
160 PRInt32
EndOffset() const { return mSoftTextOffset
+ mLength
; }
162 nsTArray
<RealWord
> mRealWords
;
163 PRInt32 mNextWordIndex
;
165 PRPackedBool mSoftTextValid
;
167 void InvalidateWords() { mSoftTextValid
= PR_FALSE
; }
170 PRInt32
MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset
);
171 // Map an offset into mSoftText to a DOM position. Note that two DOM positions
172 // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
173 // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
174 // aHintBefore controls which position we return ... if aHint is eEnd
175 // then the position indicates the END of a range so we return (A,4). Otherwise
176 // the position indicates the START of a range so we return (B,0).
177 enum DOMMapHint
{ HINT_BEGIN
, HINT_END
};
178 NodeOffset
MapSoftTextOffsetToDOMPosition(PRInt32 aSoftTextOffset
,
180 // Finds the index of the real word containing aSoftTextOffset, or -1 if none
181 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
182 // later word (favouring the assumption that it's the BEGINning of a word),
183 // otherwise return the earlier word (assuming it's the END of a word).
184 // If aSearchForward is true, then if we don't find a word at the given
185 // position, search forward until we do find a word and return that (if found).
186 PRInt32
FindRealWordContaining(PRInt32 aSoftTextOffset
, DOMMapHint aHint
,
187 PRBool aSearchForward
);
189 // build mSoftText and mSoftTextDOMMapping
190 void BuildSoftText();
191 // Build mRealWords array
192 void BuildRealWords();
194 void SplitDOMWord(PRInt32 aStart
, PRInt32 aEnd
);
196 // Convenience functions, object must be initialized
197 nsresult
MakeRange(NodeOffset aBegin
, NodeOffset aEnd
, nsIDOMRange
** aRange
);
198 nsresult
MakeRangeForWord(const RealWord
& aWord
, nsIDOMRange
** aRange
);