Bug 452317 - FeedConverter.js: QueryInterface should throw NS_ERROR_NO_INTERFACE...
[wine-gecko.git] / extensions / spellcheck / src / mozInlineSpellWordUtil.h
blob9591f8f530eaee5280d6cc1953130333c21c0969
1 /* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is inline spellchecker code.
17 * The Initial Developer of the Original Code is Google Inc.
18 * Portions created by the Initial Developer are Copyright (C) 2004-2006
19 * the Initial Developer. All Rights Reserved.
21 * Contributor(s):
22 * Brett Wilson <brettw@gmail.com> (original author)
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 #include "nsCOMPtr.h"
39 #include "nsIDOMDocument.h"
40 #include "nsIDOMDocumentRange.h"
41 #include "nsIDOMViewCSS.h"
42 #include "nsIDocument.h"
43 #include "nsString.h"
44 #include "nsTArray.h"
45 #include "nsIUGenCategory.h"
47 //#define DEBUG_SPELLCHECK
49 class nsIDOMRange;
50 class nsIDOMNode;
52 /**
53 * This class extracts text from the DOM and builds it into a single string.
54 * The string includes whitespace breaks whereever non-inline elements begin
55 * and end. This string is broken into "real words", following somewhat
56 * complex rules; for example substrings that look like URLs or
57 * email addresses are treated as single words, but otherwise many kinds of
58 * punctuation are treated as word separators. GetNextWord provides a way
59 * to iterate over these "real words".
61 * The basic operation is:
63 * 1. Call Init with the weak pointer to the editor that you're using.
64 * 2. Call SetEnd to set where you want to stop spellchecking. We'll stop
65 * at the word boundary after that. If SetEnd is not called, we'll stop
66 * at the end of the document's root element.
67 * 3. Call SetPosition to initialize the current position inside the
68 * previously given range.
69 * 4. Call GetNextWord over and over until it returns false.
72 class mozInlineSpellWordUtil
74 public:
75 struct NodeOffset {
76 nsIDOMNode* mNode;
77 PRInt32 mOffset;
79 NodeOffset(nsIDOMNode* aNode, PRInt32 aOffset) :
80 mNode(aNode), mOffset(aOffset) {}
83 mozInlineSpellWordUtil()
84 : mRootNode(nsnull),
85 mSoftBegin(nsnull, 0), mSoftEnd(nsnull, 0),
86 mNextWordIndex(-1), mSoftTextValid(PR_FALSE) {}
88 nsresult Init(nsWeakPtr aWeakEditor);
90 nsresult SetEnd(nsIDOMNode* aEndNode, PRInt32 aEndOffset);
92 // sets the current position, this should be inside the range. If we are in
93 // the middle of a word, we'll move to its start.
94 nsresult SetPosition(nsIDOMNode* aNode, PRInt32 aOffset);
96 // Given a point inside or immediately following a word, this returns the
97 // DOM range that exactly encloses that word's characters. The current
98 // position will be at the end of the word. This will find the previous
99 // word if the current position is space, so if you care that the point is
100 // inside the word, you should check the range.
102 // THIS CHANGES THE CURRENT POSITION AND RANGE. It is designed to be called
103 // before you actually generate the range you are interested in and iterate
104 // the words in it.
105 nsresult GetRangeForWord(nsIDOMNode* aWordNode, PRInt32 aWordOffset,
106 nsIDOMRange** aRange);
108 // Moves to the the next word in the range, and retrieves it's text and range.
109 // An empty word and a NULL range are returned when we are done checking.
110 // aSkipChecking will be set if the word is "special" and shouldn't be
111 // checked (e.g., an email address).
112 nsresult GetNextWord(nsAString& aText, nsIDOMRange** aRange,
113 PRBool* aSkipChecking);
115 // Call to normalize some punctuation. This function takes an autostring
116 // so we can access characters directly.
117 static void NormalizeWord(nsSubstring& aWord);
119 nsIDOMDocumentRange* GetDocumentRange() const { return mDOMDocumentRange; }
120 nsIDocument* GetDocument() const { return mDocument; }
121 nsIDOMNode* GetRootNode() { return mRootNode; }
122 nsIUGenCategory* GetCategories() { return mCategories; }
124 private:
126 // cached stuff for the editor, set by Init
127 nsCOMPtr<nsIDOMDocumentRange> mDOMDocumentRange;
128 nsCOMPtr<nsIDocument> mDocument;
129 nsCOMPtr<nsIDOMViewCSS> mCSSView;
130 nsCOMPtr<nsIUGenCategory> mCategories;
132 // range to check, see SetRange
133 nsIDOMNode* mRootNode;
134 NodeOffset mSoftBegin;
135 NodeOffset mSoftEnd;
137 // DOM text covering the soft range, with newlines added at block boundaries
138 nsString mSoftText;
139 // A list of where we extracted text from, ordered by mSoftTextOffset. A given
140 // DOM node appears at most once in this list.
141 struct DOMTextMapping {
142 NodeOffset mNodeOffset;
143 PRInt32 mSoftTextOffset;
144 PRInt32 mLength;
146 DOMTextMapping(NodeOffset aNodeOffset, PRInt32 aSoftTextOffset, PRInt32 aLength)
147 : mNodeOffset(aNodeOffset), mSoftTextOffset(aSoftTextOffset),
148 mLength(aLength) {}
150 nsTArray<DOMTextMapping> mSoftTextDOMMapping;
152 // A list of the "real words" in mSoftText, ordered by mSoftTextOffset
153 struct RealWord {
154 PRInt32 mSoftTextOffset;
155 PRInt32 mLength;
156 PRPackedBool mCheckableWord;
158 RealWord(PRInt32 aOffset, PRInt32 aLength, PRPackedBool aCheckable)
159 : mSoftTextOffset(aOffset), mLength(aLength), mCheckableWord(aCheckable) {}
160 PRInt32 EndOffset() const { return mSoftTextOffset + mLength; }
162 nsTArray<RealWord> mRealWords;
163 PRInt32 mNextWordIndex;
165 PRPackedBool mSoftTextValid;
167 void InvalidateWords() { mSoftTextValid = PR_FALSE; }
168 void EnsureWords();
170 PRInt32 MapDOMPositionToSoftTextOffset(NodeOffset aNodeOffset);
171 // Map an offset into mSoftText to a DOM position. Note that two DOM positions
172 // can map to the same mSoftText offset, e.g. given nodes A=aaaa and B=bbbb
173 // forming aaaabbbb, (A,4) and (B,0) give the same string offset. So,
174 // aHintBefore controls which position we return ... if aHint is eEnd
175 // then the position indicates the END of a range so we return (A,4). Otherwise
176 // the position indicates the START of a range so we return (B,0).
177 enum DOMMapHint { HINT_BEGIN, HINT_END };
178 NodeOffset MapSoftTextOffsetToDOMPosition(PRInt32 aSoftTextOffset,
179 DOMMapHint aHint);
180 // Finds the index of the real word containing aSoftTextOffset, or -1 if none
181 // If it's exactly between two words, then if aHint is HINT_BEGIN, return the
182 // later word (favouring the assumption that it's the BEGINning of a word),
183 // otherwise return the earlier word (assuming it's the END of a word).
184 // If aSearchForward is true, then if we don't find a word at the given
185 // position, search forward until we do find a word and return that (if found).
186 PRInt32 FindRealWordContaining(PRInt32 aSoftTextOffset, DOMMapHint aHint,
187 PRBool aSearchForward);
189 // build mSoftText and mSoftTextDOMMapping
190 void BuildSoftText();
191 // Build mRealWords array
192 void BuildRealWords();
194 void SplitDOMWord(PRInt32 aStart, PRInt32 aEnd);
196 // Convenience functions, object must be initialized
197 nsresult MakeRange(NodeOffset aBegin, NodeOffset aEnd, nsIDOMRange** aRange);
198 nsresult MakeRangeForWord(const RealWord& aWord, nsIDOMRange** aRange);