Bug 460926 A11y hierachy is broken on Ubuntu 8.10 (GNOME 2.24), r=Evan.Yan sr=roc
[wine-gecko.git] / netwerk / streamconv / converters / mozTXTToHTMLConv.h
blobd64e4476ee7187787bef19aac1bf8c635201bb54
1 /* -*- Mode: C; tab-width: 2; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is the Mozilla Text to HTML converter code.
17 * The Initial Developer of the Original Code is
18 * Ben Bucksch <http://www.bucksch.org>.
19 * Portions created by the Initial Developer are Copyright (C) 1999, 2000
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either the GNU General Public License Version 2 or later (the "GPL"), or
26 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 /**
39 Description: Currently only functions to enhance plain text with HTML tags. See mozITXTToHTMLConv. Stream conversion is defunct.
42 #ifndef _mozTXTToHTMLConv_h__
43 #define _mozTXTToHTMLConv_h__
45 #include "mozITXTToHTMLConv.h"
46 #include "nsIIOService.h"
47 #include "nsString.h"
48 #include "nsCOMPtr.h"
51 class mozTXTToHTMLConv : public mozITXTToHTMLConv
55 //////////////////////////////////////////////////////////
56 public:
57 //////////////////////////////////////////////////////////
59 mozTXTToHTMLConv();
60 virtual ~mozTXTToHTMLConv();
61 NS_DECL_ISUPPORTS
63 NS_DECL_MOZITXTTOHTMLCONV
64 NS_DECL_NSIREQUESTOBSERVER
65 NS_DECL_NSISTREAMLISTENER
66 NS_DECL_NSISTREAMCONVERTER
68 /**
69 see mozITXTToHTMLConv::ScanTXT
71 void ScanTXT(const PRUnichar * aInString, PRInt32 aInStringLength, PRUint32 whattodo, nsString& aOutString);
73 /**
74 see mozITXTToHTMLConv::ScanHTML. We will modify aInString potentially...
76 void ScanHTML(nsString& aInString, PRUint32 whattodo, nsString &aOutString);
78 /**
79 see mozITXTToHTMLConv::CiteLevelTXT
81 PRInt32 CiteLevelTXT(const PRUnichar * line,PRUint32& logLineStart);
84 //////////////////////////////////////////////////////////
85 protected:
86 //////////////////////////////////////////////////////////
87 nsCOMPtr<nsIIOService> mIOService; // for performance reasons, cache the netwerk service...
88 /**
89 Completes<ul>
90 <li>Case 1: mailto: "mozilla@bucksch.org" -> "mailto:mozilla@bucksch.org"
91 <li>Case 2: http: "www.mozilla.org" -> "http://www.mozilla.org"
92 <li>Case 3: ftp: "ftp.mozilla.org" -> "ftp://www.mozilla.org"
93 </ul>
94 It does no check, if the resulting URL is valid.
95 @param text (in): abbreviated URL
96 @param pos (in): position of "@" (case 1) or first "." (case 2 and 3)
97 @return Completed URL at success and empty string at failure
99 void CompleteAbbreviatedURL(const PRUnichar * aInString, PRInt32 aInLength,
100 const PRUint32 pos, nsString& aOutString);
103 //////////////////////////////////////////////////////////
104 private:
105 //////////////////////////////////////////////////////////
107 enum LIMTYPE
109 LT_IGNORE, // limitation not checked
110 LT_DELIMITER, // not alphanumeric and not rep[0]. End of text is also ok.
111 LT_ALPHA, // alpha char
112 LT_DIGIT
116 @param text (in): the string to search through.<p>
117 If before = IGNORE,<br>
118 rep is compared starting at 1. char of text (text[0]),<br>
119 else starting at 2. char of text (text[1]).
120 Chars after "after"-delimiter are ignored.
121 @param rep (in): the string to look for
122 @param aRepLen (in): the number of bytes in the string to look for
123 @param before (in): limitation before rep
124 @param after (in): limitation after rep
125 @return true, if rep is found and limitation spec is met or rep is empty
127 PRBool ItMatchesDelimited(const PRUnichar * aInString, PRInt32 aInLength,
128 const PRUnichar * rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after);
131 @param see ItMatchesDelimited
132 @return Number of ItMatchesDelimited in text
134 PRUint32 NumberOfMatches(const PRUnichar * aInString, PRInt32 aInStringLength,
135 const PRUnichar* rep, PRInt32 aRepLen, LIMTYPE before, LIMTYPE after);
138 Currently only changes "<", ">" and "&". All others stay as they are.<p>
139 "Char" in function name to avoid side effects with nsString(ch)
140 constructors.
141 @param ch (in)
142 @param aStringToAppendto (out) - the string to append the escaped
143 string to.
144 @param inAttribute (in) - will escape quotes, too (which is
145 only needed for attribute values)
147 void EscapeChar(const PRUnichar ch, nsString& aStringToAppendto,
148 PRBool inAttribute);
151 See EscapeChar. Escapes the string in place.
153 void EscapeStr(nsString& aInString, PRBool inAttribute);
156 Currently only reverts "<", ">" and "&". All others stay as they are.<p>
157 @param aInString (in) HTML string
158 @param aStartPos (in) start index into the buffer
159 @param aLength (in) length of the buffer
160 @param aOutString (out) unescaped buffer
162 void UnescapeStr(const PRUnichar * aInString, PRInt32 aStartPos,
163 PRInt32 aLength, nsString& aOutString);
166 <em>Note</em>: I use different strategies to pass context between the
167 functions (full text and pos vs. cutted text and col0, glphyTextLen vs.
168 replaceBefore/-After). It makes some sense, but is hard to understand
169 (maintain) :-(.
173 <p><em>Note:</em> replaceBefore + replaceAfter + 1 (for char at pos) chars
174 in text should be replaced by outputHTML.</p>
175 <p><em>Note:</em> This function should be able to process a URL on multiple
176 lines, but currently, ScanForURLs is called for every line, so it can't.</p>
177 @param text (in): includes possibly a URL
178 @param pos (in): position in text, where either ":", "." or "@" are found
179 @param whathasbeendone (in): What the calling ScanTXT did/has to do with the
180 (not-linkified) text, i.e. usually the "whattodo" parameter.
181 (Needed to calculate replaceBefore.) NOT what will be done with
182 the content of the link.
183 @param outputHTML (out): URL with HTML-a tag
184 @param replaceBefore (out): Number of chars of URL before pos
185 @param replaceAfter (out): Number of chars of URL after pos
186 @return URL found
188 PRBool FindURL(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
189 const PRUint32 whathasbeendone,
190 nsString& outputHTML, PRInt32& replaceBefore, PRInt32& replaceAfter);
192 enum modetype {
193 unknown,
194 RFC1738, /* Check, if RFC1738, APPENDIX compliant,
195 like "<URL:http://www.mozilla.org>". */
196 RFC2396E, /* RFC2396, APPENDIX E allows anglebrackets (like
197 "<http://www.mozilla.org>") (without "URL:") or
198 quotation marks(like ""http://www.mozilla.org"").
199 Also allow email addresses without scheme,
200 e.g. "<mozilla@bucksch.org>" */
201 freetext, /* assume heading scheme
202 with "[a-zA-Z][a-zA-Z0-9+\-\.]*:" like "news:"
203 (see RFC2396, Section 3.1).
204 Certain characters (see code) or any whitespace
205 (including linebreaks) end the URL.
206 Other certain (punctation) characters (see code)
207 at the end are stripped off. */
208 abbreviated /* Similar to freetext, but without scheme, e.g.
209 "www.mozilla.org", "ftp.mozilla.org" and
210 "mozilla@bucksch.org". */
211 /* RFC1738 and RFC2396E type URLs may use multiple lines,
212 whitespace is stripped. Special characters like ")" stay intact.*/
216 * @param text (in), pos (in): see FindURL
217 * @param check (in): Start must be conform with this mode
218 * @param start (out): Position in text, where URL (including brackets or
219 * similar) starts
220 * @return |check|-conform start has been found
222 PRBool FindURLStart(const PRUnichar * aInString, PRInt32 aInLength, const PRUint32 pos,
223 const modetype check, PRUint32& start);
226 * @param text (in), pos (in): see FindURL
227 * @param check (in): End must be conform with this mode
228 * @param start (in): see FindURLStart
229 * @param end (out): Similar to |start| param of FindURLStart
230 * @return |check|-conform end has been found
232 PRBool FindURLEnd(const PRUnichar * aInString, PRInt32 aInStringLength, const PRUint32 pos,
233 const modetype check, const PRUint32 start, PRUint32& end);
236 * @param text (in), pos (in), whathasbeendone (in): see FindURL
237 * @param check (in): Current mode
238 * @param start (in), end (in): see FindURLEnd
239 * @param txtURL (out): Guessed (raw) URL.
240 * Without whitespace, but not completed.
241 * @param desc (out): Link as shown to the user, but already escaped.
242 * Should be placed between the <a> and </a> tags.
243 * @param replaceBefore(out), replaceAfter (out): see FindURL
245 void CalculateURLBoundaries(const PRUnichar * aInString, PRInt32 aInStringLength,
246 const PRUint32 pos, const PRUint32 whathasbeendone,
247 const modetype check, const PRUint32 start, const PRUint32 end,
248 nsString& txtURL, nsString& desc,
249 PRInt32& replaceBefore, PRInt32& replaceAfter);
252 * @param txtURL (in), desc (in): see CalculateURLBoundaries
253 * @param outputHTML (out): see FindURL
254 * @return A valid URL could be found (and creation of HTML successful)
256 PRBool CheckURLAndCreateHTML(
257 const nsString& txtURL, const nsString& desc, const modetype mode,
258 nsString& outputHTML);
261 @param text (in): line of text possibly with tagTXT.<p>
262 if col0 is true,
263 starting with tagTXT<br>
264 else
265 starting one char before tagTXT
266 @param col0 (in): tagTXT is on the beginning of the line (or paragraph).
267 open must be 0 then.
268 @param tagTXT (in): Tag in plaintext to search for, e.g. "*"
269 @param aTagTxtLen (in): length of tagTXT.
270 @param tagHTML (in): HTML-Tag to replace tagTXT with,
271 without "<" and ">", e.g. "strong"
272 @param attributeHTML (in): HTML-attribute to add to opening tagHTML,
273 e.g. "class=txt_star"
274 @param aOutString: string to APPEND the converted html into
275 @param open (in/out): Number of currently open tags of type tagHTML
276 @return Conversion succeeded
278 PRBool StructPhraseHit(const PRUnichar * aInString, PRInt32 aInStringLength, PRBool col0,
279 const PRUnichar* tagTXT,
280 PRInt32 aTagTxtLen,
281 const char* tagHTML, const char* attributeHTML,
282 nsString& aOutputString, PRUint32& openTags);
285 @param text (in), col0 (in): see GlyphHit
286 @param tagTXT (in): Smily, see also StructPhraseHit
287 @param imageName (in): the basename of the file that contains the image for this smilie
288 @param outputHTML (out): new string containing the html for the smily
289 @param glyphTextLen (out): see GlyphHit
291 PRBool
292 SmilyHit(const PRUnichar * aInString, PRInt32 aLength, PRBool col0,
293 const char* tagTXT, const char* imageName,
294 nsString& outputHTML, PRInt32& glyphTextLen);
297 Checks, if we can replace some chars at the start of line with prettier HTML
298 code.<p>
299 If success is reported, replace the first glyphTextLen chars with outputHTML
301 @param text (in): line of text possibly with Glyph.<p>
302 If col0 is true,
303 starting with Glyph <br><!-- (br not part of text) -->
304 else
305 starting one char before Glyph
306 @param col0 (in): text starts at the beginning of the line (or paragraph)
307 @param aOutString (out): APPENDS html for the glyph to this string
308 @param glyphTextLen (out): Length of original text to replace
309 @return see StructPhraseHit
311 PRBool GlyphHit(const PRUnichar * aInString, PRInt32 aInLength, PRBool col0,
312 nsString& aOutString, PRInt32& glyphTextLen);
315 Check if a given url should be linkified.
316 @param aURL (in): url to be checked on.
318 PRBool ShouldLinkify(const nsCString& aURL);
321 // It's said, that Win32 and Mac don't like static const members
322 const PRInt32 mozTXTToHTMLConv_lastMode = 4;
323 // Needed (only) by mozTXTToHTMLConv::FindURL
324 const PRInt32 mozTXTToHTMLConv_numberOfModes = 4; // dito; unknown not counted
326 #endif