Follow-on fix for bug 457825. Use sheet principal for agent and user sheets. r=dbaron...
[wine-gecko.git] / intl / chardet / src / nsMetaCharsetObserver.cpp
bloba44ad6a77a9cc6ce0c31fcdc17df2696a19f6119
1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
13 * License.
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1999
20 * the Initial Developer. All Rights Reserved.
22 * Contributor(s):
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
37 #include "nsDeque.h"
38 #include "nsICharsetAlias.h"
39 #include "nsMetaCharsetObserver.h"
40 #include "nsIMetaCharsetService.h"
41 #include "nsIElementObserver.h"
42 #include "nsIObserver.h"
43 #include "nsIObserverService.h"
44 #include "nsISupports.h"
45 #include "nsCRT.h"
46 #include "nsIParser.h"
47 #include "pratom.h"
48 #include "nsCharDetDll.h"
49 #include "nsIServiceManager.h"
50 #include "nsObserverBase.h"
51 #include "nsWeakReference.h"
52 #include "nsIParserService.h"
53 #include "nsParserCIID.h"
54 #include "nsMetaCharsetCID.h"
55 #include "nsReadableUtils.h"
56 #include "nsUnicharUtils.h"
58 static NS_DEFINE_CID(kCharsetAliasCID, NS_CHARSETALIAS_CID);
60 static const eHTMLTags gWatchTags[] =
61 { eHTMLTag_meta,
62 eHTMLTag_unknown
65 //-------------------------------------------------------------------------
66 nsMetaCharsetObserver::nsMetaCharsetObserver()
68 bMetaCharsetObserverStarted = PR_FALSE;
69 nsresult res;
70 mAlias = nsnull;
71 nsCOMPtr<nsICharsetAlias> calias(do_GetService(kCharsetAliasCID, &res));
72 if(NS_SUCCEEDED(res)) {
73 mAlias = calias;
76 //-------------------------------------------------------------------------
77 nsMetaCharsetObserver::~nsMetaCharsetObserver()
81 //-------------------------------------------------------------------------
82 NS_IMPL_ADDREF ( nsMetaCharsetObserver )
83 NS_IMPL_RELEASE ( nsMetaCharsetObserver )
85 // Use the new scheme
86 NS_IMPL_QUERY_INTERFACE4(nsMetaCharsetObserver,
87 nsIElementObserver,
88 nsIObserver,
89 nsIMetaCharsetService,
90 nsISupportsWeakReference)
92 //-------------------------------------------------------------------------
93 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
94 PRUint32 aDocumentID,
95 const PRUnichar* aTag,
96 PRUint32 numOfAttributes,
97 const PRUnichar* nameArray[],
98 const PRUnichar* valueArray[])
101 if(!nsDependentString(aTag).LowerCaseEqualsLiteral("meta"))
102 return NS_ERROR_ILLEGAL_VALUE;
103 else
104 return Notify(aDocumentID, numOfAttributes, nameArray, valueArray);
106 //-------------------------------------------------------------------------
107 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
108 PRUint32 aDocumentID,
109 eHTMLTags aTag,
110 PRUint32 numOfAttributes,
111 const PRUnichar* nameArray[],
112 const PRUnichar* valueArray[])
114 if(eHTMLTag_meta != aTag)
115 return NS_ERROR_ILLEGAL_VALUE;
116 else
117 return Notify(aDocumentID, numOfAttributes, nameArray, valueArray);
120 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
121 PRUint32 aDocumentID,
122 PRUint32 numOfAttributes,
123 const PRUnichar* nameArray[],
124 const PRUnichar* valueArray[])
126 nsDeque keys(0);
127 nsDeque values(0);
128 PRUint32 i;
129 for(i=0;i<numOfAttributes;i++)
131 keys.Push((void*)nameArray[i]);
132 values.Push((void*)valueArray[i]);
134 return NS_OK;//Notify((nsISupports*)aDocumentID, &keys, &values);
136 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
137 nsISupports* aWebShell,
138 nsISupports* aChannel,
139 const PRUnichar* aTag,
140 const nsStringArray* keys,
141 const nsStringArray* values,
142 const PRUint32 aFlags)
144 nsresult result = NS_OK;
145 // bug 125317 - document.write content is already an unicode content.
146 if (!(aFlags & nsIElementObserver::IS_DOCUMENT_WRITE)) {
147 if(!nsDependentString(aTag).LowerCaseEqualsLiteral("meta")) {
148 result = NS_ERROR_ILLEGAL_VALUE;
150 else {
151 result = Notify(aWebShell, aChannel, keys, values);
154 return result;
157 #define IS_SPACE_CHARS(ch) (ch == ' ' || ch == '\b' || ch == '\r' || ch == '\n')
159 NS_IMETHODIMP nsMetaCharsetObserver::Notify(
160 nsISupports* aWebShell,
161 nsISupports* aChannel,
162 const nsStringArray* keys,
163 const nsStringArray* values)
165 NS_PRECONDITION(keys!=nsnull && values!=nsnull,"Need key-value pair");
167 PRInt32 numOfAttributes = keys->Count();
168 NS_ASSERTION( numOfAttributes == values->Count(), "size mismatch");
169 nsresult res=NS_OK;
170 #ifdef DEBUG
172 PRUnichar Uxcommand[]={'X','_','C','O','M','M','A','N','D','\0'};
173 PRUnichar UcharsetSource[]={'c','h','a','r','s','e','t','S','o','u','r','c','e','\0'};
174 PRUnichar Ucharset[]={'c','h','a','r','s','e','t','\0'};
176 NS_ASSERTION(numOfAttributes >= 3, "should have at least 3 private attribute");
177 NS_ASSERTION(0==nsCRT::strcmp(Uxcommand,(keys->StringAt(numOfAttributes-1))->get()),"last name should be 'X_COMMAND'" );
178 NS_ASSERTION(0==nsCRT::strcmp(UcharsetSource,(keys->StringAt(numOfAttributes-2))->get()),"2nd last name should be 'charsetSource'" );
179 NS_ASSERTION(0==nsCRT::strcmp(Ucharset,(keys->StringAt(numOfAttributes-3))->get()),"3rd last name should be 'charset'" );
181 #endif
182 NS_ASSERTION(mAlias, "Didn't get nsICharsetAlias in constructor");
184 if(nsnull == mAlias)
185 return NS_ERROR_ABORT;
187 // we need at least 5 - HTTP-EQUIV, CONTENT and 3 private
188 if(numOfAttributes >= 5 )
190 const PRUnichar *charset = (values->StringAt(numOfAttributes-3))->get();
191 const PRUnichar *source = (values->StringAt(numOfAttributes-2))->get();
192 PRInt32 err;
193 nsAutoString srcStr(source);
194 PRInt32 src = srcStr.ToInteger(&err);
195 // if we cannot convert the string into PRInt32, return error
196 NS_ASSERTION(NS_SUCCEEDED(err), "cannot get charset source");
197 if(NS_FAILED(err))
198 return NS_ERROR_ILLEGAL_VALUE;
200 if(kCharsetFromMetaTag <= src)
201 return NS_OK; // current charset has higher priority. don't bother to do the following
203 PRInt32 i;
204 const PRUnichar *httpEquivValue=nsnull;
205 const PRUnichar *contentValue=nsnull;
206 const PRUnichar *charsetValue=nsnull;
208 for(i=0;i<(numOfAttributes-3);i++)
210 const PRUnichar *keyStr;
211 keyStr = (keys->StringAt(i))->get();
213 //Change 3.190 in nsHTMLTokens.cpp allow ws/tab/cr/lf exist before
214 // and after text value, this need to be skipped before comparison
215 while(IS_SPACE_CHARS(*keyStr))
216 keyStr++;
218 if(Substring(keyStr, keyStr+10).LowerCaseEqualsLiteral("http-equiv"))
219 httpEquivValue = values->StringAt(i)->get();
220 else if(Substring(keyStr, keyStr+7).LowerCaseEqualsLiteral("content"))
221 contentValue = values->StringAt(i)->get();
222 else if (Substring(keyStr, keyStr+7).LowerCaseEqualsLiteral("charset"))
223 charsetValue = values->StringAt(i)->get();
225 NS_NAMED_LITERAL_STRING(contenttype, "Content-Type");
226 NS_NAMED_LITERAL_STRING(texthtml, "text/html");
228 if(nsnull == httpEquivValue || nsnull == contentValue)
229 return NS_OK;
231 while(IS_SPACE_CHARS(*httpEquivValue))
232 ++httpEquivValue;
233 // skip opening quote
234 if (*httpEquivValue == '\'' || *httpEquivValue == '\"')
235 ++httpEquivValue;
237 while(IS_SPACE_CHARS(*contentValue))
238 ++contentValue;
239 // skip opening quote
240 if (*contentValue == '\'' || *contentValue == '\"')
241 ++contentValue;
244 Substring(httpEquivValue,
245 httpEquivValue+contenttype.Length()).Equals(contenttype,
246 nsCaseInsensitiveStringComparator())
248 Substring(contentValue,
249 contentValue+texthtml.Length()).Equals(texthtml,
250 nsCaseInsensitiveStringComparator())
254 nsCAutoString newCharset;
256 if (nsnull == charsetValue)
258 nsAutoString contentPart1(contentValue+9); // after "text/html"
259 PRInt32 start = contentPart1.RFind("charset=", PR_TRUE ) ;
260 PRInt32 end = contentPart1.Length();
261 if(kNotFound != start)
263 start += 8; // 8 = "charset=".length
264 while (start < end && contentPart1.CharAt(start) == PRUnichar(' '))
265 ++start;
266 if (start < end) {
267 end = contentPart1.FindCharInSet("\'\"; ", start);
268 if(kNotFound == end )
269 end = contentPart1.Length();
270 NS_ASSERTION(end>=start, "wrong index");
271 LossyCopyUTF16toASCII(Substring(contentPart1, start, end-start),
272 newCharset);
276 else
278 LossyCopyUTF16toASCII(nsDependentString(charsetValue), newCharset);
281 nsCAutoString charsetString; charsetString.AssignWithConversion(charset);
283 if (!newCharset.IsEmpty())
285 if(! newCharset.Equals(charsetString, nsCaseInsensitiveCStringComparator()))
287 PRBool same = PR_FALSE;
288 nsresult res2 = mAlias->Equals( newCharset, charsetString , &same);
289 if(NS_SUCCEEDED(res2) && (! same))
291 nsCAutoString preferred;
292 res2 = mAlias->GetPreferred(newCharset, preferred);
293 if(NS_SUCCEEDED(res2))
295 // following charset should have been detected by parser
296 if (!preferred.EqualsLiteral("UTF-16") &&
297 !preferred.EqualsLiteral("UTF-16BE") &&
298 !preferred.EqualsLiteral("UTF-16LE") &&
299 !preferred.EqualsLiteral("UTF-32BE") &&
300 !preferred.EqualsLiteral("UTF-32LE")) {
301 // Propagate the error message so that the parser can
302 // shutdown correctly. - Ref. Bug 96440
303 res = NotifyWebShell(aWebShell,
304 aChannel,
305 preferred.get(),
306 kCharsetFromMetaTag);
308 } // if(NS_SUCCEEDED(res)
311 else {
312 res = NS_HTMLPARSER_VALID_META_CHARSET;
313 } // if EqualIgnoreCase
314 } // if !newCharset.IsEmpty()
315 } // if
317 else
319 nsAutoString compatCharset;
320 if (NS_SUCCEEDED(GetCharsetFromCompatibilityTag(keys, values, compatCharset)))
322 if (!compatCharset.IsEmpty()) {
323 res = NotifyWebShell(aWebShell,
324 aChannel,
325 NS_ConvertUTF16toUTF8(compatCharset).get(),
326 kCharsetFromMetaTag);
330 return res;
333 //-------------------------------------------------------------------------
334 NS_IMETHODIMP nsMetaCharsetObserver::GetCharsetFromCompatibilityTag(
335 const nsStringArray* keys,
336 const nsStringArray* values,
337 nsAString& aCharset)
339 if (!mAlias)
340 return NS_ERROR_ABORT;
342 aCharset.Truncate(0);
343 nsresult res = NS_OK;
346 // support for non standard case for compatibility
347 // e.g. <META charset="ISO-8859-1">
348 PRInt32 numOfAttributes = keys->Count();
349 if ((numOfAttributes >= 3) &&
350 (keys->StringAt(0)->LowerCaseEqualsLiteral("charset")))
352 nsAutoString srcStr((values->StringAt(numOfAttributes-2))->get());
353 PRInt32 err;
354 PRInt32 src = srcStr.ToInteger(&err);
355 // if we cannot convert the string into PRInt32, return error
356 if (NS_FAILED(err))
357 return NS_ERROR_ILLEGAL_VALUE;
359 // current charset have a lower priority
360 if (kCharsetFromMetaTag > src)
362 nsCAutoString newCharset;
363 newCharset.AssignWithConversion(values->StringAt(0)->get());
365 nsCAutoString preferred;
366 res = mAlias->GetPreferred(newCharset,
367 preferred);
368 if (NS_SUCCEEDED(res))
370 // compare against the current charset,
371 // also some charsets which should have been found in
372 // the BOM detection.
373 nsString* currentCharset = values->StringAt(numOfAttributes-3);
374 if (!preferred.Equals(NS_LossyConvertUTF16toASCII(*currentCharset)) &&
375 !preferred.EqualsLiteral("UTF-16") &&
376 !preferred.EqualsLiteral("UTF-16BE") &&
377 !preferred.EqualsLiteral("UTF-16LE") &&
378 !preferred.EqualsLiteral("UTF-32BE") &&
379 !preferred.EqualsLiteral("UTF-32LE"))
380 AppendASCIItoUTF16(preferred, aCharset);
385 return res;
388 //-------------------------------------------------------------------------
389 NS_IMETHODIMP nsMetaCharsetObserver::Observe(nsISupports *aSubject,
390 const char *aTopic,
391 const PRUnichar *aData)
393 nsresult rv = NS_OK;
394 if (!nsCRT::strcmp(aTopic, "parser-service-start")) {
395 rv = Start();
397 return rv;
400 //-------------------------------------------------------------------------
401 NS_IMETHODIMP nsMetaCharsetObserver::Start()
403 nsresult rv = NS_OK;
405 if (bMetaCharsetObserverStarted == PR_FALSE) {
406 bMetaCharsetObserverStarted = PR_TRUE;
408 nsCOMPtr<nsIParserService> parserService(do_GetService(NS_PARSERSERVICE_CONTRACTID, &rv));
410 if (NS_FAILED(rv))
411 return rv;
413 rv = parserService->RegisterObserver(this,
414 NS_LITERAL_STRING("text/html"),
415 gWatchTags);
418 return rv;
420 //-------------------------------------------------------------------------
421 NS_IMETHODIMP nsMetaCharsetObserver::End()
423 nsresult rv = NS_OK;
424 if (bMetaCharsetObserverStarted == PR_TRUE) {
425 bMetaCharsetObserverStarted = PR_FALSE;
427 nsCOMPtr<nsIParserService> parserService(do_GetService(NS_PARSERSERVICE_CONTRACTID, &rv));
429 if (NS_FAILED(rv))
430 return rv;
432 rv = parserService->UnregisterObserver(this, NS_LITERAL_STRING("text/html"));
434 return rv;
436 //==========================================================================