1 /* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
2 /* ***** BEGIN LICENSE BLOCK *****
3 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 * The contents of this file are subject to the Mozilla Public License Version
6 * 1.1 (the "License"); you may not use this file except in compliance with
7 * the License. You may obtain a copy of the License at
8 * http://www.mozilla.org/MPL/
10 * Software distributed under the License is distributed on an "AS IS" basis,
11 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
12 * for the specific language governing rights and limitations under the
15 * The Original Code is mozilla.org code.
17 * The Initial Developer of the Original Code is
18 * Netscape Communications Corporation.
19 * Portions created by the Initial Developer are Copyright (C) 1999
20 * the Initial Developer. All Rights Reserved.
24 * Alternatively, the contents of this file may be used under the terms of
25 * either of the GNU General Public License Version 2 or later (the "GPL"),
26 * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
27 * in which case the provisions of the GPL or the LGPL are applicable instead
28 * of those above. If you wish to allow use of your version of this file only
29 * under the terms of either the GPL or the LGPL, and not to allow others to
30 * use your version of this file under the terms of the MPL, indicate your
31 * decision by deleting the provisions above and replace them with the notice
32 * and other provisions required by the GPL or the LGPL. If you do not delete
33 * the provisions above, a recipient may use your version of this file under
34 * the terms of any one of the MPL, the GPL or the LGPL.
36 * ***** END LICENSE BLOCK ***** */
38 #include "nsICharsetAlias.h"
39 #include "nsMetaCharsetObserver.h"
40 #include "nsIMetaCharsetService.h"
41 #include "nsIElementObserver.h"
42 #include "nsIObserver.h"
43 #include "nsIObserverService.h"
44 #include "nsISupports.h"
46 #include "nsIParser.h"
48 #include "nsCharDetDll.h"
49 #include "nsIServiceManager.h"
50 #include "nsObserverBase.h"
51 #include "nsWeakReference.h"
52 #include "nsIParserService.h"
53 #include "nsParserCIID.h"
54 #include "nsMetaCharsetCID.h"
55 #include "nsReadableUtils.h"
56 #include "nsUnicharUtils.h"
58 static NS_DEFINE_CID(kCharsetAliasCID
, NS_CHARSETALIAS_CID
);
60 static const eHTMLTags gWatchTags
[] =
65 //-------------------------------------------------------------------------
66 nsMetaCharsetObserver::nsMetaCharsetObserver()
68 bMetaCharsetObserverStarted
= PR_FALSE
;
71 nsCOMPtr
<nsICharsetAlias
> calias(do_GetService(kCharsetAliasCID
, &res
));
72 if(NS_SUCCEEDED(res
)) {
76 //-------------------------------------------------------------------------
77 nsMetaCharsetObserver::~nsMetaCharsetObserver()
81 //-------------------------------------------------------------------------
82 NS_IMPL_ADDREF ( nsMetaCharsetObserver
)
83 NS_IMPL_RELEASE ( nsMetaCharsetObserver
)
86 NS_IMPL_QUERY_INTERFACE4(nsMetaCharsetObserver
,
89 nsIMetaCharsetService
,
90 nsISupportsWeakReference
)
92 //-------------------------------------------------------------------------
93 NS_IMETHODIMP
nsMetaCharsetObserver::Notify(
95 const PRUnichar
* aTag
,
96 PRUint32 numOfAttributes
,
97 const PRUnichar
* nameArray
[],
98 const PRUnichar
* valueArray
[])
101 if(!nsDependentString(aTag
).LowerCaseEqualsLiteral("meta"))
102 return NS_ERROR_ILLEGAL_VALUE
;
104 return Notify(aDocumentID
, numOfAttributes
, nameArray
, valueArray
);
106 //-------------------------------------------------------------------------
107 NS_IMETHODIMP
nsMetaCharsetObserver::Notify(
108 PRUint32 aDocumentID
,
110 PRUint32 numOfAttributes
,
111 const PRUnichar
* nameArray
[],
112 const PRUnichar
* valueArray
[])
114 if(eHTMLTag_meta
!= aTag
)
115 return NS_ERROR_ILLEGAL_VALUE
;
117 return Notify(aDocumentID
, numOfAttributes
, nameArray
, valueArray
);
120 NS_IMETHODIMP
nsMetaCharsetObserver::Notify(
121 PRUint32 aDocumentID
,
122 PRUint32 numOfAttributes
,
123 const PRUnichar
* nameArray
[],
124 const PRUnichar
* valueArray
[])
129 for(i
=0;i
<numOfAttributes
;i
++)
131 keys
.Push((void*)nameArray
[i
]);
132 values
.Push((void*)valueArray
[i
]);
134 return NS_OK
;//Notify((nsISupports*)aDocumentID, &keys, &values);
136 NS_IMETHODIMP
nsMetaCharsetObserver::Notify(
137 nsISupports
* aWebShell
,
138 nsISupports
* aChannel
,
139 const PRUnichar
* aTag
,
140 const nsStringArray
* keys
,
141 const nsStringArray
* values
,
142 const PRUint32 aFlags
)
144 nsresult result
= NS_OK
;
145 // bug 125317 - document.write content is already an unicode content.
146 if (!(aFlags
& nsIElementObserver::IS_DOCUMENT_WRITE
)) {
147 if(!nsDependentString(aTag
).LowerCaseEqualsLiteral("meta")) {
148 result
= NS_ERROR_ILLEGAL_VALUE
;
151 result
= Notify(aWebShell
, aChannel
, keys
, values
);
157 #define IS_SPACE_CHARS(ch) (ch == ' ' || ch == '\b' || ch == '\r' || ch == '\n')
159 NS_IMETHODIMP
nsMetaCharsetObserver::Notify(
160 nsISupports
* aWebShell
,
161 nsISupports
* aChannel
,
162 const nsStringArray
* keys
,
163 const nsStringArray
* values
)
165 NS_PRECONDITION(keys
!=nsnull
&& values
!=nsnull
,"Need key-value pair");
167 PRInt32 numOfAttributes
= keys
->Count();
168 NS_ASSERTION( numOfAttributes
== values
->Count(), "size mismatch");
172 PRUnichar Uxcommand
[]={'X','_','C','O','M','M','A','N','D','\0'};
173 PRUnichar UcharsetSource
[]={'c','h','a','r','s','e','t','S','o','u','r','c','e','\0'};
174 PRUnichar Ucharset
[]={'c','h','a','r','s','e','t','\0'};
176 NS_ASSERTION(numOfAttributes
>= 3, "should have at least 3 private attribute");
177 NS_ASSERTION(0==nsCRT::strcmp(Uxcommand
,(keys
->StringAt(numOfAttributes
-1))->get()),"last name should be 'X_COMMAND'" );
178 NS_ASSERTION(0==nsCRT::strcmp(UcharsetSource
,(keys
->StringAt(numOfAttributes
-2))->get()),"2nd last name should be 'charsetSource'" );
179 NS_ASSERTION(0==nsCRT::strcmp(Ucharset
,(keys
->StringAt(numOfAttributes
-3))->get()),"3rd last name should be 'charset'" );
182 NS_ASSERTION(mAlias
, "Didn't get nsICharsetAlias in constructor");
185 return NS_ERROR_ABORT
;
187 // we need at least 5 - HTTP-EQUIV, CONTENT and 3 private
188 if(numOfAttributes
>= 5 )
190 const PRUnichar
*charset
= (values
->StringAt(numOfAttributes
-3))->get();
191 const PRUnichar
*source
= (values
->StringAt(numOfAttributes
-2))->get();
193 nsAutoString
srcStr(source
);
194 PRInt32 src
= srcStr
.ToInteger(&err
);
195 // if we cannot convert the string into PRInt32, return error
196 NS_ASSERTION(NS_SUCCEEDED(err
), "cannot get charset source");
198 return NS_ERROR_ILLEGAL_VALUE
;
200 if(kCharsetFromMetaTag
<= src
)
201 return NS_OK
; // current charset has higher priority. don't bother to do the following
204 const PRUnichar
*httpEquivValue
=nsnull
;
205 const PRUnichar
*contentValue
=nsnull
;
206 const PRUnichar
*charsetValue
=nsnull
;
208 for(i
=0;i
<(numOfAttributes
-3);i
++)
210 const PRUnichar
*keyStr
;
211 keyStr
= (keys
->StringAt(i
))->get();
213 //Change 3.190 in nsHTMLTokens.cpp allow ws/tab/cr/lf exist before
214 // and after text value, this need to be skipped before comparison
215 while(IS_SPACE_CHARS(*keyStr
))
218 if(Substring(keyStr
, keyStr
+10).LowerCaseEqualsLiteral("http-equiv"))
219 httpEquivValue
= values
->StringAt(i
)->get();
220 else if(Substring(keyStr
, keyStr
+7).LowerCaseEqualsLiteral("content"))
221 contentValue
= values
->StringAt(i
)->get();
222 else if (Substring(keyStr
, keyStr
+7).LowerCaseEqualsLiteral("charset"))
223 charsetValue
= values
->StringAt(i
)->get();
225 NS_NAMED_LITERAL_STRING(contenttype
, "Content-Type");
226 NS_NAMED_LITERAL_STRING(texthtml
, "text/html");
228 if(nsnull
== httpEquivValue
|| nsnull
== contentValue
)
231 while(IS_SPACE_CHARS(*httpEquivValue
))
233 // skip opening quote
234 if (*httpEquivValue
== '\'' || *httpEquivValue
== '\"')
237 while(IS_SPACE_CHARS(*contentValue
))
239 // skip opening quote
240 if (*contentValue
== '\'' || *contentValue
== '\"')
244 Substring(httpEquivValue
,
245 httpEquivValue
+contenttype
.Length()).Equals(contenttype
,
246 nsCaseInsensitiveStringComparator())
248 Substring(contentValue
,
249 contentValue
+texthtml
.Length()).Equals(texthtml
,
250 nsCaseInsensitiveStringComparator())
254 nsCAutoString newCharset
;
256 if (nsnull
== charsetValue
)
258 nsAutoString
contentPart1(contentValue
+9); // after "text/html"
259 PRInt32 start
= contentPart1
.RFind("charset=", PR_TRUE
) ;
260 PRInt32 end
= contentPart1
.Length();
261 if(kNotFound
!= start
)
263 start
+= 8; // 8 = "charset=".length
264 while (start
< end
&& contentPart1
.CharAt(start
) == PRUnichar(' '))
267 end
= contentPart1
.FindCharInSet("\'\"; ", start
);
268 if(kNotFound
== end
)
269 end
= contentPart1
.Length();
270 NS_ASSERTION(end
>=start
, "wrong index");
271 LossyCopyUTF16toASCII(Substring(contentPart1
, start
, end
-start
),
278 LossyCopyUTF16toASCII(nsDependentString(charsetValue
), newCharset
);
281 nsCAutoString charsetString
; charsetString
.AssignWithConversion(charset
);
283 if (!newCharset
.IsEmpty())
285 if(! newCharset
.Equals(charsetString
, nsCaseInsensitiveCStringComparator()))
287 PRBool same
= PR_FALSE
;
288 nsresult res2
= mAlias
->Equals( newCharset
, charsetString
, &same
);
289 if(NS_SUCCEEDED(res2
) && (! same
))
291 nsCAutoString preferred
;
292 res2
= mAlias
->GetPreferred(newCharset
, preferred
);
293 if(NS_SUCCEEDED(res2
))
295 // following charset should have been detected by parser
296 if (!preferred
.EqualsLiteral("UTF-16") &&
297 !preferred
.EqualsLiteral("UTF-16BE") &&
298 !preferred
.EqualsLiteral("UTF-16LE") &&
299 !preferred
.EqualsLiteral("UTF-32") &&
300 !preferred
.EqualsLiteral("UTF-32BE") &&
301 !preferred
.EqualsLiteral("UTF-32LE")) {
302 // Propagate the error message so that the parser can
303 // shutdown correctly. - Ref. Bug 96440
304 res
= NotifyWebShell(aWebShell
,
307 kCharsetFromMetaTag
);
309 } // if(NS_SUCCEEDED(res)
313 res
= NS_HTMLPARSER_VALID_META_CHARSET
;
314 } // if EqualIgnoreCase
315 } // if !newCharset.IsEmpty()
320 nsAutoString compatCharset
;
321 if (NS_SUCCEEDED(GetCharsetFromCompatibilityTag(keys
, values
, compatCharset
)))
323 if (!compatCharset
.IsEmpty()) {
324 res
= NotifyWebShell(aWebShell
,
326 NS_ConvertUTF16toUTF8(compatCharset
).get(),
327 kCharsetFromMetaTag
);
334 //-------------------------------------------------------------------------
335 NS_IMETHODIMP
nsMetaCharsetObserver::GetCharsetFromCompatibilityTag(
336 const nsStringArray
* keys
,
337 const nsStringArray
* values
,
341 return NS_ERROR_ABORT
;
343 aCharset
.Truncate(0);
344 nsresult res
= NS_OK
;
347 // support for non standard case for compatibility
348 // e.g. <META charset="ISO-8859-1">
349 PRInt32 numOfAttributes
= keys
->Count();
350 if ((numOfAttributes
>= 3) &&
351 (keys
->StringAt(0)->LowerCaseEqualsLiteral("charset")))
353 nsAutoString
srcStr((values
->StringAt(numOfAttributes
-2))->get());
355 PRInt32 src
= srcStr
.ToInteger(&err
);
356 // if we cannot convert the string into PRInt32, return error
358 return NS_ERROR_ILLEGAL_VALUE
;
360 // current charset have a lower priority
361 if (kCharsetFromMetaTag
> src
)
363 nsCAutoString newCharset
;
364 newCharset
.AssignWithConversion(values
->StringAt(0)->get());
366 nsCAutoString preferred
;
367 res
= mAlias
->GetPreferred(newCharset
,
369 if (NS_SUCCEEDED(res
))
371 // compare against the current charset,
372 // also some charsets which should have been found in
373 // the BOM detection.
374 nsString
* currentCharset
= values
->StringAt(numOfAttributes
-3);
375 if (!preferred
.Equals(NS_LossyConvertUTF16toASCII(*currentCharset
)) &&
376 !preferred
.EqualsLiteral("UTF-16") &&
377 !preferred
.EqualsLiteral("UTF-16BE") &&
378 !preferred
.EqualsLiteral("UTF-16LE") &&
379 !preferred
.EqualsLiteral("UTF-32") &&
380 !preferred
.EqualsLiteral("UTF-32BE") &&
381 !preferred
.EqualsLiteral("UTF-32LE"))
382 AppendASCIItoUTF16(preferred
, aCharset
);
390 //-------------------------------------------------------------------------
391 NS_IMETHODIMP
nsMetaCharsetObserver::Observe(nsISupports
*aSubject
,
393 const PRUnichar
*aData
)
396 if (!nsCRT::strcmp(aTopic
, "parser-service-start")) {
402 //-------------------------------------------------------------------------
403 NS_IMETHODIMP
nsMetaCharsetObserver::Start()
407 if (bMetaCharsetObserverStarted
== PR_FALSE
) {
408 bMetaCharsetObserverStarted
= PR_TRUE
;
410 nsCOMPtr
<nsIParserService
> parserService(do_GetService(NS_PARSERSERVICE_CONTRACTID
, &rv
));
415 rv
= parserService
->RegisterObserver(this,
416 NS_LITERAL_STRING("text/html"),
422 //-------------------------------------------------------------------------
423 NS_IMETHODIMP
nsMetaCharsetObserver::End()
426 if (bMetaCharsetObserverStarted
== PR_TRUE
) {
427 bMetaCharsetObserverStarted
= PR_FALSE
;
429 nsCOMPtr
<nsIParserService
> parserService(do_GetService(NS_PARSERSERVICE_CONTRACTID
, &rv
));
434 rv
= parserService
->UnregisterObserver(this, NS_LITERAL_STRING("text/html"));
438 //==========================================================================