Merge commit 'catalyst/MOODLE_19_STABLE' into mdl19-linuxchix
[moodle-linuxchix.git] / lib / editor / htmlarea / plugins / GetHtml / get-html.js
blobc9d74b25e52a0a30a316dd85aebae3c0a70d3917
1 /**
2   * Based on XML_Utility functions submitted by troels_kn.
3   * credit also to adios, who helped with reg exps:
4   * http://www.sitepoint.com/forums/showthread.php?t=201052
5   * 
6   * A replacement for HTMLArea.getHTML
7   *
8   * Features:
9   *   - Generates XHTML code
10   *   - Much faster than HTMLArea.getHTML
11   *   - Eliminates the hacks to accomodate browser quirks
12   *   - Returns correct code for Flash objects and scripts
13   *   - Formats html in an indented, readable format in html mode
14   *   - Preserves script and pre formatting
15   *   - Preserves formatting in comments
16   *   - Removes contenteditable from body tag in full-page mode
17   *   - Supports only7BitPrintablesInURLs config option
18   *   - Supports htmlRemoveTags config option
19   */
20   
21 function GetHtml(editor) {
22     this.editor = editor;
25 GetHtml._pluginInfo = {
26         name          : "GetHtml",
27         version       : "1.0",
28         developer     : "Nelson Bright",
29         developer_url : "http://www.brightworkweb.com/",
30         license       : "htmlArea"
33 HTMLArea.RegExpCache = [
34 /*00*/  new RegExp().compile(/<\s*\/?([^\s\/>]+)[\s*\/>]/gi),//lowercase tags
35 /*01*/  new RegExp().compile(/(\S*\s*=\s*)?_moz[^=>]*(=\s*[^>]*)?/gi),//strip _moz attributes
36 /*02*/  new RegExp().compile(/\s*=\s*(([^'"][^>\s]*)([>\s])|"([^"]+)"|'([^']+)')/g),// find attributes
37 /*03*/  new RegExp().compile(/\/>/g),//strip singlet terminators
38 /*04*/ // new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed)([^>]*)>/g),//terminate singlet tags
39 /*04*/  new RegExp().compile(/<(br|hr|img|input|link|meta|param|embed|area)((\s*\S*="[^"]*")*)>/g),//terminate singlet tags
40 /*05*/  new RegExp().compile(/(checked|compact|declare|defer|disabled|ismap|multiple|no(href|resize|shade|wrap)|readonly|selected)([\s>])/gi),//expand singlet attributes
41 /*06*/  new RegExp().compile(/(="[^']*)'([^'"]*")/),//check quote nesting
42 /*07*/  new RegExp().compile(/&(?=[^<]*>)/g),//expand query ampersands
43 /*08*/  new RegExp().compile(/<\s+/g),//strip tagstart whitespace
44 /*09*/  new RegExp().compile(/\s+(\/)?>/g),//trim whitespace
45 /*10*/  new RegExp().compile(/\s{2,}/g),//trim extra whitespace
46 /*11*/  new RegExp().compile(/\s+([^=\s]+)(="[^"]+")/g),// lowercase attribute names
47 /*12*/  new RegExp().compile(/(\S*\s*=\s*)?contenteditable[^=>]*(=\s*[^>\s\/]*)?/gi),//strip contenteditable
48 /*13*/  new RegExp().compile(/((href|src)=")([^\s]*)"/g), //find href and src for stripBaseHref()
49 /*14*/  new RegExp().compile(/<\/?(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|br|hr|img|embed|param|pre|script|html|head|body|meta|link|title|area)[^>]*>/g),
50 /*15*/  new RegExp().compile(/<\/(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|html|head|body|script)( [^>]*)?>/g),//blocklevel closing tag
51 /*16*/  new RegExp().compile(/<(div|p|h[1-6]|table|tr|td|th|ul|ol|li|blockquote|object|html|head|body|script)( [^>]*)?>/g),//blocklevel opening tag
52 /*17*/  new RegExp().compile(/<(br|hr|img|embed|param|pre|meta|link|title|area)[^>]*>/g),//singlet tag
53 /*18*/  new RegExp().compile(/(^|<\/(pre|script)>)(\s|[^\s])*?(<(pre|script)[^>]*>|$)/g),//find content NOT inside pre and script tags
54 /*19*/  new RegExp().compile(/(<pre[^>]*>)(\s|[^\s])*?(<\/pre>)/g),//find content inside pre tags
55 /*20*/  new RegExp().compile(/(^|<!--(\s|\S)*?-->)((\s|\S)*?)(?=<!--(\s|\S)*?-->|$)/g),//find content NOT inside comments
56 /*21*/  new RegExp().compile(/\S*=""/g), //find empty attributes
57 /*22*/  new RegExp().compile(/<!--[\s\S]*?-->|<\?[\s\S]*?\?>|<[^>]*>/g) //find all tags, including comments and php
60 /** 
61   * Cleans HTML into wellformed xhtml
62   */
63 HTMLArea.prototype.cleanHTML = function(sHtml) {
64         var c = HTMLArea.RegExpCache;
65         sHtml = sHtml.
66                 replace(c[0], function(str) { return str.toLowerCase(); } ).//lowercase tags/attribute names
67                 replace(c[1], ' ').//strip _moz attributes
68                 replace(c[12], ' ').//strip contenteditable
69                 replace(c[2], '="$2$4$5"$3').//add attribute quotes
70                 replace(c[21], ' ').//strip empty attributes
71                 replace(c[11], function(str, p1, p2) { return ' '+p1.toLowerCase()+p2; }).//lowercase attribute names
72                 replace(c[3], '>').//strip singlet terminators
73                 replace(c[9], '$1>').//trim whitespace
74                 replace(c[5], '$1="$1"$3').//expand singlet attributes
75                 replace(c[4], '<$1$2 />').//terminate singlet tags
76                 replace(c[6], '$1$2').//check quote nesting
77         //      replace(c[7], '&amp;').//expand query ampersands
78                 replace(c[8], '<').//strip tagstart whitespace
79                 replace(c[10], ' ');//trim extra whitespace
80         if(HTMLArea.is_ie && c[13].test(sHtml)) {//
81                 sHtml = sHtml.replace(c[13],'$1'+this.stripBaseURL(RegExp.$3)+'"');
82         }
83         if(this.config.only7BitPrintablesInURLs && c[13].test(sHtml)) {
84           sHtml = sHtml.replace(c[13], '$1'+RegExp.$3.replace(/([^!-~]+)/g,function(chr){return escape(chr);})+'"');
85         }
86         return sHtml;
89 /**
90   * Prettyfies html by inserting linebreaks before tags, and indenting blocklevel tags
91   */
92 HTMLArea.indent = function(s, sindentChar) {
93         HTMLArea.__nindent = 0;
94         HTMLArea.__sindent = "";
95         HTMLArea.__sindentChar = (typeof sindentChar == "undefined") ? "  " : sindentChar;
96         var c = HTMLArea.RegExpCache;
97         if(HTMLArea.is_gecko) { //moz changes returns into <br> inside <pre> tags
98                 s = s.replace(c[19], function(str){return str.replace(/<br \/>/g,"\n")});
99         }
100         s = s.replace(c[18], function(strn) { //skip pre and script tags
101           strn = strn.replace(c[20], function(st,$1,$2,$3) { //exclude comments
102                 string = $3.replace(/[\n\r]/gi, " ").replace(/\s+/gi," ").replace(c[14], function(str) {
103                         if (str.match(c[16])) {
104                                 var s = "\n" + HTMLArea.__sindent + str;
105                                 // blocklevel openingtag - increase indent
106                                 HTMLArea.__sindent += HTMLArea.__sindentChar;
107                                 ++HTMLArea.__nindent;
108                                 return s;
109                         } else if (str.match(c[15])) {
110                                 // blocklevel closingtag - decrease indent
111                                 --HTMLArea.__nindent;
112                                 HTMLArea.__sindent = "";
113                                 for (var i=HTMLArea.__nindent;i>0;--i) {
114                                         HTMLArea.__sindent += HTMLArea.__sindentChar;
115                                 }
116                                 return "\n" + HTMLArea.__sindent + str;
117                         } else if (str.match(c[17])) {
118                                 // singlet tag
119                                 return "\n" + HTMLArea.__sindent + str;
120                         }
121                         return str; // this won't actually happen
122                 });
123                 return $1 + string;
124           });return strn;
125     });
126     if (s.charAt(0) == "\n") {
127         return s.substring(1, s.length);
128     }
129     s = s.replace(/ *\n/g,'\n');//strip spaces at end of lines
130     return s;
133 HTMLArea.getHTML = function(root, outputRoot, editor) {
134         var html = "";
135         var c = HTMLArea.RegExpCache;
137         if(root.nodeType == 11) {//document fragment
138             //we can't get innerHTML from the root (type 11) node, so we 
139             //copy all the child nodes into a new div and get innerHTML from the div
140             var div = document.createElement("div");
141             var temp = root.insertBefore(div,root.firstChild);
142             for (j = temp.nextSibling; j; j = j.nextSibling) { 
143                         temp.appendChild(j.cloneNode(true));
144             }
145             html += temp.innerHTML.replace(c[22], function(tag){
146                         if(/^<[!\?]/.test(tag)) return tag; //skip comments and php tags
147                         else return editor.cleanHTML(tag)});
149         } else {
151                 var root_tag = (root.nodeType == 1) ? root.tagName.toLowerCase() : ''; 
152                 if (outputRoot) { //only happens with <html> tag in fullpage mode
153                         html += "<" + root_tag;
154                         var attrs = root.attributes; // strangely, this doesn't work in moz
155                         for (i = 0; i < attrs.length; ++i) {
156                                 var a = attrs.item(i);
157                                 if (!a.specified) {
158                                   continue;
159                                 }
160                                 var name = a.nodeName.toLowerCase();
161                                 var value = a.nodeValue;
162                                 html += " " + name + '="' + value + '"';
163                         }
164                         html += ">";
165                 }
166                 if(root_tag == "html") {
167                         innerhtml = editor._doc.documentElement.innerHTML;
168                 } else {
169                         innerhtml = root.innerHTML;
170                 }
171                 //pass tags to cleanHTML() one at a time
172                 //includes support for htmlRemoveTags config option
173                 html += innerhtml.replace(c[22], function(tag){
174                         if(/^<[!\?]/.test(tag)) return tag; //skip comments and php tags
175                         else if(!(editor.config.htmlRemoveTags && editor.config.htmlRemoveTags.test(tag.replace(/<([^\s>\/]+)/,'$1'))))
176                                 return editor.cleanHTML(tag);
177                         else return ''});
178                 //IE drops  all </li> tags in a list except the last one
179                 if(HTMLArea.is_ie) {
180                         html = html.replace(/<li( [^>]*)?>/g,'</li><li$1>').
181                                 replace(/(<(ul|ol)[^>]*>)[\s\n]*<\/li>/g, '$1').
182                                 replace(/<\/li>([\s\n]*<\/li>)+/g, '<\/li>');
183                 }
184                 if(HTMLArea.is_gecko)
185                         html = html.replace(/(.*)<br \/>\n$/, '$1'). //strip trailing <br> added by moz
186                                 replace(/^\n(.*)/, '$1'); //strip leading newline added by moz
187                 if (outputRoot) {
188                         html += "</" + root_tag + ">";
189                 }
190 //              html = HTMLArea.indent(html);//see bug #6106
191         };
192 //      html = HTMLArea.htmlEncode(html);
194         return html;
197 //override (hack) outwardHtml() to handle onclick suppression
198 HTMLArea.prototype._origOutwardHtml = HTMLArea.prototype.outwardHtml;
199 HTMLArea.prototype.outwardHtml = function(html) {
200         html = html.replace("onclick=\"try{if(document.designMode && document.designMode == 'on') return false;}catch(e){} window.open(", "onclick=\"window.open(");
201         html = this._origOutwardHtml(html);
202         return html;