MDL-11082 Improved groups upgrade performance 1.8x -> 1.9; thanks Eloy for telling...
[moodle-pu.git] / mod / wiki / ewiki / plugins / moodle / f_fixhtml.php
blobdcf343c82df2f5c86c86f7aa3a1e45411fa7606e
1 <?php // $Id$
3 /*
4 This filter plugin implements minimal html tag balancing, and can also
5 convert ewiki_page() output into (hopefully) valid xhtml. It just works
6 around some markup problems found in ewiki and that may arise from Wiki
7 markup abuse; it however provides no fix for <ul> inside <ul> or even
8 <h2> inside <p> problems (this should rather be fixed in the ewiki_format
9 function). So following code is not meant to fix any possible html file,
10 and it certainly won't make valid html files out of random binary data.
11 So for full html spec conformance you should rather utilize w3c tidy (by
12 using your Webservers "Filter" directive).
16 define("EWIKI_XHTML", 1);
17 $ewiki_plugins["page_final"][] = "ewiki_html_tag_balancer";
20 function ewiki_html_tag_balancer(&$html) {
22 #-- vars
23 $html_standalone = array(
24 "img", "br", "hr",
25 "input", "meta", "link",
27 $html_tags = array(
28 "a", "abbr", "acronym", "address", "applet", "area", "b", "base",
29 "basefont", "bdo", "big", "blockquote", "body", "br", "button",
30 "caption", "center", "cite", "code", "col", "colgroup", "dd", "del",
31 "dfn", "dir", "div", "dl", "dt", "em", "fieldset", "font", "form",
32 "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i",
33 "iframe", "img", "input", "ins", "kbd", "label", "legend", "li",
34 "link", "map", "menu", "meta", "noframes", "noscript", "object", "ol",
35 "optgroup", "option", "p", "param", "pre", "q", "s", "samp", "script",
36 "select", "small", "span", "strike", "strong", "style", "sub", "sup",
37 "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title",
38 "tr", "tt", "u", "ul", "var",
39 #-- H2.0 "nextid", "listing", "xmp", "plaintext",
40 #-- H3.2 "frame", "frameset",
41 #-- X1.1 "rb", "rbc", "rp", "rt", "rtc", "ruby",
43 $close_opened_when = array(
44 "p", "div", "ul", "td", "table", "tr",
46 if (!EWIKI_XHTML) {
47 $html_tags = array_merge( (array) $html_tags, array(
48 "bgsound", "embed", "layer", "multicol", "nobr", "noembed",
49 ));
52 #-- walk through all tags
53 $tree = array();
54 $len = strlen($html);
55 $done = "";
56 $pos = 0;
57 $loop = (int)$len / 3;
58 while (($pos < $len) && $loop--) {
60 #-- search next tag
61 $l = strpos($html, "<", $pos);
62 $r = strpos($html, ">", $l);
63 if (($l===false) or ($r===false)) {
64 # finish
65 $done .= substr($html, $pos);
66 break;
69 #-- copy plain text part
70 if ($l >= $pos) {
71 $done .= substr($html, $pos, $l-$pos);
72 $pos = $l;
75 #-- analyze current html tag
76 if ($r >= $pos) {
77 $pos = $r + 1;
78 $tag = substr($html, $l + 1, $r - $l - 1);
80 #-- split into name and attributes
81 $tname = strtolower(strtok($tag, " \t\n>")); // LOWERCASING not needed here really
82 ($tattr = strtok(">")) && ($tattr = " $tattr");
84 // attribute checking could go here
85 // (here we just assume good output from ewiki core)
86 // ...
88 #-- html comment
89 if (substr($tname, 0, 3) == "!--") {
90 $r = strpos($html, "-->", $l+4);
91 $pos = $r + 3;
92 $done .= substr($html, $l, $r-$l+3);
93 continue;
96 #-- opening tag?
97 elseif ($tname[0] != "/") {
99 #-- cdata
100 if($tname=='![cdata[') {
101 $tname = strtoupper($tname); // Needs to be uppercase for XHTML compliance
102 // LEAVE THE POOR THING ALONE!
104 #-- standalone tag
105 else if (in_array($tname, $html_standalone)) {
106 $tattr = rtrim(rtrim($tattr, "/"));
107 if (EWIKI_XHTML) {
108 $tattr .= " /";
111 #-- normal tag
112 else {
113 if (in_array($tname, $html_tags)) {
114 #-- ok
116 else {
117 #$tattr .= " class=\"$tname\"";
118 #$tname = "div";
120 array_push($tree, $tname);
123 $tag = "$tname$tattr";
125 #-- closing tag
126 else {
127 $tname = substr($tname, 1);
129 if (!in_array($tname, $html_tags)) {
130 $tname= "div";
133 #-- check if this is allowed
134 if (!$tree) {
135 continue; // ignore closing tag
137 $last = array_pop($tree);
138 if ($last != $tname) {
140 #-- close until last opened block element
141 if (in_array($tname, $close_opened_when)) {
142 do {
143 $done .= "</$last>";
145 while (($last = array_pop($tree)) && ($last!=$tname));
147 #-- close last, close current, reopen last
148 else {
149 array_push($tree, $last);
150 $done .= "</$last></$tname><$last>";
151 continue;
154 else {
155 #-- all ok
158 #-- readd closing-slash to tag name
159 $tag = "/$tname";
162 $done .= "<$tag>";
166 #-- close still open tags
167 while ($tree && ($last = array_pop($tree))) {
168 $done .= "</$last>";
171 #-- copy back changes
172 $html = $done;