4 This filter plugin implements minimal html tag balancing, and can also
5 convert ewiki_page() output into (hopefully) valid xhtml. It just works
6 around some markup problems found in ewiki and that may arise from Wiki
7 markup abuse; it however provides no fix for <ul> inside <ul> or even
8 <h2> inside <p> problems (this should rather be fixed in the ewiki_format
9 function). So following code is not meant to fix any possible html file,
10 and it certainly won't make valid html files out of random binary data.
11 So for full html spec conformance you should rather utilize w3c tidy (by
12 using your Webservers "Filter" directive).
16 define("EWIKI_XHTML", 1);
17 $ewiki_plugins["page_final"][] = "ewiki_html_tag_balancer";
20 function ewiki_html_tag_balancer(&$html) {
23 $html_standalone = array(
25 "input", "meta", "link",
28 "a", "abbr", "acronym", "address", "applet", "area", "b", "base",
29 "basefont", "bdo", "big", "blockquote", "body", "br", "button",
30 "caption", "center", "cite", "code", "col", "colgroup", "dd", "del",
31 "dfn", "dir", "div", "dl", "dt", "em", "fieldset", "font", "form",
32 "h1", "h2", "h3", "h4", "h5", "h6", "head", "hr", "html", "i",
33 "iframe", "img", "input", "ins", "kbd", "label", "legend", "li",
34 "link", "map", "menu", "meta", "noframes", "noscript", "object", "ol",
35 "optgroup", "option", "p", "param", "pre", "q", "s", "samp", "script",
36 "select", "small", "span", "strike", "strong", "style", "sub", "sup",
37 "table", "tbody", "td", "textarea", "tfoot", "th", "thead", "title",
38 "tr", "tt", "u", "ul", "var",
39 #-- H2.0 "nextid", "listing", "xmp", "plaintext",
40 #-- H3.2 "frame", "frameset",
41 #-- X1.1 "rb", "rbc", "rp", "rt", "rtc", "ruby",
43 $close_opened_when = array(
44 "p", "div", "ul", "td", "table", "tr",
47 $html_tags = array_merge( (array) $html_tags, array(
48 "bgsound", "embed", "layer", "multicol", "nobr", "noembed",
52 #-- walk through all tags
57 $loop = (int)$len / 3;
58 while (($pos < $len) && $loop--) {
61 $l = strpos($html, "<", $pos);
62 $r = strpos($html, ">", $l);
63 if (($l===false) or ($r===false)) {
65 $done .= substr($html, $pos);
69 #-- copy plain text part
71 $done .= substr($html, $pos, $l-$pos);
75 #-- analyze current html tag
78 $tag = substr($html, $l +
1, $r - $l - 1);
80 #-- split into name and attributes
81 $tname = strtolower(strtok($tag, " \t\n>")); // LOWERCASING not needed here really
82 ($tattr = strtok(">")) && ($tattr = " $tattr");
84 // attribute checking could go here
85 // (here we just assume good output from ewiki core)
89 if (substr($tname, 0, 3) == "!--") {
90 $r = strpos($html, "-->", $l+
4);
92 $done .= substr($html, $l, $r-$l+
3);
97 elseif ($tname[0] != "/") {
100 if($tname=='![cdata[') {
101 $tname = strtoupper($tname); // Needs to be uppercase for XHTML compliance
102 // LEAVE THE POOR THING ALONE!
105 else if (in_array($tname, $html_standalone)) {
106 $tattr = rtrim(rtrim($tattr, "/"));
113 if (in_array($tname, $html_tags)) {
117 #$tattr .= " class=\"$tname\"";
120 array_push($tree, $tname);
123 $tag = "$tname$tattr";
127 $tname = substr($tname, 1);
129 if (!in_array($tname, $html_tags)) {
133 #-- check if this is allowed
135 continue; // ignore closing tag
137 $last = array_pop($tree);
138 if ($last != $tname) {
140 #-- close until last opened block element
141 if (in_array($tname, $close_opened_when)) {
145 while (($last = array_pop($tree)) && ($last!=$tname));
147 #-- close last, close current, reopen last
149 array_push($tree, $last);
150 $done .= "</$last></$tname><$last>";
158 #-- readd closing-slash to tag name
166 #-- close still open tags
167 while ($tree && ($last = array_pop($tree))) {
171 #-- copy back changes