3 HTMLPurifier_ConfigSchema
::define(
4 'Output', 'CommentScriptContents', true, 'bool',
5 'Determines whether or not HTML Purifier should attempt to fix up '.
6 'the contents of script tags for legacy browsers with comments. This '.
7 'directive was available since 2.0.0.'
9 HTMLPurifier_ConfigSchema
::defineAlias('Core', 'CommentScriptContents', 'Output', 'CommentScriptContents');
11 // extension constraints could be factored into ConfigSchema
12 HTMLPurifier_ConfigSchema
::define(
13 'Output', 'TidyFormat', false, 'bool', <<<HTML
15 Determines whether or not to run Tidy on the final output for pretty
16 formatting reasons, such as indentation and wrap.
19 This can greatly improve readability for editors who are hand-editing
20 the HTML, but is by no means necessary
as HTML Purifier has already
21 fixed all major errors the HTML may have had
. Tidy is a non
-default
22 extension
, and this directive will silently fail
if Tidy is not
26 If you are looking to make the overall look of your page
's source
27 better, I recommend running Tidy on the entire page rather than just
28 user-content (after all, the indentation relative to the containing
29 blocks will be incorrect).
32 This directive was available since 1.1.1.
36 HTMLPurifier_ConfigSchema::defineAlias('Core
', 'TidyFormat
', 'Output
', 'TidyFormat
');
38 HTMLPurifier_ConfigSchema::define('Output
', 'Newline
', null, 'string/null', '
40 Newline
string to format
final output with
. If left
null, HTML Purifier
41 will auto
-detect the
default newline type of the system
and use that
;
42 you can manually override it here
. Remember
, \r\n is Windows
, \r
43 is Mac
, and \n is Unix
. This directive was available since
2.0.1.
48 * Generates HTML from tokens.
49 * @todo Refactor interface so that configuration/context is determined
50 * upon instantiation, no need for messy generateFromTokens() calls
52 class HTMLPurifier_Generator
56 * Bool cache of %HTML.XHTML
62 * Bool cache of %Output.CommentScriptContents
65 var $_scriptFix = false;
68 * Cache of HTMLDefinition
74 * Generates HTML from an array of tokens.
75 * @param $tokens Array of HTMLPurifier_Token
76 * @param $config HTMLPurifier_Config object
77 * @return Generated HTML
79 function generateFromTokens($tokens, $config, &$context) {
81 if (!$config) $config = HTMLPurifier_Config::createDefault();
82 $this->_scriptFix = $config->get('Output
', 'CommentScriptContents
');
84 $this->_def = $config->getHTMLDefinition();
85 $this->_xhtml = $this->_def->doctype->xml;
87 if (!$tokens) return '';
88 for ($i = 0, $size = count($tokens); $i < $size; $i++) {
89 if ($this->_scriptFix && $tokens[$i]->name === 'script
'
90 && $i + 2 < $size && $tokens[$i+2]->type == 'end
') {
91 // script special case
92 // the contents of the script block must be ONE token
94 $html .= $this->generateFromToken($tokens[$i++]);
95 $html .= $this->generateScriptFromToken($tokens[$i++]);
96 // We're not going to
do this
: it wouldn
't be valid anyway
97 //while ($tokens[$i]->name != 'script
') {
98 // $html .= $this->generateScriptFromToken($tokens[$i++]);
101 $html .= $this->generateFromToken($tokens[$i]);
103 if ($config->get('Output
', 'TidyFormat
') && extension_loaded('tidy
')) {
105 $tidy_options = array(
107 'output
-xhtml
' => $this->_xhtml,
108 'show
-body
-only
' => true,
109 'indent
-spaces
' => 2,
112 if (version_compare(PHP_VERSION, '5', '<')) {
113 tidy_set_encoding('utf8
');
114 foreach ($tidy_options as $key => $value) {
115 tidy_setopt($key, $value);
117 tidy_parse_string($html);
119 $html = tidy_get_output();
122 $tidy->parseString($html, $tidy_options, 'utf8
');
123 $tidy->cleanRepair();
124 $html = (string) $tidy;
127 // normalize newlines to system
128 $nl = $config->get('Output
', 'Newline
');
129 if ($nl === null) $nl = PHP_EOL;
130 $html = str_replace("\n", $nl, $html);
135 * Generates HTML from a single token.
136 * @param $token HTMLPurifier_Token object.
137 * @return Generated HTML
139 function generateFromToken($token) {
140 if (!isset($token->type)) return '';
141 if ($token->type == 'start
') {
142 $attr = $this->generateAttributes($token->attr, $token->name);
143 return '<' . $token->name . ($attr ? ' ' : '') . $attr . '>';
145 } elseif ($token->type == 'end
') {
146 return '</' . $token->name . '>';
148 } elseif ($token->type == 'empty') {
149 $attr = $this->generateAttributes($token->attr, $token->name);
150 return '<' . $token->name . ($attr ? ' ' : '') . $attr .
151 ( $this->_xhtml ? ' /': '' )
154 } elseif ($token->type == 'text
') {
155 return $this->escape($token->data);
164 * Special case processor for the contents of script tags
165 * @warning This runs into problems if there's already a literal
166 * --> somewhere inside the script contents
.
168 function generateScriptFromToken($token) {
169 if ($token->type
!= 'text') return $this->generateFromToken($token);
170 // return '<!--' . "\n" . trim($token->data) . "\n" . '// -->';
171 // more advanced version:
172 // thanks <http://lachy.id.au/log/2005/05/script-comments>
173 $data = preg_replace('#//\s*$#', '', $token->data
);
174 return '<!--//--><![CDATA[//><!--' . "\n" . trim($data) . "\n" . '//--><!]]>';
178 * Generates attribute declarations from attribute array.
179 * @param $assoc_array_of_attributes Attribute array
180 * @return Generate HTML fragment for insertion.
182 function generateAttributes($assoc_array_of_attributes, $element) {
184 foreach ($assoc_array_of_attributes as $key => $value) {
185 if (!$this->_xhtml
) {
186 // remove namespaced attributes
187 if (strpos($key, ':') !== false) continue;
188 if (!empty($this->_def
->info
[$element]->attr
[$key]->minimized
)) {
193 $html .= $key.'="'.$this->escape($value).'" ';
199 * Escapes raw text data.
200 * @param $string String data to escape for HTML.
201 * @return String escaped data.
203 function escape($string) {
204 return htmlspecialchars($string, ENT_COMPAT
, 'UTF-8');