MDL-11082 Improved groups upgrade performance 1.8x -> 1.9; thanks Eloy for telling...
[moodle-pu.git] / lib / htmlpurifier / HTMLPurifier / EntityParser.php
blob25472413504a97aae3e5c9467804f325b6ef7865
1 <?php
3 require_once 'HTMLPurifier/EntityLookup.php';
4 require_once 'HTMLPurifier/Encoder.php';
6 // if want to implement error collecting here, we'll need to use some sort
7 // of global data (probably trigger_error) because it's impossible to pass
8 // $config or $context to the callback functions.
10 /**
11 * Handles referencing and derefencing character entities
13 class HTMLPurifier_EntityParser
16 /**
17 * Reference to entity lookup table.
18 * @protected
20 var $_entity_lookup;
22 /**
23 * Callback regex string for parsing entities.
24 * @protected
25 */
26 var $_substituteEntitiesRegex =
27 '/&(?:[#]x([a-fA-F0-9]+)|[#]0*(\d+)|([A-Za-z_:][A-Za-z0-9.\-_:]*));?/';
28 // 1. hex 2. dec 3. string (XML style)
31 /**
32 * Decimal to parsed string conversion table for special entities.
33 * @protected
35 var $_special_dec2str =
36 array(
37 34 => '"',
38 38 => '&',
39 39 => "'",
40 60 => '<',
41 62 => '>'
44 /**
45 * Stripped entity names to decimal conversion table for special entities.
46 * @protected
48 var $_special_ent2dec =
49 array(
50 'quot' => 34,
51 'amp' => 38,
52 'lt' => 60,
53 'gt' => 62
56 /**
57 * Substitutes non-special entities with their parsed equivalents. Since
58 * running this whenever you have parsed character is t3h 5uck, we run
59 * it before everything else.
61 * @protected
62 * @param $string String to have non-special entities parsed.
63 * @returns Parsed string.
65 function substituteNonSpecialEntities($string) {
66 // it will try to detect missing semicolons, but don't rely on it
67 return preg_replace_callback(
68 $this->_substituteEntitiesRegex,
69 array($this, 'nonSpecialEntityCallback'),
70 $string
74 /**
75 * Callback function for substituteNonSpecialEntities() that does the work.
77 * @warning Though this is public in order to let the callback happen,
78 * calling it directly is not recommended.
79 * @param $matches PCRE matches array, with 0 the entire match, and
80 * either index 1, 2 or 3 set with a hex value, dec value,
81 * or string (respectively).
82 * @returns Replacement string.
85 function nonSpecialEntityCallback($matches) {
86 // replaces all but big five
87 $entity = $matches[0];
88 $is_num = (@$matches[0][1] === '#');
89 if ($is_num) {
90 $is_hex = (@$entity[2] === 'x');
91 $code = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
93 // abort for special characters
94 if (isset($this->_special_dec2str[$code])) return $entity;
96 return HTMLPurifier_Encoder::unichr($code);
97 } else {
98 if (isset($this->_special_ent2dec[$matches[3]])) return $entity;
99 if (!$this->_entity_lookup) {
100 $this->_entity_lookup = HTMLPurifier_EntityLookup::instance();
102 if (isset($this->_entity_lookup->table[$matches[3]])) {
103 return $this->_entity_lookup->table[$matches[3]];
104 } else {
105 return $entity;
111 * Substitutes only special entities with their parsed equivalents.
113 * @notice We try to avoid calling this function because otherwise, it
114 * would have to be called a lot (for every parsed section).
116 * @protected
117 * @param $string String to have non-special entities parsed.
118 * @returns Parsed string.
120 function substituteSpecialEntities($string) {
121 return preg_replace_callback(
122 $this->_substituteEntitiesRegex,
123 array($this, 'specialEntityCallback'),
124 $string);
128 * Callback function for substituteSpecialEntities() that does the work.
130 * This callback has same syntax as nonSpecialEntityCallback().
132 * @warning Though this is public in order to let the callback happen,
133 * calling it directly is not recommended.
134 * @param $matches PCRE-style matches array, with 0 the entire match, and
135 * either index 1, 2 or 3 set with a hex value, dec value,
136 * or string (respectively).
137 * @returns Replacement string.
139 function specialEntityCallback($matches) {
140 $entity = $matches[0];
141 $is_num = (@$matches[0][1] === '#');
142 if ($is_num) {
143 $is_hex = (@$entity[2] === 'x');
144 $int = $is_hex ? hexdec($matches[1]) : (int) $matches[2];
145 return isset($this->_special_dec2str[$int]) ?
146 $this->_special_dec2str[$int] :
147 $entity;
148 } else {
149 return isset($this->_special_ent2dec[$matches[3]]) ?
150 $this->_special_ent2dec[$matches[3]] :
151 $entity;