MDL-15942 - separate data escaped for database entry from unescaped data
[moodle-linuxchix.git] / lib / filterlib.php
blob94c410c2c941f71016044da3d3c22191151f49b1
1 <?php // $Id$
2 // Contains special functions that are particularly useful to filters
5 /**
6 * This is just a little object to define a phrase and some instructions
7 * for how to process it. Filters can create an array of these to pass
8 * to the filter_phrases function below.
9 **/
10 class filterobject {
11 var $phrase;
12 var $hreftagbegin;
13 var $hreftagend;
14 var $casesensitive;
15 var $fullmatch;
16 var $replacementphrase;
17 var $work_phrase;
18 var $work_hreftagbegin;
19 var $work_hreftagend;
20 var $work_casesensitive;
21 var $work_fullmatch;
22 var $work_replacementphrase;
23 var $work_calculated;
25 /// a constructor just because I like constructing
26 function filterobject($phrase, $hreftagbegin='<span class="highlight">',
27 $hreftagend='</span>',
28 $casesensitive=false,
29 $fullmatch=false,
30 $replacementphrase=NULL) {
32 $this->phrase = $phrase;
33 $this->hreftagbegin = $hreftagbegin;
34 $this->hreftagend = $hreftagend;
35 $this->casesensitive = $casesensitive;
36 $this->fullmatch = $fullmatch;
37 $this->replacementphrase= $replacementphrase;
38 $this->work_calculated = false;
43 /**
44 * Process phrases intelligently found within a HTML text (such as adding links)
46 * param text the text that we are filtering
47 * param link_array an array of filterobjects
48 * param ignoretagsopen an array of opening tags that we should ignore while filtering
49 * param ignoretagsclose an array of corresponding closing tags
50 **/
51 function filter_phrases ($text, &$link_array, $ignoretagsopen=NULL, $ignoretagsclose=NULL) {
53 global $CFG;
55 static $usedphrases;
57 $ignoretags = array(); //To store all the enclosig tags to be completely ignored
58 $tags = array(); //To store all the simple tags to be ignored
60 /// A list of open/close tags that we should not replace within
61 /// No reason why you can't put full preg expressions in here too
62 /// eg '<script(.+?)>' to match any type of script tag
63 $filterignoretagsopen = array('<head>' , '<nolink>' , '<span class="nolink">');
64 $filterignoretagsclose = array('</head>', '</nolink>', '</span>');
66 /// Invalid prefixes and suffixes for the fullmatch searches
67 /// Every "word" character, but the underscore, is a invalid suffix or prefix.
68 /// (nice to use this because it includes national characters (accents...) as word characters.
69 $filterinvalidprefixes = '([^\W_])';
70 $filterinvalidsuffixes = '([^\W_])';
72 /// Add the user defined ignore tags to the default list
73 /// Unless specified otherwise, we will not replace within <a></a> tags
74 if ( $ignoretagsopen === NULL ) {
75 //$ignoretagsopen = array('<a(.+?)>');
76 $ignoretagsopen = array('<a\s[^>]+?>');
77 $ignoretagsclose = array('</a>');
80 if ( is_array($ignoretagsopen) ) {
81 foreach ($ignoretagsopen as $open) $filterignoretagsopen[] = $open;
82 foreach ($ignoretagsclose as $close) $filterignoretagsclose[] = $close;
85 //// Double up some magic chars to avoid "accidental matches"
86 $text = preg_replace('/([#*%])/','\1\1',$text);
89 ////Remove everything enclosed by the ignore tags from $text
90 filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
92 /// Remove tags from $text
93 filter_save_tags($text,$tags);
95 /// Time to cycle through each phrase to be linked
96 $size = sizeof($link_array);
97 for ($n=0; $n < $size; $n++) {
98 $linkobject =& $link_array[$n];
100 /// Set some defaults if certain properties are missing
101 /// Properties may be missing if the filterobject class has not been used to construct the object
102 if (empty($linkobject->phrase)) {
103 continue;
106 /// Avoid integers < 1000 to be linked. See bug 1446.
107 $intcurrent = intval($linkobject->phrase);
108 if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
109 continue;
112 /// All this work has to be done ONLY it it hasn't been done before
113 if (!$linkobject->work_calculated) {
114 if (!isset($linkobject->hreftagbegin) or !isset($linkobject->hreftagend)) {
115 $linkobject->work_hreftagbegin = '<span class="highlight"';
116 $linkobject->work_hreftagend = '</span>';
117 } else {
118 $linkobject->work_hreftagbegin = $linkobject->hreftagbegin;
119 $linkobject->work_hreftagend = $linkobject->hreftagend;
122 /// Double up chars to protect true duplicates
123 /// be cleared up before returning to the user.
124 $linkobject->work_hreftagbegin = preg_replace('/([#*%])/','\1\1',$linkobject->work_hreftagbegin);
126 if (empty($linkobject->casesensitive)) {
127 $linkobject->work_casesensitive = false;
128 } else {
129 $linkobject->work_casesensitive = true;
131 if (empty($linkobject->fullmatch)) {
132 $linkobject->work_fullmatch = false;
133 } else {
134 $linkobject->work_fullmatch = true;
137 /// Strip tags out of the phrase
138 $linkobject->work_phrase = strip_tags($linkobject->phrase);
140 /// Double up chars that might cause a false match -- the duplicates will
141 /// be cleared up before returning to the user.
142 $linkobject->work_phrase = preg_replace('/([#*%])/','\1\1',$linkobject->work_phrase);
144 /// Set the replacement phrase properly
145 if ($linkobject->replacementphrase) { //We have specified a replacement phrase
146 /// Strip tags
147 $linkobject->work_replacementphrase = strip_tags($linkobject->replacementphrase);
148 } else { //The replacement is the original phrase as matched below
149 $linkobject->work_replacementphrase = '$1';
152 /// Quote any regular expression characters and the delimiter in the work phrase to be searched
153 $linkobject->work_phrase = preg_quote($linkobject->work_phrase, '/');
155 /// Work calculated
156 $linkobject->work_calculated = true;
160 /// If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases
161 if (!empty($CFG->filtermatchoneperpage)) {
162 if (!empty($usedphrases) && in_array($linkobject->work_phrase,$usedphrases)) {
163 continue;
167 /// Regular expression modifiers
168 $modifiers = ($linkobject->work_casesensitive) ? 's' : 'isu'; // works in unicode mode!
170 /// Do we need to do a fullmatch?
171 /// If yes then go through and remove any non full matching entries
172 if ($linkobject->work_fullmatch) {
173 $notfullmatches = array();
174 $regexp = '/'.$filterinvalidprefixes.'('.$linkobject->work_phrase.')|('.$linkobject->work_phrase.')'.$filterinvalidsuffixes.'/'.$modifiers;
176 preg_match_all($regexp,$text,$list_of_notfullmatches);
178 if ($list_of_notfullmatches) {
179 foreach (array_unique($list_of_notfullmatches[0]) as $key=>$value) {
180 $notfullmatches['<*'.$key.'*>'] = $value;
182 if (!empty($notfullmatches)) {
183 $text = str_replace($notfullmatches,array_keys($notfullmatches),$text);
188 /// Finally we do our highlighting
189 if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
190 $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
191 $linkobject->work_hreftagbegin.
192 $linkobject->work_replacementphrase.
193 $linkobject->work_hreftagend, $text, 1);
194 } else {
195 $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
196 $linkobject->work_hreftagbegin.
197 $linkobject->work_replacementphrase.
198 $linkobject->work_hreftagend, $text);
202 /// If the text has changed we have to look for links again
203 if ($resulttext != $text) {
204 /// Set $text to $resulttext
205 $text = $resulttext;
206 /// Remove everything enclosed by the ignore tags from $text
207 filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
208 /// Remove tags from $text
209 filter_save_tags($text,$tags);
210 /// If $CFG->filtermatchoneperpage, save linked phrases to request
211 if (!empty($CFG->filtermatchoneperpage)) {
212 $usedphrases[] = $linkobject->work_phrase;
217 /// Replace the not full matches before cycling to next link object
218 if (!empty($notfullmatches)) {
219 $text = str_replace(array_keys($notfullmatches),$notfullmatches,$text);
220 unset($notfullmatches);
224 /// Rebuild the text with all the excluded areas
226 if (!empty($tags)) {
227 $text = str_replace(array_keys($tags), $tags, $text);
230 if (!empty($ignoretags)) {
231 $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
234 //// Remove the protective doubleups
235 $text = preg_replace('/([#*%])(\1)/','\1',$text);
237 /// Add missing javascript for popus
238 $text = filter_add_javascript($text);
241 return $text;
247 function filter_remove_duplicates($linkarray) {
249 $concepts = array(); // keep a record of concepts as we cycle through
250 $lconcepts = array(); // a lower case version for case insensitive
252 $cleanlinks = array();
254 foreach ($linkarray as $key=>$filterobject) {
255 if ($filterobject->casesensitive) {
256 $exists = in_array($filterobject->phrase, $concepts);
257 } else {
258 $exists = in_array(moodle_strtolower($filterobject->phrase), $lconcepts);
261 if (!$exists) {
262 $cleanlinks[] = $filterobject;
263 $concepts[] = $filterobject->phrase;
264 $lconcepts[] = moodle_strtolower($filterobject->phrase);
268 return $cleanlinks;
272 * Extract open/lose tags and their contents to avoid being processed by filters.
273 * Useful to extract pieces of code like <a>...</a> tags. It returns the text
274 * converted with some <#x.x#> codes replacing the extracted text. Such extracted
275 * texts are returned in the ignoretags array (as values), with codes as keys.
277 * param text the text that we are filtering (in/out)
278 * param filterignoretagsopen an array of open tags to start searching
279 * param filterignoretagsclose an array of close tags to end searching
280 * param ignoretags an array of saved strings useful to rebuild the original text (in/out)
282 function filter_save_ignore_tags(&$text,$filterignoretagsopen,$filterignoretagsclose,&$ignoretags) {
284 /// Remove everything enclosed by the ignore tags from $text
285 foreach ($filterignoretagsopen as $ikey=>$opentag) {
286 $closetag = $filterignoretagsclose[$ikey];
287 /// form regular expression
288 $opentag = str_replace('/','\/',$opentag); // delimit forward slashes
289 $closetag = str_replace('/','\/',$closetag); // delimit forward slashes
290 $pregexp = '/'.$opentag.'(.*?)'.$closetag.'/is';
292 preg_match_all($pregexp, $text, $list_of_ignores);
293 foreach (array_unique($list_of_ignores[0]) as $key=>$value) {
294 $prefix = (string)(count($ignoretags) + 1);
295 $ignoretags['<#'.$prefix.'.'.$key.'#>'] = $value;
297 if (!empty($ignoretags)) {
298 $text = str_replace($ignoretags,array_keys($ignoretags),$text);
304 * Extract tags (any text enclosed by < and > to avoid being processed by filters.
305 * It returns the text converted with some <%x.x%> codes replacing the extracted text. Such extracted
306 * texts are returned in the tags array (as values), with codes as keys.
308 * param text the text that we are filtering (in/out)
309 * param tags an array of saved strings useful to rebuild the original text (in/out)
311 function filter_save_tags(&$text,&$tags) {
313 preg_match_all('/<([^#%*].*?)>/is',$text,$list_of_newtags);
314 foreach (array_unique($list_of_newtags[0]) as $ntkey=>$value) {
315 $prefix = (string)(count($tags) + 1);
316 $tags['<%'.$prefix.'.'.$ntkey.'%>'] = $value;
318 if (!empty($tags)) {
319 $text = str_replace($tags,array_keys($tags),$text);
324 * Add missing openpopup javascript to HTML files.
326 function filter_add_javascript($text) {
327 global $CFG;
329 if (stripos($text, '</html>') === FALSE) {
330 return $text; // this is not a html file
332 if (strpos($text, 'onclick="return openpopup') === FALSE) {
333 return $text; // no popup - no need to add javascript
335 $js ="
336 <script type=\"text/javascript\">
337 <!--
338 function openpopup(url,name,options,fullscreen) {
339 fullurl = \"".$CFG->httpswwwroot."\" + url;
340 windowobj = window.open(fullurl,name,options);
341 if (fullscreen) {
342 windowobj.moveTo(0,0);
343 windowobj.resizeTo(screen.availWidth,screen.availHeight);
345 windowobj.focus();
346 return false;
348 // -->
349 </script>";
350 if (stripos($text, '</head>') !== FALSE) {
351 //try to add it into the head element
352 $text = str_ireplace('</head>', $js.'</head>', $text);
353 return $text;
356 //last chance - try adding head element
357 return preg_replace("/<html.*?>/is", "\\0<head>".$js.'</head>', $text);