lib/filterlib.php

   1 <?php // $Id$
   2       // Contains special functions that are particularly useful to filters
   3
   4
   5 /**
   6  * This is just a little object to define a phrase and some instructions
   7  * for how to process it.  Filters can create an array of these to pass
   8  * to the filter_phrases function below.
   9  **/
  10 class filterobject {
  11     var $phrase;
  12     var $hreftagbegin;
  13     var $hreftagend;
  14     var $casesensitive;
  15     var $fullmatch;
  16     var $replacementphrase;
  17     var $work_phrase;
  18     var $work_hreftagbegin;
  19     var $work_hreftagend;
  20     var $work_casesensitive;
  21     var $work_fullmatch;
  22     var $work_replacementphrase;
  23     var $work_calculated;
  24
  25     /// a constructor just because I like constructing
  26     function filterobject($phrase, $hreftagbegin='<span class="highlight">',
  27                                    $hreftagend='</span>',
  28                                    $casesensitive=false,
  29                                    $fullmatch=false,
  30                                    $replacementphrase=NULL) {
  31
  32         $this->phrase           = $phrase;
  33         $this->hreftagbegin     = $hreftagbegin;
  34         $this->hreftagend       = $hreftagend;
  35         $this->casesensitive    = $casesensitive;
  36         $this->fullmatch        = $fullmatch;
  37         $this->replacementphrase= $replacementphrase;
  38         $this->work_calculated  = false;
  39
  40     }
  41 }
  42
  43 /**
  44  * Process phrases intelligently found within a HTML text (such as adding links)
  45  *
  46  * param  text             the text that we are filtering
  47  * param  link_array       an array of filterobjects
  48  * param  ignoretagsopen   an array of opening tags that we should ignore while filtering
  49  * param  ignoretagsclose  an array of corresponding closing tags
  50  **/
  51 function filter_phrases ($text, &$link_array, $ignoretagsopen=NULL, $ignoretagsclose=NULL) {
  52
  53     global $CFG;
  54
  55     static $usedphrases;
  56
  57     $ignoretags = array();  //To store all the enclosig tags to be completely ignored
  58     $tags = array();        //To store all the simple tags to be ignored
  59
  60 /// A list of open/close tags that we should not replace within
  61 /// No reason why you can't put full preg expressions in here too
  62 /// eg '<script(.+?)>' to match any type of script tag
  63     $filterignoretagsopen  = array('<head>' , '<nolink>' , '<span class="nolink">');
  64     $filterignoretagsclose = array('</head>', '</nolink>', '</span>');
  65
  66 /// Invalid prefixes and suffixes for the fullmatch searches
  67 /// Every "word" character, but the underscore, is a invalid suffix or prefix.
  68 /// (nice to use this because it includes national characters (accents...) as word characters.
  69     $filterinvalidprefixes = '([^\W_])';
  70     $filterinvalidsuffixes = '([^\W_])';
  71
  72 /// Add the user defined ignore tags to the default list
  73 /// Unless specified otherwise, we will not replace within <a></a> tags
  74     if ( $ignoretagsopen === NULL ) {
  75         //$ignoretagsopen  = array('<a(.+?)>');
  76         $ignoretagsopen  = array('<a[^>]+?>');
  77         $ignoretagsclose = array('</a>');
  78     }
  79
  80     if ( is_array($ignoretagsopen) ) {
  81         foreach ($ignoretagsopen as $open) $filterignoretagsopen[] = $open;
  82         foreach ($ignoretagsclose as $close) $filterignoretagsclose[] = $close;
  83     }
  84
  85     //// Double up some magic chars to avoid "accidental matches"
  86     $text = preg_replace('/([#*%])/','\1\1',$text);
  87
  88
  89 ////Remove everything enclosed by the ignore tags from $text
  90     filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
  91
  92 /// Remove tags from $text
  93     filter_save_tags($text,$tags);
  94
  95 /// Time to cycle through each phrase to be linked
  96     $size = sizeof($link_array);
  97     for ($n=0; $n < $size; $n++) {
  98         $linkobject =& $link_array[$n];
  99
 100     /// Set some defaults if certain properties are missing
 101     /// Properties may be missing if the filterobject class has not been used to construct the object
 102         if (empty($linkobject->phrase)) {
 103             continue;
 104         }
 105
 106     /// Avoid integers < 1000 to be linked. See bug 1446.
 107         $intcurrent = intval($linkobject->phrase);
 108         if (!empty($intcurrent) && strval($intcurrent) == $linkobject->phrase && $intcurrent < 1000) {
 109             continue;
 110         }
 111
 112     /// All this work has to be done ONLY it it hasn't been done before
 113     if (!$linkobject->work_calculated) {
 114             if (!isset($linkobject->hreftagbegin) or !isset($linkobject->hreftagend)) {
 115                 $linkobject->work_hreftagbegin = '<span class="highlight"';
 116                 $linkobject->work_hreftagend   = '</span>';
 117             } else {
 118                 $linkobject->work_hreftagbegin = $linkobject->hreftagbegin;
 119                 $linkobject->work_hreftagend   = $linkobject->hreftagend;
 120             }
 121
 122         /// Double up chars to protect true duplicates
 123         /// be cleared up before returning to the user.
 124             $linkobject->work_hreftagbegin = preg_replace('/([#*%])/','\1\1',$linkobject->work_hreftagbegin);
 125
 126             if (empty($linkobject->casesensitive)) {
 127                 $linkobject->work_casesensitive = false;
 128             } else {
 129                 $linkobject->work_casesensitive = true;
 130             }
 131             if (empty($linkobject->fullmatch)) {
 132                 $linkobject->work_fullmatch = false;
 133             } else {
 134                 $linkobject->work_fullmatch = true;
 135             }
 136
 137         /// Strip tags out of the phrase
 138             $linkobject->work_phrase = strip_tags($linkobject->phrase);
 139
 140         /// Double up chars that might cause a false match -- the duplicates will
 141         /// be cleared up before returning to the user.
 142             $linkobject->work_phrase = preg_replace('/([#*%])/','\1\1',$linkobject->work_phrase);
 143
 144         /// Set the replacement phrase properly
 145             if ($linkobject->replacementphrase) {    //We have specified a replacement phrase
 146             /// Strip tags
 147                 $linkobject->work_replacementphrase = strip_tags($linkobject->replacementphrase);
 148             } else {                                 //The replacement is the original phrase as matched below
 149                 $linkobject->work_replacementphrase = '$1';
 150             }
 151
 152         /// Quote any regular expression characters and the delimiter in the work phrase to be searched
 153             $linkobject->work_phrase = preg_quote($linkobject->work_phrase, '/');
 154
 155         /// Work calculated
 156             $linkobject->work_calculated = true;
 157
 158         }
 159
 160     /// If $CFG->filtermatchoneperpage, avoid previously (request) linked phrases
 161         if (!empty($CFG->filtermatchoneperpage)) {
 162             if (!empty($usedphrases) && in_array($linkobject->work_phrase,$usedphrases)) {
 163                 continue;
 164             }
 165         }
 166
 167     /// Regular expression modifiers
 168         $modifiers = ($linkobject->work_casesensitive) ? 's' : 'isu'; // works in unicode mode!
 169
 170     /// Do we need to do a fullmatch?
 171     /// If yes then go through and remove any non full matching entries
 172         if ($linkobject->work_fullmatch) {
 173             $notfullmatches = array();
 174             $regexp = '/'.$filterinvalidprefixes.'('.$linkobject->work_phrase.')|('.$linkobject->work_phrase.')'.$filterinvalidsuffixes.'/'.$modifiers;
 175
 176             preg_match_all($regexp,$text,$list_of_notfullmatches);
 177
 178             if ($list_of_notfullmatches) {
 179                 foreach (array_unique($list_of_notfullmatches[0]) as $key=>$value) {
 180                     $notfullmatches['<*'.$key.'*>'] = $value;
 181                 }
 182                 if (!empty($notfullmatches)) {
 183                     $text = str_replace($notfullmatches,array_keys($notfullmatches),$text);
 184                 }
 185             }
 186         }
 187
 188     /// Finally we do our highlighting
 189         if (!empty($CFG->filtermatchonepertext) || !empty($CFG->filtermatchoneperpage)) {
 190             $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
 191                                       $linkobject->work_hreftagbegin.
 192                                       $linkobject->work_replacementphrase.
 193                                       $linkobject->work_hreftagend, $text, 1);
 194         } else {
 195             $resulttext = preg_replace('/('.$linkobject->work_phrase.')/'.$modifiers,
 196                                       $linkobject->work_hreftagbegin.
 197                                       $linkobject->work_replacementphrase.
 198                                       $linkobject->work_hreftagend, $text);
 199         }
 200
 201
 202     /// If the text has changed we have to look for links again
 203         if ($resulttext != $text) {
 204         /// Set $text to $resulttext
 205             $text = $resulttext;
 206         /// Remove everything enclosed by the ignore tags from $text
 207             filter_save_ignore_tags($text,$filterignoretagsopen,$filterignoretagsclose,$ignoretags);
 208         /// Remove tags from $text
 209             filter_save_tags($text,$tags);
 210         /// If $CFG->filtermatchoneperpage, save linked phrases to request
 211             if (!empty($CFG->filtermatchoneperpage)) {
 212                 $usedphrases[] = $linkobject->work_phrase;
 213             }
 214         }
 215
 216
 217     /// Replace the not full matches before cycling to next link object
 218         if (!empty($notfullmatches)) {
 219             $text = str_replace(array_keys($notfullmatches),$notfullmatches,$text);
 220             unset($notfullmatches);
 221         }
 222     }
 223
 224 /// Rebuild the text with all the excluded areas
 225
 226     if (!empty($tags)) {
 227         $text = str_replace(array_keys($tags), $tags, $text);
 228     }
 229
 230     if (!empty($ignoretags)) {
 231         $text = str_replace(array_keys($ignoretags),$ignoretags,$text);
 232     }
 233
 234     //// Remove the protective doubleups
 235     $text =  preg_replace('/([#*%])(\1)/','\1',$text);
 236
 237 /// Add missing javascript for popus
 238     $text = filter_add_javascript($text);
 239
 240
 241     return $text;
 242
 243 }
 244
 245
 246
 247 function filter_remove_duplicates($linkarray) {
 248
 249     $concepts  = array(); // keep a record of concepts as we cycle through
 250     $lconcepts = array(); // a lower case version for case insensitive
 251
 252     $cleanlinks = array();
 253
 254     foreach ($linkarray as $key=>$filterobject) {
 255         if ($filterobject->casesensitive) {
 256             $exists = in_array($filterobject->phrase, $concepts);
 257         } else {
 258             $exists = in_array(moodle_strtolower($filterobject->phrase), $lconcepts);
 259         }
 260
 261         if (!$exists) {
 262             $cleanlinks[] = $filterobject;
 263             $concepts[] = $filterobject->phrase;
 264             $lconcepts[] = moodle_strtolower($filterobject->phrase);
 265         }
 266     }
 267
 268     return $cleanlinks;
 269 }
 270
 271 /**
 272  * Extract open/lose tags and their contents to avoid being processed by filters.
 273  * Useful to extract pieces of code like <a>...</a> tags. It returns the text
 274  * converted with some <#x.x#> codes replacing the extracted text. Such extracted
 275  * texts are returned in the ignoretags array (as values), with codes as keys.
 276  *
 277  * param  text                  the text that we are filtering (in/out)
 278  * param  filterignoretagsopen  an array of open tags to start searching
 279  * param  filterignoretagsclose an array of close tags to end searching
 280  * param  ignoretags            an array of saved strings useful to rebuild the original text (in/out)
 281  **/
 282 function filter_save_ignore_tags(&$text,$filterignoretagsopen,$filterignoretagsclose,&$ignoretags) {
 283
 284 /// Remove everything enclosed by the ignore tags from $text
 285     foreach ($filterignoretagsopen as $ikey=>$opentag) {
 286         $closetag = $filterignoretagsclose[$ikey];
 287     /// form regular expression
 288         $opentag  = str_replace('/','\/',$opentag); // delimit forward slashes
 289         $closetag = str_replace('/','\/',$closetag); // delimit forward slashes
 290         $pregexp = '/'.$opentag.'(.+?)'.$closetag.'/is';
 291
 292         preg_match_all($pregexp, $text, $list_of_ignores);
 293         foreach (array_unique($list_of_ignores[0]) as $key=>$value) {
 294             $prefix = (string)(count($ignoretags) + 1);
 295             $ignoretags['<#'.$prefix.'.'.$key.'#>'] = $value;
 296         }
 297         if (!empty($ignoretags)) {
 298             $text = str_replace($ignoretags,array_keys($ignoretags),$text);
 299         }
 300     }
 301 }
 302
 303 /**
 304  * Extract tags (any text enclosed by < and > to avoid being processed by filters.
 305  * It returns the text converted with some <%x.x%> codes replacing the extracted text. Such extracted
 306  * texts are returned in the tags array (as values), with codes as keys.
 307  *
 308  * param  text   the text that we are filtering (in/out)
 309  * param  tags   an array of saved strings useful to rebuild the original text (in/out)
 310  **/
 311 function filter_save_tags(&$text,&$tags) {
 312
 313     preg_match_all('/<([^#%*].*?)>/is',$text,$list_of_newtags);
 314     foreach (array_unique($list_of_newtags[0]) as $ntkey=>$value) {
 315         $prefix = (string)(count($tags) + 1);
 316         $tags['<%'.$prefix.'.'.$ntkey.'%>'] = $value;
 317     }
 318     if (!empty($tags)) {
 319         $text = str_replace($tags,array_keys($tags),$text);
 320     }
 321 }
 322
 323 /**
 324  * Add missing openpopup javascript to HTML files.
 325  */
 326 function filter_add_javascript($text) {
 327     global $CFG;
 328
 329     if (stripos($text, '</html>') === FALSE) {
 330         return $text; // this is not a html file
 331     }
 332     if (strpos($text, 'onclick="return openpopup') === FALSE) {
 333         return $text; // no popup - no need to add javascript
 334     }
 335     $js ="
 336     <script type=\"text/javascript\">
 337     <!--
 338         function openpopup(url,name,options,fullscreen) {
 339           fullurl = \"".$CFG->httpswwwroot."\" + url;
 340           windowobj = window.open(fullurl,name,options);
 341           if (fullscreen) {
 342             windowobj.moveTo(0,0);
 343             windowobj.resizeTo(screen.availWidth,screen.availHeight);
 344           }
 345           windowobj.focus();
 346           return false;
 347         }
 348     // -->
 349     </script>";
 350     if (stripos($text, '</head>') !== FALSE) {
 351         //try to add it into the head element
 352         $text = str_ireplace('</head>', $js.'</head>', $text);
 353         return $text;
 354     }
 355
 356     //last chance - try adding head element
 357     return preg_replace("/<html.*?>/is", "\\0<head>".$js.'</head>', $text);
 358 }
 359 ?>