3 * Global Search Engine for Moodle
4 * add-on 1.8+ : Valery Fremaux [valery.fremaux@club-internet.fr]
7 * this is a format handler for getting text out of a proprietary binary format
8 * so it can be indexed by Lucene search engine
11 function get_text_for_indexing_htm(&$resource){
14 // SECURITY : do not allow non admin execute anything on system !!
15 if (!isadmin($USER->id
)) return;
18 $text = implode('', file("{$CFG->dataroot}/{$resource->course}/($resource->reference)"));
20 // extract keywords and other interesting meta information and put it back as real content for indexing
21 if (preg_match('/(.*)<meta ([^>]*)>(.*)/is',$text, $matches)){
22 $prefix = $matches[1];
23 $meta_attributes = $matches[2];
24 $suffix = $matches[3];
25 if (preg_match('/name="(keywords|description)"/i', $attributes)){
26 preg_match('/content="[^"]+"/i', $attributes, $matches);
27 $text = $prefix.' '.$matches[1].' '.$suffix;
30 // filter all html tags
31 // $text = clean_text($text, FORMAT_PLAIN);
32 // NOTE : this is done in ResourceSearchDocument __constructor
34 if (!empty($CFG->block_search_limit_index_body
)){
35 $text = shorten($text, $CFG->block_search_limit_index_body
);