MDL-11082 Improved groups upgrade performance 1.8x -> 1.9; thanks Eloy for telling...
[moodle-pu.git] / mod / lesson / importppt.php
bloba211f78a680277f52c9ac6cb1c4688e6e44bb5af
1 <?php // $Id$
2 /**
3 * This is a very rough importer for powerpoint slides
4 * Export a powerpoint presentation with powerpoint as html pages
5 * Do it with office 2002 (I think?) and no special settings
6 * Then zip the directory with all of the html pages
7 * and the zip file is what you want to upload
8 *
9 * The script supports book and lesson.
11 * @version $Id$
12 * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
13 * @package lesson
14 **/
16 require_once("../../config.php");
17 require_once("locallib.php");
19 $id = required_param('id', PARAM_INT); // Course Module ID
20 $pageid = optional_param('pageid', '', PARAM_INT); // Page ID
21 global $matches;
23 if (! $cm = get_coursemodule_from_id('lesson', $id)) {
24 error("Course Module ID was incorrect");
27 if (! $course = get_record("course", "id", $cm->course)) {
28 error("Course is misconfigured");
31 // allows for adaption for multiple modules
32 if(! $modname = get_field('modules', 'name', 'id', $cm->module)) {
33 error("Could not find module name");
36 if (! $mod = get_record($modname, "id", $cm->instance)) {
37 error("Course module is incorrect");
40 require_login($course->id, false, $cm);
41 $context = get_context_instance(CONTEXT_MODULE, $cm->id);
42 require_capability('mod/lesson:edit', $context);
44 $strimportppt = get_string("importppt", "lesson");
45 $strlessons = get_string("modulenameplural", "lesson");
47 $navlinks = array();
48 $navlinks[] = array('name' => $strlessons, 'link' => "index.php?id=$course->id", 'type' => 'activity');
49 $navlinks[] = array('name' => format_string($mod->name,true), 'link' => "$CFG->wwwroot/mod/$modname/view.php?id=$cm->id", 'type' => 'activityinstance');
50 $navlinks[] = array('name' => $strimportppt, 'link' => '', 'type' => 'title');
52 $navigation = build_navigation($navlinks);
54 print_header_simple("$strimportppt", " $strimportppt", $navigation);
56 if ($form = data_submitted()) { /// Filename
58 if (empty($_FILES['newfile'])) { // file was just uploaded
59 notify(get_string("uploadproblem") );
62 if ((!is_uploaded_file($_FILES['newfile']['tmp_name']) or $_FILES['newfile']['size'] == 0)) {
63 notify(get_string("uploadnofilefound") );
65 } else { // Valid file is found
67 if ($rawpages = readdata($_FILES, $course->id, $modname)) { // first try to reall all of the data in
68 $pageobjects = extract_data($rawpages, $course->id, $mod->name, $modname); // parse all the html files into objects
69 clean_temp(); // all done with files so dump em
71 $mod_create_objects = $modname.'_create_objects';
72 $mod_save_objects = $modname.'_save_objects';
74 $objects = $mod_create_objects($pageobjects, $mod->id); // function to preps the data to be sent to DB
76 if(! $mod_save_objects($objects, $mod->id, $pageid)) { // sends it to DB
77 error("could not save");
79 } else {
80 error('could not get data');
83 echo "<hr>";
84 print_continue("$CFG->wwwroot/mod/$modname/view.php?id=$cm->id");
85 print_footer($course);
86 exit;
90 /// Print upload form
92 print_heading_with_help($strimportppt, "importppt", "lesson");
94 print_simple_box_start("center");
95 echo "<form id=\"theform\" enctype=\"multipart/form-data\" method=\"post\">";
96 echo "<input type=\"hidden\" name=\"id\" value=\"$cm->id\" />\n";
97 echo "<input type=\"hidden\" name=\"pageid\" value=\"$pageid\" />\n";
98 echo "<table cellpadding=\"5\">";
100 echo "<tr><td align=\"right\">";
101 print_string("upload");
102 echo ":</td><td>";
103 echo "<input name=\"newfile\" type=\"file\" size=\"50\" />";
104 echo "</td></tr><tr><td>&nbsp;</td><td>";
105 echo "<input type=\"submit\" name=\"save\" value=\"".get_string("uploadthisfile")."\" />";
106 echo "</td></tr>";
108 echo "</table>";
109 echo "</form>";
110 print_simple_box_end();
112 print_footer($course);
114 // START OF FUNCTIONS
116 function readdata($file, $courseid, $modname) {
117 // this function expects a zip file to be uploaded. Then it parses
118 // outline.htm to determine the slide path. Then parses each
119 // slide to get data for the content
121 global $CFG;
123 // create an upload directory in temp
124 make_upload_directory('temp/'.$modname);
126 $base = $CFG->dataroot."/temp/$modname/";
128 $zipfile = $_FILES["newfile"]["name"];
129 $tempzipfile = $_FILES["newfile"]["tmp_name"];
131 // create our directory
132 $path_parts = pathinfo($zipfile);
133 $dirname = substr($zipfile, 0, strpos($zipfile, '.'.$path_parts['extension'])); // take off the extension
134 if (!file_exists($base.$dirname)) {
135 mkdir($base.$dirname);
138 // move our uploaded file to temp/lesson
139 move_uploaded_file($tempzipfile, $base.$zipfile);
141 // unzip it!
142 unzip_file($base.$zipfile, $base, false);
144 $base = $base.$dirname; // update the base
146 // this is the file where we get the names of the files for the slides (in the correct order too)
147 $outline = $base.'/outline.htm';
149 $pages = array();
151 if (file_exists($outline) and is_readable($outline)) {
152 $outlinecontents = file_get_contents($outline);
153 $filenames = array();
154 preg_match_all("/javascript:GoToSld\('(.*)'\)/", $outlinecontents, $filenames); // this gets all of our files names
156 // file $pages with the contents of all of the slides
157 foreach ($filenames[1] as $file) {
158 $path = $base.'/'.$file;
159 if (is_readable($path)) {
160 $pages[$path] = file_get_contents($path);
161 } else {
162 return false;
165 } else {
166 // cannot find the outline, so grab all files that start with slide
167 $dh = opendir($base);
168 while (false !== ($file = readdir($dh))) { // read throug the directory
169 if ('slide' == substr($file, 0, 5)) { // check for name (may want to check extension later)
170 $path = $base.'/'.$file;
171 if (is_readable($path)) {
172 $pages[$path] = file_get_contents($path);
173 } else {
174 return false;
179 ksort($pages); // order them by file name
182 if (empty($pages)) {
183 return false;
186 return $pages;
189 function extract_data($pages, $courseid, $lessonname, $modname) {
190 // this function attempts to extract the content out of the slides
191 // the slides are ugly broken xml. and the xml is broken... yeah...
193 global $CFG;
194 global $matches;
196 $extratedpages = array();
198 // directory for images
199 make_mod_upload_directory($courseid); // make sure moddata is made
200 make_upload_directory($courseid.'/moddata/'.$modname, false); // we store our images in a subfolder in here
202 $imagedir = $CFG->dataroot.'/'.$courseid.'/moddata/'.$modname;
204 if ($CFG->slasharguments) {
205 $imagelink = $CFG->wwwroot.'/file.php/'.$courseid.'/moddata/'.$modname;
206 } else {
207 $imagelink = $CFG->wwwroot.'/file.php?file=/'.$courseid.'/moddata/'.$modname;
210 // try to make a unique subfolder to store the images
211 $lessonname = str_replace(' ', '_', $lessonname); // get rid of spaces
212 $i = 0;
213 while(true) {
214 if (!file_exists($imagedir.'/'.$lessonname.$i)) {
215 // ok doesnt exist so make the directory and update our paths
216 mkdir($imagedir.'/'.$lessonname.$i);
217 $imagedir = $imagedir.'/'.$lessonname.$i;
218 $imagelink = $imagelink.'/'.$lessonname.$i;
219 break;
221 $i++;
224 foreach ($pages as $file => $content) {
225 // to make life easier on our preg_match_alls, we strip out all tags except
226 // for div and img (where our content is). We want div because sometimes we
227 // can identify the content in the div based on the div's class
229 $tags = '<div><img>'; // should also allow <b><i>
230 $string = strip_tags($content,$tags);
231 //echo s($string);
233 $matches = array();
234 // this will look for a non nested tag that is closed
235 // want to allow <b><i>(maybe more) tags but when we do that
236 // the preg_match messes up.
237 preg_match_all("/(<([\w]+)[^>]*>)([^<\\2>]*)(<\/\\2>)/", $string, $matches);
238 //(<([\w]+)[^>]*>)([^<\\2>]*)(<\/\\2>) original pattern
239 //(<(div+)[^>]*>)[^(<div*)](<\/div>) work in progress
241 $path_parts = pathinfo($file);
242 $file = substr($path_parts['basename'], 0, strpos($path_parts['basename'], '.')); // get rid of the extension
244 $imgs = array();
245 // this preg matches all images
246 preg_match_all("/<img[^>]*(src\=\"(".$file."\_image[^>^\"]*)\"[^>]*)>/i", $string, $imgs);
248 // start building our page
249 $page = new stdClass;
250 $page->title = '';
251 $page->contents = array();
252 $page->images = array();
253 $page->source = $path_parts['basename']; // need for book only
255 // this foreach keeps the style intact. Found it doesn't help much. But if you want back uncomment
256 // this foreach and uncomment the line with the comment imgstyle in it. Also need to comment out
257 // the $page->images[]... line in the next foreach
258 /*foreach ($imgs[1] as $img) {
259 $page->images[] = '<img '.str_replace('src="', "src=\"$imagelink/", $img).' />';
261 foreach ($imgs[2] as $img) {
262 copy($path_parts['dirname'].'/'.$img, $imagedir.'/'.$img);
263 $page->images[] = "<img src=\"$imagelink/$img\" title=\"$img\" />"; // comment out this line if you are using the above foreach loop
265 for($i = 0; $i < count($matches[1]); $i++) { // go through all of our div matches
267 $class = isolate_class($matches[1][$i]); // first step in isolating the class
269 // check for any static classes
270 switch ($class) {
271 case 'T': // class T is used for Titles
272 $page->title = $matches[3][$i];
273 break;
274 case 'B': // I would guess that all bullet lists would start with B then go to B1, B2, etc
275 case 'B1': // B1-B4 are just insurance, should just hit B and all be taken care of
276 case 'B2':
277 case 'B3':
278 case 'B4':
279 $page->contents[] = build_list('<ul>', $i, 0); // this is a recursive function that will grab all the bullets and rebuild the list in html
280 break;
281 default:
282 if ($matches[3][$i] != '&#13;') { // odd crap generated... sigh
283 if (substr($matches[3][$i], 0, 1) == ':') { // check for leading : ... hate MS ...
284 $page->contents[] = substr($matches[3][$i], 1); // get rid of :
285 } else {
286 $page->contents[] = $matches[3][$i];
289 break;
292 /*if (count($page->contents) == 0) { // didnt find anything, grab everything
293 // potential to pull in a lot of crap
294 for($i = 0; $i < count($matches[1]); $i++) {
295 //if($class = isolate_class($matches[1][$i])) {
296 //if ($class == 'O') {
297 if ($matches[3][$i] != '&#13;') { // odd crap generated... sigh
298 if (substr($matches[3][$i], 0, 1) == ':') { // check for leading : ... hate MS ...
299 $page->contents[] = substr($matches[3][$i], 1); // get rid of :
300 } else {
301 $page->contents[] = $matches[3][$i];
308 // add the page to the array;
309 $extratedpages[] = $page;
311 } // end $pages foreach loop
313 return $extratedpages;
317 A recursive function to build a html list
319 function build_list($list, &$i, $depth) {
320 global $matches; // not sure why I global this...
322 while($i < count($matches[1])) {
324 $class = isolate_class($matches[1][$i]);
326 if (strstr($class, 'B')) { // make sure we are still working with bullet classes
327 if ($class == 'B') {
328 $this_depth = 0; // calling class B depth 0
329 } else {
330 // set the depth number. So B1 is depth 1 and B2 is depth 2 and so on
331 $this_depth = substr($class, 1);
332 if (!is_numeric($this_depth)) {
333 error("Depth not parsed!");
336 if ($this_depth < $depth) {
337 // we are moving back a level in the nesting
338 break;
340 if ($this_depth > $depth) {
341 // we are moving in a lvl in nesting
342 $list .= '<ul>';
343 $list = build_list($list, $i, $this_depth);
344 // once we return back, should go to the start of the while
345 continue;
347 // no depth changes, so add the match to our list
348 if ($cleanstring = ppt_clean_text($matches[3][$i])) {
349 $list .= '<li>'.ppt_clean_text($matches[3][$i]).'</li>';
351 $i++;
352 } else {
353 // not a B class, so get out of here...
354 break;
357 // end the list and return it
358 $list .= '</ul>';
359 return $list;
364 Given an html tag, this function will
366 function isolate_class($string) {
367 if($class = strstr($string, 'class=')) { // first step in isolating the class
368 $class = substr($class, strpos($class, '=')+1); // this gets rid of <div blawblaw class= there are no "" or '' around the class name ...sigh...
369 if (strstr($class, ' ')) {
370 // spaces found, so cut off everything off after the first space
371 return substr($class, 0, strpos($class, ' '));
372 } else {
373 // no spaces so nothing else in the div tag, cut off the >
374 return substr($class, 0, strpos($class, '>'));
376 } else {
377 // no class defined in the tag
378 return '';
383 This function strips off the random chars that ppt puts infront of bullet lists
385 function ppt_clean_text($string) {
386 $chop = 1; // default: just a single char infront of the content
388 // look for any other crazy things that may be infront of the content
389 if (strstr($string, '&lt;') and strpos($string, '&lt;') == 0) { // look for the &lt; in the sting and make sure it is in the front
390 $chop = 4; // increase the $chop
392 // may need to add more later....
394 $string = substr($string, $chop);
396 if ($string != '&#13;') {
397 return $string;
398 } else {
399 return false;
404 Clean up the temp directory
406 function clean_temp() {
407 global $CFG;
408 // this function is broken, use it to clean up later
409 // should only clean up what we made as well because someone else could be importing ppt as well
410 //delDirContents($CFG->dataroot.'/temp/lesson');
414 Creates objects an object with the page and answers that are to be inserted into the database
416 function lesson_create_objects($pageobjects, $lessonid) {
418 $branchtables = array();
419 $branchtable = new stdClass;
421 // all pages have this info
422 $page->lessonid = $lessonid;
423 $page->prevpageid = 0;
424 $page->nextpageid = 0;
425 $page->qtype = LESSON_BRANCHTABLE;
426 $page->qoption = 0;
427 $page->layout = 1;
428 $page->display = 1;
429 $page->timecreated = time();
430 $page->timemodified = 0;
432 // all answers are the same
433 $answer->lessonid = $lessonid;
434 $answer->jumpto = LESSON_NEXTPAGE;
435 $answer->grade = 0;
436 $answer->score = 0;
437 $answer->flags = 0;
438 $answer->timecreated = time();
439 $answer->timemodified = 0;
440 $answer->answer = "Next";
441 $answer->response = "";
443 $answers[] = clone($answer);
445 $answer->jumpto = LESSON_PREVIOUSPAGE;
446 $answer->answer = "Previous";
448 $answers[] = clone($answer);
450 $branchtable->answers = $answers;
452 $i = 1;
454 foreach ($pageobjects as $pageobject) {
455 $temp = prep_page($pageobject, $i); // makes our title and contents
456 $page->title = $temp->title;
457 $page->contents = $temp->contents;
458 $branchtable->page = clone($page); // add the page
459 $branchtables[] = clone($branchtable); // add it all to our array
460 $i++;
463 return $branchtables;
467 Creates objects an chapter object that is to be inserted into the database
469 function book_create_objects($pageobjects, $bookid) {
471 $chapters = array();
472 $chapter = new stdClass;
474 // same for all chapters
475 $chapter->bookid = $bookid;
476 $chapter->pagenum = count_records('book_chapters', 'bookid', $bookid)+1;
477 $chapter->timecreated = time();
478 $chapter->timemodified = time();
479 $chapter->subchapter = 0;
481 $i = 1;
482 foreach ($pageobjects as $pageobject) {
483 $page = prep_page($pageobject, $i); // get title and contents
484 $chapter->importsrc = addslashes($pageobject->source); // add the source
485 $chapter->title = $page->title;
486 $chapter->content = $page->contents;
487 $chapters[] = $chapter;
489 // increment our page number and our counter
490 $chapter->pagenum = $chapter->pagenum + 1;
491 $i++;
494 return $chapters;
498 Builds the title and content strings from an object
500 function prep_page($pageobject, $count) {
501 if ($pageobject->title == '') {
502 $page->title = "Page $count"; // no title set so make a generic one
503 } else {
504 $page->title = addslashes($pageobject->title);
507 $page->contents = '';
509 // nab all the images first
510 foreach ($pageobject->images as $image) {
511 $image = str_replace("\n", '', $image);
512 $image = str_replace("\r", '', $image);
513 $image = str_replace("'", '"', $image); // imgstyle
515 $page->contents .= addslashes($image);
517 // go through the contents array and put <p> tags around each element and strip out \n which I have found to be uneccessary
518 foreach ($pageobject->contents as $content) {
519 $content = str_replace("\n", '', $content);
520 $content = str_replace("\r", '', $content);
521 $content = str_replace('&#13;', '', $content); // puts in returns?
522 $content = '<p>'.$content.'</p>';
523 $page->contents .= addslashes($content);
525 return $page;
529 Saves the branchtable objects to the DB
531 function lesson_save_objects($branchtables, $lessonid, $after) {
532 // first set up the prevpageid and nextpageid
533 if ($after == 0) { // adding it to the top of the lesson
534 $prevpageid = 0;
535 // get the id of the first page. If not found, then no pages in the lesson
536 if (!$nextpageid = get_field('lesson_pages', 'id', 'prevpageid', 0, 'lessonid', $lessonid)) {
537 $nextpageid = 0;
539 } else {
540 // going after an actual page
541 $prevpageid = $after;
542 $nextpageid = get_field('lesson_pages', 'nextpageid', 'id', $after);
545 foreach ($branchtables as $branchtable) {
547 // set the doubly linked list
548 $branchtable->page->nextpageid = $nextpageid;
549 $branchtable->page->prevpageid = $prevpageid;
551 // insert the page
552 if(!$id = insert_record('lesson_pages', $branchtable->page)) {
553 error("insert page");
556 // update the link of the page previous to the one we just updated
557 if ($prevpageid != 0) { // if not the first page
558 if (!set_field("lesson_pages", "nextpageid", $id, "id", $prevpageid)) {
559 error("Insert page: unable to update next link $prevpageid");
563 // insert the answers
564 foreach ($branchtable->answers as $answer) {
565 $answer->pageid = $id;
566 if(!insert_record('lesson_answers', $answer)) {
567 error("insert answer $id");
571 $prevpageid = $id;
574 // all done with inserts. Now check to update our last page (this is when we import between two lesson pages)
575 if ($nextpageid != 0) { // if the next page is not the end of lesson
576 if (!set_field("lesson_pages", "prevpageid", $id, "id", $nextpageid)) {
577 error("Insert page: unable to update next link $prevpageid");
581 return true;
585 Save the chapter objects to the database
587 function book_save_objects($chapters, $bookid, $pageid='0') {
588 // nothing fancy, just save them all in order
589 foreach ($chapters as $chapter) {
590 if (!$chapter->id = insert_record('book_chapters', $chapter)) {
591 error('Could not update your book');
594 return true;