mod/lesson/importppt.php

   1 <?php // $Id$
   2 /**
   3  * This is a very rough importer for powerpoint slides
   4  * Export a powerpoint presentation with powerpoint as html pages
   5  * Do it with office 2002 (I think?) and no special settings
   6  * Then zip the directory with all of the html pages
   7  * and the zip file is what you want to upload
   8  *
   9  * The script supports book and lesson.
  10  *
  11  * @version $Id$
  12  * @license http://www.gnu.org/copyleft/gpl.html GNU Public License
  13  * @package lesson
  14  **/
  15
  16     require_once("../../config.php");
  17     require_once("locallib.php");
  18
  19     $id     = required_param('id', PARAM_INT);         // Course Module ID
  20     $pageid = optional_param('pageid', '', PARAM_INT); // Page ID
  21     global $matches;
  22
  23     if (! $cm = get_coursemodule_from_id('lesson', $id)) {
  24         error("Course Module ID was incorrect");
  25     }
  26
  27     if (! $course = get_record("course", "id", $cm->course)) {
  28         error("Course is misconfigured");
  29     }
  30
  31     // allows for adaption for multiple modules
  32     if(! $modname = get_field('modules', 'name', 'id', $cm->module)) {
  33         error("Could not find module name");
  34     }
  35
  36     if (! $mod = get_record($modname, "id", $cm->instance)) {
  37         error("Course module is incorrect");
  38     }
  39
  40     require_login($course->id, false, $cm);
  41     $context = get_context_instance(CONTEXT_MODULE, $cm->id);
  42     require_capability('mod/lesson:edit', $context);
  43
  44     $strimportppt = get_string("importppt", "lesson");
  45     $strlessons = get_string("modulenameplural", "lesson");
  46
  47     $navlinks = array();
  48     $navlinks[] = array('name' => $strlessons, 'link' => "index.php?id=$course->id", 'type' => 'activity');
  49     $navlinks[] = array('name' => format_string($mod->name,true), 'link' => "$CFG->wwwroot/mod/$modname/view.php?id=$cm->id", 'type' => 'activityinstance');
  50     $navlinks[] = array('name' => $strimportppt, 'link' => '', 'type' => 'title');
  51
  52     $navigation = build_navigation($navlinks);
  53
  54     print_header_simple("$strimportppt", " $strimportppt", $navigation);
  55
  56     if ($form = data_submitted()) {   /// Filename
  57
  58         if (empty($_FILES['newfile'])) {      // file was just uploaded
  59             notify(get_string("uploadproblem") );
  60         }
  61
  62         if ((!is_uploaded_file($_FILES['newfile']['tmp_name']) or $_FILES['newfile']['size'] == 0)) {
  63             notify(get_string("uploadnofilefound") );
  64
  65         } else {  // Valid file is found
  66
  67             if ($rawpages = readdata($_FILES, $course->id, $modname)) {  // first try to reall all of the data in
  68                 $pageobjects = extract_data($rawpages, $course->id, $mod->name, $modname); // parse all the html files into objects
  69                 clean_temp(); // all done with files so dump em
  70
  71                 $mod_create_objects = $modname.'_create_objects';
  72                 $mod_save_objects = $modname.'_save_objects';
  73
  74                 $objects = $mod_create_objects($pageobjects, $mod->id);  // function to preps the data to be sent to DB
  75
  76                 if(! $mod_save_objects($objects, $mod->id, $pageid)) {  // sends it to DB
  77                     error("could not save");
  78                 }
  79             } else {
  80                 error('could not get data');
  81             }
  82
  83             echo "<hr>";
  84             print_continue("$CFG->wwwroot/mod/$modname/view.php?id=$cm->id");
  85             print_footer($course);
  86             exit;
  87         }
  88     }
  89
  90     /// Print upload form
  91
  92     print_heading_with_help($strimportppt, "importppt", "lesson");
  93
  94     print_simple_box_start("center");
  95     echo "<form id=\"theform\" enctype=\"multipart/form-data\" method=\"post\">";
  96     echo "<input type=\"hidden\" name=\"id\" value=\"$cm->id\" />\n";
  97     echo "<input type=\"hidden\" name=\"pageid\" value=\"$pageid\" />\n";
  98     echo "<table cellpadding=\"5\">";
  99
 100     echo "<tr><td align=\"right\">";
 101     print_string("upload");
 102     echo ":</td><td>";
 103     echo "<input name=\"newfile\" type=\"file\" size=\"50\" />";
 104     echo "</td></tr><tr><td>&nbsp;</td><td>";
 105     echo "<input type=\"submit\" name=\"save\" value=\"".get_string("uploadthisfile")."\" />";
 106     echo "</td></tr>";
 107
 108     echo "</table>";
 109     echo "</form>";
 110     print_simple_box_end();
 111
 112     print_footer($course);
 113
 114 // START OF FUNCTIONS
 115
 116 function readdata($file, $courseid, $modname) {
 117 // this function expects a zip file to be uploaded.  Then it parses
 118 // outline.htm to determine the slide path.  Then parses each
 119 // slide to get data for the content
 120
 121     global $CFG;
 122
 123     // create an upload directory in temp
 124     make_upload_directory('temp/'.$modname);
 125
 126     $base = $CFG->dataroot."/temp/$modname/";
 127
 128     $zipfile = $_FILES["newfile"]["name"];
 129     $tempzipfile = $_FILES["newfile"]["tmp_name"];
 130
 131     // create our directory
 132     $path_parts = pathinfo($zipfile);
 133     $dirname = substr($zipfile, 0, strpos($zipfile, '.'.$path_parts['extension'])); // take off the extension
 134     if (!file_exists($base.$dirname)) {
 135         mkdir($base.$dirname);
 136     }
 137
 138     // move our uploaded file to temp/lesson
 139     move_uploaded_file($tempzipfile, $base.$zipfile);
 140
 141     // unzip it!
 142     unzip_file($base.$zipfile, $base, false);
 143
 144     $base = $base.$dirname;  // update the base
 145
 146     // this is the file where we get the names of the files for the slides (in the correct order too)
 147     $outline = $base.'/outline.htm';
 148
 149     $pages = array();
 150
 151     if (file_exists($outline) and is_readable($outline)) {
 152         $outlinecontents = file_get_contents($outline);
 153         $filenames = array();
 154         preg_match_all("/javascript:GoToSld\('(.*)'\)/", $outlinecontents, $filenames);  // this gets all of our files names
 155
 156         // file $pages with the contents of all of the slides
 157         foreach ($filenames[1] as $file) {
 158             $path = $base.'/'.$file;
 159             if (is_readable($path)) {
 160                 $pages[$path] = file_get_contents($path);
 161             } else {
 162                 return false;
 163             }
 164         }
 165     } else {
 166         // cannot find the outline, so grab all files that start with slide
 167         $dh  = opendir($base);
 168         while (false !== ($file = readdir($dh))) {  // read throug the directory
 169            if ('slide' == substr($file, 0, 5)) {  // check for name (may want to check extension later)
 170                 $path = $base.'/'.$file;
 171                 if (is_readable($path)) {
 172                     $pages[$path] = file_get_contents($path);
 173                 } else {
 174                     return false;
 175                 }
 176             }
 177         }
 178
 179         ksort($pages);  // order them by file name
 180     }
 181
 182     if (empty($pages)) {
 183         return false;
 184     }
 185
 186     return $pages;
 187 }
 188
 189 function extract_data($pages, $courseid, $lessonname, $modname) {
 190     // this function attempts to extract the content out of the slides
 191     // the slides are ugly broken xml.  and the xml is broken... yeah...
 192
 193     global $CFG;
 194     global $matches;
 195
 196     $extratedpages = array();
 197
 198     // directory for images
 199     make_mod_upload_directory($courseid); // make sure moddata is made
 200     make_upload_directory($courseid.'/moddata/'.$modname, false);  // we store our images in a subfolder in here
 201
 202     $imagedir = $CFG->dataroot.'/'.$courseid.'/moddata/'.$modname;
 203
 204     if ($CFG->slasharguments) {
 205         $imagelink = $CFG->wwwroot.'/file.php/'.$courseid.'/moddata/'.$modname;
 206     } else {
 207         $imagelink = $CFG->wwwroot.'/file.php?file=/'.$courseid.'/moddata/'.$modname;
 208     }
 209
 210     // try to make a unique subfolder to store the images
 211     $lessonname = str_replace(' ', '_', $lessonname); // get rid of spaces
 212     $i = 0;
 213     while(true) {
 214         if (!file_exists($imagedir.'/'.$lessonname.$i)) {
 215             // ok doesnt exist so make the directory and update our paths
 216             mkdir($imagedir.'/'.$lessonname.$i);
 217             $imagedir = $imagedir.'/'.$lessonname.$i;
 218             $imagelink = $imagelink.'/'.$lessonname.$i;
 219             break;
 220         }
 221         $i++;
 222     }
 223
 224     foreach ($pages as $file => $content) {
 225         // to make life easier on our preg_match_alls, we strip out all tags except
 226         // for div and img (where our content is).  We want div because sometimes we
 227         // can identify the content in the div based on the div's class
 228
 229         $tags = '<div><img>'; // should also allow <b><i>
 230         $string = strip_tags($content,$tags);
 231         //echo s($string);
 232
 233         $matches = array();
 234         // this will look for a non nested tag that is closed
 235         // want to allow <b><i>(maybe more) tags but when we do that
 236         // the preg_match messes up.
 237         preg_match_all("/(<([\w]+)[^>]*>)([^<\\2>]*)(<\/\\2>)/", $string, $matches);
 238         //(<([\w]+)[^>]*>)([^<\\2>]*)(<\/\\2>)  original pattern
 239         //(<(div+)[^>]*>)[^(<div*)](<\/div>) work in progress
 240
 241         $path_parts = pathinfo($file);
 242         $file = substr($path_parts['basename'], 0, strpos($path_parts['basename'], '.')); // get rid of the extension
 243
 244         $imgs = array();
 245         // this preg matches all images
 246         preg_match_all("/<img[^>]*(src\=\"(".$file."\_image[^>^\"]*)\"[^>]*)>/i", $string, $imgs);
 247
 248         // start building our page
 249         $page = new stdClass;
 250         $page->title = '';
 251         $page->contents = array();
 252         $page->images = array();
 253         $page->source = $path_parts['basename']; // need for book only
 254
 255         // this foreach keeps the style intact.  Found it doesn't help much.  But if you want back uncomment
 256         // this foreach and uncomment the line with the comment imgstyle in it.  Also need to comment out
 257         // the $page->images[]... line in the next foreach
 258         /*foreach ($imgs[1] as $img) {
 259             $page->images[] = '<img '.str_replace('src="', "src=\"$imagelink/", $img).' />';
 260         }*/
 261         foreach ($imgs[2] as $img) {
 262             copy($path_parts['dirname'].'/'.$img, $imagedir.'/'.$img);
 263             $page->images[] = "<img src=\"$imagelink/$img\" title=\"$img\" />";  // comment out this line if you are using the above foreach loop
 264         }
 265         for($i = 0; $i < count($matches[1]); $i++) { // go through all of our div matches
 266
 267             $class = isolate_class($matches[1][$i]); // first step in isolating the class
 268
 269             // check for any static classes
 270             switch ($class) {
 271                 case 'T':  // class T is used for Titles
 272                     $page->title = $matches[3][$i];
 273                     break;
 274                 case 'B':  // I would guess that all bullet lists would start with B then go to B1, B2, etc
 275                 case 'B1': // B1-B4 are just insurance, should just hit B and all be taken care of
 276                 case 'B2':
 277                 case 'B3':
 278                 case 'B4':
 279                     $page->contents[] = build_list('<ul>', $i, 0);  // this is a recursive function that will grab all the bullets and rebuild the list in html
 280                     break;
 281                 default:
 282                     if ($matches[3][$i] != '&#13;') {  // odd crap generated... sigh
 283                         if (substr($matches[3][$i], 0, 1) == ':') {  // check for leading :    ... hate MS ...
 284                             $page->contents[] = substr($matches[3][$i], 1);  // get rid of :
 285                         } else {
 286                             $page->contents[] = $matches[3][$i];
 287                         }
 288                     }
 289                     break;
 290             }
 291         }
 292         /*if (count($page->contents) == 0) {  // didnt find anything, grab everything
 293                                             // potential to pull in a lot of crap
 294             for($i = 0; $i < count($matches[1]); $i++) {
 295                 //if($class = isolate_class($matches[1][$i])) {
 296                     //if ($class == 'O') {
 297                         if ($matches[3][$i] != '&#13;') {  // odd crap generated... sigh
 298                             if (substr($matches[3][$i], 0, 1) == ':') {  // check for leading :    ... hate MS ...
 299                                 $page->contents[] = substr($matches[3][$i], 1);  // get rid of :
 300                             } else {
 301                                 $page->contents[] = $matches[3][$i];
 302                             }
 303                         }
 304                     //}
 305                 //}
 306             }
 307         }*/
 308         // add the page to the array;
 309         $extratedpages[] = $page;
 310
 311     } // end $pages foreach loop
 312
 313     return $extratedpages;
 314 }
 315
 316 /**
 317 A recursive function to build a html list
 318 */
 319 function build_list($list, &$i, $depth) {
 320     global $matches; // not sure why I global this...
 321
 322     while($i < count($matches[1])) {
 323
 324         $class = isolate_class($matches[1][$i]);
 325
 326         if (strstr($class, 'B')) {  // make sure we are still working with bullet classes
 327             if ($class == 'B') {
 328                 $this_depth = 0;  // calling class B depth 0
 329             } else {
 330                 // set the depth number.  So B1 is depth 1 and B2 is depth 2 and so on
 331                 $this_depth = substr($class, 1);
 332                 if (!is_numeric($this_depth)) {
 333                     error("Depth not parsed!");
 334                 }
 335             }
 336             if ($this_depth < $depth) {
 337                 // we are moving back a level in the nesting
 338                 break;
 339             }
 340             if ($this_depth > $depth) {
 341                 // we are moving in a lvl in nesting
 342                 $list .= '<ul>';
 343                 $list = build_list($list, $i, $this_depth);
 344                 // once we return back, should go to the start of the while
 345                 continue;
 346             }
 347             // no depth changes, so add the match to our list
 348             if ($cleanstring = ppt_clean_text($matches[3][$i])) {
 349                 $list .= '<li>'.ppt_clean_text($matches[3][$i]).'</li>';
 350             }
 351             $i++;
 352         } else {
 353             // not a B class, so get out of here...
 354             break;
 355         }
 356     }
 357     // end the list and return it
 358     $list .= '</ul>';
 359     return $list;
 360
 361 }
 362
 363 /**
 364 Given an html tag, this function will
 365 */
 366 function isolate_class($string) {
 367     if($class = strstr($string, 'class=')) { // first step in isolating the class
 368         $class = substr($class, strpos($class, '=')+1);  // this gets rid of <div blawblaw class=  there are no "" or '' around the class name   ...sigh...
 369         if (strstr($class, ' ')) {
 370             // spaces found, so cut off everything off after the first space
 371             return substr($class, 0, strpos($class, ' '));
 372         } else {
 373             // no spaces so nothing else in the div tag, cut off the >
 374             return substr($class, 0, strpos($class, '>'));
 375         }
 376     } else {
 377         // no class defined in the tag
 378         return '';
 379     }
 380 }
 381
 382 /**
 383 This function strips off the random chars that ppt puts infront of bullet lists
 384 */
 385 function ppt_clean_text($string) {
 386     $chop = 1; // default: just a single char infront of the content
 387
 388     // look for any other crazy things that may be infront of the content
 389     if (strstr($string, '&lt;') and strpos($string, '&lt;') == 0) {  // look for the &lt; in the sting and make sure it is in the front
 390         $chop = 4;  // increase the $chop
 391     }
 392     // may need to add more later....
 393
 394     $string = substr($string, $chop);
 395
 396     if ($string != '&#13;') {
 397         return $string;
 398     } else {
 399         return false;
 400     }
 401 }
 402
 403 /**
 404     Clean up the temp directory
 405 */
 406 function clean_temp() {
 407     global $CFG;
 408     // this function is broken, use it to clean up later
 409     // should only clean up what we made as well because someone else could be importing ppt as well
 410     //delDirContents($CFG->dataroot.'/temp/lesson');
 411 }
 412
 413 /**
 414     Creates objects an object with the page and answers that are to be inserted into the database
 415 */
 416 function lesson_create_objects($pageobjects, $lessonid) {
 417
 418     $branchtables = array();
 419     $branchtable = new stdClass;
 420
 421     // all pages have this info
 422     $page->lessonid = $lessonid;
 423     $page->prevpageid = 0;
 424     $page->nextpageid = 0;
 425     $page->qtype = LESSON_BRANCHTABLE;
 426     $page->qoption = 0;
 427     $page->layout = 1;
 428     $page->display = 1;
 429     $page->timecreated = time();
 430     $page->timemodified = 0;
 431
 432     // all answers are the same
 433     $answer->lessonid = $lessonid;
 434     $answer->jumpto = LESSON_NEXTPAGE;
 435     $answer->grade = 0;
 436     $answer->score = 0;
 437     $answer->flags = 0;
 438     $answer->timecreated = time();
 439     $answer->timemodified = 0;
 440     $answer->answer = "Next";
 441     $answer->response = "";
 442
 443     $answers[] = clone($answer);
 444
 445     $answer->jumpto = LESSON_PREVIOUSPAGE;
 446     $answer->answer = "Previous";
 447
 448     $answers[] = clone($answer);
 449
 450     $branchtable->answers = $answers;
 451
 452     $i = 1;
 453
 454     foreach ($pageobjects as $pageobject) {
 455         $temp = prep_page($pageobject, $i);  // makes our title and contents
 456         $page->title = $temp->title;
 457         $page->contents = $temp->contents;
 458         $branchtable->page = clone($page);  // add the page
 459         $branchtables[] = clone($branchtable);  // add it all to our array
 460         $i++;
 461     }
 462
 463     return $branchtables;
 464 }
 465
 466 /**
 467     Creates objects an chapter object that is to be inserted into the database
 468 */
 469 function book_create_objects($pageobjects, $bookid) {
 470
 471     $chapters = array();
 472     $chapter = new stdClass;
 473
 474     // same for all chapters
 475     $chapter->bookid = $bookid;
 476     $chapter->pagenum = count_records('book_chapters', 'bookid', $bookid)+1;
 477     $chapter->timecreated = time();
 478     $chapter->timemodified = time();
 479     $chapter->subchapter = 0;
 480
 481     $i = 1;
 482     foreach ($pageobjects as $pageobject) {
 483         $page = prep_page($pageobject, $i);  // get title and contents
 484         $chapter->importsrc = addslashes($pageobject->source); // add the source
 485         $chapter->title = $page->title;
 486         $chapter->content = $page->contents;
 487         $chapters[] = $chapter;
 488
 489         // increment our page number and our counter
 490         $chapter->pagenum = $chapter->pagenum + 1;
 491         $i++;
 492     }
 493
 494     return $chapters;
 495 }
 496
 497 /**
 498     Builds the title and content strings from an object
 499 */
 500 function prep_page($pageobject, $count) {
 501     if ($pageobject->title == '') {
 502         $page->title = "Page $count";  // no title set so make a generic one
 503     } else {
 504         $page->title = addslashes($pageobject->title);
 505     }
 506
 507     $page->contents = '';
 508
 509     // nab all the images first
 510     foreach ($pageobject->images as $image) {
 511         $image = str_replace("\n", '', $image);
 512         $image = str_replace("\r", '', $image);
 513         $image = str_replace("'", '"', $image);  // imgstyle
 514
 515         $page->contents .= addslashes($image);
 516     }
 517     // go through the contents array and put <p> tags around each element and strip out \n which I have found to be uneccessary
 518     foreach ($pageobject->contents as $content) {
 519         $content = str_replace("\n", '', $content);
 520         $content = str_replace("\r", '', $content);
 521         $content = str_replace('&#13;', '', $content);  // puts in returns?
 522         $content = '<p>'.$content.'</p>';
 523         $page->contents .= addslashes($content);
 524     }
 525     return $page;
 526 }
 527
 528 /**
 529     Saves the branchtable objects to the DB
 530 */
 531 function lesson_save_objects($branchtables, $lessonid, $after) {
 532     // first set up the prevpageid and nextpageid
 533     if ($after == 0) { // adding it to the top of the lesson
 534         $prevpageid = 0;
 535         // get the id of the first page.  If not found, then no pages in the lesson
 536         if (!$nextpageid = get_field('lesson_pages', 'id', 'prevpageid', 0, 'lessonid', $lessonid)) {
 537             $nextpageid = 0;
 538         }
 539     } else {
 540         // going after an actual page
 541         $prevpageid = $after;
 542         $nextpageid = get_field('lesson_pages', 'nextpageid', 'id', $after);
 543     }
 544
 545     foreach ($branchtables as $branchtable) {
 546
 547         // set the doubly linked list
 548         $branchtable->page->nextpageid = $nextpageid;
 549         $branchtable->page->prevpageid = $prevpageid;
 550
 551         // insert the page
 552         if(!$id = insert_record('lesson_pages', $branchtable->page)) {
 553             error("insert page");
 554         }
 555
 556         // update the link of the page previous to the one we just updated
 557         if ($prevpageid != 0) {  // if not the first page
 558             if (!set_field("lesson_pages", "nextpageid", $id, "id", $prevpageid)) {
 559                 error("Insert page: unable to update next link $prevpageid");
 560             }
 561         }
 562
 563         // insert the answers
 564         foreach ($branchtable->answers as $answer) {
 565             $answer->pageid = $id;
 566             if(!insert_record('lesson_answers', $answer)) {
 567                 error("insert answer $id");
 568             }
 569         }
 570
 571         $prevpageid = $id;
 572     }
 573
 574     // all done with inserts.  Now check to update our last page (this is when we import between two lesson pages)
 575     if ($nextpageid != 0) {  // if the next page is not the end of lesson
 576         if (!set_field("lesson_pages", "prevpageid", $id, "id", $nextpageid)) {
 577             error("Insert page: unable to update next link $prevpageid");
 578         }
 579     }
 580
 581     return true;
 582 }
 583
 584 /**
 585     Save the chapter objects to the database
 586 */
 587 function book_save_objects($chapters, $bookid, $pageid='0') {
 588     // nothing fancy, just save them all in order
 589     foreach ($chapters as $chapter) {
 590         if (!$chapter->id = insert_record('book_chapters', $chapter)) {
 591             error('Could not update your book');
 592         }
 593     }
 594     return true;
 595 }
 596
 597 ?>