Whoops, forgot to edit WHATSNEW
[htmlpurifier/darkodev.git] / maintenance / generate-entity-file.php
blobff1713e39b6dc758524e5af27cbceab2a5d65de0
1 #!/usr/bin/php
2 <?php
4 chdir(dirname(__FILE__));
5 require_once 'common.php';
6 assertCli();
8 /**
9 * @file
10 * Parses *.ent files into an entity lookup table, and then serializes and
11 * writes the whole kaboodle to a file. The resulting file is cached so
12 * that this script does not need to be run. This script should rarely,
13 * if ever, be run, since HTML's entities are fairly immutable.
16 // here's where the entity files are located, assuming working directory
17 // is the same as the location of this PHP file. Needs trailing slash.
18 $entity_dir = '../docs/entities/';
20 // defines the output file for the serialized content.
21 $output_file = '../library/HTMLPurifier/EntityLookup/entities.ser';
23 // courtesy of a PHP manual comment
24 function unichr($dec)
26 if ($dec < 128) {
27 $utf = chr($dec);
28 } elseif ($dec < 2048) {
29 $utf = chr(192 + (($dec - ($dec % 64)) / 64));
30 $utf .= chr(128 + ($dec % 64));
31 } else {
32 $utf = chr(224 + (($dec - ($dec % 4096)) / 4096));
33 $utf .= chr(128 + ((($dec % 4096) - ($dec % 64)) / 64));
34 $utf .= chr(128 + ($dec % 64));
36 return $utf;
39 if ( !is_dir($entity_dir) ) exit("Fatal Error: Can't find entity directory.\n");
40 if ( file_exists($output_file) ) exit("Fatal Error: output file already exists.\n");
42 $dh = @opendir($entity_dir);
43 if ( !$dh ) exit("Fatal Error: Cannot read entity directory.\n");
45 $entity_files = array();
46 while (($file = readdir($dh)) !== false) {
47 if (@$file[0] === '.') continue;
48 if (substr(strrchr($file, "."), 1) !== 'ent') continue;
49 $entity_files[] = $file;
51 closedir($dh);
53 if ( !$entity_files ) exit("Fatal Error: No entity files to parse.\n");
55 $entity_table = array();
56 $regexp = '/<!ENTITY\s+([A-Za-z0-9]+)\s+"&#(?:38;#)?([0-9]+);">/';
58 foreach ( $entity_files as $file ) {
59 $contents = file_get_contents($entity_dir . $file);
60 $matches = array();
61 preg_match_all($regexp, $contents, $matches, PREG_SET_ORDER);
62 foreach ($matches as $match) {
63 $entity_table[$match[1]] = unichr($match[2]);
67 $output = serialize($entity_table);
69 $fh = fopen($output_file, 'w');
70 fwrite($fh, $output);
71 fclose($fh);
73 echo "Completed successfully.";
75 // vim: et sw=4 sts=4