inc/pageutils.php

   1 <?php
   2 /**
   3  * Utilities for handling pagenames
   4  *
   5  * @license    GPL 2 (http://www.gnu.org/licenses/gpl.html)
   6  * @author     Andreas Gohr <andi@splitbrain.org>
   7  * @todo       Combine similar functions like {wiki,media,meta}FN()
   8  */
   9
  10 /**
  11  * Fetch the an ID from request
  12  *
  13  * Uses either standard $_REQUEST variable or extracts it from
  14  * the full request URI when userewrite is set to 2
  15  *
  16  * For $param='id' $conf['start'] is returned if no id was found.
  17  * If the second parameter is true (default) the ID is cleaned.
  18  *
  19  * @author Andreas Gohr <andi@splitbrain.org>
  20  */
  21 function getID($param='id',$clean=true){
  22     global $conf;
  23
  24     $id = isset($_REQUEST[$param]) ? $_REQUEST[$param] : null;
  25
  26     $request = $_SERVER['REQUEST_URI'];
  27
  28     //construct page id from request URI
  29     if(empty($id) && $conf['userewrite'] == 2){
  30         //get the script URL
  31         if($conf['basedir']){
  32             $relpath = '';
  33             if($param != 'id') {
  34                 $relpath = 'lib/exe/';
  35             }
  36             $script = $conf['basedir'].$relpath.basename($_SERVER['SCRIPT_FILENAME']);
  37
  38         }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['PATH_TRANSLATED']){
  39             $request = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
  40                     $_SERVER['PATH_TRANSLATED']);
  41         }elseif($_SERVER['DOCUMENT_ROOT'] && $_SERVER['SCRIPT_FILENAME']){
  42             $script = preg_replace ('/^'.preg_quote($_SERVER['DOCUMENT_ROOT'],'/').'/','',
  43                     $_SERVER['SCRIPT_FILENAME']);
  44             $script = '/'.$script;
  45         }else{
  46             $script = $_SERVER['SCRIPT_NAME'];
  47         }
  48
  49         //clean script and request (fixes a windows problem)
  50         $script  = preg_replace('/\/\/+/','/',$script);
  51         $request = preg_replace('/\/\/+/','/',$request);
  52
  53         //remove script URL and Querystring to gain the id
  54         if(preg_match('/^'.preg_quote($script,'/').'(.*)/',$request, $match)){
  55             $id = preg_replace ('/\?.*/','',$match[1]);
  56         }
  57         $id = urldecode($id);
  58         //strip leading slashes
  59         $id = preg_replace('!^/+!','',$id);
  60     }
  61
  62     // Namespace autolinking from URL
  63     if(substr($id,-1) == ':' || ($conf['useslash'] && substr($id,-1) == '/')){
  64         if(page_exists($id.$conf['start'])){
  65             // start page inside namespace
  66             $id = $id.$conf['start'];
  67         }elseif(page_exists($id.noNS(cleanID($id)))){
  68             // page named like the NS inside the NS
  69             $id = $id.noNS(cleanID($id));
  70         }elseif(page_exists($id)){
  71             // page like namespace exists
  72             $id = substr($id,0,-1);
  73         }else{
  74             // fall back to default
  75             $id = $id.$conf['start'];
  76         }
  77         send_redirect(wl($id,'',true));
  78     }
  79
  80     if($clean) $id = cleanID($id);
  81     if(empty($id) && $param=='id') $id = $conf['start'];
  82
  83     return $id;
  84 }
  85
  86 /**
  87  * Remove unwanted chars from ID
  88  *
  89  * Cleans a given ID to only use allowed characters. Accented characters are
  90  * converted to unaccented ones
  91  *
  92  * @author Andreas Gohr <andi@splitbrain.org>
  93  * @param  string  $raw_id    The pageid to clean
  94  * @param  boolean $ascii     Force ASCII
  95  * @param  boolean $media     Allow leading or trailing _ for media files
  96  */
  97 function cleanID($raw_id,$ascii=false,$media=false){
  98     global $conf;
  99     global $lang;
 100     static $sepcharpat = null;
 101
 102     global $cache_cleanid;
 103     $cache = & $cache_cleanid;
 104
 105     // check if it's already in the memory cache
 106     if (isset($cache[(string)$raw_id])) {
 107         return $cache[(string)$raw_id];
 108     }
 109
 110     $sepchar = $conf['sepchar'];
 111     if($sepcharpat == null) // build string only once to save clock cycles
 112         $sepcharpat = '#\\'.$sepchar.'+#';
 113
 114     $id = trim((string)$raw_id);
 115     $id = utf8_strtolower($id);
 116
 117     //alternative namespace seperator
 118     $id = strtr($id,';',':');
 119     if($conf['useslash']){
 120         $id = strtr($id,'/',':');
 121     }else{
 122         $id = strtr($id,'/',$sepchar);
 123     }
 124
 125     if($conf['deaccent'] == 2 || $ascii) $id = utf8_romanize($id);
 126     if($conf['deaccent'] || $ascii) $id = utf8_deaccent($id,-1);
 127
 128     //remove specials
 129     $id = utf8_stripspecials($id,$sepchar,'\*');
 130
 131     if($ascii) $id = utf8_strip($id);
 132
 133     //clean up
 134     $id = preg_replace($sepcharpat,$sepchar,$id);
 135     $id = preg_replace('#:+#',':',$id);
 136     $id = ($media ? trim($id,':.-') : trim($id,':._-'));
 137     $id = preg_replace('#:[:\._\-]+#',':',$id);
 138
 139     $cache[(string)$raw_id] = $id;
 140     return($id);
 141 }
 142
 143 /**
 144  * Return namespacepart of a wiki ID
 145  *
 146  * @author Andreas Gohr <andi@splitbrain.org>
 147  */
 148 function getNS($id){
 149     $pos = strrpos((string)$id,':');
 150     if($pos!==false){
 151         return substr((string)$id,0,$pos);
 152     }
 153     return false;
 154 }
 155
 156 /**
 157  * Returns the ID without the namespace
 158  *
 159  * @author Andreas Gohr <andi@splitbrain.org>
 160  */
 161 function noNS($id) {
 162     $pos = strrpos($id, ':');
 163     if ($pos!==false) {
 164         return substr($id, $pos+1);
 165     } else {
 166         return $id;
 167     }
 168 }
 169
 170 /**
 171  * Returns the current namespace
 172  *
 173  * @author Nathan Fritz <fritzn@crown.edu>
 174  */
 175 function curNS($id) {
 176     return noNS(getNS($id));
 177 }
 178
 179 /**
 180  * Returns the ID without the namespace or current namespace for 'start' pages
 181  *
 182  * @author Nathan Fritz <fritzn@crown.edu>
 183  */
 184 function noNSorNS($id) {
 185     global $conf;
 186
 187     $p = noNS($id);
 188     if ($p == $conf['start']) {
 189         $p = curNS($id);
 190         if ($p == false) {
 191             return noNS($id);
 192         }
 193     }
 194     return $p;
 195 }
 196
 197 /**
 198  * Creates a XHTML valid linkid from a given headline title
 199  *
 200  * @param string  $title   The headline title
 201  * @param array   $check   List of existing IDs
 202  * @author Andreas Gohr <andi@splitbrain.org>
 203  */
 204 function sectionID($title,&$check) {
 205     $title = str_replace(array(':','.'),'',cleanID($title));
 206     $new = ltrim($title,'0123456789_-');
 207     if(empty($new)){
 208         $title = 'section'.preg_replace('/[^0-9]+/','',$title); //keep numbers from headline
 209     }else{
 210         $title = $new;
 211     }
 212
 213     if(is_array($check)){
 214         // make sure tiles are unique
 215         $num = '';
 216         while(in_array($title.$num,$check)){
 217             ($num) ? $num++ : $num = 1;
 218         }
 219         $title = $title.$num;
 220         $check[] = $title;
 221     }
 222
 223     return $title;
 224 }
 225
 226
 227 /**
 228  * Wiki page existence check
 229  *
 230  * parameters as for wikiFN
 231  *
 232  * @author Chris Smith <chris@jalakai.co.uk>
 233  */
 234 function page_exists($id,$rev='',$clean=true) {
 235     return @file_exists(wikiFN($id,$rev,$clean));
 236 }
 237
 238 /**
 239  * returns the full path to the datafile specified by ID and optional revision
 240  *
 241  * The filename is URL encoded to protect Unicode chars
 242  *
 243  * @param  $raw_id  string   id of wikipage
 244  * @param  $rev     string   page revision, empty string for current
 245  * @param  $clean   bool     flag indicating that $raw_id should be cleaned.  Only set to false
 246  *                           when $id is guaranteed to have been cleaned already.
 247  *
 248  * @author Andreas Gohr <andi@splitbrain.org>
 249  */
 250 function wikiFN($raw_id,$rev='',$clean=true){
 251     global $conf;
 252
 253     global $cache_wikifn;
 254     $cache = & $cache_wikifn;
 255
 256     if (isset($cache[$raw_id]) && isset($cache[$raw_id][$rev])) {
 257         return $cache[$raw_id][$rev];
 258     }
 259
 260     $id = $raw_id;
 261
 262     if ($clean) $id = cleanID($id);
 263     $id = str_replace(':','/',$id);
 264     if(empty($rev)){
 265         $fn = $conf['datadir'].'/'.utf8_encodeFN($id).'.txt';
 266     }else{
 267         $fn = $conf['olddir'].'/'.utf8_encodeFN($id).'.'.$rev.'.txt';
 268         if($conf['compression']){
 269             //test for extensions here, we want to read both compressions
 270             if (@file_exists($fn . '.gz')){
 271                 $fn .= '.gz';
 272             }else if(@file_exists($fn . '.bz2')){
 273                 $fn .= '.bz2';
 274             }else{
 275                 //file doesnt exist yet, so we take the configured extension
 276                 $fn .= '.' . $conf['compression'];
 277             }
 278         }
 279     }
 280
 281     if (!isset($cache[$raw_id])) { $cache[$raw_id] = array(); }
 282     $cache[$raw_id][$rev] = $fn;
 283     return $fn;
 284 }
 285
 286 /**
 287  * Returns the full path to the file for locking the page while editing.
 288  *
 289  * @author Ben Coburn <btcoburn@silicodon.net>
 290  */
 291 function wikiLockFN($id) {
 292     global $conf;
 293     return $conf['lockdir'].'/'.md5(cleanID($id)).'.lock';
 294 }
 295
 296
 297 /**
 298  * returns the full path to the meta file specified by ID and extension
 299  *
 300  * The filename is URL encoded to protect Unicode chars
 301  *
 302  * @author Steven Danz <steven-danz@kc.rr.com>
 303  */
 304 function metaFN($id,$ext){
 305     global $conf;
 306     $id = cleanID($id);
 307     $id = str_replace(':','/',$id);
 308     $fn = $conf['metadir'].'/'.utf8_encodeFN($id).$ext;
 309     return $fn;
 310 }
 311
 312 /**
 313  * returns an array of full paths to all metafiles of a given ID
 314  *
 315  * @author Esther Brunner <esther@kaffeehaus.ch>
 316  */
 317 function metaFiles($id){
 318     $name   = noNS($id);
 319     $ns     = getNS($id);
 320     $dir    = ($ns) ? metaFN($ns,'').'/' : metaFN($ns,'');
 321     $files  = array();
 322
 323     $dh = @opendir($dir);
 324     if(!$dh) return $files;
 325     while(($file = readdir($dh)) !== false){
 326         if(strpos($file,$name.'.') === 0 && !is_dir($dir.$file))
 327             $files[] = $dir.$file;
 328     }
 329     closedir($dh);
 330
 331     return $files;
 332 }
 333
 334 /**
 335  * returns the full path to the mediafile specified by ID
 336  *
 337  * The filename is URL encoded to protect Unicode chars
 338  *
 339  * @author Andreas Gohr <andi@splitbrain.org>
 340  */
 341 function mediaFN($id){
 342     global $conf;
 343     $id = cleanID($id);
 344     $id = str_replace(':','/',$id);
 345     $fn = $conf['mediadir'].'/'.utf8_encodeFN($id);
 346     return $fn;
 347 }
 348
 349 /**
 350  * Returns the full filepath to a localized textfile if local
 351  * version isn't found the english one is returned
 352  *
 353  * @author Andreas Gohr <andi@splitbrain.org>
 354  */
 355 function localeFN($id){
 356     global $conf;
 357     $file = DOKU_INC.'inc/lang/'.$conf['lang'].'/'.$id.'.txt';
 358     if(!@file_exists($file)){
 359         //fall back to english
 360         $file = DOKU_INC.'inc/lang/en/'.$id.'.txt';
 361     }
 362     return $file;
 363 }
 364
 365 /**
 366  * Resolve relative paths in IDs
 367  *
 368  * Do not call directly use resolve_mediaid or resolve_pageid
 369  * instead
 370  *
 371  * Partyly based on a cleanPath function found at
 372  * http://www.php.net/manual/en/function.realpath.php#57016
 373  *
 374  * @author <bart at mediawave dot nl>
 375  */
 376 function resolve_id($ns,$id,$clean=true){
 377     global $conf;
 378
 379     // some pre cleaning for useslash:
 380     if($conf['useslash']) $id = str_replace('/',':',$id);
 381
 382     // if the id starts with a dot we need to handle the
 383     // relative stuff
 384     if($id{0} == '.'){
 385         // normalize initial dots without a colon
 386         $id = preg_replace('/^(\.+)(?=[^:\.])/','\1:',$id);
 387         // prepend the current namespace
 388         $id = $ns.':'.$id;
 389
 390         // cleanup relatives
 391         $result = array();
 392         $pathA  = explode(':', $id);
 393         if (!$pathA[0]) $result[] = '';
 394         foreach ($pathA AS $key => $dir) {
 395             if ($dir == '..') {
 396                 if (end($result) == '..') {
 397                     $result[] = '..';
 398                 } elseif (!array_pop($result)) {
 399                     $result[] = '..';
 400                 }
 401             } elseif ($dir && $dir != '.') {
 402                 $result[] = $dir;
 403             }
 404         }
 405         if (!end($pathA)) $result[] = '';
 406         $id = implode(':', $result);
 407     }elseif($ns !== false && strpos($id,':') === false){
 408         //if link contains no namespace. add current namespace (if any)
 409         $id = $ns.':'.$id;
 410     }
 411
 412     if($clean) $id = cleanID($id);
 413     return $id;
 414 }
 415
 416 /**
 417  * Returns a full media id
 418  *
 419  * @author Andreas Gohr <andi@splitbrain.org>
 420  */
 421 function resolve_mediaid($ns,&$page,&$exists){
 422     $page   = resolve_id($ns,$page);
 423     $file   = mediaFN($page);
 424     $exists = @file_exists($file);
 425 }
 426
 427 /**
 428  * Returns a full page id
 429  *
 430  * @author Andreas Gohr <andi@splitbrain.org>
 431  */
 432 function resolve_pageid($ns,&$page,&$exists){
 433     global $conf;
 434     $exists = false;
 435
 436     //keep hashlink if exists then clean both parts
 437     if (strpos($page,'#')) {
 438         list($page,$hash) = explode('#',$page,2);
 439     } else {
 440         $hash = '';
 441     }
 442     $hash = cleanID($hash);
 443     $page = resolve_id($ns,$page,false); // resolve but don't clean, yet
 444
 445     // get filename (calls clean itself)
 446     $file = wikiFN($page);
 447
 448     // if ends with colon or slash we have a namespace link
 449     if(substr($page,-1) == ':' || ($conf['useslash'] && substr($page,-1) == '/')){
 450         if(page_exists($page.$conf['start'])){
 451             // start page inside namespace
 452             $page = $page.$conf['start'];
 453             $exists = true;
 454         }elseif(page_exists($page.noNS(cleanID($page)))){
 455             // page named like the NS inside the NS
 456             $page = $page.noNS(cleanID($page));
 457             $exists = true;
 458         }elseif(page_exists($page)){
 459             // page like namespace exists
 460             $page = $page;
 461             $exists = true;
 462         }else{
 463             // fall back to default
 464             $page = $page.$conf['start'];
 465         }
 466     }else{
 467         //check alternative plural/nonplural form
 468         if(!@file_exists($file)){
 469             if( $conf['autoplural'] ){
 470                 if(substr($page,-1) == 's'){
 471                     $try = substr($page,0,-1);
 472                 }else{
 473                     $try = $page.'s';
 474                 }
 475                 if(page_exists($try)){
 476                     $page   = $try;
 477                     $exists = true;
 478                 }
 479             }
 480         }else{
 481             $exists = true;
 482         }
 483     }
 484
 485     // now make sure we have a clean page
 486     $page = cleanID($page);
 487
 488     //add hash if any
 489     if(!empty($hash)) $page .= '#'.$hash;
 490 }
 491
 492 /**
 493  * Returns the name of a cachefile from given data
 494  *
 495  * The needed directory is created by this function!
 496  *
 497  * @author Andreas Gohr <andi@splitbrain.org>
 498  *
 499  * @param string $data  This data is used to create a unique md5 name
 500  * @param string $ext   This is appended to the filename if given
 501  * @return string       The filename of the cachefile
 502  */
 503 function getCacheName($data,$ext=''){
 504     global $conf;
 505     $md5  = md5($data);
 506     $file = $conf['cachedir'].'/'.$md5{0}.'/'.$md5.$ext;
 507     io_makeFileDir($file);
 508     return $file;
 509 }
 510
 511 /**
 512  * Checks a pageid against $conf['hidepages']
 513  *
 514  * @author Andreas Gohr <gohr@cosmocode.de>
 515  */
 516 function isHiddenPage($id){
 517     global $conf;
 518     global $ACT;
 519     if(empty($conf['hidepages'])) return false;
 520     if($ACT == 'admin') return false;
 521
 522     if(preg_match('/'.$conf['hidepages'].'/ui',':'.$id)){
 523         return true;
 524     }
 525     return false;
 526 }
 527
 528 /**
 529  * Reverse of isHiddenPage
 530  *
 531  * @author Andreas Gohr <gohr@cosmocode.de>
 532  */
 533 function isVisiblePage($id){
 534     return !isHiddenPage($id);
 535 }
 536
 537 /**
 538  * Format an id for output to a user
 539  *
 540  * Namespaces are denoted by a trailing “:*”. The root namespace is
 541  * “*”. Output is escaped.
 542  *
 543  * @author Adrian Lang <lang@cosmocode.de>
 544  */
 545
 546 function prettyprint_id($id) {
 547     if (!$id || $id === ':') {
 548         return '*';
 549     }
 550     if ((substr($id, -1, 1) === ':')) {
 551         $id .= '*';
 552     }
 553     return hsc($id);
 554 }