languages/Language.php

   1 <?php
   2 /**
   3  * Internationalisation code.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup Language
  22  */
  23
  24 /**
  25  * @defgroup Language Language
  26  */
  27
  28 if ( !defined( 'MEDIAWIKI' ) ) {
  29         echo "This file is part of MediaWiki, it is not a valid entry point.\n";
  30         exit( 1 );
  31 }
  32
  33 # Read language names
  34 global $wgLanguageNames;
  35 require_once( __DIR__ . '/Names.php' );
  36
  37 if ( function_exists( 'mb_strtoupper' ) ) {
  38         mb_internal_encoding( 'UTF-8' );
  39 }
  40
  41 /**
  42  * a fake language converter
  43  *
  44  * @ingroup Language
  45  */
  46 class FakeConverter {
  47         /**
  48          * @var Language
  49          */
  50         public $mLang;
  51         function __construct( $langobj ) { $this->mLang = $langobj; }
  52         function autoConvertToAllVariants( $text ) { return array( $this->mLang->getCode() => $text ); }
  53         function convert( $t ) { return $t; }
  54         function convertTo( $text, $variant ) { return $text; }
  55         function convertTitle( $t ) { return $t->getPrefixedText(); }
  56         function convertNamespace( $ns ) { return $this->mLang->getFormattedNsText( $ns ); }
  57         function getVariants() { return array( $this->mLang->getCode() ); }
  58         function getPreferredVariant() { return $this->mLang->getCode(); }
  59         function getDefaultVariant() { return $this->mLang->getCode(); }
  60         function getURLVariant() { return ''; }
  61         function getConvRuleTitle() { return false; }
  62         function findVariantLink( &$l, &$n, $ignoreOtherCond = false ) { }
  63         function getExtraHashOptions() { return ''; }
  64         function getParsedTitle() { return ''; }
  65         function markNoConversion( $text, $noParse = false ) { return $text; }
  66         function convertCategoryKey( $key ) { return $key; }
  67         function convertLinkToAllVariants( $text ) { return $this->autoConvertToAllVariants( $text ); }
  68         function armourMath( $text ) { return $text; }
  69 }
  70
  71 /**
  72  * Internationalisation code
  73  * @ingroup Language
  74  */
  75 class Language {
  76
  77         /**
  78          * @var LanguageConverter
  79          */
  80         public $mConverter;
  81
  82         public $mVariants, $mCode, $mLoaded = false;
  83         public $mMagicExtensions = array(), $mMagicHookDone = false;
  84         private $mHtmlCode = null;
  85
  86         public $dateFormatStrings = array();
  87         public $mExtendedSpecialPageAliases;
  88
  89         protected $namespaceNames, $mNamespaceIds, $namespaceAliases;
  90
  91         /**
  92          * ReplacementArray object caches
  93          */
  94         public $transformData = array();
  95
  96         /**
  97          * @var LocalisationCache
  98          */
  99         static public $dataCache;
 100
 101         static public $mLangObjCache = array();
 102
 103         static public $mWeekdayMsgs = array(
 104                 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
 105                 'friday', 'saturday'
 106         );
 107
 108         static public $mWeekdayAbbrevMsgs = array(
 109                 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
 110         );
 111
 112         static public $mMonthMsgs = array(
 113                 'january', 'february', 'march', 'april', 'may_long', 'june',
 114                 'july', 'august', 'september', 'october', 'november',
 115                 'december'
 116         );
 117         static public $mMonthGenMsgs = array(
 118                 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
 119                 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
 120                 'december-gen'
 121         );
 122         static public $mMonthAbbrevMsgs = array(
 123                 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
 124                 'sep', 'oct', 'nov', 'dec'
 125         );
 126
 127         static public $mIranianCalendarMonthMsgs = array(
 128                 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
 129                 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
 130                 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
 131                 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
 132         );
 133
 134         static public $mHebrewCalendarMonthMsgs = array(
 135                 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
 136                 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
 137                 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
 138                 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
 139                 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
 140         );
 141
 142         static public $mHebrewCalendarMonthGenMsgs = array(
 143                 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
 144                 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
 145                 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
 146                 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
 147                 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
 148         );
 149
 150         static public $mHijriCalendarMonthMsgs = array(
 151                 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
 152                 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
 153                 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
 154                 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
 155         );
 156
 157         /**
 158          * @since 1.20
 159          * @var array
 160          */
 161         static public $durationIntervals = array(
 162                 'millennia' => 31556952000,
 163                 'centuries' => 3155695200,
 164                 'decades' => 315569520,
 165                 'years' => 31556952, // 86400 * ( 365 + ( 24 * 3 + 25 ) / 400 )
 166                 'weeks' => 604800,
 167                 'days' => 86400,
 168                 'hours' => 3600,
 169                 'minutes' => 60,
 170                 'seconds' => 1,
 171         );
 172
 173         /**
 174          * Get a cached or new language object for a given language code
 175          * @param $code String
 176          * @return Language
 177          */
 178         static function factory( $code ) {
 179                 global $wgDummyLanguageCodes, $wgLangObjCacheSize;
 180
 181                 if ( isset( $wgDummyLanguageCodes[$code] ) ) {
 182                         $code = $wgDummyLanguageCodes[$code];
 183                 }
 184
 185                 // get the language object to process
 186                 $langObj = isset( self::$mLangObjCache[$code] )
 187                         ? self::$mLangObjCache[$code]
 188                         : self::newFromCode( $code );
 189
 190                 // merge the language object in to get it up front in the cache
 191                 self::$mLangObjCache = array_merge( array( $code => $langObj ), self::$mLangObjCache );
 192                 // get rid of the oldest ones in case we have an overflow
 193                 self::$mLangObjCache = array_slice( self::$mLangObjCache, 0, $wgLangObjCacheSize, true );
 194
 195                 return $langObj;
 196         }
 197
 198         /**
 199          * Create a language object for a given language code
 200          * @param $code String
 201          * @throws MWException
 202          * @return Language
 203          */
 204         protected static function newFromCode( $code ) {
 205                 // Protect against path traversal below
 206                 if ( !Language::isValidCode( $code )
 207                         || strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
 208                 {
 209                         throw new MWException( "Invalid language code \"$code\"" );
 210                 }
 211
 212                 if ( !Language::isValidBuiltInCode( $code ) ) {
 213                         // It's not possible to customise this code with class files, so
 214                         // just return a Language object. This is to support uselang= hacks.
 215                         $lang = new Language;
 216                         $lang->setCode( $code );
 217                         return $lang;
 218                 }
 219
 220                 // Check if there is a language class for the code
 221                 $class = self::classFromCode( $code );
 222                 self::preloadLanguageClass( $class );
 223                 if ( MWInit::classExists( $class ) ) {
 224                         $lang = new $class;
 225                         return $lang;
 226                 }
 227
 228                 // Keep trying the fallback list until we find an existing class
 229                 $fallbacks = Language::getFallbacksFor( $code );
 230                 foreach ( $fallbacks as $fallbackCode ) {
 231                         if ( !Language::isValidBuiltInCode( $fallbackCode ) ) {
 232                                 throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" );
 233                         }
 234
 235                         $class = self::classFromCode( $fallbackCode );
 236                         self::preloadLanguageClass( $class );
 237                         if ( MWInit::classExists( $class ) ) {
 238                                 $lang = Language::newFromCode( $fallbackCode );
 239                                 $lang->setCode( $code );
 240                                 return $lang;
 241                         }
 242                 }
 243
 244                 throw new MWException( "Invalid fallback sequence for language '$code'" );
 245         }
 246
 247         /**
 248          * Checks whether any localisation is available for that language tag
 249          * in MediaWiki (MessagesXx.php exists).
 250          *
 251          * @param string $code Language tag (in lower case)
 252          * @return bool Whether language is supported
 253          * @since 1.21
 254          */
 255         public static function isSupportedLanguage( $code ) {
 256                 return $code === strtolower( $code ) && is_readable( self::getMessagesFileName( $code ) );
 257         }
 258
 259         /**
 260          * Returns true if a language code string is a well-formed language tag
 261          * according to RFC 5646.
 262          * This function only checks well-formedness; it doesn't check that
 263          * language, script or variant codes actually exist in the repositories.
 264          *
 265          * Based on regexes by Mark Davis of the Unicode Consortium:
 266          * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
 267          *
 268          * @param $code string
 269          * @param $lenient boolean Whether to allow '_' as separator. The default is only '-'.
 270          *
 271          * @return bool
 272          * @since 1.21
 273          */
 274         public static function isWellFormedLanguageTag( $code, $lenient = false ) {
 275                 $alpha = '[a-z]';
 276                 $digit = '[0-9]';
 277                 $alphanum = '[a-z0-9]';
 278                 $x = 'x' ; # private use singleton
 279                 $singleton = '[a-wy-z]'; # other singleton
 280                 $s = $lenient ? '[-_]' : '-';
 281
 282                 $language = "$alpha{2,8}|$alpha{2,3}$s$alpha{3}";
 283                 $script = "$alpha{4}"; # ISO 15924
 284                 $region = "(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49
 285                 $variant = "(?:$alphanum{5,8}|$digit$alphanum{3})";
 286                 $extension = "$singleton(?:$s$alphanum{2,8})+";
 287                 $privateUse = "$x(?:$s$alphanum{1,8})+";
 288
 289                 # Define certain grandfathered codes, since otherwise the regex is pretty useless.
 290                 # Since these are limited, this is safe even later changes to the registry --
 291                 # the only oddity is that it might change the type of the tag, and thus
 292                 # the results from the capturing groups.
 293                 # http://www.iana.org/assignments/language-subtag-registry
 294
 295                 $grandfathered = "en{$s}GB{$s}oed"
 296                         . "|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)"
 297                         . "|no{$s}(?:bok|nyn)"
 298                         . "|sgn{$s}(?:BE{$s}(?:fr|nl)|CH{$s}de)"
 299                         . "|zh{$s}min{$s}nan";
 300
 301                 $variantList = "$variant(?:$s$variant)*";
 302                 $extensionList = "$extension(?:$s$extension)*";
 303
 304                 $langtag = "(?:($language)"
 305                         . "(?:$s$script)?"
 306                         . "(?:$s$region)?"
 307                         . "(?:$s$variantList)?"
 308                         . "(?:$s$extensionList)?"
 309                         . "(?:$s$privateUse)?)";
 310
 311                 # The final breakdown, with capturing groups for each of these components
 312                 # The variants, extensions, grandfathered, and private-use may have interior '-'
 313
 314                 $root = "^(?:$langtag|$privateUse|$grandfathered)$";
 315
 316                 return (bool)preg_match( "/$root/", strtolower( $code ) );
 317         }
 318
 319         /**
 320          * Returns true if a language code string is of a valid form, whether or
 321          * not it exists. This includes codes which are used solely for
 322          * customisation via the MediaWiki namespace.
 323          *
 324          * @param $code string
 325          *
 326          * @return bool
 327          */
 328         public static function isValidCode( $code ) {
 329                 return
 330                         // People think language codes are html safe, so enforce it.
 331                         // Ideally we should only allow a-zA-Z0-9-
 332                         // but, .+ and other chars are often used for {{int:}} hacks
 333                         // see bugs 37564, 37587, 36938
 334                         strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code )
 335                         && !preg_match( Title::getTitleInvalidRegex(), $code );
 336         }
 337
 338         /**
 339          * Returns true if a language code is of a valid form for the purposes of
 340          * internal customisation of MediaWiki, via Messages*.php.
 341          *
 342          * @param $code string
 343          *
 344          * @throws MWException
 345          * @since 1.18
 346          * @return bool
 347          */
 348         public static function isValidBuiltInCode( $code ) {
 349
 350                 if ( !is_string( $code ) ) {
 351                         if ( is_object( $code ) ) {
 352                                 $addmsg = " of class " . get_class( $code );
 353                         } else {
 354                                 $addmsg = '';
 355                         }
 356                         $type = gettype( $code );
 357                         throw new MWException( __METHOD__ . " must be passed a string, $type given$addmsg" );
 358                 }
 359
 360                 return (bool)preg_match( '/^[a-z0-9-]+$/i', $code );
 361         }
 362
 363         /**
 364          * Returns true if a language code is an IETF tag known to MediaWiki.
 365          *
 366          * @param $code string
 367          *
 368          * @since 1.21
 369          * @return bool
 370          */
 371         public static function isKnownLanguageTag( $tag ) {
 372                 static $coreLanguageNames;
 373
 374                 if ( $coreLanguageNames === null ) {
 375                         include( MWInit::compiledPath( 'languages/Names.php' ) );
 376                 }
 377
 378                 if ( isset( $coreLanguageNames[$tag] )
 379                         || self::fetchLanguageName( $tag, $tag ) !== ''
 380                 ) {
 381                         return true;
 382                 }
 383
 384                 return false;
 385         }
 386
 387         /**
 388          * @param $code
 389          * @return String Name of the language class
 390          */
 391         public static function classFromCode( $code ) {
 392                 if ( $code == 'en' ) {
 393                         return 'Language';
 394                 } else {
 395                         return 'Language' . str_replace( '-', '_', ucfirst( $code ) );
 396                 }
 397         }
 398
 399         /**
 400          * Includes language class files
 401          *
 402          * @param $class string Name of the language class
 403          */
 404         public static function preloadLanguageClass( $class ) {
 405                 global $IP;
 406
 407                 if ( $class === 'Language' ) {
 408                         return;
 409                 }
 410
 411                 if ( !defined( 'MW_COMPILED' ) ) {
 412                         if ( file_exists( "$IP/languages/classes/$class.php" ) ) {
 413                                 include_once( "$IP/languages/classes/$class.php" );
 414                         }
 415                 }
 416         }
 417
 418         /**
 419          * Get the LocalisationCache instance
 420          *
 421          * @return LocalisationCache
 422          */
 423         public static function getLocalisationCache() {
 424                 if ( is_null( self::$dataCache ) ) {
 425                         global $wgLocalisationCacheConf;
 426                         $class = $wgLocalisationCacheConf['class'];
 427                         self::$dataCache = new $class( $wgLocalisationCacheConf );
 428                 }
 429                 return self::$dataCache;
 430         }
 431
 432         function __construct() {
 433                 $this->mConverter = new FakeConverter( $this );
 434                 // Set the code to the name of the descendant
 435                 if ( get_class( $this ) == 'Language' ) {
 436                         $this->mCode = 'en';
 437                 } else {
 438                         $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
 439                 }
 440                 self::getLocalisationCache();
 441         }
 442
 443         /**
 444          * Reduce memory usage
 445          */
 446         function __destruct() {
 447                 foreach ( $this as $name => $value ) {
 448                         unset( $this->$name );
 449                 }
 450         }
 451
 452         /**
 453          * Hook which will be called if this is the content language.
 454          * Descendants can use this to register hook functions or modify globals
 455          */
 456         function initContLang() { }
 457
 458         /**
 459          * Same as getFallbacksFor for current language.
 460          * @return array|bool
 461          * @deprecated in 1.19
 462          */
 463         function getFallbackLanguageCode() {
 464                 wfDeprecated( __METHOD__, '1.19' );
 465                 return self::getFallbackFor( $this->mCode );
 466         }
 467
 468         /**
 469          * @return array
 470          * @since 1.19
 471          */
 472         function getFallbackLanguages() {
 473                 return self::getFallbacksFor( $this->mCode );
 474         }
 475
 476         /**
 477          * Exports $wgBookstoreListEn
 478          * @return array
 479          */
 480         function getBookstoreList() {
 481                 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
 482         }
 483
 484         /**
 485          * @return array
 486          */
 487         public function getNamespaces() {
 488                 if ( is_null( $this->namespaceNames ) ) {
 489                         global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
 490
 491                         $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
 492                         $validNamespaces = MWNamespace::getCanonicalNamespaces();
 493
 494                         $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
 495
 496                         $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
 497                         if ( $wgMetaNamespaceTalk ) {
 498                                 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
 499                         } else {
 500                                 $talk = $this->namespaceNames[NS_PROJECT_TALK];
 501                                 $this->namespaceNames[NS_PROJECT_TALK] =
 502                                         $this->fixVariableInNamespace( $talk );
 503                         }
 504
 505                         # Sometimes a language will be localised but not actually exist on this wiki.
 506                         foreach ( $this->namespaceNames as $key => $text ) {
 507                                 if ( !isset( $validNamespaces[$key] ) ) {
 508                                         unset( $this->namespaceNames[$key] );
 509                                 }
 510                         }
 511
 512                         # The above mixing may leave namespaces out of canonical order.
 513                         # Re-order by namespace ID number...
 514                         ksort( $this->namespaceNames );
 515
 516                         wfRunHooks( 'LanguageGetNamespaces', array( &$this->namespaceNames ) );
 517                 }
 518                 return $this->namespaceNames;
 519         }
 520
 521         /**
 522          * Arbitrarily set all of the namespace names at once. Mainly used for testing
 523          * @param $namespaces Array of namespaces (id => name)
 524          */
 525         public function setNamespaces( array $namespaces ) {
 526                 $this->namespaceNames = $namespaces;
 527                 $this->mNamespaceIds = null;
 528         }
 529
 530         /**
 531          * Resets all of the namespace caches. Mainly used for testing
 532          */
 533         public function resetNamespaces() {
 534                 $this->namespaceNames = null;
 535                 $this->mNamespaceIds = null;
 536                 $this->namespaceAliases = null;
 537         }
 538
 539         /**
 540          * A convenience function that returns the same thing as
 541          * getNamespaces() except with the array values changed to ' '
 542          * where it found '_', useful for producing output to be displayed
 543          * e.g. in <select> forms.
 544          *
 545          * @return array
 546          */
 547         function getFormattedNamespaces() {
 548                 $ns = $this->getNamespaces();
 549                 foreach ( $ns as $k => $v ) {
 550                         $ns[$k] = strtr( $v, '_', ' ' );
 551                 }
 552                 return $ns;
 553         }
 554
 555         /**
 556          * Get a namespace value by key
 557          * <code>
 558          * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
 559          * echo $mw_ns; // prints 'MediaWiki'
 560          * </code>
 561          *
 562          * @param $index Int: the array key of the namespace to return
 563          * @return mixed, string if the namespace value exists, otherwise false
 564          */
 565         function getNsText( $index ) {
 566                 $ns = $this->getNamespaces();
 567                 return isset( $ns[$index] ) ? $ns[$index] : false;
 568         }
 569
 570         /**
 571          * A convenience function that returns the same thing as
 572          * getNsText() except with '_' changed to ' ', useful for
 573          * producing output.
 574          *
 575          * <code>
 576          * $mw_ns = $wgContLang->getFormattedNsText( NS_MEDIAWIKI_TALK );
 577          * echo $mw_ns; // prints 'MediaWiki talk'
 578          * </code>
 579          *
 580          * @param int $index The array key of the namespace to return
 581          * @return string Namespace name without underscores (empty string if namespace does not exist)
 582          */
 583         function getFormattedNsText( $index ) {
 584                 $ns = $this->getNsText( $index );
 585                 return strtr( $ns, '_', ' ' );
 586         }
 587
 588         /**
 589          * Returns gender-dependent namespace alias if available.
 590          * @param $index Int: namespace index
 591          * @param $gender String: gender key (male, female... )
 592          * @return String
 593          * @since 1.18
 594          */
 595         function getGenderNsText( $index, $gender ) {
 596                 global $wgExtraGenderNamespaces;
 597
 598                 $ns = $wgExtraGenderNamespaces + self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 599                 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
 600         }
 601
 602         /**
 603          * Whether this language makes distinguishes genders for example in
 604          * namespaces.
 605          * @return bool
 606          * @since 1.18
 607          */
 608         function needsGenderDistinction() {
 609                 global $wgExtraGenderNamespaces, $wgExtraNamespaces;
 610                 if ( count( $wgExtraGenderNamespaces ) > 0 ) {
 611                         // $wgExtraGenderNamespaces overrides everything
 612                         return true;
 613                 } elseif ( isset( $wgExtraNamespaces[NS_USER] ) && isset( $wgExtraNamespaces[NS_USER_TALK] ) ) {
 614                         /// @todo There may be other gender namespace than NS_USER & NS_USER_TALK in the future
 615                         // $wgExtraNamespaces overrides any gender aliases specified in i18n files
 616                         return false;
 617                 } else {
 618                         // Check what is in i18n files
 619                         $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 620                         return count( $aliases ) > 0;
 621                 }
 622         }
 623
 624         /**
 625          * Get a namespace key by value, case insensitive.
 626          * Only matches namespace names for the current language, not the
 627          * canonical ones defined in Namespace.php.
 628          *
 629          * @param $text String
 630          * @return mixed An integer if $text is a valid value otherwise false
 631          */
 632         function getLocalNsIndex( $text ) {
 633                 $lctext = $this->lc( $text );
 634                 $ids = $this->getNamespaceIds();
 635                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 636         }
 637
 638         /**
 639          * @return array
 640          */
 641         function getNamespaceAliases() {
 642                 if ( is_null( $this->namespaceAliases ) ) {
 643                         $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
 644                         if ( !$aliases ) {
 645                                 $aliases = array();
 646                         } else {
 647                                 foreach ( $aliases as $name => $index ) {
 648                                         if ( $index === NS_PROJECT_TALK ) {
 649                                                 unset( $aliases[$name] );
 650                                                 $name = $this->fixVariableInNamespace( $name );
 651                                                 $aliases[$name] = $index;
 652                                         }
 653                                 }
 654                         }
 655
 656                         global $wgExtraGenderNamespaces;
 657                         $genders = $wgExtraGenderNamespaces + (array)self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
 658                         foreach ( $genders as $index => $forms ) {
 659                                 foreach ( $forms as $alias ) {
 660                                         $aliases[$alias] = $index;
 661                                 }
 662                         }
 663
 664                         $this->namespaceAliases = $aliases;
 665                 }
 666                 return $this->namespaceAliases;
 667         }
 668
 669         /**
 670          * @return array
 671          */
 672         function getNamespaceIds() {
 673                 if ( is_null( $this->mNamespaceIds ) ) {
 674                         global $wgNamespaceAliases;
 675                         # Put namespace names and aliases into a hashtable.
 676                         # If this is too slow, then we should arrange it so that it is done
 677                         # before caching. The catch is that at pre-cache time, the above
 678                         # class-specific fixup hasn't been done.
 679                         $this->mNamespaceIds = array();
 680                         foreach ( $this->getNamespaces() as $index => $name ) {
 681                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 682                         }
 683                         foreach ( $this->getNamespaceAliases() as $name => $index ) {
 684                                 $this->mNamespaceIds[$this->lc( $name )] = $index;
 685                         }
 686                         if ( $wgNamespaceAliases ) {
 687                                 foreach ( $wgNamespaceAliases as $name => $index ) {
 688                                         $this->mNamespaceIds[$this->lc( $name )] = $index;
 689                                 }
 690                         }
 691                 }
 692                 return $this->mNamespaceIds;
 693         }
 694
 695         /**
 696          * Get a namespace key by value, case insensitive.  Canonical namespace
 697          * names override custom ones defined for the current language.
 698          *
 699          * @param $text String
 700          * @return mixed An integer if $text is a valid value otherwise false
 701          */
 702         function getNsIndex( $text ) {
 703                 $lctext = $this->lc( $text );
 704                 $ns = MWNamespace::getCanonicalIndex( $lctext );
 705                 if ( $ns !== null ) {
 706                         return $ns;
 707                 }
 708                 $ids = $this->getNamespaceIds();
 709                 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
 710         }
 711
 712         /**
 713          * short names for language variants used for language conversion links.
 714          *
 715          * @param $code String
 716          * @param $usemsg bool Use the "variantname-xyz" message if it exists
 717          * @return string
 718          */
 719         function getVariantname( $code, $usemsg = true ) {
 720                 $msg = "variantname-$code";
 721                 if ( $usemsg && wfMessage( $msg )->exists() ) {
 722                         return $this->getMessageFromDB( $msg );
 723                 }
 724                 $name = self::fetchLanguageName( $code );
 725                 if ( $name ) {
 726                         return $name; # if it's defined as a language name, show that
 727                 } else {
 728                         # otherwise, output the language code
 729                         return $code;
 730                 }
 731         }
 732
 733         /**
 734          * @param $name string
 735          * @return string
 736          */
 737         function specialPage( $name ) {
 738                 $aliases = $this->getSpecialPageAliases();
 739                 if ( isset( $aliases[$name][0] ) ) {
 740                         $name = $aliases[$name][0];
 741                 }
 742                 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
 743         }
 744
 745         /**
 746          * @return array
 747          */
 748         function getQuickbarSettings() {
 749                 return array(
 750                         $this->getMessage( 'qbsettings-none' ),
 751                         $this->getMessage( 'qbsettings-fixedleft' ),
 752                         $this->getMessage( 'qbsettings-fixedright' ),
 753                         $this->getMessage( 'qbsettings-floatingleft' ),
 754                         $this->getMessage( 'qbsettings-floatingright' ),
 755                         $this->getMessage( 'qbsettings-directionality' )
 756                 );
 757         }
 758
 759         /**
 760          * @return array
 761          */
 762         function getDatePreferences() {
 763                 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
 764         }
 765
 766         /**
 767          * @return array
 768          */
 769         function getDateFormats() {
 770                 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
 771         }
 772
 773         /**
 774          * @return array|string
 775          */
 776         function getDefaultDateFormat() {
 777                 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
 778                 if ( $df === 'dmy or mdy' ) {
 779                         global $wgAmericanDates;
 780                         return $wgAmericanDates ? 'mdy' : 'dmy';
 781                 } else {
 782                         return $df;
 783                 }
 784         }
 785
 786         /**
 787          * @return array
 788          */
 789         function getDatePreferenceMigrationMap() {
 790                 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
 791         }
 792
 793         /**
 794          * @param  $image
 795          * @return array|null
 796          */
 797         function getImageFile( $image ) {
 798                 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
 799         }
 800
 801         /**
 802          * @return array
 803          */
 804         function getExtraUserToggles() {
 805                 return (array)self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
 806         }
 807
 808         /**
 809          * @param  $tog
 810          * @return string
 811          */
 812         function getUserToggle( $tog ) {
 813                 return $this->getMessageFromDB( "tog-$tog" );
 814         }
 815
 816         /**
 817          * Get native language names, indexed by code.
 818          * Only those defined in MediaWiki, no other data like CLDR.
 819          * If $customisedOnly is true, only returns codes with a messages file
 820          *
 821          * @param $customisedOnly bool
 822          *
 823          * @return array
 824          * @deprecated in 1.20, use fetchLanguageNames()
 825          */
 826         public static function getLanguageNames( $customisedOnly = false ) {
 827                 return self::fetchLanguageNames( null, $customisedOnly ? 'mwfile' : 'mw' );
 828         }
 829
 830         /**
 831          * Get translated language names. This is done on best effort and
 832          * by default this is exactly the same as Language::getLanguageNames.
 833          * The CLDR extension provides translated names.
 834          * @param $code String Language code.
 835          * @return Array language code => language name
 836          * @since 1.18.0
 837          * @deprecated in 1.20, use fetchLanguageNames()
 838          */
 839         public static function getTranslatedLanguageNames( $code ) {
 840                 return self::fetchLanguageNames( $code, 'all' );
 841         }
 842
 843         /**
 844          * Get an array of language names, indexed by code.
 845          * @param $inLanguage null|string: Code of language in which to return the names
 846          *              Use null for autonyms (native names)
 847          * @param $include string:
 848          *              'all' all available languages
 849          *              'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
 850          *              'mwfile' only if the language is in 'mw' *and* has a message file
 851          * @return array: language code => language name
 852          * @since 1.20
 853          */
 854         public static function fetchLanguageNames( $inLanguage = null, $include = 'mw' ) {
 855                 global $wgExtraLanguageNames;
 856                 static $coreLanguageNames;
 857
 858                 if ( $coreLanguageNames === null ) {
 859                         include( MWInit::compiledPath( 'languages/Names.php' ) );
 860                 }
 861
 862                 $names = array();
 863
 864                 if ( $inLanguage ) {
 865                         # TODO: also include when $inLanguage is null, when this code is more efficient
 866                         wfRunHooks( 'LanguageGetTranslatedLanguageNames', array( &$names, $inLanguage ) );
 867                 }
 868
 869                 $mwNames = $wgExtraLanguageNames + $coreLanguageNames;
 870                 foreach ( $mwNames as $mwCode => $mwName ) {
 871                         # - Prefer own MediaWiki native name when not using the hook
 872                         # - For other names just add if not added through the hook
 873                         if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
 874                                 $names[$mwCode] = $mwName;
 875                         }
 876                 }
 877
 878                 if ( $include === 'all' ) {
 879                         return $names;
 880                 }
 881
 882                 $returnMw = array();
 883                 $coreCodes = array_keys( $mwNames );
 884                 foreach ( $coreCodes as $coreCode ) {
 885                         $returnMw[$coreCode] = $names[$coreCode];
 886                 }
 887
 888                 if ( $include === 'mwfile' ) {
 889                         $namesMwFile = array();
 890                         # We do this using a foreach over the codes instead of a directory
 891                         # loop so that messages files in extensions will work correctly.
 892                         foreach ( $returnMw as $code => $value ) {
 893                                 if ( is_readable( self::getMessagesFileName( $code ) ) ) {
 894                                         $namesMwFile[$code] = $names[$code];
 895                                 }
 896                         }
 897                         return $namesMwFile;
 898                 }
 899                 # 'mw' option; default if it's not one of the other two options (all/mwfile)
 900                 return $returnMw;
 901         }
 902
 903         /**
 904          * @param $code string: The code of the language for which to get the name
 905          * @param $inLanguage null|string: Code of language in which to return the name (null for autonyms)
 906          * @param $include string: 'all', 'mw' or 'mwfile'; see fetchLanguageNames()
 907          * @return string: Language name or empty
 908          * @since 1.20
 909          */
 910         public static function fetchLanguageName( $code, $inLanguage = null, $include = 'all' ) {
 911                 $array = self::fetchLanguageNames( $inLanguage, $include );
 912                 return !array_key_exists( $code, $array ) ? '' : $array[$code];
 913         }
 914
 915         /**
 916          * Get a message from the MediaWiki namespace.
 917          *
 918          * @param $msg String: message name
 919          * @return string
 920          */
 921         function getMessageFromDB( $msg ) {
 922                 return wfMessage( $msg )->inLanguage( $this )->text();
 923         }
 924
 925         /**
 926          * Get the native language name of $code.
 927          * Only if defined in MediaWiki, no other data like CLDR.
 928          * @param $code string
 929          * @return string
 930          * @deprecated in 1.20, use fetchLanguageName()
 931          */
 932         function getLanguageName( $code ) {
 933                 return self::fetchLanguageName( $code );
 934         }
 935
 936         /**
 937          * @param $key string
 938          * @return string
 939          */
 940         function getMonthName( $key ) {
 941                 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
 942         }
 943
 944         /**
 945          * @return array
 946          */
 947         function getMonthNamesArray() {
 948                 $monthNames = array( '' );
 949                 for ( $i = 1; $i < 13; $i++ ) {
 950                         $monthNames[] = $this->getMonthName( $i );
 951                 }
 952                 return $monthNames;
 953         }
 954
 955         /**
 956          * @param $key string
 957          * @return string
 958          */
 959         function getMonthNameGen( $key ) {
 960                 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
 961         }
 962
 963         /**
 964          * @param $key string
 965          * @return string
 966          */
 967         function getMonthAbbreviation( $key ) {
 968                 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
 969         }
 970
 971         /**
 972          * @return array
 973          */
 974         function getMonthAbbreviationsArray() {
 975                 $monthNames = array( '' );
 976                 for ( $i = 1; $i < 13; $i++ ) {
 977                         $monthNames[] = $this->getMonthAbbreviation( $i );
 978                 }
 979                 return $monthNames;
 980         }
 981
 982         /**
 983          * @param $key string
 984          * @return string
 985          */
 986         function getWeekdayName( $key ) {
 987                 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
 988         }
 989
 990         /**
 991          * @param $key string
 992          * @return string
 993          */
 994         function getWeekdayAbbreviation( $key ) {
 995                 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
 996         }
 997
 998         /**
 999          * @param $key string
1000          * @return string
1001          */
1002         function getIranianCalendarMonthName( $key ) {
1003                 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
1004         }
1005
1006         /**
1007          * @param $key string
1008          * @return string
1009          */
1010         function getHebrewCalendarMonthName( $key ) {
1011                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
1012         }
1013
1014         /**
1015          * @param $key string
1016          * @return string
1017          */
1018         function getHebrewCalendarMonthNameGen( $key ) {
1019                 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
1020         }
1021
1022         /**
1023          * @param $key string
1024          * @return string
1025          */
1026         function getHijriCalendarMonthName( $key ) {
1027                 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
1028         }
1029
1030         /**
1031          * This is a workalike of PHP's date() function, but with better
1032          * internationalisation, a reduced set of format characters, and a better
1033          * escaping format.
1034          *
1035          * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrU. See the
1036          * PHP manual for definitions. There are a number of extensions, which
1037          * start with "x":
1038          *
1039          *    xn   Do not translate digits of the next numeric format character
1040          *    xN   Toggle raw digit (xn) flag, stays set until explicitly unset
1041          *    xr   Use roman numerals for the next numeric format character
1042          *    xh   Use hebrew numerals for the next numeric format character
1043          *    xx   Literal x
1044          *    xg   Genitive month name
1045          *
1046          *    xij  j (day number) in Iranian calendar
1047          *    xiF  F (month name) in Iranian calendar
1048          *    xin  n (month number) in Iranian calendar
1049          *    xiy  y (two digit year) in Iranian calendar
1050          *    xiY  Y (full year) in Iranian calendar
1051          *
1052          *    xjj  j (day number) in Hebrew calendar
1053          *    xjF  F (month name) in Hebrew calendar
1054          *    xjt  t (days in month) in Hebrew calendar
1055          *    xjx  xg (genitive month name) in Hebrew calendar
1056          *    xjn  n (month number) in Hebrew calendar
1057          *    xjY  Y (full year) in Hebrew calendar
1058          *
1059          *    xmj  j (day number) in Hijri calendar
1060          *    xmF  F (month name) in Hijri calendar
1061          *    xmn  n (month number) in Hijri calendar
1062          *    xmY  Y (full year) in Hijri calendar
1063          *
1064          *    xkY  Y (full year) in Thai solar calendar. Months and days are
1065          *                       identical to the Gregorian calendar
1066          *    xoY  Y (full year) in Minguo calendar or Juche year.
1067          *                       Months and days are identical to the
1068          *                       Gregorian calendar
1069          *    xtY  Y (full year) in Japanese nengo. Months and days are
1070          *                       identical to the Gregorian calendar
1071          *
1072          * Characters enclosed in double quotes will be considered literal (with
1073          * the quotes themselves removed). Unmatched quotes will be considered
1074          * literal quotes. Example:
1075          *
1076          * "The month is" F       => The month is January
1077          * i's"                   => 20'11"
1078          *
1079          * Backslash escaping is also supported.
1080          *
1081          * Input timestamp is assumed to be pre-normalized to the desired local
1082          * time zone, if any.
1083          *
1084          * @param $format String
1085          * @param $ts String: 14-character timestamp
1086          *      YYYYMMDDHHMMSS
1087          *      01234567890123
1088          * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
1089          *
1090          * @return string
1091          */
1092         function sprintfDate( $format, $ts ) {
1093                 $s = '';
1094                 $raw = false;
1095                 $roman = false;
1096                 $hebrewNum = false;
1097                 $unix = false;
1098                 $rawToggle = false;
1099                 $iranian = false;
1100                 $hebrew = false;
1101                 $hijri = false;
1102                 $thai = false;
1103                 $minguo = false;
1104                 $tenno = false;
1105                 for ( $p = 0; $p < strlen( $format ); $p++ ) {
1106                         $num = false;
1107                         $code = $format[$p];
1108                         if ( $code == 'x' && $p < strlen( $format ) - 1 ) {
1109                                 $code .= $format[++$p];
1110                         }
1111
1112                         if ( ( $code === 'xi' || $code == 'xj' || $code == 'xk' || $code == 'xm' || $code == 'xo' || $code == 'xt' ) && $p < strlen( $format ) - 1 ) {
1113                                 $code .= $format[++$p];
1114                         }
1115
1116                         switch ( $code ) {
1117                                 case 'xx':
1118                                         $s .= 'x';
1119                                         break;
1120                                 case 'xn':
1121                                         $raw = true;
1122                                         break;
1123                                 case 'xN':
1124                                         $rawToggle = !$rawToggle;
1125                                         break;
1126                                 case 'xr':
1127                                         $roman = true;
1128                                         break;
1129                                 case 'xh':
1130                                         $hebrewNum = true;
1131                                         break;
1132                                 case 'xg':
1133                                         $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
1134                                         break;
1135                                 case 'xjx':
1136                                         if ( !$hebrew ) $hebrew = self::tsToHebrew( $ts );
1137                                         $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
1138                                         break;
1139                                 case 'd':
1140                                         $num = substr( $ts, 6, 2 );
1141                                         break;
1142                                 case 'D':
1143                                         if ( !$unix ) $unix = wfTimestamp( TS_UNIX, $ts );
1144                                         $s .= $this->getWeekdayAbbreviation( gmdate( 'w', $unix ) + 1 );
1145                                         break;
1146                                 case 'j':
1147                                         $num = intval( substr( $ts, 6, 2 ) );
1148                                         break;
1149                                 case 'xij':
1150                                         if ( !$iranian ) {
1151                                                 $iranian = self::tsToIranian( $ts );
1152                                         }
1153                                         $num = $iranian[2];
1154                                         break;
1155                                 case 'xmj':
1156                                         if ( !$hijri ) {
1157                                                 $hijri = self::tsToHijri( $ts );
1158                                         }
1159                                         $num = $hijri[2];
1160                                         break;
1161                                 case 'xjj':
1162                                         if ( !$hebrew ) {
1163                                                 $hebrew = self::tsToHebrew( $ts );
1164                                         }
1165                                         $num = $hebrew[2];
1166                                         break;
1167                                 case 'l':
1168                                         if ( !$unix ) {
1169                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1170                                         }
1171                                         $s .= $this->getWeekdayName( gmdate( 'w', $unix ) + 1 );
1172                                         break;
1173                                 case 'N':
1174                                         if ( !$unix ) {
1175                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1176                                         }
1177                                         $w = gmdate( 'w', $unix );
1178                                         $num = $w ? $w : 7;
1179                                         break;
1180                                 case 'w':
1181                                         if ( !$unix ) {
1182                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1183                                         }
1184                                         $num = gmdate( 'w', $unix );
1185                                         break;
1186                                 case 'z':
1187                                         if ( !$unix ) {
1188                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1189                                         }
1190                                         $num = gmdate( 'z', $unix );
1191                                         break;
1192                                 case 'W':
1193                                         if ( !$unix ) {
1194                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1195                                         }
1196                                         $num = gmdate( 'W', $unix );
1197                                         break;
1198                                 case 'F':
1199                                         $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
1200                                         break;
1201                                 case 'xiF':
1202                                         if ( !$iranian ) {
1203                                                 $iranian = self::tsToIranian( $ts );
1204                                         }
1205                                         $s .= $this->getIranianCalendarMonthName( $iranian[1] );
1206                                         break;
1207                                 case 'xmF':
1208                                         if ( !$hijri ) {
1209                                                 $hijri = self::tsToHijri( $ts );
1210                                         }
1211                                         $s .= $this->getHijriCalendarMonthName( $hijri[1] );
1212                                         break;
1213                                 case 'xjF':
1214                                         if ( !$hebrew ) {
1215                                                 $hebrew = self::tsToHebrew( $ts );
1216                                         }
1217                                         $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
1218                                         break;
1219                                 case 'm':
1220                                         $num = substr( $ts, 4, 2 );
1221                                         break;
1222                                 case 'M':
1223                                         $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
1224                                         break;
1225                                 case 'n':
1226                                         $num = intval( substr( $ts, 4, 2 ) );
1227                                         break;
1228                                 case 'xin':
1229                                         if ( !$iranian ) {
1230                                                 $iranian = self::tsToIranian( $ts );
1231                                         }
1232                                         $num = $iranian[1];
1233                                         break;
1234                                 case 'xmn':
1235                                         if ( !$hijri ) {
1236                                                 $hijri = self::tsToHijri ( $ts );
1237                                         }
1238                                         $num = $hijri[1];
1239                                         break;
1240                                 case 'xjn':
1241                                         if ( !$hebrew ) {
1242                                                 $hebrew = self::tsToHebrew( $ts );
1243                                         }
1244                                         $num = $hebrew[1];
1245                                         break;
1246                                 case 't':
1247                                         if ( !$unix ) {
1248                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1249                                         }
1250                                         $num = gmdate( 't', $unix );
1251                                         break;
1252                                 case 'xjt':
1253                                         if ( !$hebrew ) {
1254                                                 $hebrew = self::tsToHebrew( $ts );
1255                                         }
1256                                         $num = $hebrew[3];
1257                                         break;
1258                                 case 'L':
1259                                         if ( !$unix ) {
1260                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1261                                         }
1262                                         $num = gmdate( 'L', $unix );
1263                                         break;
1264                                 case 'o':
1265                                         if ( !$unix ) {
1266                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1267                                         }
1268                                         $num = gmdate( 'o', $unix );
1269                                         break;
1270                                 case 'Y':
1271                                         $num = substr( $ts, 0, 4 );
1272                                         break;
1273                                 case 'xiY':
1274                                         if ( !$iranian ) {
1275                                                 $iranian = self::tsToIranian( $ts );
1276                                         }
1277                                         $num = $iranian[0];
1278                                         break;
1279                                 case 'xmY':
1280                                         if ( !$hijri ) {
1281                                                 $hijri = self::tsToHijri( $ts );
1282                                         }
1283                                         $num = $hijri[0];
1284                                         break;
1285                                 case 'xjY':
1286                                         if ( !$hebrew ) {
1287                                                 $hebrew = self::tsToHebrew( $ts );
1288                                         }
1289                                         $num = $hebrew[0];
1290                                         break;
1291                                 case 'xkY':
1292                                         if ( !$thai ) {
1293                                                 $thai = self::tsToYear( $ts, 'thai' );
1294                                         }
1295                                         $num = $thai[0];
1296                                         break;
1297                                 case 'xoY':
1298                                         if ( !$minguo ) {
1299                                                 $minguo = self::tsToYear( $ts, 'minguo' );
1300                                         }
1301                                         $num = $minguo[0];
1302                                         break;
1303                                 case 'xtY':
1304                                         if ( !$tenno ) {
1305                                                 $tenno = self::tsToYear( $ts, 'tenno' );
1306                                         }
1307                                         $num = $tenno[0];
1308                                         break;
1309                                 case 'y':
1310                                         $num = substr( $ts, 2, 2 );
1311                                         break;
1312                                 case 'xiy':
1313                                         if ( !$iranian ) {
1314                                                 $iranian = self::tsToIranian( $ts );
1315                                         }
1316                                         $num = substr( $iranian[0], -2 );
1317                                         break;
1318                                 case 'a':
1319                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
1320                                         break;
1321                                 case 'A':
1322                                         $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
1323                                         break;
1324                                 case 'g':
1325                                         $h = substr( $ts, 8, 2 );
1326                                         $num = $h % 12 ? $h % 12 : 12;
1327                                         break;
1328                                 case 'G':
1329                                         $num = intval( substr( $ts, 8, 2 ) );
1330                                         break;
1331                                 case 'h':
1332                                         $h = substr( $ts, 8, 2 );
1333                                         $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
1334                                         break;
1335                                 case 'H':
1336                                         $num = substr( $ts, 8, 2 );
1337                                         break;
1338                                 case 'i':
1339                                         $num = substr( $ts, 10, 2 );
1340                                         break;
1341                                 case 's':
1342                                         $num = substr( $ts, 12, 2 );
1343                                         break;
1344                                 case 'c':
1345                                         if ( !$unix ) {
1346                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1347                                         }
1348                                         $s .= gmdate( 'c', $unix );
1349                                         break;
1350                                 case 'r':
1351                                         if ( !$unix ) {
1352                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1353                                         }
1354                                         $s .= gmdate( 'r', $unix );
1355                                         break;
1356                                 case 'U':
1357                                         if ( !$unix ) {
1358                                                 $unix = wfTimestamp( TS_UNIX, $ts );
1359                                         }
1360                                         $num = $unix;
1361                                         break;
1362                                 case '\\':
1363                                         # Backslash escaping
1364                                         if ( $p < strlen( $format ) - 1 ) {
1365                                                 $s .= $format[++$p];
1366                                         } else {
1367                                                 $s .= '\\';
1368                                         }
1369                                         break;
1370                                 case '"':
1371                                         # Quoted literal
1372                                         if ( $p < strlen( $format ) - 1 ) {
1373                                                 $endQuote = strpos( $format, '"', $p + 1 );
1374                                                 if ( $endQuote === false ) {
1375                                                         # No terminating quote, assume literal "
1376                                                         $s .= '"';
1377                                                 } else {
1378                                                         $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1379                                                         $p = $endQuote;
1380                                                 }
1381                                         } else {
1382                                                 # Quote at end of string, assume literal "
1383                                                 $s .= '"';
1384                                         }
1385                                         break;
1386                                 default:
1387                                         $s .= $format[$p];
1388                         }
1389                         if ( $num !== false ) {
1390                                 if ( $rawToggle || $raw ) {
1391                                         $s .= $num;
1392                                         $raw = false;
1393                                 } elseif ( $roman ) {
1394                                         $s .= Language::romanNumeral( $num );
1395                                         $roman = false;
1396                                 } elseif ( $hebrewNum ) {
1397                                         $s .= self::hebrewNumeral( $num );
1398                                         $hebrewNum = false;
1399                                 } else {
1400                                         $s .= $this->formatNum( $num, true );
1401                                 }
1402                         }
1403                 }
1404                 return $s;
1405         }
1406
1407         private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1408         private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1409
1410         /**
1411          * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1412          * Gregorian dates to Iranian dates. Originally written in C, it
1413          * is released under the terms of GNU Lesser General Public
1414          * License. Conversion to PHP was performed by Niklas Laxström.
1415          *
1416          * Link: http://www.farsiweb.info/jalali/jalali.c
1417          *
1418          * @param $ts string
1419          *
1420          * @return string
1421          */
1422         private static function tsToIranian( $ts ) {
1423                 $gy = substr( $ts, 0, 4 ) -1600;
1424                 $gm = substr( $ts, 4, 2 ) -1;
1425                 $gd = substr( $ts, 6, 2 ) -1;
1426
1427                 # Days passed from the beginning (including leap years)
1428                 $gDayNo = 365 * $gy
1429                         + floor( ( $gy + 3 ) / 4 )
1430                         - floor( ( $gy + 99 ) / 100 )
1431                         + floor( ( $gy + 399 ) / 400 );
1432
1433                 // Add days of the past months of this year
1434                 for ( $i = 0; $i < $gm; $i++ ) {
1435                         $gDayNo += self::$GREG_DAYS[$i];
1436                 }
1437
1438                 // Leap years
1439                 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1440                         $gDayNo++;
1441                 }
1442
1443                 // Days passed in current month
1444                 $gDayNo += (int)$gd;
1445
1446                 $jDayNo = $gDayNo - 79;
1447
1448                 $jNp = floor( $jDayNo / 12053 );
1449                 $jDayNo %= 12053;
1450
1451                 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1452                 $jDayNo %= 1461;
1453
1454                 if ( $jDayNo >= 366 ) {
1455                         $jy += floor( ( $jDayNo - 1 ) / 365 );
1456                         $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1457                 }
1458
1459                 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1460                         $jDayNo -= self::$IRANIAN_DAYS[$i];
1461                 }
1462
1463                 $jm = $i + 1;
1464                 $jd = $jDayNo + 1;
1465
1466                 return array( $jy, $jm, $jd );
1467         }
1468
1469         /**
1470          * Converting Gregorian dates to Hijri dates.
1471          *
1472          * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1473          *
1474          * @see http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1475          *
1476          * @param $ts string
1477          *
1478          * @return string
1479          */
1480         private static function tsToHijri( $ts ) {
1481                 $year = substr( $ts, 0, 4 );
1482                 $month = substr( $ts, 4, 2 );
1483                 $day = substr( $ts, 6, 2 );
1484
1485                 $zyr = $year;
1486                 $zd = $day;
1487                 $zm = $month;
1488                 $zy = $zyr;
1489
1490                 if (
1491                         ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1492                         ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1493                 )
1494                 {
1495                         $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1496                                         (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1497                                         (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1498                                         $zd - 32075;
1499                 } else {
1500                         $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1501                                                                 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1502                 }
1503
1504                 $zl = $zjd -1948440 + 10632;
1505                 $zn = (int)( ( $zl - 1 ) / 10631 );
1506                 $zl = $zl - 10631 * $zn + 354;
1507                 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) + ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1508                 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) - ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1509                 $zm = (int)( ( 24 * $zl ) / 709 );
1510                 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1511                 $zy = 30 * $zn + $zj - 30;
1512
1513                 return array( $zy, $zm, $zd );
1514         }
1515
1516         /**
1517          * Converting Gregorian dates to Hebrew dates.
1518          *
1519          * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1520          * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1521          * to translate the relevant functions into PHP and release them under
1522          * GNU GPL.
1523          *
1524          * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1525          * and Adar II is 14. In a non-leap year, Adar is 6.
1526          *
1527          * @param $ts string
1528          *
1529          * @return string
1530          */
1531         private static function tsToHebrew( $ts ) {
1532                 # Parse date
1533                 $year = substr( $ts, 0, 4 );
1534                 $month = substr( $ts, 4, 2 );
1535                 $day = substr( $ts, 6, 2 );
1536
1537                 # Calculate Hebrew year
1538                 $hebrewYear = $year + 3760;
1539
1540                 # Month number when September = 1, August = 12
1541                 $month += 4;
1542                 if ( $month > 12 ) {
1543                         # Next year
1544                         $month -= 12;
1545                         $year++;
1546                         $hebrewYear++;
1547                 }
1548
1549                 # Calculate day of year from 1 September
1550                 $dayOfYear = $day;
1551                 for ( $i = 1; $i < $month; $i++ ) {
1552                         if ( $i == 6 ) {
1553                                 # February
1554                                 $dayOfYear += 28;
1555                                 # Check if the year is leap
1556                                 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1557                                         $dayOfYear++;
1558                                 }
1559                         } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1560                                 $dayOfYear += 30;
1561                         } else {
1562                                 $dayOfYear += 31;
1563                         }
1564                 }
1565
1566                 # Calculate the start of the Hebrew year
1567                 $start = self::hebrewYearStart( $hebrewYear );
1568
1569                 # Calculate next year's start
1570                 if ( $dayOfYear <= $start ) {
1571                         # Day is before the start of the year - it is the previous year
1572                         # Next year's start
1573                         $nextStart = $start;
1574                         # Previous year
1575                         $year--;
1576                         $hebrewYear--;
1577                         # Add days since previous year's 1 September
1578                         $dayOfYear += 365;
1579                         if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1580                                 # Leap year
1581                                 $dayOfYear++;
1582                         }
1583                         # Start of the new (previous) year
1584                         $start = self::hebrewYearStart( $hebrewYear );
1585                 } else {
1586                         # Next year's start
1587                         $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1588                 }
1589
1590                 # Calculate Hebrew day of year
1591                 $hebrewDayOfYear = $dayOfYear - $start;
1592
1593                 # Difference between year's days
1594                 $diff = $nextStart - $start;
1595                 # Add 12 (or 13 for leap years) days to ignore the difference between
1596                 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1597                 # difference is only about the year type
1598                 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1599                         $diff += 13;
1600                 } else {
1601                         $diff += 12;
1602                 }
1603
1604                 # Check the year pattern, and is leap year
1605                 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1606                 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1607                 # and non-leap years
1608                 $yearPattern = $diff % 30;
1609                 # Check if leap year
1610                 $isLeap = $diff >= 30;
1611
1612                 # Calculate day in the month from number of day in the Hebrew year
1613                 # Don't check Adar - if the day is not in Adar, we will stop before;
1614                 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1615                 $hebrewDay = $hebrewDayOfYear;
1616                 $hebrewMonth = 1;
1617                 $days = 0;
1618                 while ( $hebrewMonth <= 12 ) {
1619                         # Calculate days in this month
1620                         if ( $isLeap && $hebrewMonth == 6 ) {
1621                                 # Adar in a leap year
1622                                 if ( $isLeap ) {
1623                                         # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1624                                         $days = 30;
1625                                         if ( $hebrewDay <= $days ) {
1626                                                 # Day in Adar I
1627                                                 $hebrewMonth = 13;
1628                                         } else {
1629                                                 # Subtract the days of Adar I
1630                                                 $hebrewDay -= $days;
1631                                                 # Try Adar II
1632                                                 $days = 29;
1633                                                 if ( $hebrewDay <= $days ) {
1634                                                         # Day in Adar II
1635                                                         $hebrewMonth = 14;
1636                                                 }
1637                                         }
1638                                 }
1639                         } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1640                                 # Cheshvan in a complete year (otherwise as the rule below)
1641                                 $days = 30;
1642                         } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1643                                 # Kislev in an incomplete year (otherwise as the rule below)
1644                                 $days = 29;
1645                         } else {
1646                                 # Odd months have 30 days, even have 29
1647                                 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1648                         }
1649                         if ( $hebrewDay <= $days ) {
1650                                 # In the current month
1651                                 break;
1652                         } else {
1653                                 # Subtract the days of the current month
1654                                 $hebrewDay -= $days;
1655                                 # Try in the next month
1656                                 $hebrewMonth++;
1657                         }
1658                 }
1659
1660                 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1661         }
1662
1663         /**
1664          * This calculates the Hebrew year start, as days since 1 September.
1665          * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1666          * Used for Hebrew date.
1667          *
1668          * @param $year int
1669          *
1670          * @return string
1671          */
1672         private static function hebrewYearStart( $year ) {
1673                 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1674                 $b = intval( ( $year - 1 ) % 4 );
1675                 $m = 32.044093161144 + 1.5542417966212 * $a +  $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1676                 if ( $m < 0 ) {
1677                         $m--;
1678                 }
1679                 $Mar = intval( $m );
1680                 if ( $m < 0 ) {
1681                         $m++;
1682                 }
1683                 $m -= $Mar;
1684
1685                 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1686                 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1687                         $Mar++;
1688                 } elseif ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1689                         $Mar += 2;
1690                 } elseif ( $c == 2 || $c == 4 || $c == 6 ) {
1691                         $Mar++;
1692                 }
1693
1694                 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1695                 return $Mar;
1696         }
1697
1698         /**
1699          * Algorithm to convert Gregorian dates to Thai solar dates,
1700          * Minguo dates or Minguo dates.
1701          *
1702          * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1703          *       http://en.wikipedia.org/wiki/Minguo_calendar
1704          *       http://en.wikipedia.org/wiki/Japanese_era_name
1705          *
1706          * @param $ts String: 14-character timestamp
1707          * @param $cName String: calender name
1708          * @return Array: converted year, month, day
1709          */
1710         private static function tsToYear( $ts, $cName ) {
1711                 $gy = substr( $ts, 0, 4 );
1712                 $gm = substr( $ts, 4, 2 );
1713                 $gd = substr( $ts, 6, 2 );
1714
1715                 if ( !strcmp( $cName, 'thai' ) ) {
1716                         # Thai solar dates
1717                         # Add 543 years to the Gregorian calendar
1718                         # Months and days are identical
1719                         $gy_offset = $gy + 543;
1720                 } elseif ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1721                         # Minguo dates
1722                         # Deduct 1911 years from the Gregorian calendar
1723                         # Months and days are identical
1724                         $gy_offset = $gy - 1911;
1725                 } elseif ( !strcmp( $cName, 'tenno' ) ) {
1726                         # Nengō dates up to Meiji period
1727                         # Deduct years from the Gregorian calendar
1728                         # depending on the nengo periods
1729                         # Months and days are identical
1730                         if ( ( $gy < 1912 ) || ( ( $gy == 1912 ) && ( $gm < 7 ) ) || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) ) ) {
1731                                 # Meiji period
1732                                 $gy_gannen = $gy - 1868 + 1;
1733                                 $gy_offset = $gy_gannen;
1734                                 if ( $gy_gannen == 1 ) {
1735                                         $gy_offset = '元';
1736                                 }
1737                                 $gy_offset = '明治' . $gy_offset;
1738                         } elseif (
1739                                 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1740                                 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1741                                 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1742                                 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1743                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1744                         )
1745                         {
1746                                 # Taishō period
1747                                 $gy_gannen = $gy - 1912 + 1;
1748                                 $gy_offset = $gy_gannen;
1749                                 if ( $gy_gannen == 1 ) {
1750                                         $gy_offset = '元';
1751                                 }
1752                                 $gy_offset = '大正' . $gy_offset;
1753                         } elseif (
1754                                 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1755                                 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1756                                 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1757                         )
1758                         {
1759                                 # Shōwa period
1760                                 $gy_gannen = $gy - 1926 + 1;
1761                                 $gy_offset = $gy_gannen;
1762                                 if ( $gy_gannen == 1 ) {
1763                                         $gy_offset = '元';
1764                                 }
1765                                 $gy_offset = '昭和' . $gy_offset;
1766                         } else {
1767                                 # Heisei period
1768                                 $gy_gannen = $gy - 1989 + 1;
1769                                 $gy_offset = $gy_gannen;
1770                                 if ( $gy_gannen == 1 ) {
1771                                         $gy_offset = '元';
1772                                 }
1773                                 $gy_offset = '平成' . $gy_offset;
1774                         }
1775                 } else {
1776                         $gy_offset = $gy;
1777                 }
1778
1779                 return array( $gy_offset, $gm, $gd );
1780         }
1781
1782         /**
1783          * Roman number formatting up to 10000
1784          *
1785          * @param $num int
1786          *
1787          * @return string
1788          */
1789         static function romanNumeral( $num ) {
1790                 static $table = array(
1791                         array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
1792                         array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
1793                         array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
1794                         array( '', 'M', 'MM', 'MMM', 'MMMM', 'MMMMM', 'MMMMMM', 'MMMMMMM', 'MMMMMMMM', 'MMMMMMMMM', 'MMMMMMMMMM' )
1795                 );
1796
1797                 $num = intval( $num );
1798                 if ( $num > 10000 || $num <= 0 ) {
1799                         return $num;
1800                 }
1801
1802                 $s = '';
1803                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1804                         if ( $num >= $pow10 ) {
1805                                 $s .= $table[$i][(int)floor( $num / $pow10 )];
1806                         }
1807                         $num = $num % $pow10;
1808                 }
1809                 return $s;
1810         }
1811
1812         /**
1813          * Hebrew Gematria number formatting up to 9999
1814          *
1815          * @param $num int
1816          *
1817          * @return string
1818          */
1819         static function hebrewNumeral( $num ) {
1820                 static $table = array(
1821                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
1822                         array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
1823                         array( '', 'ק', 'ר', 'ש', 'ת', 'תק', 'תר', 'תש', 'תת', 'תתק', 'תתר' ),
1824                         array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
1825                 );
1826
1827                 $num = intval( $num );
1828                 if ( $num > 9999 || $num <= 0 ) {
1829                         return $num;
1830                 }
1831
1832                 $s = '';
1833                 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
1834                         if ( $num >= $pow10 ) {
1835                                 if ( $num == 15 || $num == 16 ) {
1836                                         $s .= $table[0][9] . $table[0][$num - 9];
1837                                         $num = 0;
1838                                 } else {
1839                                         $s .= $table[$i][intval( ( $num / $pow10 ) )];
1840                                         if ( $pow10 == 1000 ) {
1841                                                 $s .= "'";
1842                                         }
1843                                 }
1844                         }
1845                         $num = $num % $pow10;
1846                 }
1847                 if ( strlen( $s ) == 2 ) {
1848                         $str = $s . "'";
1849                 } else  {
1850                         $str = substr( $s, 0, strlen( $s ) - 2 ) . '"';
1851                         $str .= substr( $s, strlen( $s ) - 2, 2 );
1852                 }
1853                 $start = substr( $str, 0, strlen( $str ) - 2 );
1854                 $end = substr( $str, strlen( $str ) - 2 );
1855                 switch( $end ) {
1856                         case 'כ':
1857                                 $str = $start . 'ך';
1858                                 break;
1859                         case 'מ':
1860                                 $str = $start . 'ם';
1861                                 break;
1862                         case 'נ':
1863                                 $str = $start . 'ן';
1864                                 break;
1865                         case 'פ':
1866                                 $str = $start . 'ף';
1867                                 break;
1868                         case 'צ':
1869                                 $str = $start . 'ץ';
1870                                 break;
1871                 }
1872                 return $str;
1873         }
1874
1875         /**
1876          * Used by date() and time() to adjust the time output.
1877          *
1878          * @param $ts Int the time in date('YmdHis') format
1879          * @param $tz Mixed: adjust the time by this amount (default false, mean we
1880          *            get user timecorrection setting)
1881          * @return int
1882          */
1883         function userAdjust( $ts, $tz = false ) {
1884                 global $wgUser, $wgLocalTZoffset;
1885
1886                 if ( $tz === false ) {
1887                         $tz = $wgUser->getOption( 'timecorrection' );
1888                 }
1889
1890                 $data = explode( '|', $tz, 3 );
1891
1892                 if ( $data[0] == 'ZoneInfo' ) {
1893                         wfSuppressWarnings();
1894                         $userTZ = timezone_open( $data[2] );
1895                         wfRestoreWarnings();
1896                         if ( $userTZ !== false ) {
1897                                 $date = date_create( $ts, timezone_open( 'UTC' ) );
1898                                 date_timezone_set( $date, $userTZ );
1899                                 $date = date_format( $date, 'YmdHis' );
1900                                 return $date;
1901                         }
1902                         # Unrecognized timezone, default to 'Offset' with the stored offset.
1903                         $data[0] = 'Offset';
1904                 }
1905
1906                 $minDiff = 0;
1907                 if ( $data[0] == 'System' || $tz == '' ) {
1908                         #  Global offset in minutes.
1909                         if ( isset( $wgLocalTZoffset ) ) {
1910                                 $minDiff = $wgLocalTZoffset;
1911                         }
1912                 } elseif ( $data[0] == 'Offset' ) {
1913                         $minDiff = intval( $data[1] );
1914                 } else {
1915                         $data = explode( ':', $tz );
1916                         if ( count( $data ) == 2 ) {
1917                                 $data[0] = intval( $data[0] );
1918                                 $data[1] = intval( $data[1] );
1919                                 $minDiff = abs( $data[0] ) * 60 + $data[1];
1920                                 if ( $data[0] < 0 ) {
1921                                         $minDiff = -$minDiff;
1922                                 }
1923                         } else {
1924                                 $minDiff = intval( $data[0] ) * 60;
1925                         }
1926                 }
1927
1928                 # No difference ? Return time unchanged
1929                 if ( 0 == $minDiff ) {
1930                         return $ts;
1931                 }
1932
1933                 wfSuppressWarnings(); // E_STRICT system time bitching
1934                 # Generate an adjusted date; take advantage of the fact that mktime
1935                 # will normalize out-of-range values so we don't have to split $minDiff
1936                 # into hours and minutes.
1937                 $t = mktime( (
1938                   (int)substr( $ts, 8, 2 ) ), # Hours
1939                   (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
1940                   (int)substr( $ts, 12, 2 ), # Seconds
1941                   (int)substr( $ts, 4, 2 ), # Month
1942                   (int)substr( $ts, 6, 2 ), # Day
1943                   (int)substr( $ts, 0, 4 ) ); # Year
1944
1945                 $date = date( 'YmdHis', $t );
1946                 wfRestoreWarnings();
1947
1948                 return $date;
1949         }
1950
1951         /**
1952          * This is meant to be used by time(), date(), and timeanddate() to get
1953          * the date preference they're supposed to use, it should be used in
1954          * all children.
1955          *
1956          *<code>
1957          * function timeanddate([...], $format = true) {
1958          *      $datePreference = $this->dateFormat($format);
1959          * [...]
1960          * }
1961          *</code>
1962          *
1963          * @param $usePrefs Mixed: if true, the user's preference is used
1964          *                         if false, the site/language default is used
1965          *                         if int/string, assumed to be a format.
1966          * @return string
1967          */
1968         function dateFormat( $usePrefs = true ) {
1969                 global $wgUser;
1970
1971                 if ( is_bool( $usePrefs ) ) {
1972                         if ( $usePrefs ) {
1973                                 $datePreference = $wgUser->getDatePreference();
1974                         } else {
1975                                 $datePreference = (string)User::getDefaultOption( 'date' );
1976                         }
1977                 } else {
1978                         $datePreference = (string)$usePrefs;
1979                 }
1980
1981                 // return int
1982                 if ( $datePreference == '' ) {
1983                         return 'default';
1984                 }
1985
1986                 return $datePreference;
1987         }
1988
1989         /**
1990          * Get a format string for a given type and preference
1991          * @param $type string May be date, time or both
1992          * @param $pref string The format name as it appears in Messages*.php
1993          *
1994          * @return string
1995          */
1996         function getDateFormatString( $type, $pref ) {
1997                 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
1998                         if ( $pref == 'default' ) {
1999                                 $pref = $this->getDefaultDateFormat();
2000                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
2001                         } else {
2002                                 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
2003                                 if ( is_null( $df ) ) {
2004                                         $pref = $this->getDefaultDateFormat();
2005                                         $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
2006                                 }
2007                         }
2008                         $this->dateFormatStrings[$type][$pref] = $df;
2009                 }
2010                 return $this->dateFormatStrings[$type][$pref];
2011         }
2012
2013         /**
2014          * @param $ts Mixed: the time format which needs to be turned into a
2015          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2016          * @param $adj Bool: whether to adjust the time output according to the
2017          *             user configured offset ($timecorrection)
2018          * @param $format Mixed: true to use user's date format preference
2019          * @param $timecorrection String|bool the time offset as returned by
2020          *                        validateTimeZone() in Special:Preferences
2021          * @return string
2022          */
2023         function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
2024                 $ts = wfTimestamp( TS_MW, $ts );
2025                 if ( $adj ) {
2026                         $ts = $this->userAdjust( $ts, $timecorrection );
2027                 }
2028                 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
2029                 return $this->sprintfDate( $df, $ts );
2030         }
2031
2032         /**
2033          * @param $ts Mixed: the time format which needs to be turned into a
2034          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2035          * @param $adj Bool: whether to adjust the time output according to the
2036          *             user configured offset ($timecorrection)
2037          * @param $format Mixed: true to use user's date format preference
2038          * @param $timecorrection String|bool the time offset as returned by
2039          *                        validateTimeZone() in Special:Preferences
2040          * @return string
2041          */
2042         function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
2043                 $ts = wfTimestamp( TS_MW, $ts );
2044                 if ( $adj ) {
2045                         $ts = $this->userAdjust( $ts, $timecorrection );
2046                 }
2047                 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
2048                 return $this->sprintfDate( $df, $ts );
2049         }
2050
2051         /**
2052          * @param $ts Mixed: the time format which needs to be turned into a
2053          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2054          * @param $adj Bool: whether to adjust the time output according to the
2055          *             user configured offset ($timecorrection)
2056          * @param $format Mixed: what format to return, if it's false output the
2057          *                default one (default true)
2058          * @param $timecorrection String|bool the time offset as returned by
2059          *                        validateTimeZone() in Special:Preferences
2060          * @return string
2061          */
2062         function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
2063                 $ts = wfTimestamp( TS_MW, $ts );
2064                 if ( $adj ) {
2065                         $ts = $this->userAdjust( $ts, $timecorrection );
2066                 }
2067                 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
2068                 return $this->sprintfDate( $df, $ts );
2069         }
2070
2071         /**
2072          * Takes a number of seconds and turns it into a text using values such as hours and minutes.
2073          *
2074          * @since 1.20
2075          *
2076          * @param integer $seconds The amount of seconds.
2077          * @param array $chosenIntervals The intervals to enable.
2078          *
2079          * @return string
2080          */
2081         public function formatDuration( $seconds, array $chosenIntervals = array() ) {
2082                 $intervals = $this->getDurationIntervals( $seconds, $chosenIntervals );
2083
2084                 $segments = array();
2085
2086                 foreach ( $intervals as $intervalName => $intervalValue ) {
2087                         $message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue );
2088                         $segments[] = $message->inLanguage( $this )->escaped();
2089                 }
2090
2091                 return $this->listToText( $segments );
2092         }
2093
2094         /**
2095          * Takes a number of seconds and returns an array with a set of corresponding intervals.
2096          * For example 65 will be turned into array( minutes => 1, seconds => 5 ).
2097          *
2098          * @since 1.20
2099          *
2100          * @param integer $seconds The amount of seconds.
2101          * @param array $chosenIntervals The intervals to enable.
2102          *
2103          * @return array
2104          */
2105         public function getDurationIntervals( $seconds, array $chosenIntervals = array() ) {
2106                 if ( empty( $chosenIntervals ) ) {
2107                         $chosenIntervals = array( 'millennia', 'centuries', 'decades', 'years', 'days', 'hours', 'minutes', 'seconds' );
2108                 }
2109
2110                 $intervals = array_intersect_key( self::$durationIntervals, array_flip( $chosenIntervals ) );
2111                 $sortedNames = array_keys( $intervals );
2112                 $smallestInterval = array_pop( $sortedNames );
2113
2114                 $segments = array();
2115
2116                 foreach ( $intervals as $name => $length ) {
2117                         $value = floor( $seconds / $length );
2118
2119                         if ( $value > 0 || ( $name == $smallestInterval && empty( $segments ) ) ) {
2120                                 $seconds -= $value * $length;
2121                                 $segments[$name] = $value;
2122                         }
2123                 }
2124
2125                 return $segments;
2126         }
2127
2128         /**
2129          * Internal helper function for userDate(), userTime() and userTimeAndDate()
2130          *
2131          * @param $type String: can be 'date', 'time' or 'both'
2132          * @param $ts Mixed: the time format which needs to be turned into a
2133          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2134          * @param $user User object used to get preferences for timezone and format
2135          * @param $options Array, can contain the following keys:
2136          *        - 'timecorrection': time correction, can have the following values:
2137          *             - true: use user's preference
2138          *             - false: don't use time correction
2139          *             - integer: value of time correction in minutes
2140          *        - 'format': format to use, can have the following values:
2141          *             - true: use user's preference
2142          *             - false: use default preference
2143          *             - string: format to use
2144          * @since 1.19
2145          * @return String
2146          */
2147         private function internalUserTimeAndDate( $type, $ts, User $user, array $options ) {
2148                 $ts = wfTimestamp( TS_MW, $ts );
2149                 $options += array( 'timecorrection' => true, 'format' => true );
2150                 if ( $options['timecorrection'] !== false ) {
2151                         if ( $options['timecorrection'] === true ) {
2152                                 $offset = $user->getOption( 'timecorrection' );
2153                         } else {
2154                                 $offset = $options['timecorrection'];
2155                         }
2156                         $ts = $this->userAdjust( $ts, $offset );
2157                 }
2158                 if ( $options['format'] === true ) {
2159                         $format = $user->getDatePreference();
2160                 } else {
2161                         $format = $options['format'];
2162                 }
2163                 $df = $this->getDateFormatString( $type, $this->dateFormat( $format ) );
2164                 return $this->sprintfDate( $df, $ts );
2165         }
2166
2167         /**
2168          * Get the formatted date for the given timestamp and formatted for
2169          * the given user.
2170          *
2171          * @param $ts Mixed: the time format which needs to be turned into a
2172          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2173          * @param $user User object used to get preferences for timezone and format
2174          * @param $options Array, can contain the following keys:
2175          *        - 'timecorrection': time correction, can have the following values:
2176          *             - true: use user's preference
2177          *             - false: don't use time correction
2178          *             - integer: value of time correction in minutes
2179          *        - 'format': format to use, can have the following values:
2180          *             - true: use user's preference
2181          *             - false: use default preference
2182          *             - string: format to use
2183          * @since 1.19
2184          * @return String
2185          */
2186         public function userDate( $ts, User $user, array $options = array() ) {
2187                 return $this->internalUserTimeAndDate( 'date', $ts, $user, $options );
2188         }
2189
2190         /**
2191          * Get the formatted time for the given timestamp and formatted for
2192          * the given user.
2193          *
2194          * @param $ts Mixed: the time format which needs to be turned into a
2195          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2196          * @param $user User object used to get preferences for timezone and format
2197          * @param $options Array, can contain the following keys:
2198          *        - 'timecorrection': time correction, can have the following values:
2199          *             - true: use user's preference
2200          *             - false: don't use time correction
2201          *             - integer: value of time correction in minutes
2202          *        - 'format': format to use, can have the following values:
2203          *             - true: use user's preference
2204          *             - false: use default preference
2205          *             - string: format to use
2206          * @since 1.19
2207          * @return String
2208          */
2209         public function userTime( $ts, User $user, array $options = array() ) {
2210                 return $this->internalUserTimeAndDate( 'time', $ts, $user, $options );
2211         }
2212
2213         /**
2214          * Get the formatted date and time for the given timestamp and formatted for
2215          * the given user.
2216          *
2217          * @param $ts Mixed: the time format which needs to be turned into a
2218          *            date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2219          * @param $user User object used to get preferences for timezone and format
2220          * @param $options Array, can contain the following keys:
2221          *        - 'timecorrection': time correction, can have the following values:
2222          *             - true: use user's preference
2223          *             - false: don't use time correction
2224          *             - integer: value of time correction in minutes
2225          *        - 'format': format to use, can have the following values:
2226          *             - true: use user's preference
2227          *             - false: use default preference
2228          *             - string: format to use
2229          * @since 1.19
2230          * @return String
2231          */
2232         public function userTimeAndDate( $ts, User $user, array $options = array() ) {
2233                 return $this->internalUserTimeAndDate( 'both', $ts, $user, $options );
2234         }
2235
2236         /**
2237          * @param $key string
2238          * @return array|null
2239          */
2240         function getMessage( $key ) {
2241                 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
2242         }
2243
2244         /**
2245          * @return array
2246          */
2247         function getAllMessages() {
2248                 return self::$dataCache->getItem( $this->mCode, 'messages' );
2249         }
2250
2251         /**
2252          * @param $in
2253          * @param $out
2254          * @param $string
2255          * @return string
2256          */
2257         function iconv( $in, $out, $string ) {
2258                 # This is a wrapper for iconv in all languages except esperanto,
2259                 # which does some nasty x-conversions beforehand
2260
2261                 # Even with //IGNORE iconv can whine about illegal characters in
2262                 # *input* string. We just ignore those too.
2263                 # REF: http://bugs.php.net/bug.php?id=37166
2264                 # REF: https://bugzilla.wikimedia.org/show_bug.cgi?id=16885
2265                 wfSuppressWarnings();
2266                 $text = iconv( $in, $out . '//IGNORE', $string );
2267                 wfRestoreWarnings();
2268                 return $text;
2269         }
2270
2271         // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
2272
2273         /**
2274          * @param $matches array
2275          * @return mixed|string
2276          */
2277         function ucwordbreaksCallbackAscii( $matches ) {
2278                 return $this->ucfirst( $matches[1] );
2279         }
2280
2281         /**
2282          * @param $matches array
2283          * @return string
2284          */
2285         function ucwordbreaksCallbackMB( $matches ) {
2286                 return mb_strtoupper( $matches[0] );
2287         }
2288
2289         /**
2290          * @param $matches array
2291          * @return string
2292          */
2293         function ucCallback( $matches ) {
2294                 list( $wikiUpperChars ) = self::getCaseMaps();
2295                 return strtr( $matches[1], $wikiUpperChars );
2296         }
2297
2298         /**
2299          * @param $matches array
2300          * @return string
2301          */
2302         function lcCallback( $matches ) {
2303                 list( , $wikiLowerChars ) = self::getCaseMaps();
2304                 return strtr( $matches[1], $wikiLowerChars );
2305         }
2306
2307         /**
2308          * @param $matches array
2309          * @return string
2310          */
2311         function ucwordsCallbackMB( $matches ) {
2312                 return mb_strtoupper( $matches[0] );
2313         }
2314
2315         /**
2316          * @param $matches array
2317          * @return string
2318          */
2319         function ucwordsCallbackWiki( $matches ) {
2320                 list( $wikiUpperChars ) = self::getCaseMaps();
2321                 return strtr( $matches[0], $wikiUpperChars );
2322         }
2323
2324         /**
2325          * Make a string's first character uppercase
2326          *
2327          * @param $str string
2328          *
2329          * @return string
2330          */
2331         function ucfirst( $str ) {
2332                 $o = ord( $str );
2333                 if ( $o < 96 ) { // if already uppercase...
2334                         return $str;
2335                 } elseif ( $o < 128 ) {
2336                         return ucfirst( $str ); // use PHP's ucfirst()
2337                 } else {
2338                         // fall back to more complex logic in case of multibyte strings
2339                         return $this->uc( $str, true );
2340                 }
2341         }
2342
2343         /**
2344          * Convert a string to uppercase
2345          *
2346          * @param $str string
2347          * @param $first bool
2348          *
2349          * @return string
2350          */
2351         function uc( $str, $first = false ) {
2352                 if ( function_exists( 'mb_strtoupper' ) ) {
2353                         if ( $first ) {
2354                                 if ( $this->isMultibyte( $str ) ) {
2355                                         return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2356                                 } else {
2357                                         return ucfirst( $str );
2358                                 }
2359                         } else {
2360                                 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
2361                         }
2362                 } else {
2363                         if ( $this->isMultibyte( $str ) ) {
2364                                 $x = $first ? '^' : '';
2365                                 return preg_replace_callback(
2366                                         "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2367                                         array( $this, 'ucCallback' ),
2368                                         $str
2369                                 );
2370                         } else {
2371                                 return $first ? ucfirst( $str ) : strtoupper( $str );
2372                         }
2373                 }
2374         }
2375
2376         /**
2377          * @param $str string
2378          * @return mixed|string
2379          */
2380         function lcfirst( $str ) {
2381                 $o = ord( $str );
2382                 if ( !$o ) {
2383                         return strval( $str );
2384                 } elseif ( $o >= 128 ) {
2385                         return $this->lc( $str, true );
2386                 } elseif ( $o > 96 ) {
2387                         return $str;
2388                 } else {
2389                         $str[0] = strtolower( $str[0] );
2390                         return $str;
2391                 }
2392         }
2393
2394         /**
2395          * @param $str string
2396          * @param $first bool
2397          * @return mixed|string
2398          */
2399         function lc( $str, $first = false ) {
2400                 if ( function_exists( 'mb_strtolower' ) ) {
2401                         if ( $first ) {
2402                                 if ( $this->isMultibyte( $str ) ) {
2403                                         return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2404                                 } else {
2405                                         return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
2406                                 }
2407                         } else {
2408                                 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
2409                         }
2410                 } else {
2411                         if ( $this->isMultibyte( $str ) ) {
2412                                 $x = $first ? '^' : '';
2413                                 return preg_replace_callback(
2414                                         "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2415                                         array( $this, 'lcCallback' ),
2416                                         $str
2417                                 );
2418                         } else {
2419                                 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
2420                         }
2421                 }
2422         }
2423
2424         /**
2425          * @param $str string
2426          * @return bool
2427          */
2428         function isMultibyte( $str ) {
2429                 return (bool)preg_match( '/[\x80-\xff]/', $str );
2430         }
2431
2432         /**
2433          * @param $str string
2434          * @return mixed|string
2435          */
2436         function ucwords( $str ) {
2437                 if ( $this->isMultibyte( $str ) ) {
2438                         $str = $this->lc( $str );
2439
2440                         // regexp to find first letter in each word (i.e. after each space)
2441                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2442
2443                         // function to use to capitalize a single char
2444                         if ( function_exists( 'mb_strtoupper' ) ) {
2445                                 return preg_replace_callback(
2446                                         $replaceRegexp,
2447                                         array( $this, 'ucwordsCallbackMB' ),
2448                                         $str
2449                                 );
2450                         } else {
2451                                 return preg_replace_callback(
2452                                         $replaceRegexp,
2453                                         array( $this, 'ucwordsCallbackWiki' ),
2454                                         $str
2455                                 );
2456                         }
2457                 } else {
2458                         return ucwords( strtolower( $str ) );
2459                 }
2460         }
2461
2462         /**
2463          * capitalize words at word breaks
2464          *
2465          * @param $str string
2466          * @return mixed
2467          */
2468         function ucwordbreaks( $str ) {
2469                 if ( $this->isMultibyte( $str ) ) {
2470                         $str = $this->lc( $str );
2471
2472                         // since \b doesn't work for UTF-8, we explicitely define word break chars
2473                         $breaks = "[ \-\(\)\}\{\.,\?!]";
2474
2475                         // find first letter after word break
2476                         $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2477
2478                         if ( function_exists( 'mb_strtoupper' ) ) {
2479                                 return preg_replace_callback(
2480                                         $replaceRegexp,
2481                                         array( $this, 'ucwordbreaksCallbackMB' ),
2482                                         $str
2483                                 );
2484                         } else {
2485                                 return preg_replace_callback(
2486                                         $replaceRegexp,
2487                                         array( $this, 'ucwordsCallbackWiki' ),
2488                                         $str
2489                                 );
2490                         }
2491                 } else {
2492                         return preg_replace_callback(
2493                                 '/\b([\w\x80-\xff]+)\b/',
2494                                 array( $this, 'ucwordbreaksCallbackAscii' ),
2495                                 $str
2496                         );
2497                 }
2498         }
2499
2500         /**
2501          * Return a case-folded representation of $s
2502          *
2503          * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
2504          * and $s2 are the same except for the case of their characters. It is not
2505          * necessary for the value returned to make sense when displayed.
2506          *
2507          * Do *not* perform any other normalisation in this function. If a caller
2508          * uses this function when it should be using a more general normalisation
2509          * function, then fix the caller.
2510          *
2511          * @param $s string
2512          *
2513          * @return string
2514          */
2515         function caseFold( $s ) {
2516                 return $this->uc( $s );
2517         }
2518
2519         /**
2520          * @param $s string
2521          * @return string
2522          */
2523         function checkTitleEncoding( $s ) {
2524                 if ( is_array( $s ) ) {
2525                         wfDebugDieBacktrace( 'Given array to checkTitleEncoding.' );
2526                 }
2527                 if ( StringUtils::isUtf8( $s ) ) {
2528                         return $s;
2529                 }
2530
2531                 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
2532         }
2533
2534         /**
2535          * @return array
2536          */
2537         function fallback8bitEncoding() {
2538                 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
2539         }
2540
2541         /**
2542          * Most writing systems use whitespace to break up words.
2543          * Some languages such as Chinese don't conventionally do this,
2544          * which requires special handling when breaking up words for
2545          * searching etc.
2546          *
2547          * @return bool
2548          */
2549         function hasWordBreaks() {
2550                 return true;
2551         }
2552
2553         /**
2554          * Some languages such as Chinese require word segmentation,
2555          * Specify such segmentation when overridden in derived class.
2556          *
2557          * @param $string String
2558          * @return String
2559          */
2560         function segmentByWord( $string ) {
2561                 return $string;
2562         }
2563
2564         /**
2565          * Some languages have special punctuation need to be normalized.
2566          * Make such changes here.
2567          *
2568          * @param $string String
2569          * @return String
2570          */
2571         function normalizeForSearch( $string ) {
2572                 return self::convertDoubleWidth( $string );
2573         }
2574
2575         /**
2576          * convert double-width roman characters to single-width.
2577          * range: ff00-ff5f ~= 0020-007f
2578          *
2579          * @param $string string
2580          *
2581          * @return string
2582          */
2583         protected static function convertDoubleWidth( $string ) {
2584                 static $full = null;
2585                 static $half = null;
2586
2587                 if ( $full === null ) {
2588                         $fullWidth = "０１２３４５６７８９ＡＢＣＤＥＦＧＨＩＪＫＬＭＮＯＰＱＲＳＴＵＶＷＸＹＺａｂｃｄｅｆｇｈｉｊｋｌｍｎｏｐｑｒｓｔｕｖｗｘｙｚ";
2589                         $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2590                         $full = str_split( $fullWidth, 3 );
2591                         $half = str_split( $halfWidth );
2592                 }
2593
2594                 $string = str_replace( $full, $half, $string );
2595                 return $string;
2596         }
2597
2598         /**
2599          * @param $string string
2600          * @param $pattern string
2601          * @return string
2602          */
2603         protected static function insertSpace( $string, $pattern ) {
2604                 $string = preg_replace( $pattern, " $1 ", $string );
2605                 $string = preg_replace( '/ +/', ' ', $string );
2606                 return $string;
2607         }
2608
2609         /**
2610          * @param $termsArray array
2611          * @return array
2612          */
2613         function convertForSearchResult( $termsArray ) {
2614                 # some languages, e.g. Chinese, need to do a conversion
2615                 # in order for search results to be displayed correctly
2616                 return $termsArray;
2617         }
2618
2619         /**
2620          * Get the first character of a string.
2621          *
2622          * @param $s string
2623          * @return string
2624          */
2625         function firstChar( $s ) {
2626                 $matches = array();
2627                 preg_match(
2628                         '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
2629                                 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
2630                         $s,
2631                         $matches
2632                 );
2633
2634                 if ( isset( $matches[1] ) ) {
2635                         if ( strlen( $matches[1] ) != 3 ) {
2636                                 return $matches[1];
2637                         }
2638
2639                         // Break down Hangul syllables to grab the first jamo
2640                         $code = utf8ToCodepoint( $matches[1] );
2641                         if ( $code < 0xac00 || 0xd7a4 <= $code ) {
2642                                 return $matches[1];
2643                         } elseif ( $code < 0xb098 ) {
2644                                 return "\xe3\x84\xb1";
2645                         } elseif ( $code < 0xb2e4 ) {
2646                                 return "\xe3\x84\xb4";
2647                         } elseif ( $code < 0xb77c ) {
2648                                 return "\xe3\x84\xb7";
2649                         } elseif ( $code < 0xb9c8 ) {
2650                                 return "\xe3\x84\xb9";
2651                         } elseif ( $code < 0xbc14 ) {
2652                                 return "\xe3\x85\x81";
2653                         } elseif ( $code < 0xc0ac ) {
2654                                 return "\xe3\x85\x82";
2655                         } elseif ( $code < 0xc544 ) {
2656                                 return "\xe3\x85\x85";
2657                         } elseif ( $code < 0xc790 ) {
2658                                 return "\xe3\x85\x87";
2659                         } elseif ( $code < 0xcc28 ) {
2660                                 return "\xe3\x85\x88";
2661                         } elseif ( $code < 0xce74 ) {
2662                                 return "\xe3\x85\x8a";
2663                         } elseif ( $code < 0xd0c0 ) {
2664                                 return "\xe3\x85\x8b";
2665                         } elseif ( $code < 0xd30c ) {
2666                                 return "\xe3\x85\x8c";
2667                         } elseif ( $code < 0xd558 ) {
2668                                 return "\xe3\x85\x8d";
2669                         } else {
2670                                 return "\xe3\x85\x8e";
2671                         }
2672                 } else {
2673                         return '';
2674                 }
2675         }
2676
2677         function initEncoding() {
2678                 # Some languages may have an alternate char encoding option
2679                 # (Esperanto X-coding, Japanese furigana conversion, etc)
2680                 # If this language is used as the primary content language,
2681                 # an override to the defaults can be set here on startup.
2682         }
2683
2684         /**
2685          * @param $s string
2686          * @return string
2687          */
2688         function recodeForEdit( $s ) {
2689                 # For some languages we'll want to explicitly specify
2690                 # which characters make it into the edit box raw
2691                 # or are converted in some way or another.
2692                 global $wgEditEncoding;
2693                 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
2694                         return $s;
2695                 } else {
2696                         return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
2697                 }
2698         }
2699
2700         /**
2701          * @param $s string
2702          * @return string
2703          */
2704         function recodeInput( $s ) {
2705                 # Take the previous into account.
2706                 global $wgEditEncoding;
2707                 if ( $wgEditEncoding != '' ) {
2708                         $enc = $wgEditEncoding;
2709                 } else {
2710                         $enc = 'UTF-8';
2711                 }
2712                 if ( $enc == 'UTF-8' ) {
2713                         return $s;
2714                 } else {
2715                         return $this->iconv( $enc, 'UTF-8', $s );
2716                 }
2717         }
2718
2719         /**
2720          * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
2721          * also cleans up certain backwards-compatible sequences, converting them
2722          * to the modern Unicode equivalent.
2723          *
2724          * This is language-specific for performance reasons only.
2725          *
2726          * @param $s string
2727          *
2728          * @return string
2729          */
2730         function normalize( $s ) {
2731                 global $wgAllUnicodeFixes;
2732                 $s = UtfNormal::cleanUp( $s );
2733                 if ( $wgAllUnicodeFixes ) {
2734                         $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
2735                         $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
2736                 }
2737
2738                 return $s;
2739         }
2740
2741         /**
2742          * Transform a string using serialized data stored in the given file (which
2743          * must be in the serialized subdirectory of $IP). The file contains pairs
2744          * mapping source characters to destination characters.
2745          *
2746          * The data is cached in process memory. This will go faster if you have the
2747          * FastStringSearch extension.
2748          *
2749          * @param $file string
2750          * @param $string string
2751          *
2752          * @throws MWException
2753          * @return string
2754          */
2755         function transformUsingPairFile( $file, $string ) {
2756                 if ( !isset( $this->transformData[$file] ) ) {
2757                         $data = wfGetPrecompiledData( $file );
2758                         if ( $data === false ) {
2759                                 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
2760                         }
2761                         $this->transformData[$file] = new ReplacementArray( $data );
2762                 }
2763                 return $this->transformData[$file]->replace( $string );
2764         }
2765
2766         /**
2767          * For right-to-left language support
2768          *
2769          * @return bool
2770          */
2771         function isRTL() {
2772                 return self::$dataCache->getItem( $this->mCode, 'rtl' );
2773         }
2774
2775         /**
2776          * Return the correct HTML 'dir' attribute value for this language.
2777          * @return String
2778          */
2779         function getDir() {
2780                 return $this->isRTL() ? 'rtl' : 'ltr';
2781         }
2782
2783         /**
2784          * Return 'left' or 'right' as appropriate alignment for line-start
2785          * for this language's text direction.
2786          *
2787          * Should be equivalent to CSS3 'start' text-align value....
2788          *
2789          * @return String
2790          */
2791         function alignStart() {
2792                 return $this->isRTL() ? 'right' : 'left';
2793         }
2794
2795         /**
2796          * Return 'right' or 'left' as appropriate alignment for line-end
2797          * for this language's text direction.
2798          *
2799          * Should be equivalent to CSS3 'end' text-align value....
2800          *
2801          * @return String
2802          */
2803         function alignEnd() {
2804                 return $this->isRTL() ? 'left' : 'right';
2805         }
2806
2807         /**
2808          * A hidden direction mark (LRM or RLM), depending on the language direction.
2809          * Unlike getDirMark(), this function returns the character as an HTML entity.
2810          * This function should be used when the output is guaranteed to be HTML,
2811          * because it makes the output HTML source code more readable. When
2812          * the output is plain text or can be escaped, getDirMark() should be used.
2813          *
2814          * @param $opposite Boolean Get the direction mark opposite to your language
2815          * @return string
2816          * @since 1.20
2817          */
2818         function getDirMarkEntity( $opposite = false ) {
2819                 if ( $opposite ) { return $this->isRTL() ? '&lrm;' : '&rlm;'; }
2820                 return $this->isRTL() ? '&rlm;' : '&lrm;';
2821         }
2822
2823         /**
2824          * A hidden direction mark (LRM or RLM), depending on the language direction.
2825          * This function produces them as invisible Unicode characters and
2826          * the output may be hard to read and debug, so it should only be used
2827          * when the output is plain text or can be escaped. When the output is
2828          * HTML, use getDirMarkEntity() instead.
2829          *
2830          * @param $opposite Boolean Get the direction mark opposite to your language
2831          * @return string
2832          */
2833         function getDirMark( $opposite = false ) {
2834                 $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
2835                 $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
2836                 if ( $opposite ) { return $this->isRTL() ? $lrm : $rlm; }
2837                 return $this->isRTL() ? $rlm : $lrm;
2838         }
2839
2840         /**
2841          * @return array
2842          */
2843         function capitalizeAllNouns() {
2844                 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
2845         }
2846
2847         /**
2848          * An arrow, depending on the language direction.
2849          *
2850          * @param $direction String: the direction of the arrow: forwards (default), backwards, left, right, up, down.
2851          * @return string
2852          */
2853         function getArrow( $direction = 'forwards' ) {
2854                 switch ( $direction ) {
2855                 case 'forwards':
2856                         return $this->isRTL() ? '←' : '→';
2857                 case 'backwards':
2858                         return $this->isRTL() ? '→' : '←';
2859                 case 'left':
2860                         return '←';
2861                 case 'right':
2862                         return '→';
2863                 case 'up':
2864                         return '↑';
2865                 case 'down':
2866                         return '↓';
2867                 }
2868         }
2869
2870         /**
2871          * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
2872          *
2873          * @return bool
2874          */
2875         function linkPrefixExtension() {
2876                 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
2877         }
2878
2879         /**
2880          * @return array
2881          */
2882         function getMagicWords() {
2883                 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
2884         }
2885
2886         protected function doMagicHook() {
2887                 if ( $this->mMagicHookDone ) {
2888                         return;
2889                 }
2890                 $this->mMagicHookDone = true;
2891                 wfProfileIn( 'LanguageGetMagic' );
2892                 wfRunHooks( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
2893                 wfProfileOut( 'LanguageGetMagic' );
2894         }
2895
2896         /**
2897          * Fill a MagicWord object with data from here
2898          *
2899          * @param $mw
2900          */
2901         function getMagic( $mw ) {
2902                 $this->doMagicHook();
2903
2904                 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
2905                         $rawEntry = $this->mMagicExtensions[$mw->mId];
2906                 } else {
2907                         $magicWords = $this->getMagicWords();
2908                         if ( isset( $magicWords[$mw->mId] ) ) {
2909                                 $rawEntry = $magicWords[$mw->mId];
2910                         } else {
2911                                 $rawEntry = false;
2912                         }
2913                 }
2914
2915                 if ( !is_array( $rawEntry ) ) {
2916                         error_log( "\"$rawEntry\" is not a valid magic word for \"$mw->mId\"" );
2917                 } else {
2918                         $mw->mCaseSensitive = $rawEntry[0];
2919                         $mw->mSynonyms = array_slice( $rawEntry, 1 );
2920                 }
2921         }
2922
2923         /**
2924          * Add magic words to the extension array
2925          *
2926          * @param $newWords array
2927          */
2928         function addMagicWordsByLang( $newWords ) {
2929                 $fallbackChain = $this->getFallbackLanguages();
2930                 $fallbackChain = array_reverse( $fallbackChain );
2931                 foreach ( $fallbackChain as $code ) {
2932                         if ( isset( $newWords[$code] ) ) {
2933                                 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
2934                         }
2935                 }
2936         }
2937
2938         /**
2939          * Get special page names, as an associative array
2940          *   case folded alias => real name
2941          */
2942         function getSpecialPageAliases() {
2943                 // Cache aliases because it may be slow to load them
2944                 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
2945                         // Initialise array
2946                         $this->mExtendedSpecialPageAliases =
2947                                 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
2948                         wfRunHooks( 'LanguageGetSpecialPageAliases',
2949                                 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
2950                 }
2951
2952                 return $this->mExtendedSpecialPageAliases;
2953         }
2954
2955         /**
2956          * Italic is unsuitable for some languages
2957          *
2958          * @param $text String: the text to be emphasized.
2959          * @return string
2960          */
2961         function emphasize( $text ) {
2962                 return "<em>$text</em>";
2963         }
2964
2965         /**
2966          * Normally we output all numbers in plain en_US style, that is
2967          * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
2968          * point twohundredthirtyfive. However this is not suitable for all
2969          * languages, some such as Pakaran want ੨੯੩,੨੯੫.੨੩੫ and others such as
2970          * Icelandic just want to use commas instead of dots, and dots instead
2971          * of commas like "293.291,235".
2972          *
2973          * An example of this function being called:
2974          * <code>
2975          * wfMessage( 'message' )->numParams( $num )->text()
2976          * </code>
2977          *
2978          * See LanguageGu.php for the Gujarati implementation and
2979          * $separatorTransformTable on MessageIs.php for
2980          * the , => . and . => , implementation.
2981          *
2982          * @todo check if it's viable to use localeconv() for the decimal
2983          *       separator thing.
2984          * @param $number Mixed: the string to be formatted, should be an integer
2985          *        or a floating point number.
2986          * @param $nocommafy Bool: set to true for special numbers like dates
2987          * @return string
2988          */
2989         public function formatNum( $number, $nocommafy = false ) {
2990                 global $wgTranslateNumerals;
2991                 if ( !$nocommafy ) {
2992                         $number = $this->commafy( $number );
2993                         $s = $this->separatorTransformTable();
2994                         if ( $s ) {
2995                                 $number = strtr( $number, $s );
2996                         }
2997                 }
2998
2999                 if ( $wgTranslateNumerals ) {
3000                         $s = $this->digitTransformTable();
3001                         if ( $s ) {
3002                                 $number = strtr( $number, $s );
3003                         }
3004                 }
3005
3006                 return $number;
3007         }
3008
3009         /**
3010          * Front-end for non-commafied formatNum
3011          *
3012          * @param mixed $number the string to be formatted, should be an integer
3013          *        or a floating point number.
3014          * @since 1.21
3015          * @return string
3016          */
3017         public function formatNumNoSeparators( $number ) {
3018                 return $this->formatNum( $number, true );
3019         }
3020
3021         /**
3022          * @param $number string
3023          * @return string
3024          */
3025         function parseFormattedNumber( $number ) {
3026                 $s = $this->digitTransformTable();
3027                 if ( $s ) {
3028                         $number = strtr( $number, array_flip( $s ) );
3029                 }
3030
3031                 $s = $this->separatorTransformTable();
3032                 if ( $s ) {
3033                         $number = strtr( $number, array_flip( $s ) );
3034                 }
3035
3036                 $number = strtr( $number, array( ',' => '' ) );
3037                 return $number;
3038         }
3039
3040         /**
3041          * Adds commas to a given number
3042          * @since 1.19
3043          * @param $number mixed
3044          * @return string
3045          */
3046         function commafy( $number ) {
3047                 $digitGroupingPattern = $this->digitGroupingPattern();
3048                 if ( $number === null ) {
3049                         return '';
3050                 }
3051
3052                 if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) {
3053                         // default grouping is at thousands,  use the same for ###,###,### pattern too.
3054                         return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );
3055                 } else {
3056                         // Ref: http://cldr.unicode.org/translation/number-patterns
3057                         $sign = "";
3058                         if ( intval( $number ) < 0 ) {
3059                                 // For negative numbers apply the algorithm like positive number and add sign.
3060                                 $sign =  "-";
3061                                 $number = substr( $number, 1 );
3062                         }
3063                         $integerPart = array();
3064                         $decimalPart = array();
3065                         $numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches );
3066                         preg_match( "/\d+/", $number, $integerPart );
3067                         preg_match( "/\.\d*/", $number, $decimalPart );
3068                         $groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0]:"";
3069                         if ( $groupedNumber  === $number ) {
3070                                 // the string does not have any number part. Eg: .12345
3071                                 return $sign . $groupedNumber;
3072                         }
3073                         $start = $end = strlen( $integerPart[0] );
3074                         while ( $start > 0 ) {
3075                                 $match = $matches[0][$numMatches -1] ;
3076                                 $matchLen = strlen( $match );
3077                                 $start = $end - $matchLen;
3078                                 if ( $start < 0 ) {
3079                                         $start = 0;
3080                                 }
3081                                 $groupedNumber = substr( $number, $start, $end -$start ) . $groupedNumber ;
3082                                 $end = $start;
3083                                 if ( $numMatches > 1 ) {
3084                                         // use the last pattern for the rest of the number
3085                                         $numMatches--;
3086                                 }
3087                                 if ( $start > 0 ) {
3088                                         $groupedNumber = "," . $groupedNumber;
3089                                 }
3090                         }
3091                         return $sign . $groupedNumber;
3092                 }
3093         }
3094
3095         /**
3096          * @return String
3097          */
3098         function digitGroupingPattern() {
3099                 return self::$dataCache->getItem( $this->mCode, 'digitGroupingPattern' );
3100         }
3101
3102         /**
3103          * @return array
3104          */
3105         function digitTransformTable() {
3106                 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
3107         }
3108
3109         /**
3110          * @return array
3111          */
3112         function separatorTransformTable() {
3113                 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
3114         }
3115
3116         /**
3117          * Take a list of strings and build a locale-friendly comma-separated
3118          * list, using the local comma-separator message.
3119          * The last two strings are chained with an "and".
3120          * NOTE: This function will only work with standard numeric array keys (0, 1, 2…)
3121          *
3122          * @param $l Array
3123          * @return string
3124          */
3125         function listToText( array $l ) {
3126                 $m = count( $l ) - 1;
3127                 if ( $m < 0 ) {
3128                         return '';
3129                 }
3130                 if ( $m > 0 ) {
3131                         $and = $this->getMessageFromDB( 'and' );
3132                         $space = $this->getMessageFromDB( 'word-separator' );
3133                         if ( $m > 1 ) {
3134                                 $comma = $this->getMessageFromDB( 'comma-separator' );
3135                         }
3136                 }
3137                 $s = $l[$m];
3138                 for ( $i = $m - 1; $i >= 0; $i-- ) {
3139                         if ( $i == $m - 1 ) {
3140                                 $s = $l[$i] . $and . $space . $s;
3141                         } else {
3142                                 $s = $l[$i] . $comma . $s;
3143                         }
3144                 }
3145                 return $s;
3146         }
3147
3148         /**
3149          * Take a list of strings and build a locale-friendly comma-separated
3150          * list, using the local comma-separator message.
3151          * @param $list array of strings to put in a comma list
3152          * @return string
3153          */
3154         function commaList( array $list ) {
3155                 return implode(
3156                         wfMessage( 'comma-separator' )->inLanguage( $this )->escaped(),
3157                         $list
3158                 );
3159         }
3160
3161         /**
3162          * Take a list of strings and build a locale-friendly semicolon-separated
3163          * list, using the local semicolon-separator message.
3164          * @param $list array of strings to put in a semicolon list
3165          * @return string
3166          */
3167         function semicolonList( array $list ) {
3168                 return implode(
3169                         wfMessage( 'semicolon-separator' )->inLanguage( $this )->escaped(),
3170                         $list
3171                 );
3172         }
3173
3174         /**
3175          * Same as commaList, but separate it with the pipe instead.
3176          * @param $list array of strings to put in a pipe list
3177          * @return string
3178          */
3179         function pipeList( array $list ) {
3180                 return implode(
3181                         wfMessage( 'pipe-separator' )->inLanguage( $this )->escaped(),
3182                         $list
3183                 );
3184         }
3185
3186         /**
3187          * Truncate a string to a specified length in bytes, appending an optional
3188          * string (e.g. for ellipses)
3189          *
3190          * The database offers limited byte lengths for some columns in the database;
3191          * multi-byte character sets mean we need to ensure that only whole characters
3192          * are included, otherwise broken characters can be passed to the user
3193          *
3194          * If $length is negative, the string will be truncated from the beginning
3195          *
3196          * @param $string String to truncate
3197          * @param $length Int: maximum length (including ellipses)
3198          * @param $ellipsis String to append to the truncated text
3199          * @param $adjustLength Boolean: Subtract length of ellipsis from $length.
3200          *      $adjustLength was introduced in 1.18, before that behaved as if false.
3201          * @return string
3202          */
3203         function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
3204                 # Use the localized ellipsis character
3205                 if ( $ellipsis == '...' ) {
3206                         $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3207                 }
3208                 # Check if there is no need to truncate
3209                 if ( $length == 0 ) {
3210                         return $ellipsis; // convention
3211                 } elseif ( strlen( $string ) <= abs( $length ) ) {
3212                         return $string; // no need to truncate
3213                 }
3214                 $stringOriginal = $string;
3215                 # If ellipsis length is >= $length then we can't apply $adjustLength
3216                 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
3217                         $string = $ellipsis; // this can be slightly unexpected
3218                 # Otherwise, truncate and add ellipsis...
3219                 } else {
3220                         $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
3221                         if ( $length > 0 ) {
3222                                 $length -= $eLength;
3223                                 $string = substr( $string, 0, $length ); // xyz...
3224                                 $string = $this->removeBadCharLast( $string );
3225                                 $string = $string . $ellipsis;
3226                         } else {
3227                                 $length += $eLength;
3228                                 $string = substr( $string, $length ); // ...xyz
3229                                 $string = $this->removeBadCharFirst( $string );
3230                                 $string = $ellipsis . $string;
3231                         }
3232                 }
3233                 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
3234                 # This check is *not* redundant if $adjustLength, due to the single case where
3235                 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
3236                 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
3237                         return $string;
3238                 } else {
3239                         return $stringOriginal;
3240                 }
3241         }
3242
3243         /**
3244          * Remove bytes that represent an incomplete Unicode character
3245          * at the end of string (e.g. bytes of the char are missing)
3246          *
3247          * @param $string String
3248          * @return string
3249          */
3250         protected function removeBadCharLast( $string ) {
3251                 if ( $string != '' ) {
3252                         $char = ord( $string[strlen( $string ) - 1] );
3253                         $m = array();
3254                         if ( $char >= 0xc0 ) {
3255                                 # We got the first byte only of a multibyte char; remove it.
3256                                 $string = substr( $string, 0, -1 );
3257                         } elseif ( $char >= 0x80 &&
3258                                   preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
3259                                                           '[\xf0-\xf7][\x80-\xbf]{1,2})$/', $string, $m ) )
3260                         {
3261                                 # We chopped in the middle of a character; remove it
3262                                 $string = $m[1];
3263                         }
3264                 }
3265                 return $string;
3266         }
3267
3268         /**
3269          * Remove bytes that represent an incomplete Unicode character
3270          * at the start of string (e.g. bytes of the char are missing)
3271          *
3272          * @param $string String
3273          * @return string
3274          */
3275         protected function removeBadCharFirst( $string ) {
3276                 if ( $string != '' ) {
3277                         $char = ord( $string[0] );
3278                         if ( $char >= 0x80 && $char < 0xc0 ) {
3279                                 # We chopped in the middle of a character; remove the whole thing
3280                                 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
3281                         }
3282                 }
3283                 return $string;
3284         }
3285
3286         /**
3287          * Truncate a string of valid HTML to a specified length in bytes,
3288          * appending an optional string (e.g. for ellipses), and return valid HTML
3289          *
3290          * This is only intended for styled/linked text, such as HTML with
3291          * tags like <span> and <a>, were the tags are self-contained (valid HTML).
3292          * Also, this will not detect things like "display:none" CSS.
3293          *
3294          * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
3295          *
3296          * @param string $text HTML string to truncate
3297          * @param int $length (zero/positive) Maximum length (including ellipses)
3298          * @param string $ellipsis String to append to the truncated text
3299          * @return string
3300          */
3301         function truncateHtml( $text, $length, $ellipsis = '...' ) {
3302                 # Use the localized ellipsis character
3303                 if ( $ellipsis == '...' ) {
3304                         $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3305                 }
3306                 # Check if there is clearly no need to truncate
3307                 if ( $length <= 0 ) {
3308                         return $ellipsis; // no text shown, nothing to format (convention)
3309                 } elseif ( strlen( $text ) <= $length ) {
3310                         return $text; // string short enough even *with* HTML (short-circuit)
3311                 }
3312
3313                 $dispLen = 0; // innerHTML legth so far
3314                 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
3315                 $tagType = 0; // 0-open, 1-close
3316                 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
3317                 $entityState = 0; // 0-not entity, 1-entity
3318                 $tag = $ret = ''; // accumulated tag name, accumulated result string
3319                 $openTags = array(); // open tag stack
3320                 $maybeState = null; // possible truncation state
3321
3322                 $textLen = strlen( $text );
3323                 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
3324                 for ( $pos = 0; true; ++$pos ) {
3325                         # Consider truncation once the display length has reached the maximim.
3326                         # We check if $dispLen > 0 to grab tags for the $neLength = 0 case.
3327                         # Check that we're not in the middle of a bracket/entity...
3328                         if ( $dispLen && $dispLen >= $neLength && $bracketState == 0 && !$entityState ) {
3329                                 if ( !$testingEllipsis ) {
3330                                         $testingEllipsis = true;
3331                                         # Save where we are; we will truncate here unless there turn out to
3332                                         # be so few remaining characters that truncation is not necessary.
3333                                         if ( !$maybeState ) { // already saved? ($neLength = 0 case)
3334                                                 $maybeState = array( $ret, $openTags ); // save state
3335                                         }
3336                                 } elseif ( $dispLen > $length && $dispLen > strlen( $ellipsis ) ) {
3337                                         # String in fact does need truncation, the truncation point was OK.
3338                                         list( $ret, $openTags ) = $maybeState; // reload state
3339                                         $ret = $this->removeBadCharLast( $ret ); // multi-byte char fix
3340                                         $ret .= $ellipsis; // add ellipsis
3341                                         break;
3342                                 }
3343                         }
3344                         if ( $pos >= $textLen ) break; // extra iteration just for above checks
3345
3346                         # Read the next char...
3347                         $ch = $text[$pos];
3348                         $lastCh = $pos ? $text[$pos - 1] : '';
3349                         $ret .= $ch; // add to result string
3350                         if ( $ch == '<' ) {
3351                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
3352                                 $entityState = 0; // for bad HTML
3353                                 $bracketState = 1; // tag started (checking for backslash)
3354                         } elseif ( $ch == '>' ) {
3355                                 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
3356                                 $entityState = 0; // for bad HTML
3357                                 $bracketState = 0; // out of brackets
3358                         } elseif ( $bracketState == 1 ) {
3359                                 if ( $ch == '/' ) {
3360                                         $tagType = 1; // close tag (e.g. "</span>")
3361                                 } else {
3362                                         $tagType = 0; // open tag (e.g. "<span>")
3363                                         $tag .= $ch;
3364                                 }
3365                                 $bracketState = 2; // building tag name
3366                         } elseif ( $bracketState == 2 ) {
3367                                 if ( $ch != ' ' ) {
3368                                         $tag .= $ch;
3369                                 } else {
3370                                         // Name found (e.g. "<a href=..."), add on tag attributes...
3371                                         $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
3372                                 }
3373                         } elseif ( $bracketState == 0 ) {
3374                                 if ( $entityState ) {
3375                                         if ( $ch == ';' ) {
3376                                                 $entityState = 0;
3377                                                 $dispLen++; // entity is one displayed char
3378                                         }
3379                                 } else {
3380                                         if ( $neLength == 0 && !$maybeState ) {
3381                                                 // Save state without $ch. We want to *hit* the first
3382                                                 // display char (to get tags) but not *use* it if truncating.
3383                                                 $maybeState = array( substr( $ret, 0, -1 ), $openTags );
3384                                         }
3385                                         if ( $ch == '&' ) {
3386                                                 $entityState = 1; // entity found, (e.g. "&#160;")
3387                                         } else {
3388                                                 $dispLen++; // this char is displayed
3389                                                 // Add the next $max display text chars after this in one swoop...
3390                                                 $max = ( $testingEllipsis ? $length : $neLength ) - $dispLen;
3391                                                 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max );
3392                                                 $dispLen += $skipped;
3393                                                 $pos += $skipped;
3394                                         }
3395                                 }
3396                         }
3397                 }
3398                 // Close the last tag if left unclosed by bad HTML
3399                 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
3400                 while ( count( $openTags ) > 0 ) {
3401                         $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
3402                 }
3403                 return $ret;
3404         }
3405
3406         /**
3407          * truncateHtml() helper function
3408          * like strcspn() but adds the skipped chars to $ret
3409          *
3410          * @param $ret
3411          * @param $text
3412          * @param $search
3413          * @param $start
3414          * @param $len
3415          * @return int
3416          */
3417         private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
3418                 if ( $len === null ) {
3419                         $len = -1; // -1 means "no limit" for strcspn
3420                 } elseif ( $len < 0 ) {
3421                         $len = 0; // sanity
3422                 }
3423                 $skipCount = 0;
3424                 if ( $start < strlen( $text ) ) {
3425                         $skipCount = strcspn( $text, $search, $start, $len );
3426                         $ret .= substr( $text, $start, $skipCount );
3427                 }
3428                 return $skipCount;
3429         }
3430
3431         /**
3432          * truncateHtml() helper function
3433          * (a) push or pop $tag from $openTags as needed
3434          * (b) clear $tag value
3435          * @param &$tag string Current HTML tag name we are looking at
3436          * @param $tagType int (0-open tag, 1-close tag)
3437          * @param $lastCh string Character before the '>' that ended this tag
3438          * @param &$openTags array Open tag stack (not accounting for $tag)
3439          */
3440         private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
3441                 $tag = ltrim( $tag );
3442                 if ( $tag != '' ) {
3443                         if ( $tagType == 0 && $lastCh != '/' ) {
3444                                 $openTags[] = $tag; // tag opened (didn't close itself)
3445                         } elseif ( $tagType == 1 ) {
3446                                 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
3447                                         array_pop( $openTags ); // tag closed
3448                                 }
3449                         }
3450                         $tag = '';
3451                 }
3452         }
3453
3454         /**
3455          * Grammatical transformations, needed for inflected languages
3456          * Invoked by putting {{grammar:case|word}} in a message
3457          *
3458          * @param $word string
3459          * @param $case string
3460          * @return string
3461          */
3462         function convertGrammar( $word, $case ) {
3463                 global $wgGrammarForms;
3464                 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
3465                         return $wgGrammarForms[$this->getCode()][$case][$word];
3466                 }
3467                 return $word;
3468         }
3469         /**
3470          * Get the grammar forms for the content language
3471          * @return array of grammar forms
3472          * @since 1.20
3473          */
3474         function getGrammarForms() {
3475                 global $wgGrammarForms;
3476                 if ( isset( $wgGrammarForms[$this->getCode()] ) && is_array( $wgGrammarForms[$this->getCode()] ) ) {
3477                          return $wgGrammarForms[$this->getCode()];
3478                 }
3479                 return array();
3480         }
3481         /**
3482          * Provides an alternative text depending on specified gender.
3483          * Usage {{gender:username|masculine|feminine|neutral}}.
3484          * username is optional, in which case the gender of current user is used,
3485          * but only in (some) interface messages; otherwise default gender is used.
3486          *
3487          * If no forms are given, an empty string is returned. If only one form is
3488          * given, it will be returned unconditionally. These details are implied by
3489          * the caller and cannot be overridden in subclasses.
3490          *
3491          * If more than one form is given, the default is to use the neutral one
3492          * if it is specified, and to use the masculine one otherwise. These
3493          * details can be overridden in subclasses.
3494          *
3495          * @param $gender string
3496          * @param $forms array
3497          *
3498          * @return string
3499          */
3500         function gender( $gender, $forms ) {
3501                 if ( !count( $forms ) ) {
3502                         return '';
3503                 }
3504                 $forms = $this->preConvertPlural( $forms, 2 );
3505                 if ( $gender === 'male' ) {
3506                         return $forms[0];
3507                 }
3508                 if ( $gender === 'female' ) {
3509                         return $forms[1];
3510                 }
3511                 return isset( $forms[2] ) ? $forms[2] : $forms[0];
3512         }
3513
3514         /**
3515          * Plural form transformations, needed for some languages.
3516          * For example, there are 3 form of plural in Russian and Polish,
3517          * depending on "count mod 10". See [[w:Plural]]
3518          * For English it is pretty simple.
3519          *
3520          * Invoked by putting {{plural:count|wordform1|wordform2}}
3521          * or {{plural:count|wordform1|wordform2|wordform3}}
3522          *
3523          * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
3524          *
3525          * @param $count Integer: non-localized number
3526          * @param $forms Array: different plural forms
3527          * @return string Correct form of plural for $count in this language
3528          */
3529         function convertPlural( $count, $forms ) {
3530                 if ( !count( $forms ) ) {
3531                         return '';
3532                 }
3533
3534                 // Handle explicit n=pluralform cases
3535                 foreach ( $forms as $index => $form ) {
3536                         if ( preg_match( '/\d+=/i', $form ) ) {
3537                                 $pos = strpos( $form, '=' );
3538                                 if ( substr( $form, 0, $pos ) === (string) $count ) {
3539                                         return substr( $form, $pos + 1 );
3540                                 }
3541                                 unset( $forms[$index] );
3542                         }
3543                 }
3544                 $forms = array_values( $forms );
3545
3546                 $pluralForm = $this->getPluralRuleIndexNumber( $count );
3547                 $pluralForm = min( $pluralForm, count( $forms ) - 1 );
3548                 return $forms[$pluralForm];
3549         }
3550
3551         /**
3552          * Checks that convertPlural was given an array and pads it to requested
3553          * amount of forms by copying the last one.
3554          *
3555          * @param $count Integer: How many forms should there be at least
3556          * @param $forms Array of forms given to convertPlural
3557          * @return array Padded array of forms or an exception if not an array
3558          */
3559         protected function preConvertPlural( /* Array */ $forms, $count ) {
3560                 while ( count( $forms ) < $count ) {
3561                         $forms[] = $forms[count( $forms ) - 1];
3562                 }
3563                 return $forms;
3564         }
3565
3566         /**
3567          * @todo Maybe translate block durations.  Note that this function is somewhat misnamed: it
3568          * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
3569          * (which is an absolute timestamp). Please note: do NOT add this blindly, as it is used
3570          * on old expiry lengths recorded in log entries. You'd need to provide the start date to
3571          * match up with it.
3572          *
3573          * @param $str String: the validated block duration in English
3574          * @return string Somehow translated block duration
3575          * @see LanguageFi.php for example implementation
3576          */
3577         function translateBlockExpiry( $str ) {
3578                 $duration = SpecialBlock::getSuggestedDurations( $this );
3579                 foreach ( $duration as $show => $value ) {
3580                         if ( strcmp( $str, $value ) == 0 ) {
3581                                 return htmlspecialchars( trim( $show ) );
3582                         }
3583                 }
3584
3585                 // Since usually only infinite or indefinite is only on list, so try
3586                 // equivalents if still here.
3587                 $indefs = array( 'infinite', 'infinity', 'indefinite' );
3588                 if ( in_array( $str, $indefs ) ) {
3589                         foreach ( $indefs as $val ) {
3590                                 $show = array_search( $val, $duration, true );
3591                                 if ( $show !== false ) {
3592                                         return htmlspecialchars( trim( $show ) );
3593                                 }
3594                         }
3595                 }
3596
3597                 // If all else fails, return a standard duration or timestamp description.
3598                 $time = strtotime( $str, 0 );
3599                 if ( $time === false ) { // Unknown format. Return it as-is in case.
3600                         return $str;
3601                 } elseif ( $time !== strtotime( $str, 1 ) ) { // It's a relative timestamp.
3602                         // $time is relative to 0 so it's a duration length.
3603                         return $this->formatDuration( $time );
3604                 } else { // It's an absolute timestamp.
3605                         if ( $time === 0 ) {
3606                                 // wfTimestamp() handles 0 as current time instead of epoch.
3607                                 return $this->timeanddate( '19700101000000' );
3608                         } else {
3609                                 return $this->timeanddate( $time );
3610                         }
3611                 }
3612         }
3613
3614         /**
3615          * languages like Chinese need to be segmented in order for the diff
3616          * to be of any use
3617          *
3618          * @param $text String
3619          * @return String
3620          */
3621         public function segmentForDiff( $text ) {
3622                 return $text;
3623         }
3624
3625         /**
3626          * and unsegment to show the result
3627          *
3628          * @param $text String
3629          * @return String
3630          */
3631         public function unsegmentForDiff( $text ) {
3632                 return $text;
3633         }
3634
3635         /**
3636          * Return the LanguageConverter used in the Language
3637          *
3638          * @since 1.19
3639          * @return LanguageConverter
3640          */
3641         public function getConverter() {
3642                 return $this->mConverter;
3643         }
3644
3645         /**
3646          * convert text to all supported variants
3647          *
3648          * @param $text string
3649          * @return array
3650          */
3651         public function autoConvertToAllVariants( $text ) {
3652                 return $this->mConverter->autoConvertToAllVariants( $text );
3653         }
3654
3655         /**
3656          * convert text to different variants of a language.
3657          *
3658          * @param $text string
3659          * @return string
3660          */
3661         public function convert( $text ) {
3662                 return $this->mConverter->convert( $text );
3663         }
3664
3665         /**
3666          * Convert a Title object to a string in the preferred variant
3667          *
3668          * @param $title Title
3669          * @return string
3670          */
3671         public function convertTitle( $title ) {
3672                 return $this->mConverter->convertTitle( $title );
3673         }
3674
3675         /**
3676          * Convert a namespace index to a string in the preferred variant
3677          *
3678          * @param $ns int
3679          * @return string
3680          */
3681         public function convertNamespace( $ns ) {
3682                 return $this->mConverter->convertNamespace( $ns );
3683         }
3684
3685         /**
3686          * Check if this is a language with variants
3687          *
3688          * @return bool
3689          */
3690         public function hasVariants() {
3691                 return count( $this->getVariants() ) > 1;
3692         }
3693
3694         /**
3695          * Check if the language has the specific variant
3696          *
3697          * @since 1.19
3698          * @param $variant string
3699          * @return bool
3700          */
3701         public function hasVariant( $variant ) {
3702                 return (bool)$this->mConverter->validateVariant( $variant );
3703         }
3704
3705         /**
3706          * Put custom tags (e.g. -{ }-) around math to prevent conversion
3707          *
3708          * @param $text string
3709          * @return string
3710          */
3711         public function armourMath( $text ) {
3712                 return $this->mConverter->armourMath( $text );
3713         }
3714
3715         /**
3716          * Perform output conversion on a string, and encode for safe HTML output.
3717          * @param $text String text to be converted
3718          * @param $isTitle Bool whether this conversion is for the article title
3719          * @return string
3720          * @todo this should get integrated somewhere sane
3721          */
3722         public function convertHtml( $text, $isTitle = false ) {
3723                 return htmlspecialchars( $this->convert( $text, $isTitle ) );
3724         }
3725
3726         /**
3727          * @param $key string
3728          * @return string
3729          */
3730         public function convertCategoryKey( $key ) {
3731                 return $this->mConverter->convertCategoryKey( $key );
3732         }
3733
3734         /**
3735          * Get the list of variants supported by this language
3736          * see sample implementation in LanguageZh.php
3737          *
3738          * @return array an array of language codes
3739          */
3740         public function getVariants() {
3741                 return $this->mConverter->getVariants();
3742         }
3743
3744         /**
3745          * @return string
3746          */
3747         public function getPreferredVariant() {
3748                 return $this->mConverter->getPreferredVariant();
3749         }
3750
3751         /**
3752          * @return string
3753          */
3754         public function getDefaultVariant() {
3755                 return $this->mConverter->getDefaultVariant();
3756         }
3757
3758         /**
3759          * @return string
3760          */
3761         public function getURLVariant() {
3762                 return $this->mConverter->getURLVariant();
3763         }
3764
3765         /**
3766          * If a language supports multiple variants, it is
3767          * possible that non-existing link in one variant
3768          * actually exists in another variant. this function
3769          * tries to find it. See e.g. LanguageZh.php
3770          *
3771          * @param $link String: the name of the link
3772          * @param $nt Mixed: the title object of the link
3773          * @param $ignoreOtherCond Boolean: to disable other conditions when
3774          *      we need to transclude a template or update a category's link
3775          * @return null the input parameters may be modified upon return
3776          */
3777         public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
3778                 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
3779         }
3780
3781         /**
3782          * If a language supports multiple variants, converts text
3783          * into an array of all possible variants of the text:
3784          *  'variant' => text in that variant
3785          *
3786          * @deprecated since 1.17 Use autoConvertToAllVariants()
3787          *
3788          * @param $text string
3789          *
3790          * @return string
3791          */
3792         public function convertLinkToAllVariants( $text ) {
3793                 return $this->mConverter->convertLinkToAllVariants( $text );
3794         }
3795
3796         /**
3797          * returns language specific options used by User::getPageRenderHash()
3798          * for example, the preferred language variant
3799          *
3800          * @return string
3801          */
3802         function getExtraHashOptions() {
3803                 return $this->mConverter->getExtraHashOptions();
3804         }
3805
3806         /**
3807          * For languages that support multiple variants, the title of an
3808          * article may be displayed differently in different variants. this
3809          * function returns the apporiate title defined in the body of the article.
3810          *
3811          * @return string
3812          */
3813         public function getParsedTitle() {
3814                 return $this->mConverter->getParsedTitle();
3815         }
3816
3817         /**
3818          * Prepare external link text for conversion. When the text is
3819          * a URL, it shouldn't be converted, and it'll be wrapped in
3820          * the "raw" tag (-{R| }-) to prevent conversion.
3821          *
3822          * This function is called "markNoConversion" for historical
3823          * reasons.
3824          *
3825          * @param $text String: text to be used for external link
3826          * @param $noParse bool: wrap it without confirming it's a real URL first
3827          * @return string the tagged text
3828          */
3829         public function markNoConversion( $text, $noParse = false ) {
3830                 // Excluding protocal-relative URLs may avoid many false positives.
3831                 if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
3832                         return $this->mConverter->markNoConversion( $text );
3833                 } else {
3834                         return $text;
3835                 }
3836         }
3837
3838         /**
3839          * A regular expression to match legal word-trailing characters
3840          * which should be merged onto a link of the form [[foo]]bar.
3841          *
3842          * @return string
3843          */
3844         public function linkTrail() {
3845                 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
3846         }
3847
3848         /**
3849          * @return Language
3850          */
3851         function getLangObj() {
3852                 return $this;
3853         }
3854
3855         /**
3856          * Get the RFC 3066 code for this language object
3857          *
3858          * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
3859          * htmlspecialchars() or similar
3860          *
3861          * @return string
3862          */
3863         public function getCode() {
3864                 return $this->mCode;
3865         }
3866
3867         /**
3868          * Get the code in Bcp47 format which we can use
3869          * inside of html lang="" tags.
3870          *
3871          * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
3872          * htmlspecialchars() or similar.
3873          *
3874          * @since 1.19
3875          * @return string
3876          */
3877         public function getHtmlCode() {
3878                 if ( is_null( $this->mHtmlCode ) ) {
3879                         $this->mHtmlCode = wfBCP47( $this->getCode() );
3880                 }
3881                 return $this->mHtmlCode;
3882         }
3883
3884         /**
3885          * @param $code string
3886          */
3887         public function setCode( $code ) {
3888                 $this->mCode = $code;
3889                 // Ensure we don't leave an incorrect html code lying around
3890                 $this->mHtmlCode = null;
3891         }
3892
3893         /**
3894          * Get the name of a file for a certain language code
3895          * @param $prefix string Prepend this to the filename
3896          * @param $code string Language code
3897          * @param $suffix string Append this to the filename
3898          * @throws MWException
3899          * @return string $prefix . $mangledCode . $suffix
3900          */
3901         public static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
3902                 // Protect against path traversal
3903                 if ( !Language::isValidCode( $code )
3904                         || strcspn( $code, ":/\\\000" ) !== strlen( $code ) )
3905                 {
3906                         throw new MWException( "Invalid language code \"$code\"" );
3907                 }
3908
3909                 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
3910         }
3911
3912         /**
3913          * Get the language code from a file name. Inverse of getFileName()
3914          * @param $filename string $prefix . $languageCode . $suffix
3915          * @param $prefix string Prefix before the language code
3916          * @param $suffix string Suffix after the language code
3917          * @return string Language code, or false if $prefix or $suffix isn't found
3918          */
3919         public static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
3920                 $m = null;
3921                 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
3922                         preg_quote( $suffix, '/' ) . '/', $filename, $m );
3923                 if ( !count( $m ) ) {
3924                         return false;
3925                 }
3926                 return str_replace( '_', '-', strtolower( $m[1] ) );
3927         }
3928
3929         /**
3930          * @param $code string
3931          * @return string
3932          */
3933         public static function getMessagesFileName( $code ) {
3934                 global $IP;
3935                 $file = self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
3936                 wfRunHooks( 'Language::getMessagesFileName', array( $code, &$file ) );
3937                 return $file;
3938         }
3939
3940         /**
3941          * @param $code string
3942          * @return string
3943          */
3944         public static function getClassFileName( $code ) {
3945                 global $IP;
3946                 return self::getFileName( "$IP/languages/classes/Language", $code, '.php' );
3947         }
3948
3949         /**
3950          * Get the first fallback for a given language.
3951          *
3952          * @param $code string
3953          *
3954          * @return bool|string
3955          */
3956         public static function getFallbackFor( $code ) {
3957                 if ( $code === 'en' || !Language::isValidBuiltInCode( $code ) ) {
3958                         return false;
3959                 } else {
3960                         $fallbacks = self::getFallbacksFor( $code );
3961                         $first = array_shift( $fallbacks );
3962                         return $first;
3963                 }
3964         }
3965
3966         /**
3967          * Get the ordered list of fallback languages.
3968          *
3969          * @since 1.19
3970          * @param $code string Language code
3971          * @return array
3972          */
3973         public static function getFallbacksFor( $code ) {
3974                 if ( $code === 'en' || !Language::isValidBuiltInCode( $code ) ) {
3975                         return array();
3976                 } else {
3977                         $v = self::getLocalisationCache()->getItem( $code, 'fallback' );
3978                         $v = array_map( 'trim', explode( ',', $v ) );
3979                         if ( $v[count( $v ) - 1] !== 'en' ) {
3980                                 $v[] = 'en';
3981                         }
3982                         return $v;
3983                 }
3984         }
3985
3986         /**
3987          * Get all messages for a given language
3988          * WARNING: this may take a long time. If you just need all message *keys*
3989          * but need the *contents* of only a few messages, consider using getMessageKeysFor().
3990          *
3991          * @param $code string
3992          *
3993          * @return array
3994          */
3995         public static function getMessagesFor( $code ) {
3996                 return self::getLocalisationCache()->getItem( $code, 'messages' );
3997         }
3998
3999         /**
4000          * Get a message for a given language
4001          *
4002          * @param $key string
4003          * @param $code string
4004          *
4005          * @return string
4006          */
4007         public static function getMessageFor( $key, $code ) {
4008                 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
4009         }
4010
4011         /**
4012          * Get all message keys for a given language. This is a faster alternative to
4013          * array_keys( Language::getMessagesFor( $code ) )
4014          *
4015          * @since 1.19
4016          * @param $code string Language code
4017          * @return array of message keys (strings)
4018          */
4019         public static function getMessageKeysFor( $code ) {
4020                 return self::getLocalisationCache()->getSubItemList( $code, 'messages' );
4021         }
4022
4023         /**
4024          * @param $talk
4025          * @return mixed
4026          */
4027         function fixVariableInNamespace( $talk ) {
4028                 if ( strpos( $talk, '$1' ) === false ) {
4029                         return $talk;
4030                 }
4031
4032                 global $wgMetaNamespace;
4033                 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
4034
4035                 # Allow grammar transformations
4036                 # Allowing full message-style parsing would make simple requests
4037                 # such as action=raw much more expensive than they need to be.
4038                 # This will hopefully cover most cases.
4039                 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
4040                         array( &$this, 'replaceGrammarInNamespace' ), $talk );
4041                 return str_replace( ' ', '_', $talk );
4042         }
4043
4044         /**
4045          * @param $m string
4046          * @return string
4047          */
4048         function replaceGrammarInNamespace( $m ) {
4049                 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
4050         }
4051
4052         /**
4053          * @throws MWException
4054          * @return array
4055          */
4056         static function getCaseMaps() {
4057                 static $wikiUpperChars, $wikiLowerChars;
4058                 if ( isset( $wikiUpperChars ) ) {
4059                         return array( $wikiUpperChars, $wikiLowerChars );
4060                 }
4061
4062                 wfProfileIn( __METHOD__ );
4063                 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
4064                 if ( $arr === false ) {
4065                         throw new MWException(
4066                                 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
4067                 }
4068                 $wikiUpperChars = $arr['wikiUpperChars'];
4069                 $wikiLowerChars = $arr['wikiLowerChars'];
4070                 wfProfileOut( __METHOD__ );
4071                 return array( $wikiUpperChars, $wikiLowerChars );
4072         }
4073
4074         /**
4075          * Decode an expiry (block, protection, etc) which has come from the DB
4076          *
4077          * @todo FIXME: why are we returnings DBMS-dependent strings???
4078          *
4079          * @param $expiry String: Database expiry String
4080          * @param $format Bool|Int true to process using language functions, or TS_ constant
4081          *     to return the expiry in a given timestamp
4082          * @return String
4083          * @since 1.18
4084          */
4085         public function formatExpiry( $expiry, $format = true ) {
4086                 static $infinity, $infinityMsg;
4087                 if ( $infinity === null ) {
4088                         $infinityMsg = wfMessage( 'infiniteblock' );
4089                         $infinity = wfGetDB( DB_SLAVE )->getInfinity();
4090                 }
4091
4092                 if ( $expiry == '' || $expiry == $infinity ) {
4093                         return $format === true
4094                                 ? $infinityMsg
4095                                 : $infinity;
4096                 } else {
4097                         return $format === true
4098                                 ? $this->timeanddate( $expiry, /* User preference timezone */ true )
4099                                 : wfTimestamp( $format, $expiry );
4100                 }
4101         }
4102
4103         /**
4104          * @todo Document
4105          * @param $seconds int|float
4106          * @param $format Array Optional
4107          *              If $format['avoid'] == 'avoidseconds' - don't mention seconds if $seconds >= 1 hour
4108          *              If $format['avoid'] == 'avoidminutes' - don't mention seconds/minutes if $seconds > 48 hours
4109          *              If $format['noabbrevs'] is true - use 'seconds' and friends instead of 'seconds-abbrev' and friends
4110          *              For backwards compatibility, $format may also be one of the strings 'avoidseconds' or 'avoidminutes'
4111          * @return string
4112          */
4113         function formatTimePeriod( $seconds, $format = array() ) {
4114                 if ( !is_array( $format ) ) {
4115                         $format = array( 'avoid' => $format ); // For backwards compatibility
4116                 }
4117                 if ( !isset( $format['avoid'] ) ) {
4118                         $format['avoid'] = false;
4119                 }
4120                 if ( !isset( $format['noabbrevs' ] ) ) {
4121                         $format['noabbrevs'] = false;
4122                 }
4123                 $secondsMsg = wfMessage(
4124                         $format['noabbrevs'] ? 'seconds' : 'seconds-abbrev' )->inLanguage( $this );
4125                 $minutesMsg = wfMessage(
4126                         $format['noabbrevs'] ? 'minutes' : 'minutes-abbrev' )->inLanguage( $this );
4127                 $hoursMsg = wfMessage(
4128                         $format['noabbrevs'] ? 'hours' : 'hours-abbrev' )->inLanguage( $this );
4129                 $daysMsg = wfMessage(
4130                         $format['noabbrevs'] ? 'days' : 'days-abbrev' )->inLanguage( $this );
4131
4132                 if ( round( $seconds * 10 ) < 100 ) {
4133                         $s = $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) );
4134                         $s = $secondsMsg->params( $s )->text();
4135                 } elseif ( round( $seconds ) < 60 ) {
4136                         $s = $this->formatNum( round( $seconds ) );
4137                         $s = $secondsMsg->params( $s )->text();
4138                 } elseif ( round( $seconds ) < 3600 ) {
4139                         $minutes = floor( $seconds / 60 );
4140                         $secondsPart = round( fmod( $seconds, 60 ) );
4141                         if ( $secondsPart == 60 ) {
4142                                 $secondsPart = 0;
4143                                 $minutes++;
4144                         }
4145                         $s = $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4146                         $s .= ' ';
4147                         $s .= $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4148                 } elseif ( round( $seconds ) <= 2 * 86400 ) {
4149                         $hours = floor( $seconds / 3600 );
4150                         $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
4151                         $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
4152                         if ( $secondsPart == 60 ) {
4153                                 $secondsPart = 0;
4154                                 $minutes++;
4155                         }
4156                         if ( $minutes == 60 ) {
4157                                 $minutes = 0;
4158                                 $hours++;
4159                         }
4160                         $s = $hoursMsg->params( $this->formatNum( $hours ) )->text();
4161                         $s .= ' ';
4162                         $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4163                         if ( !in_array( $format['avoid'], array( 'avoidseconds', 'avoidminutes' ) ) ) {
4164                                 $s .= ' ' . $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4165                         }
4166                 } else {
4167                         $days = floor( $seconds / 86400 );
4168                         if ( $format['avoid'] === 'avoidminutes' ) {
4169                                 $hours = round( ( $seconds - $days * 86400 ) / 3600 );
4170                                 if ( $hours == 24 ) {
4171                                         $hours = 0;
4172                                         $days++;
4173                                 }
4174                                 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4175                                 $s .= ' ';
4176                                 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4177                         } elseif ( $format['avoid'] === 'avoidseconds' ) {
4178                                 $hours = floor( ( $seconds - $days * 86400 ) / 3600 );
4179                                 $minutes = round( ( $seconds - $days * 86400 - $hours * 3600 ) / 60 );
4180                                 if ( $minutes == 60 ) {
4181                                         $minutes = 0;
4182                                         $hours++;
4183                                 }
4184                                 if ( $hours == 24 ) {
4185                                         $hours = 0;
4186                                         $days++;
4187                                 }
4188                                 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4189                                 $s .= ' ';
4190                                 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4191                                 $s .= ' ';
4192                                 $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4193                         } else {
4194                                 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4195                                 $s .= ' ';
4196                                 $s .= $this->formatTimePeriod( $seconds - $days * 86400, $format );
4197                         }
4198                 }
4199                 return $s;
4200         }
4201
4202         /**
4203          * Format a bitrate for output, using an appropriate
4204          * unit (bps, kbps, Mbps, Gbps, Tbps, Pbps, Ebps, Zbps or Ybps) according to the magnitude in question
4205          *
4206          * This use base 1000. For base 1024 use formatSize(), for another base
4207          * see formatComputingNumbers()
4208          *
4209          * @param $bps int
4210          * @return string
4211          */
4212         function formatBitrate( $bps ) {
4213                 return $this->formatComputingNumbers( $bps, 1000, "bitrate-$1bits" );
4214         }
4215
4216         /**
4217          * @param $size int Size of the unit
4218          * @param $boundary int Size boundary (1000, or 1024 in most cases)
4219          * @param $messageKey string Message key to be uesd
4220          * @return string
4221          */
4222         function formatComputingNumbers( $size, $boundary, $messageKey ) {
4223                 if ( $size <= 0 ) {
4224                         return str_replace( '$1', $this->formatNum( $size ),
4225                                 $this->getMessageFromDB( str_replace( '$1', '', $messageKey ) )
4226                         );
4227                 }
4228                 $sizes = array( '', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zeta', 'yotta' );
4229                 $index = 0;
4230
4231                 $maxIndex = count( $sizes ) - 1;
4232                 while ( $size >= $boundary && $index < $maxIndex ) {
4233                         $index++;
4234                         $size /= $boundary;
4235                 }
4236
4237                 // For small sizes no decimal places necessary
4238                 $round = 0;
4239                 if ( $index > 1 ) {
4240                         // For MB and bigger two decimal places are smarter
4241                         $round = 2;
4242                 }
4243                 $msg = str_replace( '$1', $sizes[$index], $messageKey );
4244
4245                 $size = round( $size, $round );
4246                 $text = $this->getMessageFromDB( $msg );
4247                 return str_replace( '$1', $this->formatNum( $size ), $text );
4248         }
4249
4250         /**
4251          * Format a size in bytes for output, using an appropriate
4252          * unit (B, KB, MB, GB, TB, PB, EB, ZB or YB) according to the magnitude in question
4253          *
4254          * This method use base 1024. For base 1000 use formatBitrate(), for
4255          * another base see formatComputingNumbers()
4256          *
4257          * @param $size int Size to format
4258          * @return string Plain text (not HTML)
4259          */
4260         function formatSize( $size ) {
4261                 return $this->formatComputingNumbers( $size, 1024, "size-$1bytes" );
4262         }
4263
4264         /**
4265          * Make a list item, used by various special pages
4266          *
4267          * @param $page String Page link
4268          * @param $details String Text between brackets
4269          * @param $oppositedm Boolean Add the direction mark opposite to your
4270          *                    language, to display text properly
4271          * @return String
4272          */
4273         function specialList( $page, $details, $oppositedm = true ) {
4274                 $dirmark = ( $oppositedm ? $this->getDirMark( true ) : '' ) .
4275                         $this->getDirMark();
4276                 $details = $details ? $dirmark . $this->getMessageFromDB( 'word-separator' ) .
4277                         wfMessage( 'parentheses' )->rawParams( $details )->inLanguage( $this )->escaped() : '';
4278                 return $page . $details;
4279         }
4280
4281         /**
4282          * Generate (prev x| next x) (20|50|100...) type links for paging
4283          *
4284          * @param $title Title object to link
4285          * @param $offset Integer offset parameter
4286          * @param $limit Integer limit parameter
4287          * @param $query array|String optional URL query parameter string
4288          * @param $atend Bool optional param for specified if this is the last page
4289          * @return String
4290          */
4291         public function viewPrevNext( Title $title, $offset, $limit, array $query = array(), $atend = false ) {
4292                 // @todo FIXME: Why on earth this needs one message for the text and another one for tooltip?
4293
4294                 # Make 'previous' link
4295                 $prev = wfMessage( 'prevn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4296                 if ( $offset > 0 ) {
4297                         $plink = $this->numLink( $title, max( $offset - $limit, 0 ), $limit,
4298                                 $query, $prev, 'prevn-title', 'mw-prevlink' );
4299                 } else {
4300                         $plink = htmlspecialchars( $prev );
4301                 }
4302
4303                 # Make 'next' link
4304                 $next = wfMessage( 'nextn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4305                 if ( $atend ) {
4306                         $nlink = htmlspecialchars( $next );
4307                 } else {
4308                         $nlink = $this->numLink( $title, $offset + $limit, $limit,
4309                                 $query, $next, 'prevn-title', 'mw-nextlink' );
4310                 }
4311
4312                 # Make links to set number of items per page
4313                 $numLinks = array();
4314                 foreach ( array( 20, 50, 100, 250, 500 ) as $num ) {
4315                         $numLinks[] = $this->numLink( $title, $offset, $num,
4316                                 $query, $this->formatNum( $num ), 'shown-title', 'mw-numlink' );
4317                 }
4318
4319                 return wfMessage( 'viewprevnext' )->inLanguage( $this )->title( $title
4320                         )->rawParams( $plink, $nlink, $this->pipeList( $numLinks ) )->escaped();
4321         }
4322
4323         /**
4324          * Helper function for viewPrevNext() that generates links
4325          *
4326          * @param $title Title object to link
4327          * @param $offset Integer offset parameter
4328          * @param $limit Integer limit parameter
4329          * @param $query Array extra query parameters
4330          * @param $link String text to use for the link; will be escaped
4331          * @param $tooltipMsg String name of the message to use as tooltip
4332          * @param $class String value of the "class" attribute of the link
4333          * @return String HTML fragment
4334          */
4335         private function numLink( Title $title, $offset, $limit, array $query, $link, $tooltipMsg, $class ) {
4336                 $query = array( 'limit' => $limit, 'offset' => $offset ) + $query;
4337                 $tooltip = wfMessage( $tooltipMsg )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4338                 return Html::element( 'a', array( 'href' => $title->getLocalURL( $query ),
4339                         'title' => $tooltip, 'class' => $class ), $link );
4340         }
4341
4342         /**
4343          * Get the conversion rule title, if any.
4344          *
4345          * @return string
4346          */
4347         public function getConvRuleTitle() {
4348                 return $this->mConverter->getConvRuleTitle();
4349         }
4350
4351         /**
4352          * Get the compiled plural rules for the language
4353          * @since 1.20
4354          * @return array Associative array with plural form, and plural rule as key-value pairs
4355          */
4356         public function getCompiledPluralRules() {
4357                 $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'compiledPluralRules' );
4358                 $fallbacks = Language::getFallbacksFor( $this->mCode );
4359                 if ( !$pluralRules ) {
4360                         foreach ( $fallbacks as $fallbackCode ) {
4361                                 $pluralRules = self::$dataCache->getItem( strtolower( $fallbackCode ), 'compiledPluralRules' );
4362                                 if ( $pluralRules ) {
4363                                         break;
4364                                 }
4365                         }
4366                 }
4367                 return $pluralRules;
4368         }
4369
4370         /**
4371          * Get the plural rules for the language
4372          * @since 1.20
4373          * @return array Associative array with plural form number and plural rule as key-value pairs
4374          */
4375         public function getPluralRules() {
4376                 $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRules' );
4377                 $fallbacks = Language::getFallbacksFor( $this->mCode );
4378                 if ( !$pluralRules ) {
4379                         foreach ( $fallbacks as $fallbackCode ) {
4380                                 $pluralRules = self::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRules' );
4381                                 if ( $pluralRules ) {
4382                                         break;
4383                                 }
4384                         }
4385                 }
4386                 return $pluralRules;
4387         }
4388
4389         /**
4390          * Get the plural rule types for the language
4391          * @since 1.21
4392          * @return array Associative array with plural form number and plural rule type as key-value pairs
4393          */
4394         public function getPluralRuleTypes() {
4395                 $pluralRuleTypes = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRuleTypes' );
4396                 $fallbacks = Language::getFallbacksFor( $this->mCode );
4397                 if ( !$pluralRuleTypes ) {
4398                         foreach ( $fallbacks as $fallbackCode ) {
4399                                 $pluralRuleTypes = self::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRuleTypes' );
4400                                 if ( $pluralRuleTypes ) {
4401                                         break;
4402                                 }
4403                         }
4404                 }
4405                 return $pluralRuleTypes;
4406         }
4407
4408         /**
4409          * Find the index number of the plural rule appropriate for the given number
4410          * @return int The index number of the plural rule
4411          */
4412         public function getPluralRuleIndexNumber( $number ) {
4413                 $pluralRules = $this->getCompiledPluralRules();
4414                 $form = CLDRPluralRuleEvaluator::evaluateCompiled( $number, $pluralRules );
4415                 return $form;
4416         }
4417
4418         /**
4419          * Find the plural rule type appropriate for the given number
4420          * For example, if the language is set to Arabic, getPluralType(5) should
4421          * return 'few'.
4422          * @since 1.21
4423          * @return string The name of the plural rule type, e.g. one, two, few, many
4424          */
4425         public function getPluralRuleType( $number ) {
4426                 $index = $this->getPluralRuleIndexNumber( $number );
4427                 $pluralRuleTypes = $this->getPluralRuleTypes();
4428                 if ( isset( $pluralRuleTypes[$index] ) ) {
4429                         return $pluralRuleTypes[$index];
4430                 } else {
4431                         return 'other';
4432                 }
4433         }
4434 }