Merge "Remove not used private member variable mParserWarnings from OutputPage"
[mediawiki.git] / languages / Language.php
blob69f518b33e1366049d8a892292afd7d437ff16aa
1 <?php
2 /**
3 * Internationalisation code.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Language
24 /**
25 * @defgroup Language Language
28 if ( !defined( 'MEDIAWIKI' ) ) {
29 echo "This file is part of MediaWiki, it is not a valid entry point.\n";
30 exit( 1 );
33 if ( function_exists( 'mb_strtoupper' ) ) {
34 mb_internal_encoding( 'UTF-8' );
37 use CLDRPluralRuleParser\Evaluator;
39 /**
40 * Internationalisation code
41 * @ingroup Language
43 class Language {
44 /**
45 * @var LanguageConverter
47 public $mConverter;
49 public $mVariants, $mCode, $mLoaded = false;
50 public $mMagicExtensions = array(), $mMagicHookDone = false;
51 private $mHtmlCode = null, $mParentLanguage = false;
53 public $dateFormatStrings = array();
54 public $mExtendedSpecialPageAliases;
56 protected $namespaceNames, $mNamespaceIds, $namespaceAliases;
58 /**
59 * ReplacementArray object caches
61 public $transformData = array();
63 /**
64 * @var LocalisationCache
66 static public $dataCache;
68 static public $mLangObjCache = array();
70 static public $mWeekdayMsgs = array(
71 'sunday', 'monday', 'tuesday', 'wednesday', 'thursday',
72 'friday', 'saturday'
75 static public $mWeekdayAbbrevMsgs = array(
76 'sun', 'mon', 'tue', 'wed', 'thu', 'fri', 'sat'
79 static public $mMonthMsgs = array(
80 'january', 'february', 'march', 'april', 'may_long', 'june',
81 'july', 'august', 'september', 'october', 'november',
82 'december'
84 static public $mMonthGenMsgs = array(
85 'january-gen', 'february-gen', 'march-gen', 'april-gen', 'may-gen', 'june-gen',
86 'july-gen', 'august-gen', 'september-gen', 'october-gen', 'november-gen',
87 'december-gen'
89 static public $mMonthAbbrevMsgs = array(
90 'jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug',
91 'sep', 'oct', 'nov', 'dec'
94 static public $mIranianCalendarMonthMsgs = array(
95 'iranian-calendar-m1', 'iranian-calendar-m2', 'iranian-calendar-m3',
96 'iranian-calendar-m4', 'iranian-calendar-m5', 'iranian-calendar-m6',
97 'iranian-calendar-m7', 'iranian-calendar-m8', 'iranian-calendar-m9',
98 'iranian-calendar-m10', 'iranian-calendar-m11', 'iranian-calendar-m12'
101 static public $mHebrewCalendarMonthMsgs = array(
102 'hebrew-calendar-m1', 'hebrew-calendar-m2', 'hebrew-calendar-m3',
103 'hebrew-calendar-m4', 'hebrew-calendar-m5', 'hebrew-calendar-m6',
104 'hebrew-calendar-m7', 'hebrew-calendar-m8', 'hebrew-calendar-m9',
105 'hebrew-calendar-m10', 'hebrew-calendar-m11', 'hebrew-calendar-m12',
106 'hebrew-calendar-m6a', 'hebrew-calendar-m6b'
109 static public $mHebrewCalendarMonthGenMsgs = array(
110 'hebrew-calendar-m1-gen', 'hebrew-calendar-m2-gen', 'hebrew-calendar-m3-gen',
111 'hebrew-calendar-m4-gen', 'hebrew-calendar-m5-gen', 'hebrew-calendar-m6-gen',
112 'hebrew-calendar-m7-gen', 'hebrew-calendar-m8-gen', 'hebrew-calendar-m9-gen',
113 'hebrew-calendar-m10-gen', 'hebrew-calendar-m11-gen', 'hebrew-calendar-m12-gen',
114 'hebrew-calendar-m6a-gen', 'hebrew-calendar-m6b-gen'
117 static public $mHijriCalendarMonthMsgs = array(
118 'hijri-calendar-m1', 'hijri-calendar-m2', 'hijri-calendar-m3',
119 'hijri-calendar-m4', 'hijri-calendar-m5', 'hijri-calendar-m6',
120 'hijri-calendar-m7', 'hijri-calendar-m8', 'hijri-calendar-m9',
121 'hijri-calendar-m10', 'hijri-calendar-m11', 'hijri-calendar-m12'
125 * @since 1.20
126 * @var array
128 static public $durationIntervals = array(
129 'millennia' => 31556952000,
130 'centuries' => 3155695200,
131 'decades' => 315569520,
132 'years' => 31556952, // 86400 * ( 365 + ( 24 * 3 + 25 ) / 400 )
133 'weeks' => 604800,
134 'days' => 86400,
135 'hours' => 3600,
136 'minutes' => 60,
137 'seconds' => 1,
141 * Cache for language fallbacks.
142 * @see Language::getFallbacksIncludingSiteLanguage
143 * @since 1.21
144 * @var array
146 static private $fallbackLanguageCache = array();
149 * Cache for language names
150 * @var HashBagOStuff|null
152 static private $languageNameCache;
155 * Unicode directional formatting characters, for embedBidi()
157 static private $lre = "\xE2\x80\xAA"; // U+202A LEFT-TO-RIGHT EMBEDDING
158 static private $rle = "\xE2\x80\xAB"; // U+202B RIGHT-TO-LEFT EMBEDDING
159 static private $pdf = "\xE2\x80\xAC"; // U+202C POP DIRECTIONAL FORMATTING
162 * Directionality test regex for embedBidi(). Matches the first strong directionality codepoint:
163 * - in group 1 if it is LTR
164 * - in group 2 if it is RTL
165 * Does not match if there is no strong directionality codepoint.
167 * The form is '/(?:([strong ltr codepoint])|([strong rtl codepoint]))/u' .
169 * Generated by UnicodeJS (see tools/strongDir) from the UCD; see
170 * https://git.wikimedia.org/summary/unicodejs.git .
172 // @codingStandardsIgnoreStart
173 // @codeCoverageIgnoreStart
174 static private $strongDirRegex = '/(?:([\x{41}-\x{5a}\x{61}-\x{7a}\x{aa}\x{b5}\x{ba}\x{c0}-\x{d6}\x{d8}-\x{f6}\x{f8}-\x{2b8}\x{2bb}-\x{2c1}\x{2d0}\x{2d1}\x{2e0}-\x{2e4}\x{2ee}\x{370}-\x{373}\x{376}\x{377}\x{37a}-\x{37d}\x{37f}\x{386}\x{388}-\x{38a}\x{38c}\x{38e}-\x{3a1}\x{3a3}-\x{3f5}\x{3f7}-\x{482}\x{48a}-\x{52f}\x{531}-\x{556}\x{559}-\x{55f}\x{561}-\x{587}\x{589}\x{903}-\x{939}\x{93b}\x{93d}-\x{940}\x{949}-\x{94c}\x{94e}-\x{950}\x{958}-\x{961}\x{964}-\x{980}\x{982}\x{983}\x{985}-\x{98c}\x{98f}\x{990}\x{993}-\x{9a8}\x{9aa}-\x{9b0}\x{9b2}\x{9b6}-\x{9b9}\x{9bd}-\x{9c0}\x{9c7}\x{9c8}\x{9cb}\x{9cc}\x{9ce}\x{9d7}\x{9dc}\x{9dd}\x{9df}-\x{9e1}\x{9e6}-\x{9f1}\x{9f4}-\x{9fa}\x{a03}\x{a05}-\x{a0a}\x{a0f}\x{a10}\x{a13}-\x{a28}\x{a2a}-\x{a30}\x{a32}\x{a33}\x{a35}\x{a36}\x{a38}\x{a39}\x{a3e}-\x{a40}\x{a59}-\x{a5c}\x{a5e}\x{a66}-\x{a6f}\x{a72}-\x{a74}\x{a83}\x{a85}-\x{a8d}\x{a8f}-\x{a91}\x{a93}-\x{aa8}\x{aaa}-\x{ab0}\x{ab2}\x{ab3}\x{ab5}-\x{ab9}\x{abd}-\x{ac0}\x{ac9}\x{acb}\x{acc}\x{ad0}\x{ae0}\x{ae1}\x{ae6}-\x{af0}\x{af9}\x{b02}\x{b03}\x{b05}-\x{b0c}\x{b0f}\x{b10}\x{b13}-\x{b28}\x{b2a}-\x{b30}\x{b32}\x{b33}\x{b35}-\x{b39}\x{b3d}\x{b3e}\x{b40}\x{b47}\x{b48}\x{b4b}\x{b4c}\x{b57}\x{b5c}\x{b5d}\x{b5f}-\x{b61}\x{b66}-\x{b77}\x{b83}\x{b85}-\x{b8a}\x{b8e}-\x{b90}\x{b92}-\x{b95}\x{b99}\x{b9a}\x{b9c}\x{b9e}\x{b9f}\x{ba3}\x{ba4}\x{ba8}-\x{baa}\x{bae}-\x{bb9}\x{bbe}\x{bbf}\x{bc1}\x{bc2}\x{bc6}-\x{bc8}\x{bca}-\x{bcc}\x{bd0}\x{bd7}\x{be6}-\x{bf2}\x{c01}-\x{c03}\x{c05}-\x{c0c}\x{c0e}-\x{c10}\x{c12}-\x{c28}\x{c2a}-\x{c39}\x{c3d}\x{c41}-\x{c44}\x{c58}-\x{c5a}\x{c60}\x{c61}\x{c66}-\x{c6f}\x{c7f}\x{c82}\x{c83}\x{c85}-\x{c8c}\x{c8e}-\x{c90}\x{c92}-\x{ca8}\x{caa}-\x{cb3}\x{cb5}-\x{cb9}\x{cbd}-\x{cc4}\x{cc6}-\x{cc8}\x{cca}\x{ccb}\x{cd5}\x{cd6}\x{cde}\x{ce0}\x{ce1}\x{ce6}-\x{cef}\x{cf1}\x{cf2}\x{d02}\x{d03}\x{d05}-\x{d0c}\x{d0e}-\x{d10}\x{d12}-\x{d3a}\x{d3d}-\x{d40}\x{d46}-\x{d48}\x{d4a}-\x{d4c}\x{d4e}\x{d57}\x{d5f}-\x{d61}\x{d66}-\x{d75}\x{d79}-\x{d7f}\x{d82}\x{d83}\x{d85}-\x{d96}\x{d9a}-\x{db1}\x{db3}-\x{dbb}\x{dbd}\x{dc0}-\x{dc6}\x{dcf}-\x{dd1}\x{dd8}-\x{ddf}\x{de6}-\x{def}\x{df2}-\x{df4}\x{e01}-\x{e30}\x{e32}\x{e33}\x{e40}-\x{e46}\x{e4f}-\x{e5b}\x{e81}\x{e82}\x{e84}\x{e87}\x{e88}\x{e8a}\x{e8d}\x{e94}-\x{e97}\x{e99}-\x{e9f}\x{ea1}-\x{ea3}\x{ea5}\x{ea7}\x{eaa}\x{eab}\x{ead}-\x{eb0}\x{eb2}\x{eb3}\x{ebd}\x{ec0}-\x{ec4}\x{ec6}\x{ed0}-\x{ed9}\x{edc}-\x{edf}\x{f00}-\x{f17}\x{f1a}-\x{f34}\x{f36}\x{f38}\x{f3e}-\x{f47}\x{f49}-\x{f6c}\x{f7f}\x{f85}\x{f88}-\x{f8c}\x{fbe}-\x{fc5}\x{fc7}-\x{fcc}\x{fce}-\x{fda}\x{1000}-\x{102c}\x{1031}\x{1038}\x{103b}\x{103c}\x{103f}-\x{1057}\x{105a}-\x{105d}\x{1061}-\x{1070}\x{1075}-\x{1081}\x{1083}\x{1084}\x{1087}-\x{108c}\x{108e}-\x{109c}\x{109e}-\x{10c5}\x{10c7}\x{10cd}\x{10d0}-\x{1248}\x{124a}-\x{124d}\x{1250}-\x{1256}\x{1258}\x{125a}-\x{125d}\x{1260}-\x{1288}\x{128a}-\x{128d}\x{1290}-\x{12b0}\x{12b2}-\x{12b5}\x{12b8}-\x{12be}\x{12c0}\x{12c2}-\x{12c5}\x{12c8}-\x{12d6}\x{12d8}-\x{1310}\x{1312}-\x{1315}\x{1318}-\x{135a}\x{1360}-\x{137c}\x{1380}-\x{138f}\x{13a0}-\x{13f5}\x{13f8}-\x{13fd}\x{1401}-\x{167f}\x{1681}-\x{169a}\x{16a0}-\x{16f8}\x{1700}-\x{170c}\x{170e}-\x{1711}\x{1720}-\x{1731}\x{1735}\x{1736}\x{1740}-\x{1751}\x{1760}-\x{176c}\x{176e}-\x{1770}\x{1780}-\x{17b3}\x{17b6}\x{17be}-\x{17c5}\x{17c7}\x{17c8}\x{17d4}-\x{17da}\x{17dc}\x{17e0}-\x{17e9}\x{1810}-\x{1819}\x{1820}-\x{1877}\x{1880}-\x{18a8}\x{18aa}\x{18b0}-\x{18f5}\x{1900}-\x{191e}\x{1923}-\x{1926}\x{1929}-\x{192b}\x{1930}\x{1931}\x{1933}-\x{1938}\x{1946}-\x{196d}\x{1970}-\x{1974}\x{1980}-\x{19ab}\x{19b0}-\x{19c9}\x{19d0}-\x{19da}\x{1a00}-\x{1a16}\x{1a19}\x{1a1a}\x{1a1e}-\x{1a55}\x{1a57}\x{1a61}\x{1a63}\x{1a64}\x{1a6d}-\x{1a72}\x{1a80}-\x{1a89}\x{1a90}-\x{1a99}\x{1aa0}-\x{1aad}\x{1b04}-\x{1b33}\x{1b35}\x{1b3b}\x{1b3d}-\x{1b41}\x{1b43}-\x{1b4b}\x{1b50}-\x{1b6a}\x{1b74}-\x{1b7c}\x{1b82}-\x{1ba1}\x{1ba6}\x{1ba7}\x{1baa}\x{1bae}-\x{1be5}\x{1be7}\x{1bea}-\x{1bec}\x{1bee}\x{1bf2}\x{1bf3}\x{1bfc}-\x{1c2b}\x{1c34}\x{1c35}\x{1c3b}-\x{1c49}\x{1c4d}-\x{1c7f}\x{1cc0}-\x{1cc7}\x{1cd3}\x{1ce1}\x{1ce9}-\x{1cec}\x{1cee}-\x{1cf3}\x{1cf5}\x{1cf6}\x{1d00}-\x{1dbf}\x{1e00}-\x{1f15}\x{1f18}-\x{1f1d}\x{1f20}-\x{1f45}\x{1f48}-\x{1f4d}\x{1f50}-\x{1f57}\x{1f59}\x{1f5b}\x{1f5d}\x{1f5f}-\x{1f7d}\x{1f80}-\x{1fb4}\x{1fb6}-\x{1fbc}\x{1fbe}\x{1fc2}-\x{1fc4}\x{1fc6}-\x{1fcc}\x{1fd0}-\x{1fd3}\x{1fd6}-\x{1fdb}\x{1fe0}-\x{1fec}\x{1ff2}-\x{1ff4}\x{1ff6}-\x{1ffc}\x{200e}\x{2071}\x{207f}\x{2090}-\x{209c}\x{2102}\x{2107}\x{210a}-\x{2113}\x{2115}\x{2119}-\x{211d}\x{2124}\x{2126}\x{2128}\x{212a}-\x{212d}\x{212f}-\x{2139}\x{213c}-\x{213f}\x{2145}-\x{2149}\x{214e}\x{214f}\x{2160}-\x{2188}\x{2336}-\x{237a}\x{2395}\x{249c}-\x{24e9}\x{26ac}\x{2800}-\x{28ff}\x{2c00}-\x{2c2e}\x{2c30}-\x{2c5e}\x{2c60}-\x{2ce4}\x{2ceb}-\x{2cee}\x{2cf2}\x{2cf3}\x{2d00}-\x{2d25}\x{2d27}\x{2d2d}\x{2d30}-\x{2d67}\x{2d6f}\x{2d70}\x{2d80}-\x{2d96}\x{2da0}-\x{2da6}\x{2da8}-\x{2dae}\x{2db0}-\x{2db6}\x{2db8}-\x{2dbe}\x{2dc0}-\x{2dc6}\x{2dc8}-\x{2dce}\x{2dd0}-\x{2dd6}\x{2dd8}-\x{2dde}\x{3005}-\x{3007}\x{3021}-\x{3029}\x{302e}\x{302f}\x{3031}-\x{3035}\x{3038}-\x{303c}\x{3041}-\x{3096}\x{309d}-\x{309f}\x{30a1}-\x{30fa}\x{30fc}-\x{30ff}\x{3105}-\x{312d}\x{3131}-\x{318e}\x{3190}-\x{31ba}\x{31f0}-\x{321c}\x{3220}-\x{324f}\x{3260}-\x{327b}\x{327f}-\x{32b0}\x{32c0}-\x{32cb}\x{32d0}-\x{32fe}\x{3300}-\x{3376}\x{337b}-\x{33dd}\x{33e0}-\x{33fe}\x{3400}-\x{4db5}\x{4e00}-\x{9fd5}\x{a000}-\x{a48c}\x{a4d0}-\x{a60c}\x{a610}-\x{a62b}\x{a640}-\x{a66e}\x{a680}-\x{a69d}\x{a6a0}-\x{a6ef}\x{a6f2}-\x{a6f7}\x{a722}-\x{a787}\x{a789}-\x{a7ad}\x{a7b0}-\x{a7b7}\x{a7f7}-\x{a801}\x{a803}-\x{a805}\x{a807}-\x{a80a}\x{a80c}-\x{a824}\x{a827}\x{a830}-\x{a837}\x{a840}-\x{a873}\x{a880}-\x{a8c3}\x{a8ce}-\x{a8d9}\x{a8f2}-\x{a8fd}\x{a900}-\x{a925}\x{a92e}-\x{a946}\x{a952}\x{a953}\x{a95f}-\x{a97c}\x{a983}-\x{a9b2}\x{a9b4}\x{a9b5}\x{a9ba}\x{a9bb}\x{a9bd}-\x{a9cd}\x{a9cf}-\x{a9d9}\x{a9de}-\x{a9e4}\x{a9e6}-\x{a9fe}\x{aa00}-\x{aa28}\x{aa2f}\x{aa30}\x{aa33}\x{aa34}\x{aa40}-\x{aa42}\x{aa44}-\x{aa4b}\x{aa4d}\x{aa50}-\x{aa59}\x{aa5c}-\x{aa7b}\x{aa7d}-\x{aaaf}\x{aab1}\x{aab5}\x{aab6}\x{aab9}-\x{aabd}\x{aac0}\x{aac2}\x{aadb}-\x{aaeb}\x{aaee}-\x{aaf5}\x{ab01}-\x{ab06}\x{ab09}-\x{ab0e}\x{ab11}-\x{ab16}\x{ab20}-\x{ab26}\x{ab28}-\x{ab2e}\x{ab30}-\x{ab65}\x{ab70}-\x{abe4}\x{abe6}\x{abe7}\x{abe9}-\x{abec}\x{abf0}-\x{abf9}\x{ac00}-\x{d7a3}\x{d7b0}-\x{d7c6}\x{d7cb}-\x{d7fb}\x{e000}-\x{fa6d}\x{fa70}-\x{fad9}\x{fb00}-\x{fb06}\x{fb13}-\x{fb17}\x{ff21}-\x{ff3a}\x{ff41}-\x{ff5a}\x{ff66}-\x{ffbe}\x{ffc2}-\x{ffc7}\x{ffca}-\x{ffcf}\x{ffd2}-\x{ffd7}\x{ffda}-\x{ffdc}\x{10000}-\x{1000b}\x{1000d}-\x{10026}\x{10028}-\x{1003a}\x{1003c}\x{1003d}\x{1003f}-\x{1004d}\x{10050}-\x{1005d}\x{10080}-\x{100fa}\x{10100}\x{10102}\x{10107}-\x{10133}\x{10137}-\x{1013f}\x{101d0}-\x{101fc}\x{10280}-\x{1029c}\x{102a0}-\x{102d0}\x{10300}-\x{10323}\x{10330}-\x{1034a}\x{10350}-\x{10375}\x{10380}-\x{1039d}\x{1039f}-\x{103c3}\x{103c8}-\x{103d5}\x{10400}-\x{1049d}\x{104a0}-\x{104a9}\x{10500}-\x{10527}\x{10530}-\x{10563}\x{1056f}\x{10600}-\x{10736}\x{10740}-\x{10755}\x{10760}-\x{10767}\x{11000}\x{11002}-\x{11037}\x{11047}-\x{1104d}\x{11066}-\x{1106f}\x{11082}-\x{110b2}\x{110b7}\x{110b8}\x{110bb}-\x{110c1}\x{110d0}-\x{110e8}\x{110f0}-\x{110f9}\x{11103}-\x{11126}\x{1112c}\x{11136}-\x{11143}\x{11150}-\x{11172}\x{11174}-\x{11176}\x{11182}-\x{111b5}\x{111bf}-\x{111c9}\x{111cd}\x{111d0}-\x{111df}\x{111e1}-\x{111f4}\x{11200}-\x{11211}\x{11213}-\x{1122e}\x{11232}\x{11233}\x{11235}\x{11238}-\x{1123d}\x{11280}-\x{11286}\x{11288}\x{1128a}-\x{1128d}\x{1128f}-\x{1129d}\x{1129f}-\x{112a9}\x{112b0}-\x{112de}\x{112e0}-\x{112e2}\x{112f0}-\x{112f9}\x{11302}\x{11303}\x{11305}-\x{1130c}\x{1130f}\x{11310}\x{11313}-\x{11328}\x{1132a}-\x{11330}\x{11332}\x{11333}\x{11335}-\x{11339}\x{1133d}-\x{1133f}\x{11341}-\x{11344}\x{11347}\x{11348}\x{1134b}-\x{1134d}\x{11350}\x{11357}\x{1135d}-\x{11363}\x{11480}-\x{114b2}\x{114b9}\x{114bb}-\x{114be}\x{114c1}\x{114c4}-\x{114c7}\x{114d0}-\x{114d9}\x{11580}-\x{115b1}\x{115b8}-\x{115bb}\x{115be}\x{115c1}-\x{115db}\x{11600}-\x{11632}\x{1163b}\x{1163c}\x{1163e}\x{11641}-\x{11644}\x{11650}-\x{11659}\x{11680}-\x{116aa}\x{116ac}\x{116ae}\x{116af}\x{116b6}\x{116c0}-\x{116c9}\x{11700}-\x{11719}\x{11720}\x{11721}\x{11726}\x{11730}-\x{1173f}\x{118a0}-\x{118f2}\x{118ff}\x{11ac0}-\x{11af8}\x{12000}-\x{12399}\x{12400}-\x{1246e}\x{12470}-\x{12474}\x{12480}-\x{12543}\x{13000}-\x{1342e}\x{14400}-\x{14646}\x{16800}-\x{16a38}\x{16a40}-\x{16a5e}\x{16a60}-\x{16a69}\x{16a6e}\x{16a6f}\x{16ad0}-\x{16aed}\x{16af5}\x{16b00}-\x{16b2f}\x{16b37}-\x{16b45}\x{16b50}-\x{16b59}\x{16b5b}-\x{16b61}\x{16b63}-\x{16b77}\x{16b7d}-\x{16b8f}\x{16f00}-\x{16f44}\x{16f50}-\x{16f7e}\x{16f93}-\x{16f9f}\x{1b000}\x{1b001}\x{1bc00}-\x{1bc6a}\x{1bc70}-\x{1bc7c}\x{1bc80}-\x{1bc88}\x{1bc90}-\x{1bc99}\x{1bc9c}\x{1bc9f}\x{1d000}-\x{1d0f5}\x{1d100}-\x{1d126}\x{1d129}-\x{1d166}\x{1d16a}-\x{1d172}\x{1d183}\x{1d184}\x{1d18c}-\x{1d1a9}\x{1d1ae}-\x{1d1e8}\x{1d360}-\x{1d371}\x{1d400}-\x{1d454}\x{1d456}-\x{1d49c}\x{1d49e}\x{1d49f}\x{1d4a2}\x{1d4a5}\x{1d4a6}\x{1d4a9}-\x{1d4ac}\x{1d4ae}-\x{1d4b9}\x{1d4bb}\x{1d4bd}-\x{1d4c3}\x{1d4c5}-\x{1d505}\x{1d507}-\x{1d50a}\x{1d50d}-\x{1d514}\x{1d516}-\x{1d51c}\x{1d51e}-\x{1d539}\x{1d53b}-\x{1d53e}\x{1d540}-\x{1d544}\x{1d546}\x{1d54a}-\x{1d550}\x{1d552}-\x{1d6a5}\x{1d6a8}-\x{1d6da}\x{1d6dc}-\x{1d714}\x{1d716}-\x{1d74e}\x{1d750}-\x{1d788}\x{1d78a}-\x{1d7c2}\x{1d7c4}-\x{1d7cb}\x{1d800}-\x{1d9ff}\x{1da37}-\x{1da3a}\x{1da6d}-\x{1da74}\x{1da76}-\x{1da83}\x{1da85}-\x{1da8b}\x{1f110}-\x{1f12e}\x{1f130}-\x{1f169}\x{1f170}-\x{1f19a}\x{1f1e6}-\x{1f202}\x{1f210}-\x{1f23a}\x{1f240}-\x{1f248}\x{1f250}\x{1f251}\x{20000}-\x{2a6d6}\x{2a700}-\x{2b734}\x{2b740}-\x{2b81d}\x{2b820}-\x{2cea1}\x{2f800}-\x{2fa1d}\x{f0000}-\x{ffffd}\x{100000}-\x{10fffd}])|([\x{590}\x{5be}\x{5c0}\x{5c3}\x{5c6}\x{5c8}-\x{5ff}\x{7c0}-\x{7ea}\x{7f4}\x{7f5}\x{7fa}-\x{815}\x{81a}\x{824}\x{828}\x{82e}-\x{858}\x{85c}-\x{89f}\x{200f}\x{fb1d}\x{fb1f}-\x{fb28}\x{fb2a}-\x{fb4f}\x{10800}-\x{1091e}\x{10920}-\x{10a00}\x{10a04}\x{10a07}-\x{10a0b}\x{10a10}-\x{10a37}\x{10a3b}-\x{10a3e}\x{10a40}-\x{10ae4}\x{10ae7}-\x{10b38}\x{10b40}-\x{10e5f}\x{10e7f}-\x{10fff}\x{1e800}-\x{1e8cf}\x{1e8d7}-\x{1edff}\x{1ef00}-\x{1efff}\x{608}\x{60b}\x{60d}\x{61b}-\x{64a}\x{66d}-\x{66f}\x{671}-\x{6d5}\x{6e5}\x{6e6}\x{6ee}\x{6ef}\x{6fa}-\x{710}\x{712}-\x{72f}\x{74b}-\x{7a5}\x{7b1}-\x{7bf}\x{8a0}-\x{8e2}\x{fb50}-\x{fd3d}\x{fd40}-\x{fdcf}\x{fdf0}-\x{fdfc}\x{fdfe}\x{fdff}\x{fe70}-\x{fefe}\x{1ee00}-\x{1eeef}\x{1eef2}-\x{1eeff}]))/u';
175 // @codeCoverageIgnoreEnd
176 // @codingStandardsIgnoreEnd
179 * Get a cached or new language object for a given language code
180 * @param string $code
181 * @return Language
183 static function factory( $code ) {
184 global $wgDummyLanguageCodes, $wgLangObjCacheSize;
186 if ( isset( $wgDummyLanguageCodes[$code] ) ) {
187 $code = $wgDummyLanguageCodes[$code];
190 // get the language object to process
191 $langObj = isset( self::$mLangObjCache[$code] )
192 ? self::$mLangObjCache[$code]
193 : self::newFromCode( $code );
195 // merge the language object in to get it up front in the cache
196 self::$mLangObjCache = array_merge( array( $code => $langObj ), self::$mLangObjCache );
197 // get rid of the oldest ones in case we have an overflow
198 self::$mLangObjCache = array_slice( self::$mLangObjCache, 0, $wgLangObjCacheSize, true );
200 return $langObj;
204 * Create a language object for a given language code
205 * @param string $code
206 * @throws MWException
207 * @return Language
209 protected static function newFromCode( $code ) {
210 if ( !Language::isValidCode( $code ) ) {
211 throw new MWException( "Invalid language code \"$code\"" );
214 if ( !Language::isValidBuiltInCode( $code ) ) {
215 // It's not possible to customise this code with class files, so
216 // just return a Language object. This is to support uselang= hacks.
217 $lang = new Language;
218 $lang->setCode( $code );
219 return $lang;
222 // Check if there is a language class for the code
223 $class = self::classFromCode( $code );
224 if ( class_exists( $class ) ) {
225 $lang = new $class;
226 return $lang;
229 // Keep trying the fallback list until we find an existing class
230 $fallbacks = Language::getFallbacksFor( $code );
231 foreach ( $fallbacks as $fallbackCode ) {
232 if ( !Language::isValidBuiltInCode( $fallbackCode ) ) {
233 throw new MWException( "Invalid fallback '$fallbackCode' in fallback sequence for '$code'" );
236 $class = self::classFromCode( $fallbackCode );
237 if ( class_exists( $class ) ) {
238 $lang = new $class;
239 $lang->setCode( $code );
240 return $lang;
244 throw new MWException( "Invalid fallback sequence for language '$code'" );
248 * Checks whether any localisation is available for that language tag
249 * in MediaWiki (MessagesXx.php exists).
251 * @param string $code Language tag (in lower case)
252 * @return bool Whether language is supported
253 * @since 1.21
255 public static function isSupportedLanguage( $code ) {
256 if ( !self::isValidBuiltInCode( $code ) ) {
257 return false;
260 if ( $code === 'qqq' ) {
261 return false;
264 return is_readable( self::getMessagesFileName( $code ) ) ||
265 is_readable( self::getJsonMessagesFileName( $code ) );
269 * Returns true if a language code string is a well-formed language tag
270 * according to RFC 5646.
271 * This function only checks well-formedness; it doesn't check that
272 * language, script or variant codes actually exist in the repositories.
274 * Based on regexes by Mark Davis of the Unicode Consortium:
275 * http://unicode.org/repos/cldr/trunk/tools/java/org/unicode/cldr/util/data/langtagRegex.txt
277 * @param string $code
278 * @param bool $lenient Whether to allow '_' as separator. The default is only '-'.
280 * @return bool
281 * @since 1.21
283 public static function isWellFormedLanguageTag( $code, $lenient = false ) {
284 $alpha = '[a-z]';
285 $digit = '[0-9]';
286 $alphanum = '[a-z0-9]';
287 $x = 'x'; # private use singleton
288 $singleton = '[a-wy-z]'; # other singleton
289 $s = $lenient ? '[-_]' : '-';
291 $language = "$alpha{2,8}|$alpha{2,3}$s$alpha{3}";
292 $script = "$alpha{4}"; # ISO 15924
293 $region = "(?:$alpha{2}|$digit{3})"; # ISO 3166-1 alpha-2 or UN M.49
294 $variant = "(?:$alphanum{5,8}|$digit$alphanum{3})";
295 $extension = "$singleton(?:$s$alphanum{2,8})+";
296 $privateUse = "$x(?:$s$alphanum{1,8})+";
298 # Define certain grandfathered codes, since otherwise the regex is pretty useless.
299 # Since these are limited, this is safe even later changes to the registry --
300 # the only oddity is that it might change the type of the tag, and thus
301 # the results from the capturing groups.
302 # http://www.iana.org/assignments/language-subtag-registry
304 $grandfathered = "en{$s}GB{$s}oed"
305 . "|i{$s}(?:ami|bnn|default|enochian|hak|klingon|lux|mingo|navajo|pwn|tao|tay|tsu)"
306 . "|no{$s}(?:bok|nyn)"
307 . "|sgn{$s}(?:BE{$s}(?:fr|nl)|CH{$s}de)"
308 . "|zh{$s}min{$s}nan";
310 $variantList = "$variant(?:$s$variant)*";
311 $extensionList = "$extension(?:$s$extension)*";
313 $langtag = "(?:($language)"
314 . "(?:$s$script)?"
315 . "(?:$s$region)?"
316 . "(?:$s$variantList)?"
317 . "(?:$s$extensionList)?"
318 . "(?:$s$privateUse)?)";
320 # The final breakdown, with capturing groups for each of these components
321 # The variants, extensions, grandfathered, and private-use may have interior '-'
323 $root = "^(?:$langtag|$privateUse|$grandfathered)$";
325 return (bool)preg_match( "/$root/", strtolower( $code ) );
329 * Returns true if a language code string is of a valid form, whether or
330 * not it exists. This includes codes which are used solely for
331 * customisation via the MediaWiki namespace.
333 * @param string $code
335 * @return bool
337 public static function isValidCode( $code ) {
338 static $cache = array();
339 if ( !isset( $cache[$code] ) ) {
340 // People think language codes are html safe, so enforce it.
341 // Ideally we should only allow a-zA-Z0-9-
342 // but, .+ and other chars are often used for {{int:}} hacks
343 // see bugs 37564, 37587, 36938
344 $cache[$code] =
345 // Protect against path traversal
346 strcspn( $code, ":/\\\000&<>'\"" ) === strlen( $code )
347 && !preg_match( MediaWikiTitleCodec::getTitleInvalidRegex(), $code );
349 return $cache[$code];
353 * Returns true if a language code is of a valid form for the purposes of
354 * internal customisation of MediaWiki, via Messages*.php or *.json.
356 * @param string $code
358 * @throws MWException
359 * @since 1.18
360 * @return bool
362 public static function isValidBuiltInCode( $code ) {
364 if ( !is_string( $code ) ) {
365 if ( is_object( $code ) ) {
366 $addmsg = " of class " . get_class( $code );
367 } else {
368 $addmsg = '';
370 $type = gettype( $code );
371 throw new MWException( __METHOD__ . " must be passed a string, $type given$addmsg" );
374 return (bool)preg_match( '/^[a-z0-9-]{2,}$/', $code );
378 * Returns true if a language code is an IETF tag known to MediaWiki.
380 * @param string $tag
382 * @since 1.21
383 * @return bool
385 public static function isKnownLanguageTag( $tag ) {
386 static $coreLanguageNames;
388 // Quick escape for invalid input to avoid exceptions down the line
389 // when code tries to process tags which are not valid at all.
390 if ( !self::isValidBuiltInCode( $tag ) ) {
391 return false;
394 if ( $coreLanguageNames === null ) {
395 global $IP;
396 include "$IP/languages/Names.php";
399 if ( isset( $coreLanguageNames[$tag] )
400 || self::fetchLanguageName( $tag, $tag ) !== ''
402 return true;
405 return false;
409 * Get the LocalisationCache instance
411 * @return LocalisationCache
413 public static function getLocalisationCache() {
414 if ( is_null( self::$dataCache ) ) {
415 global $wgLocalisationCacheConf;
416 $class = $wgLocalisationCacheConf['class'];
417 self::$dataCache = new $class( $wgLocalisationCacheConf );
419 return self::$dataCache;
422 function __construct() {
423 $this->mConverter = new FakeConverter( $this );
424 // Set the code to the name of the descendant
425 if ( get_class( $this ) == 'Language' ) {
426 $this->mCode = 'en';
427 } else {
428 $this->mCode = str_replace( '_', '-', strtolower( substr( get_class( $this ), 8 ) ) );
430 self::getLocalisationCache();
434 * Reduce memory usage
436 function __destruct() {
437 foreach ( $this as $name => $value ) {
438 unset( $this->$name );
443 * Hook which will be called if this is the content language.
444 * Descendants can use this to register hook functions or modify globals
446 function initContLang() {
450 * @return array
451 * @since 1.19
453 function getFallbackLanguages() {
454 return self::getFallbacksFor( $this->mCode );
458 * Exports $wgBookstoreListEn
459 * @return array
461 function getBookstoreList() {
462 return self::$dataCache->getItem( $this->mCode, 'bookstoreList' );
466 * Returns an array of localised namespaces indexed by their numbers. If the namespace is not
467 * available in localised form, it will be included in English.
469 * @return array
471 public function getNamespaces() {
472 if ( is_null( $this->namespaceNames ) ) {
473 global $wgMetaNamespace, $wgMetaNamespaceTalk, $wgExtraNamespaces;
475 $this->namespaceNames = self::$dataCache->getItem( $this->mCode, 'namespaceNames' );
476 $validNamespaces = MWNamespace::getCanonicalNamespaces();
478 $this->namespaceNames = $wgExtraNamespaces + $this->namespaceNames + $validNamespaces;
480 $this->namespaceNames[NS_PROJECT] = $wgMetaNamespace;
481 if ( $wgMetaNamespaceTalk ) {
482 $this->namespaceNames[NS_PROJECT_TALK] = $wgMetaNamespaceTalk;
483 } else {
484 $talk = $this->namespaceNames[NS_PROJECT_TALK];
485 $this->namespaceNames[NS_PROJECT_TALK] =
486 $this->fixVariableInNamespace( $talk );
489 # Sometimes a language will be localised but not actually exist on this wiki.
490 foreach ( $this->namespaceNames as $key => $text ) {
491 if ( !isset( $validNamespaces[$key] ) ) {
492 unset( $this->namespaceNames[$key] );
496 # The above mixing may leave namespaces out of canonical order.
497 # Re-order by namespace ID number...
498 ksort( $this->namespaceNames );
500 Hooks::run( 'LanguageGetNamespaces', array( &$this->namespaceNames ) );
503 return $this->namespaceNames;
507 * Arbitrarily set all of the namespace names at once. Mainly used for testing
508 * @param array $namespaces Array of namespaces (id => name)
510 public function setNamespaces( array $namespaces ) {
511 $this->namespaceNames = $namespaces;
512 $this->mNamespaceIds = null;
516 * Resets all of the namespace caches. Mainly used for testing
518 public function resetNamespaces() {
519 $this->namespaceNames = null;
520 $this->mNamespaceIds = null;
521 $this->namespaceAliases = null;
525 * A convenience function that returns getNamespaces() with spaces instead of underscores
526 * in values. Useful for producing output to be displayed e.g. in `<select>` forms.
528 * @return array
530 function getFormattedNamespaces() {
531 $ns = $this->getNamespaces();
532 foreach ( $ns as $k => $v ) {
533 $ns[$k] = strtr( $v, '_', ' ' );
535 return $ns;
539 * Get a namespace value by key
541 * <code>
542 * $mw_ns = $wgContLang->getNsText( NS_MEDIAWIKI );
543 * echo $mw_ns; // prints 'MediaWiki'
544 * </code>
546 * @param int $index The array key of the namespace to return
547 * @return string|bool String if the namespace value exists, otherwise false
549 function getNsText( $index ) {
550 $ns = $this->getNamespaces();
551 return isset( $ns[$index] ) ? $ns[$index] : false;
555 * A convenience function that returns the same thing as
556 * getNsText() except with '_' changed to ' ', useful for
557 * producing output.
559 * <code>
560 * $mw_ns = $wgContLang->getFormattedNsText( NS_MEDIAWIKI_TALK );
561 * echo $mw_ns; // prints 'MediaWiki talk'
562 * </code>
564 * @param int $index The array key of the namespace to return
565 * @return string Namespace name without underscores (empty string if namespace does not exist)
567 function getFormattedNsText( $index ) {
568 $ns = $this->getNsText( $index );
569 return strtr( $ns, '_', ' ' );
573 * Returns gender-dependent namespace alias if available.
574 * See https://www.mediawiki.org/wiki/Manual:$wgExtraGenderNamespaces
575 * @param int $index Namespace index
576 * @param string $gender Gender key (male, female... )
577 * @return string
578 * @since 1.18
580 function getGenderNsText( $index, $gender ) {
581 global $wgExtraGenderNamespaces;
583 $ns = $wgExtraGenderNamespaces +
584 (array)self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
586 return isset( $ns[$index][$gender] ) ? $ns[$index][$gender] : $this->getNsText( $index );
590 * Whether this language uses gender-dependent namespace aliases.
591 * See https://www.mediawiki.org/wiki/Manual:$wgExtraGenderNamespaces
592 * @return bool
593 * @since 1.18
595 function needsGenderDistinction() {
596 global $wgExtraGenderNamespaces, $wgExtraNamespaces;
597 if ( count( $wgExtraGenderNamespaces ) > 0 ) {
598 // $wgExtraGenderNamespaces overrides everything
599 return true;
600 } elseif ( isset( $wgExtraNamespaces[NS_USER] ) && isset( $wgExtraNamespaces[NS_USER_TALK] ) ) {
601 /// @todo There may be other gender namespace than NS_USER & NS_USER_TALK in the future
602 // $wgExtraNamespaces overrides any gender aliases specified in i18n files
603 return false;
604 } else {
605 // Check what is in i18n files
606 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
607 return count( $aliases ) > 0;
612 * Get a namespace key by value, case insensitive.
613 * Only matches namespace names for the current language, not the
614 * canonical ones defined in Namespace.php.
616 * @param string $text
617 * @return int|bool An integer if $text is a valid value otherwise false
619 function getLocalNsIndex( $text ) {
620 $lctext = $this->lc( $text );
621 $ids = $this->getNamespaceIds();
622 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
626 * @return array
628 function getNamespaceAliases() {
629 if ( is_null( $this->namespaceAliases ) ) {
630 $aliases = self::$dataCache->getItem( $this->mCode, 'namespaceAliases' );
631 if ( !$aliases ) {
632 $aliases = array();
633 } else {
634 foreach ( $aliases as $name => $index ) {
635 if ( $index === NS_PROJECT_TALK ) {
636 unset( $aliases[$name] );
637 $name = $this->fixVariableInNamespace( $name );
638 $aliases[$name] = $index;
643 global $wgExtraGenderNamespaces;
644 $genders = $wgExtraGenderNamespaces +
645 (array)self::$dataCache->getItem( $this->mCode, 'namespaceGenderAliases' );
646 foreach ( $genders as $index => $forms ) {
647 foreach ( $forms as $alias ) {
648 $aliases[$alias] = $index;
652 # Also add converted namespace names as aliases, to avoid confusion.
653 $convertedNames = array();
654 foreach ( $this->getVariants() as $variant ) {
655 if ( $variant === $this->mCode ) {
656 continue;
658 foreach ( $this->getNamespaces() as $ns => $_ ) {
659 $convertedNames[$this->getConverter()->convertNamespace( $ns, $variant )] = $ns;
663 $this->namespaceAliases = $aliases + $convertedNames;
666 return $this->namespaceAliases;
670 * @return array
672 function getNamespaceIds() {
673 if ( is_null( $this->mNamespaceIds ) ) {
674 global $wgNamespaceAliases;
675 # Put namespace names and aliases into a hashtable.
676 # If this is too slow, then we should arrange it so that it is done
677 # before caching. The catch is that at pre-cache time, the above
678 # class-specific fixup hasn't been done.
679 $this->mNamespaceIds = array();
680 foreach ( $this->getNamespaces() as $index => $name ) {
681 $this->mNamespaceIds[$this->lc( $name )] = $index;
683 foreach ( $this->getNamespaceAliases() as $name => $index ) {
684 $this->mNamespaceIds[$this->lc( $name )] = $index;
686 if ( $wgNamespaceAliases ) {
687 foreach ( $wgNamespaceAliases as $name => $index ) {
688 $this->mNamespaceIds[$this->lc( $name )] = $index;
692 return $this->mNamespaceIds;
696 * Get a namespace key by value, case insensitive. Canonical namespace
697 * names override custom ones defined for the current language.
699 * @param string $text
700 * @return int|bool An integer if $text is a valid value otherwise false
702 function getNsIndex( $text ) {
703 $lctext = $this->lc( $text );
704 $ns = MWNamespace::getCanonicalIndex( $lctext );
705 if ( $ns !== null ) {
706 return $ns;
708 $ids = $this->getNamespaceIds();
709 return isset( $ids[$lctext] ) ? $ids[$lctext] : false;
713 * short names for language variants used for language conversion links.
715 * @param string $code
716 * @param bool $usemsg Use the "variantname-xyz" message if it exists
717 * @return string
719 function getVariantname( $code, $usemsg = true ) {
720 $msg = "variantname-$code";
721 if ( $usemsg && wfMessage( $msg )->exists() ) {
722 return $this->getMessageFromDB( $msg );
724 $name = self::fetchLanguageName( $code );
725 if ( $name ) {
726 return $name; # if it's defined as a language name, show that
727 } else {
728 # otherwise, output the language code
729 return $code;
734 * @deprecated since 1.24, doesn't handle conflicting aliases. Use
735 * SpecialPageFactory::getLocalNameFor instead.
736 * @param string $name
737 * @return string
739 function specialPage( $name ) {
740 $aliases = $this->getSpecialPageAliases();
741 if ( isset( $aliases[$name][0] ) ) {
742 $name = $aliases[$name][0];
744 return $this->getNsText( NS_SPECIAL ) . ':' . $name;
748 * @return array
750 function getDatePreferences() {
751 return self::$dataCache->getItem( $this->mCode, 'datePreferences' );
755 * @return array
757 function getDateFormats() {
758 return self::$dataCache->getItem( $this->mCode, 'dateFormats' );
762 * @return array|string
764 function getDefaultDateFormat() {
765 $df = self::$dataCache->getItem( $this->mCode, 'defaultDateFormat' );
766 if ( $df === 'dmy or mdy' ) {
767 global $wgAmericanDates;
768 return $wgAmericanDates ? 'mdy' : 'dmy';
769 } else {
770 return $df;
775 * @return array
777 function getDatePreferenceMigrationMap() {
778 return self::$dataCache->getItem( $this->mCode, 'datePreferenceMigrationMap' );
782 * @param string $image
783 * @return array|null
785 function getImageFile( $image ) {
786 return self::$dataCache->getSubitem( $this->mCode, 'imageFiles', $image );
790 * @return array
791 * @since 1.24
793 function getImageFiles() {
794 return self::$dataCache->getItem( $this->mCode, 'imageFiles' );
798 * @return array
800 function getExtraUserToggles() {
801 return (array)self::$dataCache->getItem( $this->mCode, 'extraUserToggles' );
805 * @param string $tog
806 * @return string
808 function getUserToggle( $tog ) {
809 return $this->getMessageFromDB( "tog-$tog" );
813 * Get native language names, indexed by code.
814 * Only those defined in MediaWiki, no other data like CLDR.
815 * If $customisedOnly is true, only returns codes with a messages file
817 * @param bool $customisedOnly
819 * @return array
820 * @deprecated since 1.20, use fetchLanguageNames()
822 public static function getLanguageNames( $customisedOnly = false ) {
823 return self::fetchLanguageNames( null, $customisedOnly ? 'mwfile' : 'mw' );
827 * Get translated language names. This is done on best effort and
828 * by default this is exactly the same as Language::getLanguageNames.
829 * The CLDR extension provides translated names.
830 * @param string $code Language code.
831 * @return array Language code => language name
832 * @since 1.18.0
833 * @deprecated since 1.20, use fetchLanguageNames()
835 public static function getTranslatedLanguageNames( $code ) {
836 return self::fetchLanguageNames( $code, 'all' );
840 * Get an array of language names, indexed by code.
841 * @param null|string $inLanguage Code of language in which to return the names
842 * Use null for autonyms (native names)
843 * @param string $include One of:
844 * 'all' all available languages
845 * 'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
846 * 'mwfile' only if the language is in 'mw' *and* has a message file
847 * @return array Language code => language name
848 * @since 1.20
850 public static function fetchLanguageNames( $inLanguage = null, $include = 'mw' ) {
851 $cacheKey = $inLanguage === null ? 'null' : $inLanguage;
852 $cacheKey .= ":$include";
853 if ( self::$languageNameCache === null ) {
854 self::$languageNameCache = new HashBagOStuff( array( 'maxKeys' => 20 ) );
857 $ret = self::$languageNameCache->get( $cacheKey );
858 if ( !$ret ) {
859 $ret = self::fetchLanguageNamesUncached( $inLanguage, $include );
860 self::$languageNameCache->set( $cacheKey, $ret );
862 return $ret;
866 * Uncached helper for fetchLanguageNames
867 * @param null|string $inLanguage Code of language in which to return the names
868 * Use null for autonyms (native names)
869 * @param string $include One of:
870 * 'all' all available languages
871 * 'mw' only if the language is defined in MediaWiki or wgExtraLanguageNames (default)
872 * 'mwfile' only if the language is in 'mw' *and* has a message file
873 * @return array Language code => language name
875 private static function fetchLanguageNamesUncached( $inLanguage = null, $include = 'mw' ) {
876 global $wgExtraLanguageNames;
877 static $coreLanguageNames;
879 if ( $coreLanguageNames === null ) {
880 global $IP;
881 include "$IP/languages/Names.php";
884 // If passed an invalid language code to use, fallback to en
885 if ( $inLanguage !== null && !Language::isValidCode( $inLanguage ) ) {
886 $inLanguage = 'en';
889 $names = array();
891 if ( $inLanguage ) {
892 # TODO: also include when $inLanguage is null, when this code is more efficient
893 Hooks::run( 'LanguageGetTranslatedLanguageNames', array( &$names, $inLanguage ) );
896 $mwNames = $wgExtraLanguageNames + $coreLanguageNames;
897 foreach ( $mwNames as $mwCode => $mwName ) {
898 # - Prefer own MediaWiki native name when not using the hook
899 # - For other names just add if not added through the hook
900 if ( $mwCode === $inLanguage || !isset( $names[$mwCode] ) ) {
901 $names[$mwCode] = $mwName;
905 if ( $include === 'all' ) {
906 ksort( $names );
907 return $names;
910 $returnMw = array();
911 $coreCodes = array_keys( $mwNames );
912 foreach ( $coreCodes as $coreCode ) {
913 $returnMw[$coreCode] = $names[$coreCode];
916 if ( $include === 'mwfile' ) {
917 $namesMwFile = array();
918 # We do this using a foreach over the codes instead of a directory
919 # loop so that messages files in extensions will work correctly.
920 foreach ( $returnMw as $code => $value ) {
921 if ( is_readable( self::getMessagesFileName( $code ) )
922 || is_readable( self::getJsonMessagesFileName( $code ) )
924 $namesMwFile[$code] = $names[$code];
928 ksort( $namesMwFile );
929 return $namesMwFile;
932 ksort( $returnMw );
933 # 'mw' option; default if it's not one of the other two options (all/mwfile)
934 return $returnMw;
938 * @param string $code The code of the language for which to get the name
939 * @param null|string $inLanguage Code of language in which to return the name (null for autonyms)
940 * @param string $include 'all', 'mw' or 'mwfile'; see fetchLanguageNames()
941 * @return string Language name or empty
942 * @since 1.20
944 public static function fetchLanguageName( $code, $inLanguage = null, $include = 'all' ) {
945 $code = strtolower( $code );
946 $array = self::fetchLanguageNames( $inLanguage, $include );
947 return !array_key_exists( $code, $array ) ? '' : $array[$code];
951 * Get a message from the MediaWiki namespace.
953 * @param string $msg Message name
954 * @return string
956 function getMessageFromDB( $msg ) {
957 return $this->msg( $msg )->text();
961 * Get message object in this language. Only for use inside this class.
963 * @param string $msg Message name
964 * @return Message
966 protected function msg( $msg ) {
967 return wfMessage( $msg )->inLanguage( $this );
971 * Get the native language name of $code.
972 * Only if defined in MediaWiki, no other data like CLDR.
973 * @param string $code
974 * @return string
975 * @deprecated since 1.20, use fetchLanguageName()
977 function getLanguageName( $code ) {
978 return self::fetchLanguageName( $code );
982 * @param string $key
983 * @return string
985 function getMonthName( $key ) {
986 return $this->getMessageFromDB( self::$mMonthMsgs[$key - 1] );
990 * @return array
992 function getMonthNamesArray() {
993 $monthNames = array( '' );
994 for ( $i = 1; $i < 13; $i++ ) {
995 $monthNames[] = $this->getMonthName( $i );
997 return $monthNames;
1001 * @param string $key
1002 * @return string
1004 function getMonthNameGen( $key ) {
1005 return $this->getMessageFromDB( self::$mMonthGenMsgs[$key - 1] );
1009 * @param string $key
1010 * @return string
1012 function getMonthAbbreviation( $key ) {
1013 return $this->getMessageFromDB( self::$mMonthAbbrevMsgs[$key - 1] );
1017 * @return array
1019 function getMonthAbbreviationsArray() {
1020 $monthNames = array( '' );
1021 for ( $i = 1; $i < 13; $i++ ) {
1022 $monthNames[] = $this->getMonthAbbreviation( $i );
1024 return $monthNames;
1028 * @param string $key
1029 * @return string
1031 function getWeekdayName( $key ) {
1032 return $this->getMessageFromDB( self::$mWeekdayMsgs[$key - 1] );
1036 * @param string $key
1037 * @return string
1039 function getWeekdayAbbreviation( $key ) {
1040 return $this->getMessageFromDB( self::$mWeekdayAbbrevMsgs[$key - 1] );
1044 * @param string $key
1045 * @return string
1047 function getIranianCalendarMonthName( $key ) {
1048 return $this->getMessageFromDB( self::$mIranianCalendarMonthMsgs[$key - 1] );
1052 * @param string $key
1053 * @return string
1055 function getHebrewCalendarMonthName( $key ) {
1056 return $this->getMessageFromDB( self::$mHebrewCalendarMonthMsgs[$key - 1] );
1060 * @param string $key
1061 * @return string
1063 function getHebrewCalendarMonthNameGen( $key ) {
1064 return $this->getMessageFromDB( self::$mHebrewCalendarMonthGenMsgs[$key - 1] );
1068 * @param string $key
1069 * @return string
1071 function getHijriCalendarMonthName( $key ) {
1072 return $this->getMessageFromDB( self::$mHijriCalendarMonthMsgs[$key - 1] );
1076 * Pass through result from $dateTimeObj->format()
1077 * @param DateTime|bool|null &$dateTimeObj
1078 * @param string $ts
1079 * @param DateTimeZone|bool|null $zone
1080 * @param string $code
1081 * @return string
1083 private static function dateTimeObjFormat( &$dateTimeObj, $ts, $zone, $code ) {
1084 if ( !$dateTimeObj ) {
1085 $dateTimeObj = DateTime::createFromFormat(
1086 'YmdHis', $ts, $zone ?: new DateTimeZone( 'UTC' )
1089 return $dateTimeObj->format( $code );
1093 * This is a workalike of PHP's date() function, but with better
1094 * internationalisation, a reduced set of format characters, and a better
1095 * escaping format.
1097 * Supported format characters are dDjlNwzWFmMntLoYyaAgGhHiscrUeIOPTZ. See
1098 * the PHP manual for definitions. There are a number of extensions, which
1099 * start with "x":
1101 * xn Do not translate digits of the next numeric format character
1102 * xN Toggle raw digit (xn) flag, stays set until explicitly unset
1103 * xr Use roman numerals for the next numeric format character
1104 * xh Use hebrew numerals for the next numeric format character
1105 * xx Literal x
1106 * xg Genitive month name
1108 * xij j (day number) in Iranian calendar
1109 * xiF F (month name) in Iranian calendar
1110 * xin n (month number) in Iranian calendar
1111 * xiy y (two digit year) in Iranian calendar
1112 * xiY Y (full year) in Iranian calendar
1114 * xjj j (day number) in Hebrew calendar
1115 * xjF F (month name) in Hebrew calendar
1116 * xjt t (days in month) in Hebrew calendar
1117 * xjx xg (genitive month name) in Hebrew calendar
1118 * xjn n (month number) in Hebrew calendar
1119 * xjY Y (full year) in Hebrew calendar
1121 * xmj j (day number) in Hijri calendar
1122 * xmF F (month name) in Hijri calendar
1123 * xmn n (month number) in Hijri calendar
1124 * xmY Y (full year) in Hijri calendar
1126 * xkY Y (full year) in Thai solar calendar. Months and days are
1127 * identical to the Gregorian calendar
1128 * xoY Y (full year) in Minguo calendar or Juche year.
1129 * Months and days are identical to the
1130 * Gregorian calendar
1131 * xtY Y (full year) in Japanese nengo. Months and days are
1132 * identical to the Gregorian calendar
1134 * Characters enclosed in double quotes will be considered literal (with
1135 * the quotes themselves removed). Unmatched quotes will be considered
1136 * literal quotes. Example:
1138 * "The month is" F => The month is January
1139 * i's" => 20'11"
1141 * Backslash escaping is also supported.
1143 * Input timestamp is assumed to be pre-normalized to the desired local
1144 * time zone, if any. Note that the format characters crUeIOPTZ will assume
1145 * $ts is UTC if $zone is not given.
1147 * @param string $format
1148 * @param string $ts 14-character timestamp
1149 * YYYYMMDDHHMMSS
1150 * 01234567890123
1151 * @param DateTimeZone $zone Timezone of $ts
1152 * @param[out] int $ttl The amount of time (in seconds) the output may be cached for.
1153 * Only makes sense if $ts is the current time.
1154 * @todo handling of "o" format character for Iranian, Hebrew, Hijri & Thai?
1156 * @throws MWException
1157 * @return string
1159 function sprintfDate( $format, $ts, DateTimeZone $zone = null, &$ttl = null ) {
1160 $s = '';
1161 $raw = false;
1162 $roman = false;
1163 $hebrewNum = false;
1164 $dateTimeObj = false;
1165 $rawToggle = false;
1166 $iranian = false;
1167 $hebrew = false;
1168 $hijri = false;
1169 $thai = false;
1170 $minguo = false;
1171 $tenno = false;
1173 $usedSecond = false;
1174 $usedMinute = false;
1175 $usedHour = false;
1176 $usedAMPM = false;
1177 $usedDay = false;
1178 $usedWeek = false;
1179 $usedMonth = false;
1180 $usedYear = false;
1181 $usedISOYear = false;
1182 $usedIsLeapYear = false;
1184 $usedHebrewMonth = false;
1185 $usedIranianMonth = false;
1186 $usedHijriMonth = false;
1187 $usedHebrewYear = false;
1188 $usedIranianYear = false;
1189 $usedHijriYear = false;
1190 $usedTennoYear = false;
1192 if ( strlen( $ts ) !== 14 ) {
1193 throw new MWException( __METHOD__ . ": The timestamp $ts should have 14 characters" );
1196 if ( !ctype_digit( $ts ) ) {
1197 throw new MWException( __METHOD__ . ": The timestamp $ts should be a number" );
1200 $formatLength = strlen( $format );
1201 for ( $p = 0; $p < $formatLength; $p++ ) {
1202 $num = false;
1203 $code = $format[$p];
1204 if ( $code == 'x' && $p < $formatLength - 1 ) {
1205 $code .= $format[++$p];
1208 if ( ( $code === 'xi'
1209 || $code === 'xj'
1210 || $code === 'xk'
1211 || $code === 'xm'
1212 || $code === 'xo'
1213 || $code === 'xt' )
1214 && $p < $formatLength - 1 ) {
1215 $code .= $format[++$p];
1218 switch ( $code ) {
1219 case 'xx':
1220 $s .= 'x';
1221 break;
1222 case 'xn':
1223 $raw = true;
1224 break;
1225 case 'xN':
1226 $rawToggle = !$rawToggle;
1227 break;
1228 case 'xr':
1229 $roman = true;
1230 break;
1231 case 'xh':
1232 $hebrewNum = true;
1233 break;
1234 case 'xg':
1235 $usedMonth = true;
1236 $s .= $this->getMonthNameGen( substr( $ts, 4, 2 ) );
1237 break;
1238 case 'xjx':
1239 $usedHebrewMonth = true;
1240 if ( !$hebrew ) {
1241 $hebrew = self::tsToHebrew( $ts );
1243 $s .= $this->getHebrewCalendarMonthNameGen( $hebrew[1] );
1244 break;
1245 case 'd':
1246 $usedDay = true;
1247 $num = substr( $ts, 6, 2 );
1248 break;
1249 case 'D':
1250 $usedDay = true;
1251 $s .= $this->getWeekdayAbbreviation(
1252 Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'w' ) + 1
1254 break;
1255 case 'j':
1256 $usedDay = true;
1257 $num = intval( substr( $ts, 6, 2 ) );
1258 break;
1259 case 'xij':
1260 $usedDay = true;
1261 if ( !$iranian ) {
1262 $iranian = self::tsToIranian( $ts );
1264 $num = $iranian[2];
1265 break;
1266 case 'xmj':
1267 $usedDay = true;
1268 if ( !$hijri ) {
1269 $hijri = self::tsToHijri( $ts );
1271 $num = $hijri[2];
1272 break;
1273 case 'xjj':
1274 $usedDay = true;
1275 if ( !$hebrew ) {
1276 $hebrew = self::tsToHebrew( $ts );
1278 $num = $hebrew[2];
1279 break;
1280 case 'l':
1281 $usedDay = true;
1282 $s .= $this->getWeekdayName(
1283 Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'w' ) + 1
1285 break;
1286 case 'F':
1287 $usedMonth = true;
1288 $s .= $this->getMonthName( substr( $ts, 4, 2 ) );
1289 break;
1290 case 'xiF':
1291 $usedIranianMonth = true;
1292 if ( !$iranian ) {
1293 $iranian = self::tsToIranian( $ts );
1295 $s .= $this->getIranianCalendarMonthName( $iranian[1] );
1296 break;
1297 case 'xmF':
1298 $usedHijriMonth = true;
1299 if ( !$hijri ) {
1300 $hijri = self::tsToHijri( $ts );
1302 $s .= $this->getHijriCalendarMonthName( $hijri[1] );
1303 break;
1304 case 'xjF':
1305 $usedHebrewMonth = true;
1306 if ( !$hebrew ) {
1307 $hebrew = self::tsToHebrew( $ts );
1309 $s .= $this->getHebrewCalendarMonthName( $hebrew[1] );
1310 break;
1311 case 'm':
1312 $usedMonth = true;
1313 $num = substr( $ts, 4, 2 );
1314 break;
1315 case 'M':
1316 $usedMonth = true;
1317 $s .= $this->getMonthAbbreviation( substr( $ts, 4, 2 ) );
1318 break;
1319 case 'n':
1320 $usedMonth = true;
1321 $num = intval( substr( $ts, 4, 2 ) );
1322 break;
1323 case 'xin':
1324 $usedIranianMonth = true;
1325 if ( !$iranian ) {
1326 $iranian = self::tsToIranian( $ts );
1328 $num = $iranian[1];
1329 break;
1330 case 'xmn':
1331 $usedHijriMonth = true;
1332 if ( !$hijri ) {
1333 $hijri = self::tsToHijri( $ts );
1335 $num = $hijri[1];
1336 break;
1337 case 'xjn':
1338 $usedHebrewMonth = true;
1339 if ( !$hebrew ) {
1340 $hebrew = self::tsToHebrew( $ts );
1342 $num = $hebrew[1];
1343 break;
1344 case 'xjt':
1345 $usedHebrewMonth = true;
1346 if ( !$hebrew ) {
1347 $hebrew = self::tsToHebrew( $ts );
1349 $num = $hebrew[3];
1350 break;
1351 case 'Y':
1352 $usedYear = true;
1353 $num = substr( $ts, 0, 4 );
1354 break;
1355 case 'xiY':
1356 $usedIranianYear = true;
1357 if ( !$iranian ) {
1358 $iranian = self::tsToIranian( $ts );
1360 $num = $iranian[0];
1361 break;
1362 case 'xmY':
1363 $usedHijriYear = true;
1364 if ( !$hijri ) {
1365 $hijri = self::tsToHijri( $ts );
1367 $num = $hijri[0];
1368 break;
1369 case 'xjY':
1370 $usedHebrewYear = true;
1371 if ( !$hebrew ) {
1372 $hebrew = self::tsToHebrew( $ts );
1374 $num = $hebrew[0];
1375 break;
1376 case 'xkY':
1377 $usedYear = true;
1378 if ( !$thai ) {
1379 $thai = self::tsToYear( $ts, 'thai' );
1381 $num = $thai[0];
1382 break;
1383 case 'xoY':
1384 $usedYear = true;
1385 if ( !$minguo ) {
1386 $minguo = self::tsToYear( $ts, 'minguo' );
1388 $num = $minguo[0];
1389 break;
1390 case 'xtY':
1391 $usedTennoYear = true;
1392 if ( !$tenno ) {
1393 $tenno = self::tsToYear( $ts, 'tenno' );
1395 $num = $tenno[0];
1396 break;
1397 case 'y':
1398 $usedYear = true;
1399 $num = substr( $ts, 2, 2 );
1400 break;
1401 case 'xiy':
1402 $usedIranianYear = true;
1403 if ( !$iranian ) {
1404 $iranian = self::tsToIranian( $ts );
1406 $num = substr( $iranian[0], -2 );
1407 break;
1408 case 'a':
1409 $usedAMPM = true;
1410 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'am' : 'pm';
1411 break;
1412 case 'A':
1413 $usedAMPM = true;
1414 $s .= intval( substr( $ts, 8, 2 ) ) < 12 ? 'AM' : 'PM';
1415 break;
1416 case 'g':
1417 $usedHour = true;
1418 $h = substr( $ts, 8, 2 );
1419 $num = $h % 12 ? $h % 12 : 12;
1420 break;
1421 case 'G':
1422 $usedHour = true;
1423 $num = intval( substr( $ts, 8, 2 ) );
1424 break;
1425 case 'h':
1426 $usedHour = true;
1427 $h = substr( $ts, 8, 2 );
1428 $num = sprintf( '%02d', $h % 12 ? $h % 12 : 12 );
1429 break;
1430 case 'H':
1431 $usedHour = true;
1432 $num = substr( $ts, 8, 2 );
1433 break;
1434 case 'i':
1435 $usedMinute = true;
1436 $num = substr( $ts, 10, 2 );
1437 break;
1438 case 's':
1439 $usedSecond = true;
1440 $num = substr( $ts, 12, 2 );
1441 break;
1442 case 'c':
1443 case 'r':
1444 $usedSecond = true;
1445 // fall through
1446 case 'e':
1447 case 'O':
1448 case 'P':
1449 case 'T':
1450 $s .= Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code );
1451 break;
1452 case 'w':
1453 case 'N':
1454 case 'z':
1455 $usedDay = true;
1456 $num = Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code );
1457 break;
1458 case 'W':
1459 $usedWeek = true;
1460 $num = Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code );
1461 break;
1462 case 't':
1463 $usedMonth = true;
1464 $num = Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code );
1465 break;
1466 case 'L':
1467 $usedIsLeapYear = true;
1468 $num = Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code );
1469 break;
1470 case 'o':
1471 $usedISOYear = true;
1472 $num = Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code );
1473 break;
1474 case 'U':
1475 $usedSecond = true;
1476 // fall through
1477 case 'I':
1478 case 'Z':
1479 $num = Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, $code );
1480 break;
1481 case '\\':
1482 # Backslash escaping
1483 if ( $p < $formatLength - 1 ) {
1484 $s .= $format[++$p];
1485 } else {
1486 $s .= '\\';
1488 break;
1489 case '"':
1490 # Quoted literal
1491 if ( $p < $formatLength - 1 ) {
1492 $endQuote = strpos( $format, '"', $p + 1 );
1493 if ( $endQuote === false ) {
1494 # No terminating quote, assume literal "
1495 $s .= '"';
1496 } else {
1497 $s .= substr( $format, $p + 1, $endQuote - $p - 1 );
1498 $p = $endQuote;
1500 } else {
1501 # Quote at end of string, assume literal "
1502 $s .= '"';
1504 break;
1505 default:
1506 $s .= $format[$p];
1508 if ( $num !== false ) {
1509 if ( $rawToggle || $raw ) {
1510 $s .= $num;
1511 $raw = false;
1512 } elseif ( $roman ) {
1513 $s .= Language::romanNumeral( $num );
1514 $roman = false;
1515 } elseif ( $hebrewNum ) {
1516 $s .= self::hebrewNumeral( $num );
1517 $hebrewNum = false;
1518 } else {
1519 $s .= $this->formatNum( $num, true );
1524 if ( $usedSecond ) {
1525 $ttl = 1;
1526 } elseif ( $usedMinute ) {
1527 $ttl = 60 - substr( $ts, 12, 2 );
1528 } elseif ( $usedHour ) {
1529 $ttl = 3600 - substr( $ts, 10, 2 ) * 60 - substr( $ts, 12, 2 );
1530 } elseif ( $usedAMPM ) {
1531 $ttl = 43200 - ( substr( $ts, 8, 2 ) % 12 ) * 3600 -
1532 substr( $ts, 10, 2 ) * 60 - substr( $ts, 12, 2 );
1533 } elseif (
1534 $usedDay ||
1535 $usedHebrewMonth ||
1536 $usedIranianMonth ||
1537 $usedHijriMonth ||
1538 $usedHebrewYear ||
1539 $usedIranianYear ||
1540 $usedHijriYear ||
1541 $usedTennoYear
1543 // @todo Someone who understands the non-Gregorian calendars
1544 // should write proper logic for them so that they don't need purged every day.
1545 $ttl = 86400 - substr( $ts, 8, 2 ) * 3600 -
1546 substr( $ts, 10, 2 ) * 60 - substr( $ts, 12, 2 );
1547 } else {
1548 $possibleTtls = array();
1549 $timeRemainingInDay = 86400 - substr( $ts, 8, 2 ) * 3600 -
1550 substr( $ts, 10, 2 ) * 60 - substr( $ts, 12, 2 );
1551 if ( $usedWeek ) {
1552 $possibleTtls[] =
1553 ( 7 - Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'N' ) ) * 86400 +
1554 $timeRemainingInDay;
1555 } elseif ( $usedISOYear ) {
1556 // December 28th falls on the last ISO week of the year, every year.
1557 // The last ISO week of a year can be 52 or 53.
1558 $lastWeekOfISOYear = DateTime::createFromFormat(
1559 'Ymd',
1560 substr( $ts, 0, 4 ) . '1228',
1561 $zone ?: new DateTimeZone( 'UTC' )
1562 )->format( 'W' );
1563 $currentISOWeek = Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'W' );
1564 $weeksRemaining = $lastWeekOfISOYear - $currentISOWeek;
1565 $timeRemainingInWeek =
1566 ( 7 - Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'N' ) ) * 86400
1567 + $timeRemainingInDay;
1568 $possibleTtls[] = $weeksRemaining * 604800 + $timeRemainingInWeek;
1571 if ( $usedMonth ) {
1572 $possibleTtls[] =
1573 ( Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 't' ) -
1574 substr( $ts, 6, 2 ) ) * 86400
1575 + $timeRemainingInDay;
1576 } elseif ( $usedYear ) {
1577 $possibleTtls[] =
1578 ( Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'L' ) + 364 -
1579 Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'z' ) ) * 86400
1580 + $timeRemainingInDay;
1581 } elseif ( $usedIsLeapYear ) {
1582 $year = substr( $ts, 0, 4 );
1583 $timeRemainingInYear =
1584 ( Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'L' ) + 364 -
1585 Language::dateTimeObjFormat( $dateTimeObj, $ts, $zone, 'z' ) ) * 86400
1586 + $timeRemainingInDay;
1587 $mod = $year % 4;
1588 if ( $mod || ( !( $year % 100 ) && $year % 400 ) ) {
1589 // this isn't a leap year. see when the next one starts
1590 $nextCandidate = $year - $mod + 4;
1591 if ( $nextCandidate % 100 || !( $nextCandidate % 400 ) ) {
1592 $possibleTtls[] = ( $nextCandidate - $year - 1 ) * 365 * 86400 +
1593 $timeRemainingInYear;
1594 } else {
1595 $possibleTtls[] = ( $nextCandidate - $year + 3 ) * 365 * 86400 +
1596 $timeRemainingInYear;
1598 } else {
1599 // this is a leap year, so the next year isn't
1600 $possibleTtls[] = $timeRemainingInYear;
1604 if ( $possibleTtls ) {
1605 $ttl = min( $possibleTtls );
1609 return $s;
1612 private static $GREG_DAYS = array( 31, 28, 31, 30, 31, 30, 31, 31, 30, 31, 30, 31 );
1613 private static $IRANIAN_DAYS = array( 31, 31, 31, 31, 31, 31, 30, 30, 30, 30, 30, 29 );
1616 * Algorithm by Roozbeh Pournader and Mohammad Toossi to convert
1617 * Gregorian dates to Iranian dates. Originally written in C, it
1618 * is released under the terms of GNU Lesser General Public
1619 * License. Conversion to PHP was performed by Niklas Laxström.
1621 * Link: http://www.farsiweb.info/jalali/jalali.c
1623 * @param string $ts
1625 * @return string
1627 private static function tsToIranian( $ts ) {
1628 $gy = substr( $ts, 0, 4 ) -1600;
1629 $gm = substr( $ts, 4, 2 ) -1;
1630 $gd = substr( $ts, 6, 2 ) -1;
1632 # Days passed from the beginning (including leap years)
1633 $gDayNo = 365 * $gy
1634 + floor( ( $gy + 3 ) / 4 )
1635 - floor( ( $gy + 99 ) / 100 )
1636 + floor( ( $gy + 399 ) / 400 );
1638 // Add days of the past months of this year
1639 for ( $i = 0; $i < $gm; $i++ ) {
1640 $gDayNo += self::$GREG_DAYS[$i];
1643 // Leap years
1644 if ( $gm > 1 && ( ( $gy % 4 === 0 && $gy % 100 !== 0 || ( $gy % 400 == 0 ) ) ) ) {
1645 $gDayNo++;
1648 // Days passed in current month
1649 $gDayNo += (int)$gd;
1651 $jDayNo = $gDayNo - 79;
1653 $jNp = floor( $jDayNo / 12053 );
1654 $jDayNo %= 12053;
1656 $jy = 979 + 33 * $jNp + 4 * floor( $jDayNo / 1461 );
1657 $jDayNo %= 1461;
1659 if ( $jDayNo >= 366 ) {
1660 $jy += floor( ( $jDayNo - 1 ) / 365 );
1661 $jDayNo = floor( ( $jDayNo - 1 ) % 365 );
1664 for ( $i = 0; $i < 11 && $jDayNo >= self::$IRANIAN_DAYS[$i]; $i++ ) {
1665 $jDayNo -= self::$IRANIAN_DAYS[$i];
1668 $jm = $i + 1;
1669 $jd = $jDayNo + 1;
1671 return array( $jy, $jm, $jd );
1675 * Converting Gregorian dates to Hijri dates.
1677 * Based on a PHP-Nuke block by Sharjeel which is released under GNU/GPL license
1679 * @see http://phpnuke.org/modules.php?name=News&file=article&sid=8234&mode=thread&order=0&thold=0
1681 * @param string $ts
1683 * @return string
1685 private static function tsToHijri( $ts ) {
1686 $year = substr( $ts, 0, 4 );
1687 $month = substr( $ts, 4, 2 );
1688 $day = substr( $ts, 6, 2 );
1690 $zyr = $year;
1691 $zd = $day;
1692 $zm = $month;
1693 $zy = $zyr;
1695 if (
1696 ( $zy > 1582 ) || ( ( $zy == 1582 ) && ( $zm > 10 ) ) ||
1697 ( ( $zy == 1582 ) && ( $zm == 10 ) && ( $zd > 14 ) )
1699 $zjd = (int)( ( 1461 * ( $zy + 4800 + (int)( ( $zm - 14 ) / 12 ) ) ) / 4 ) +
1700 (int)( ( 367 * ( $zm - 2 - 12 * ( (int)( ( $zm - 14 ) / 12 ) ) ) ) / 12 ) -
1701 (int)( ( 3 * (int)( ( ( $zy + 4900 + (int)( ( $zm - 14 ) / 12 ) ) / 100 ) ) ) / 4 ) +
1702 $zd - 32075;
1703 } else {
1704 $zjd = 367 * $zy - (int)( ( 7 * ( $zy + 5001 + (int)( ( $zm - 9 ) / 7 ) ) ) / 4 ) +
1705 (int)( ( 275 * $zm ) / 9 ) + $zd + 1729777;
1708 $zl = $zjd -1948440 + 10632;
1709 $zn = (int)( ( $zl - 1 ) / 10631 );
1710 $zl = $zl - 10631 * $zn + 354;
1711 $zj = ( (int)( ( 10985 - $zl ) / 5316 ) ) * ( (int)( ( 50 * $zl ) / 17719 ) ) +
1712 ( (int)( $zl / 5670 ) ) * ( (int)( ( 43 * $zl ) / 15238 ) );
1713 $zl = $zl - ( (int)( ( 30 - $zj ) / 15 ) ) * ( (int)( ( 17719 * $zj ) / 50 ) ) -
1714 ( (int)( $zj / 16 ) ) * ( (int)( ( 15238 * $zj ) / 43 ) ) + 29;
1715 $zm = (int)( ( 24 * $zl ) / 709 );
1716 $zd = $zl - (int)( ( 709 * $zm ) / 24 );
1717 $zy = 30 * $zn + $zj - 30;
1719 return array( $zy, $zm, $zd );
1723 * Converting Gregorian dates to Hebrew dates.
1725 * Based on a JavaScript code by Abu Mami and Yisrael Hersch
1726 * (abu-mami@kaluach.net, http://www.kaluach.net), who permitted
1727 * to translate the relevant functions into PHP and release them under
1728 * GNU GPL.
1730 * The months are counted from Tishrei = 1. In a leap year, Adar I is 13
1731 * and Adar II is 14. In a non-leap year, Adar is 6.
1733 * @param string $ts
1735 * @return string
1737 private static function tsToHebrew( $ts ) {
1738 # Parse date
1739 $year = substr( $ts, 0, 4 );
1740 $month = substr( $ts, 4, 2 );
1741 $day = substr( $ts, 6, 2 );
1743 # Calculate Hebrew year
1744 $hebrewYear = $year + 3760;
1746 # Month number when September = 1, August = 12
1747 $month += 4;
1748 if ( $month > 12 ) {
1749 # Next year
1750 $month -= 12;
1751 $year++;
1752 $hebrewYear++;
1755 # Calculate day of year from 1 September
1756 $dayOfYear = $day;
1757 for ( $i = 1; $i < $month; $i++ ) {
1758 if ( $i == 6 ) {
1759 # February
1760 $dayOfYear += 28;
1761 # Check if the year is leap
1762 if ( $year % 400 == 0 || ( $year % 4 == 0 && $year % 100 > 0 ) ) {
1763 $dayOfYear++;
1765 } elseif ( $i == 8 || $i == 10 || $i == 1 || $i == 3 ) {
1766 $dayOfYear += 30;
1767 } else {
1768 $dayOfYear += 31;
1772 # Calculate the start of the Hebrew year
1773 $start = self::hebrewYearStart( $hebrewYear );
1775 # Calculate next year's start
1776 if ( $dayOfYear <= $start ) {
1777 # Day is before the start of the year - it is the previous year
1778 # Next year's start
1779 $nextStart = $start;
1780 # Previous year
1781 $year--;
1782 $hebrewYear--;
1783 # Add days since previous year's 1 September
1784 $dayOfYear += 365;
1785 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1786 # Leap year
1787 $dayOfYear++;
1789 # Start of the new (previous) year
1790 $start = self::hebrewYearStart( $hebrewYear );
1791 } else {
1792 # Next year's start
1793 $nextStart = self::hebrewYearStart( $hebrewYear + 1 );
1796 # Calculate Hebrew day of year
1797 $hebrewDayOfYear = $dayOfYear - $start;
1799 # Difference between year's days
1800 $diff = $nextStart - $start;
1801 # Add 12 (or 13 for leap years) days to ignore the difference between
1802 # Hebrew and Gregorian year (353 at least vs. 365/6) - now the
1803 # difference is only about the year type
1804 if ( ( $year % 400 == 0 ) || ( $year % 100 != 0 && $year % 4 == 0 ) ) {
1805 $diff += 13;
1806 } else {
1807 $diff += 12;
1810 # Check the year pattern, and is leap year
1811 # 0 means an incomplete year, 1 means a regular year, 2 means a complete year
1812 # This is mod 30, to work on both leap years (which add 30 days of Adar I)
1813 # and non-leap years
1814 $yearPattern = $diff % 30;
1815 # Check if leap year
1816 $isLeap = $diff >= 30;
1818 # Calculate day in the month from number of day in the Hebrew year
1819 # Don't check Adar - if the day is not in Adar, we will stop before;
1820 # if it is in Adar, we will use it to check if it is Adar I or Adar II
1821 $hebrewDay = $hebrewDayOfYear;
1822 $hebrewMonth = 1;
1823 $days = 0;
1824 while ( $hebrewMonth <= 12 ) {
1825 # Calculate days in this month
1826 if ( $isLeap && $hebrewMonth == 6 ) {
1827 # Adar in a leap year
1828 if ( $isLeap ) {
1829 # Leap year - has Adar I, with 30 days, and Adar II, with 29 days
1830 $days = 30;
1831 if ( $hebrewDay <= $days ) {
1832 # Day in Adar I
1833 $hebrewMonth = 13;
1834 } else {
1835 # Subtract the days of Adar I
1836 $hebrewDay -= $days;
1837 # Try Adar II
1838 $days = 29;
1839 if ( $hebrewDay <= $days ) {
1840 # Day in Adar II
1841 $hebrewMonth = 14;
1845 } elseif ( $hebrewMonth == 2 && $yearPattern == 2 ) {
1846 # Cheshvan in a complete year (otherwise as the rule below)
1847 $days = 30;
1848 } elseif ( $hebrewMonth == 3 && $yearPattern == 0 ) {
1849 # Kislev in an incomplete year (otherwise as the rule below)
1850 $days = 29;
1851 } else {
1852 # Odd months have 30 days, even have 29
1853 $days = 30 - ( $hebrewMonth - 1 ) % 2;
1855 if ( $hebrewDay <= $days ) {
1856 # In the current month
1857 break;
1858 } else {
1859 # Subtract the days of the current month
1860 $hebrewDay -= $days;
1861 # Try in the next month
1862 $hebrewMonth++;
1866 return array( $hebrewYear, $hebrewMonth, $hebrewDay, $days );
1870 * This calculates the Hebrew year start, as days since 1 September.
1871 * Based on Carl Friedrich Gauss algorithm for finding Easter date.
1872 * Used for Hebrew date.
1874 * @param int $year
1876 * @return string
1878 private static function hebrewYearStart( $year ) {
1879 $a = intval( ( 12 * ( $year - 1 ) + 17 ) % 19 );
1880 $b = intval( ( $year - 1 ) % 4 );
1881 $m = 32.044093161144 + 1.5542417966212 * $a + $b / 4.0 - 0.0031777940220923 * ( $year - 1 );
1882 if ( $m < 0 ) {
1883 $m--;
1885 $Mar = intval( $m );
1886 if ( $m < 0 ) {
1887 $m++;
1889 $m -= $Mar;
1891 $c = intval( ( $Mar + 3 * ( $year - 1 ) + 5 * $b + 5 ) % 7 );
1892 if ( $c == 0 && $a > 11 && $m >= 0.89772376543210 ) {
1893 $Mar++;
1894 } elseif ( $c == 1 && $a > 6 && $m >= 0.63287037037037 ) {
1895 $Mar += 2;
1896 } elseif ( $c == 2 || $c == 4 || $c == 6 ) {
1897 $Mar++;
1900 $Mar += intval( ( $year - 3761 ) / 100 ) - intval( ( $year - 3761 ) / 400 ) - 24;
1901 return $Mar;
1905 * Algorithm to convert Gregorian dates to Thai solar dates,
1906 * Minguo dates or Minguo dates.
1908 * Link: http://en.wikipedia.org/wiki/Thai_solar_calendar
1909 * http://en.wikipedia.org/wiki/Minguo_calendar
1910 * http://en.wikipedia.org/wiki/Japanese_era_name
1912 * @param string $ts 14-character timestamp
1913 * @param string $cName Calender name
1914 * @return array Converted year, month, day
1916 private static function tsToYear( $ts, $cName ) {
1917 $gy = substr( $ts, 0, 4 );
1918 $gm = substr( $ts, 4, 2 );
1919 $gd = substr( $ts, 6, 2 );
1921 if ( !strcmp( $cName, 'thai' ) ) {
1922 # Thai solar dates
1923 # Add 543 years to the Gregorian calendar
1924 # Months and days are identical
1925 $gy_offset = $gy + 543;
1926 } elseif ( ( !strcmp( $cName, 'minguo' ) ) || !strcmp( $cName, 'juche' ) ) {
1927 # Minguo dates
1928 # Deduct 1911 years from the Gregorian calendar
1929 # Months and days are identical
1930 $gy_offset = $gy - 1911;
1931 } elseif ( !strcmp( $cName, 'tenno' ) ) {
1932 # Nengō dates up to Meiji period
1933 # Deduct years from the Gregorian calendar
1934 # depending on the nengo periods
1935 # Months and days are identical
1936 if ( ( $gy < 1912 )
1937 || ( ( $gy == 1912 ) && ( $gm < 7 ) )
1938 || ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd < 31 ) )
1940 # Meiji period
1941 $gy_gannen = $gy - 1868 + 1;
1942 $gy_offset = $gy_gannen;
1943 if ( $gy_gannen == 1 ) {
1944 $gy_offset = '元';
1946 $gy_offset = '明治' . $gy_offset;
1947 } elseif (
1948 ( ( $gy == 1912 ) && ( $gm == 7 ) && ( $gd == 31 ) ) ||
1949 ( ( $gy == 1912 ) && ( $gm >= 8 ) ) ||
1950 ( ( $gy > 1912 ) && ( $gy < 1926 ) ) ||
1951 ( ( $gy == 1926 ) && ( $gm < 12 ) ) ||
1952 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd < 26 ) )
1954 # Taishō period
1955 $gy_gannen = $gy - 1912 + 1;
1956 $gy_offset = $gy_gannen;
1957 if ( $gy_gannen == 1 ) {
1958 $gy_offset = '元';
1960 $gy_offset = '大正' . $gy_offset;
1961 } elseif (
1962 ( ( $gy == 1926 ) && ( $gm == 12 ) && ( $gd >= 26 ) ) ||
1963 ( ( $gy > 1926 ) && ( $gy < 1989 ) ) ||
1964 ( ( $gy == 1989 ) && ( $gm == 1 ) && ( $gd < 8 ) )
1966 # Shōwa period
1967 $gy_gannen = $gy - 1926 + 1;
1968 $gy_offset = $gy_gannen;
1969 if ( $gy_gannen == 1 ) {
1970 $gy_offset = '元';
1972 $gy_offset = '昭和' . $gy_offset;
1973 } else {
1974 # Heisei period
1975 $gy_gannen = $gy - 1989 + 1;
1976 $gy_offset = $gy_gannen;
1977 if ( $gy_gannen == 1 ) {
1978 $gy_offset = '元';
1980 $gy_offset = '平成' . $gy_offset;
1982 } else {
1983 $gy_offset = $gy;
1986 return array( $gy_offset, $gm, $gd );
1990 * Gets directionality of the first strongly directional codepoint, for embedBidi()
1992 * This is the rule the BIDI algorithm uses to determine the directionality of
1993 * paragraphs ( http://unicode.org/reports/tr9/#The_Paragraph_Level ) and
1994 * FSI isolates ( http://unicode.org/reports/tr9/#Explicit_Directional_Isolates ).
1996 * TODO: Does not handle BIDI control characters inside the text.
1997 * TODO: Does not handle unallocated characters.
1999 * @param string $text Text to test
2000 * @return null|string Directionality ('ltr' or 'rtl') or null
2002 private static function strongDirFromContent( $text = '' ) {
2003 if ( !preg_match( self::$strongDirRegex, $text, $matches ) ) {
2004 return null;
2006 if ( $matches[1] === '' ) {
2007 return 'rtl';
2009 return 'ltr';
2013 * Roman number formatting up to 10000
2015 * @param int $num
2017 * @return string
2019 static function romanNumeral( $num ) {
2020 static $table = array(
2021 array( '', 'I', 'II', 'III', 'IV', 'V', 'VI', 'VII', 'VIII', 'IX', 'X' ),
2022 array( '', 'X', 'XX', 'XXX', 'XL', 'L', 'LX', 'LXX', 'LXXX', 'XC', 'C' ),
2023 array( '', 'C', 'CC', 'CCC', 'CD', 'D', 'DC', 'DCC', 'DCCC', 'CM', 'M' ),
2024 array( '', 'M', 'MM', 'MMM', 'MMMM', 'MMMMM', 'MMMMMM', 'MMMMMMM',
2025 'MMMMMMMM', 'MMMMMMMMM', 'MMMMMMMMMM' )
2028 $num = intval( $num );
2029 if ( $num > 10000 || $num <= 0 ) {
2030 return $num;
2033 $s = '';
2034 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
2035 if ( $num >= $pow10 ) {
2036 $s .= $table[$i][(int)floor( $num / $pow10 )];
2038 $num = $num % $pow10;
2040 return $s;
2044 * Hebrew Gematria number formatting up to 9999
2046 * @param int $num
2048 * @return string
2050 static function hebrewNumeral( $num ) {
2051 static $table = array(
2052 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' ),
2053 array( '', 'י', 'כ', 'ל', 'מ', 'נ', 'ס', 'ע', 'פ', 'צ', 'ק' ),
2054 array( '',
2055 array( 'ק' ),
2056 array( 'ר' ),
2057 array( 'ש' ),
2058 array( 'ת' ),
2059 array( 'ת', 'ק' ),
2060 array( 'ת', 'ר' ),
2061 array( 'ת', 'ש' ),
2062 array( 'ת', 'ת' ),
2063 array( 'ת', 'ת', 'ק' ),
2064 array( 'ת', 'ת', 'ר' ),
2066 array( '', 'א', 'ב', 'ג', 'ד', 'ה', 'ו', 'ז', 'ח', 'ט', 'י' )
2069 $num = intval( $num );
2070 if ( $num > 9999 || $num <= 0 ) {
2071 return $num;
2074 // Round thousands have special notations
2075 if ( $num === 1000 ) {
2076 return "א' אלף";
2077 } elseif ( $num % 1000 === 0 ) {
2078 return $table[0][$num / 1000] . "' אלפים";
2081 $letters = array();
2083 for ( $pow10 = 1000, $i = 3; $i >= 0; $pow10 /= 10, $i-- ) {
2084 if ( $num >= $pow10 ) {
2085 if ( $num === 15 || $num === 16 ) {
2086 $letters[] = $table[0][9];
2087 $letters[] = $table[0][$num - 9];
2088 $num = 0;
2089 } else {
2090 $letters = array_merge(
2091 $letters,
2092 (array)$table[$i][intval( $num / $pow10 )]
2095 if ( $pow10 === 1000 ) {
2096 $letters[] = "'";
2101 $num = $num % $pow10;
2104 $preTransformLength = count( $letters );
2105 if ( $preTransformLength === 1 ) {
2106 // Add geresh (single quote) to one-letter numbers
2107 $letters[] = "'";
2108 } else {
2109 $lastIndex = $preTransformLength - 1;
2110 $letters[$lastIndex] = str_replace(
2111 array( 'כ', 'מ', 'נ', 'פ', 'צ' ),
2112 array( 'ך', 'ם', 'ן', 'ף', 'ץ' ),
2113 $letters[$lastIndex]
2116 // Add gershayim (double quote) to multiple-letter numbers,
2117 // but exclude numbers with only one letter after the thousands
2118 // (1001-1009, 1020, 1030, 2001-2009, etc.)
2119 if ( $letters[1] === "'" && $preTransformLength === 3 ) {
2120 $letters[] = "'";
2121 } else {
2122 array_splice( $letters, -1, 0, '"' );
2126 return implode( $letters );
2130 * Used by date() and time() to adjust the time output.
2132 * @param string $ts The time in date('YmdHis') format
2133 * @param mixed $tz Adjust the time by this amount (default false, mean we
2134 * get user timecorrection setting)
2135 * @return int
2137 function userAdjust( $ts, $tz = false ) {
2138 global $wgUser, $wgLocalTZoffset;
2140 if ( $tz === false ) {
2141 $tz = $wgUser->getOption( 'timecorrection' );
2144 $data = explode( '|', $tz, 3 );
2146 if ( $data[0] == 'ZoneInfo' ) {
2147 MediaWiki\suppressWarnings();
2148 $userTZ = timezone_open( $data[2] );
2149 MediaWiki\restoreWarnings();
2150 if ( $userTZ !== false ) {
2151 $date = date_create( $ts, timezone_open( 'UTC' ) );
2152 date_timezone_set( $date, $userTZ );
2153 $date = date_format( $date, 'YmdHis' );
2154 return $date;
2156 # Unrecognized timezone, default to 'Offset' with the stored offset.
2157 $data[0] = 'Offset';
2160 if ( $data[0] == 'System' || $tz == '' ) {
2161 # Global offset in minutes.
2162 $minDiff = $wgLocalTZoffset;
2163 } elseif ( $data[0] == 'Offset' ) {
2164 $minDiff = intval( $data[1] );
2165 } else {
2166 $data = explode( ':', $tz );
2167 if ( count( $data ) == 2 ) {
2168 $data[0] = intval( $data[0] );
2169 $data[1] = intval( $data[1] );
2170 $minDiff = abs( $data[0] ) * 60 + $data[1];
2171 if ( $data[0] < 0 ) {
2172 $minDiff = -$minDiff;
2174 } else {
2175 $minDiff = intval( $data[0] ) * 60;
2179 # No difference ? Return time unchanged
2180 if ( 0 == $minDiff ) {
2181 return $ts;
2184 MediaWiki\suppressWarnings(); // E_STRICT system time bitching
2185 # Generate an adjusted date; take advantage of the fact that mktime
2186 # will normalize out-of-range values so we don't have to split $minDiff
2187 # into hours and minutes.
2188 $t = mktime( (
2189 (int)substr( $ts, 8, 2 ) ), # Hours
2190 (int)substr( $ts, 10, 2 ) + $minDiff, # Minutes
2191 (int)substr( $ts, 12, 2 ), # Seconds
2192 (int)substr( $ts, 4, 2 ), # Month
2193 (int)substr( $ts, 6, 2 ), # Day
2194 (int)substr( $ts, 0, 4 ) ); # Year
2196 $date = date( 'YmdHis', $t );
2197 MediaWiki\restoreWarnings();
2199 return $date;
2203 * This is meant to be used by time(), date(), and timeanddate() to get
2204 * the date preference they're supposed to use, it should be used in
2205 * all children.
2207 *<code>
2208 * function timeanddate([...], $format = true) {
2209 * $datePreference = $this->dateFormat($format);
2210 * [...]
2212 *</code>
2214 * @param int|string|bool $usePrefs If true, the user's preference is used
2215 * if false, the site/language default is used
2216 * if int/string, assumed to be a format.
2217 * @return string
2219 function dateFormat( $usePrefs = true ) {
2220 global $wgUser;
2222 if ( is_bool( $usePrefs ) ) {
2223 if ( $usePrefs ) {
2224 $datePreference = $wgUser->getDatePreference();
2225 } else {
2226 $datePreference = (string)User::getDefaultOption( 'date' );
2228 } else {
2229 $datePreference = (string)$usePrefs;
2232 // return int
2233 if ( $datePreference == '' ) {
2234 return 'default';
2237 return $datePreference;
2241 * Get a format string for a given type and preference
2242 * @param string $type May be 'date', 'time', 'both', or 'pretty'.
2243 * @param string $pref The format name as it appears in Messages*.php under
2244 * $datePreferences.
2246 * @since 1.22 New type 'pretty' that provides a more readable timestamp format
2248 * @return string
2250 function getDateFormatString( $type, $pref ) {
2251 $wasDefault = false;
2252 if ( $pref == 'default' ) {
2253 $wasDefault = true;
2254 $pref = $this->getDefaultDateFormat();
2257 if ( !isset( $this->dateFormatStrings[$type][$pref] ) ) {
2258 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
2260 if ( $type === 'pretty' && $df === null ) {
2261 $df = $this->getDateFormatString( 'date', $pref );
2264 if ( !$wasDefault && $df === null ) {
2265 $pref = $this->getDefaultDateFormat();
2266 $df = self::$dataCache->getSubitem( $this->mCode, 'dateFormats', "$pref $type" );
2269 $this->dateFormatStrings[$type][$pref] = $df;
2271 return $this->dateFormatStrings[$type][$pref];
2275 * @param string $ts The time format which needs to be turned into a
2276 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2277 * @param bool $adj Whether to adjust the time output according to the
2278 * user configured offset ($timecorrection)
2279 * @param mixed $format True to use user's date format preference
2280 * @param string|bool $timecorrection The time offset as returned by
2281 * validateTimeZone() in Special:Preferences
2282 * @return string
2284 function date( $ts, $adj = false, $format = true, $timecorrection = false ) {
2285 $ts = wfTimestamp( TS_MW, $ts );
2286 if ( $adj ) {
2287 $ts = $this->userAdjust( $ts, $timecorrection );
2289 $df = $this->getDateFormatString( 'date', $this->dateFormat( $format ) );
2290 return $this->sprintfDate( $df, $ts );
2294 * @param string $ts The time format which needs to be turned into a
2295 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2296 * @param bool $adj Whether to adjust the time output according to the
2297 * user configured offset ($timecorrection)
2298 * @param mixed $format True to use user's date format preference
2299 * @param string|bool $timecorrection The time offset as returned by
2300 * validateTimeZone() in Special:Preferences
2301 * @return string
2303 function time( $ts, $adj = false, $format = true, $timecorrection = false ) {
2304 $ts = wfTimestamp( TS_MW, $ts );
2305 if ( $adj ) {
2306 $ts = $this->userAdjust( $ts, $timecorrection );
2308 $df = $this->getDateFormatString( 'time', $this->dateFormat( $format ) );
2309 return $this->sprintfDate( $df, $ts );
2313 * @param string $ts The time format which needs to be turned into a
2314 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2315 * @param bool $adj Whether to adjust the time output according to the
2316 * user configured offset ($timecorrection)
2317 * @param mixed $format What format to return, if it's false output the
2318 * default one (default true)
2319 * @param string|bool $timecorrection The time offset as returned by
2320 * validateTimeZone() in Special:Preferences
2321 * @return string
2323 function timeanddate( $ts, $adj = false, $format = true, $timecorrection = false ) {
2324 $ts = wfTimestamp( TS_MW, $ts );
2325 if ( $adj ) {
2326 $ts = $this->userAdjust( $ts, $timecorrection );
2328 $df = $this->getDateFormatString( 'both', $this->dateFormat( $format ) );
2329 return $this->sprintfDate( $df, $ts );
2333 * Takes a number of seconds and turns it into a text using values such as hours and minutes.
2335 * @since 1.20
2337 * @param int $seconds The amount of seconds.
2338 * @param array $chosenIntervals The intervals to enable.
2340 * @return string
2342 public function formatDuration( $seconds, array $chosenIntervals = array() ) {
2343 $intervals = $this->getDurationIntervals( $seconds, $chosenIntervals );
2345 $segments = array();
2347 foreach ( $intervals as $intervalName => $intervalValue ) {
2348 // Messages: duration-seconds, duration-minutes, duration-hours, duration-days, duration-weeks,
2349 // duration-years, duration-decades, duration-centuries, duration-millennia
2350 $message = wfMessage( 'duration-' . $intervalName )->numParams( $intervalValue );
2351 $segments[] = $message->inLanguage( $this )->escaped();
2354 return $this->listToText( $segments );
2358 * Takes a number of seconds and returns an array with a set of corresponding intervals.
2359 * For example 65 will be turned into array( minutes => 1, seconds => 5 ).
2361 * @since 1.20
2363 * @param int $seconds The amount of seconds.
2364 * @param array $chosenIntervals The intervals to enable.
2366 * @return array
2368 public function getDurationIntervals( $seconds, array $chosenIntervals = array() ) {
2369 if ( empty( $chosenIntervals ) ) {
2370 $chosenIntervals = array(
2371 'millennia',
2372 'centuries',
2373 'decades',
2374 'years',
2375 'days',
2376 'hours',
2377 'minutes',
2378 'seconds'
2382 $intervals = array_intersect_key( self::$durationIntervals, array_flip( $chosenIntervals ) );
2383 $sortedNames = array_keys( $intervals );
2384 $smallestInterval = array_pop( $sortedNames );
2386 $segments = array();
2388 foreach ( $intervals as $name => $length ) {
2389 $value = floor( $seconds / $length );
2391 if ( $value > 0 || ( $name == $smallestInterval && empty( $segments ) ) ) {
2392 $seconds -= $value * $length;
2393 $segments[$name] = $value;
2397 return $segments;
2401 * Internal helper function for userDate(), userTime() and userTimeAndDate()
2403 * @param string $type Can be 'date', 'time' or 'both'
2404 * @param string $ts The time format which needs to be turned into a
2405 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2406 * @param User $user User object used to get preferences for timezone and format
2407 * @param array $options Array, can contain the following keys:
2408 * - 'timecorrection': time correction, can have the following values:
2409 * - true: use user's preference
2410 * - false: don't use time correction
2411 * - int: value of time correction in minutes
2412 * - 'format': format to use, can have the following values:
2413 * - true: use user's preference
2414 * - false: use default preference
2415 * - string: format to use
2416 * @since 1.19
2417 * @return string
2419 private function internalUserTimeAndDate( $type, $ts, User $user, array $options ) {
2420 $ts = wfTimestamp( TS_MW, $ts );
2421 $options += array( 'timecorrection' => true, 'format' => true );
2422 if ( $options['timecorrection'] !== false ) {
2423 if ( $options['timecorrection'] === true ) {
2424 $offset = $user->getOption( 'timecorrection' );
2425 } else {
2426 $offset = $options['timecorrection'];
2428 $ts = $this->userAdjust( $ts, $offset );
2430 if ( $options['format'] === true ) {
2431 $format = $user->getDatePreference();
2432 } else {
2433 $format = $options['format'];
2435 $df = $this->getDateFormatString( $type, $this->dateFormat( $format ) );
2436 return $this->sprintfDate( $df, $ts );
2440 * Get the formatted date for the given timestamp and formatted for
2441 * the given user.
2443 * @param mixed $ts Mixed: the time format which needs to be turned into a
2444 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2445 * @param User $user User object used to get preferences for timezone and format
2446 * @param array $options Array, can contain the following keys:
2447 * - 'timecorrection': time correction, can have the following values:
2448 * - true: use user's preference
2449 * - false: don't use time correction
2450 * - int: value of time correction in minutes
2451 * - 'format': format to use, can have the following values:
2452 * - true: use user's preference
2453 * - false: use default preference
2454 * - string: format to use
2455 * @since 1.19
2456 * @return string
2458 public function userDate( $ts, User $user, array $options = array() ) {
2459 return $this->internalUserTimeAndDate( 'date', $ts, $user, $options );
2463 * Get the formatted time for the given timestamp and formatted for
2464 * the given user.
2466 * @param mixed $ts The time format which needs to be turned into a
2467 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2468 * @param User $user User object used to get preferences for timezone and format
2469 * @param array $options Array, can contain the following keys:
2470 * - 'timecorrection': time correction, can have the following values:
2471 * - true: use user's preference
2472 * - false: don't use time correction
2473 * - int: value of time correction in minutes
2474 * - 'format': format to use, can have the following values:
2475 * - true: use user's preference
2476 * - false: use default preference
2477 * - string: format to use
2478 * @since 1.19
2479 * @return string
2481 public function userTime( $ts, User $user, array $options = array() ) {
2482 return $this->internalUserTimeAndDate( 'time', $ts, $user, $options );
2486 * Get the formatted date and time for the given timestamp and formatted for
2487 * the given user.
2489 * @param mixed $ts The time format which needs to be turned into a
2490 * date('YmdHis') format with wfTimestamp(TS_MW,$ts)
2491 * @param User $user User object used to get preferences for timezone and format
2492 * @param array $options Array, can contain the following keys:
2493 * - 'timecorrection': time correction, can have the following values:
2494 * - true: use user's preference
2495 * - false: don't use time correction
2496 * - int: value of time correction in minutes
2497 * - 'format': format to use, can have the following values:
2498 * - true: use user's preference
2499 * - false: use default preference
2500 * - string: format to use
2501 * @since 1.19
2502 * @return string
2504 public function userTimeAndDate( $ts, User $user, array $options = array() ) {
2505 return $this->internalUserTimeAndDate( 'both', $ts, $user, $options );
2509 * Get the timestamp in a human-friendly relative format, e.g., "3 days ago".
2511 * Determine the difference between the timestamp and the current time, and
2512 * generate a readable timestamp by returning "<N> <units> ago", where the
2513 * largest possible unit is used.
2515 * @since 1.26 (Prior to 1.26 method existed but was not meant to be used directly)
2517 * @param MWTimestamp $time
2518 * @param MWTimestamp|null $relativeTo The base timestamp to compare to (defaults to now)
2519 * @param User|null $user User the timestamp is being generated for
2520 * (or null to use main context's user)
2521 * @return string Formatted timestamp
2523 public function getHumanTimestamp(
2524 MWTimestamp $time, MWTimestamp $relativeTo = null, User $user = null
2526 if ( $relativeTo === null ) {
2527 $relativeTo = new MWTimestamp();
2529 if ( $user === null ) {
2530 $user = RequestContext::getMain()->getUser();
2533 // Adjust for the user's timezone.
2534 $offsetThis = $time->offsetForUser( $user );
2535 $offsetRel = $relativeTo->offsetForUser( $user );
2537 $ts = '';
2538 if ( Hooks::run( 'GetHumanTimestamp', array( &$ts, $time, $relativeTo, $user, $this ) ) ) {
2539 $ts = $this->getHumanTimestampInternal( $time, $relativeTo, $user );
2542 // Reset the timezone on the objects.
2543 $time->timestamp->sub( $offsetThis );
2544 $relativeTo->timestamp->sub( $offsetRel );
2546 return $ts;
2550 * Convert an MWTimestamp into a pretty human-readable timestamp using
2551 * the given user preferences and relative base time.
2553 * @see Language::getHumanTimestamp
2554 * @param MWTimestamp $ts Timestamp to prettify
2555 * @param MWTimestamp $relativeTo Base timestamp
2556 * @param User $user User preferences to use
2557 * @return string Human timestamp
2558 * @since 1.26
2560 private function getHumanTimestampInternal(
2561 MWTimestamp $ts, MWTimestamp $relativeTo, User $user
2563 $diff = $ts->diff( $relativeTo );
2564 $diffDay = (bool)( (int)$ts->timestamp->format( 'w' ) -
2565 (int)$relativeTo->timestamp->format( 'w' ) );
2566 $days = $diff->days ?: (int)$diffDay;
2567 if ( $diff->invert || $days > 5
2568 && $ts->timestamp->format( 'Y' ) !== $relativeTo->timestamp->format( 'Y' )
2570 // Timestamps are in different years: use full timestamp
2571 // Also do full timestamp for future dates
2573 * @todo FIXME: Add better handling of future timestamps.
2575 $format = $this->getDateFormatString( 'both', $user->getDatePreference() ?: 'default' );
2576 $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) );
2577 } elseif ( $days > 5 ) {
2578 // Timestamps are in same year, but more than 5 days ago: show day and month only.
2579 $format = $this->getDateFormatString( 'pretty', $user->getDatePreference() ?: 'default' );
2580 $ts = $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) );
2581 } elseif ( $days > 1 ) {
2582 // Timestamp within the past week: show the day of the week and time
2583 $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' );
2584 $weekday = self::$mWeekdayMsgs[$ts->timestamp->format( 'w' )];
2585 // Messages:
2586 // sunday-at, monday-at, tuesday-at, wednesday-at, thursday-at, friday-at, saturday-at
2587 $ts = wfMessage( "$weekday-at" )
2588 ->inLanguage( $this )
2589 ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) )
2590 ->text();
2591 } elseif ( $days == 1 ) {
2592 // Timestamp was yesterday: say 'yesterday' and the time.
2593 $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' );
2594 $ts = wfMessage( 'yesterday-at' )
2595 ->inLanguage( $this )
2596 ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) )
2597 ->text();
2598 } elseif ( $diff->h > 1 || $diff->h == 1 && $diff->i > 30 ) {
2599 // Timestamp was today, but more than 90 minutes ago: say 'today' and the time.
2600 $format = $this->getDateFormatString( 'time', $user->getDatePreference() ?: 'default' );
2601 $ts = wfMessage( 'today-at' )
2602 ->inLanguage( $this )
2603 ->params( $this->sprintfDate( $format, $ts->getTimestamp( TS_MW ) ) )
2604 ->text();
2606 // From here on in, the timestamp was soon enough ago so that we can simply say
2607 // XX units ago, e.g., "2 hours ago" or "5 minutes ago"
2608 } elseif ( $diff->h == 1 ) {
2609 // Less than 90 minutes, but more than an hour ago.
2610 $ts = wfMessage( 'hours-ago' )->inLanguage( $this )->numParams( 1 )->text();
2611 } elseif ( $diff->i >= 1 ) {
2612 // A few minutes ago.
2613 $ts = wfMessage( 'minutes-ago' )->inLanguage( $this )->numParams( $diff->i )->text();
2614 } elseif ( $diff->s >= 30 ) {
2615 // Less than a minute, but more than 30 sec ago.
2616 $ts = wfMessage( 'seconds-ago' )->inLanguage( $this )->numParams( $diff->s )->text();
2617 } else {
2618 // Less than 30 seconds ago.
2619 $ts = wfMessage( 'just-now' )->text();
2622 return $ts;
2626 * @param string $key
2627 * @return array|null
2629 function getMessage( $key ) {
2630 return self::$dataCache->getSubitem( $this->mCode, 'messages', $key );
2634 * @return array
2636 function getAllMessages() {
2637 return self::$dataCache->getItem( $this->mCode, 'messages' );
2641 * @param string $in
2642 * @param string $out
2643 * @param string $string
2644 * @return string
2646 function iconv( $in, $out, $string ) {
2647 # This is a wrapper for iconv in all languages except esperanto,
2648 # which does some nasty x-conversions beforehand
2650 # Even with //IGNORE iconv can whine about illegal characters in
2651 # *input* string. We just ignore those too.
2652 # REF: http://bugs.php.net/bug.php?id=37166
2653 # REF: https://phabricator.wikimedia.org/T18885
2654 MediaWiki\suppressWarnings();
2655 $text = iconv( $in, $out . '//IGNORE', $string );
2656 MediaWiki\restoreWarnings();
2657 return $text;
2660 // callback functions for uc(), lc(), ucwords(), ucwordbreaks()
2663 * @param array $matches
2664 * @return mixed|string
2666 function ucwordbreaksCallbackAscii( $matches ) {
2667 return $this->ucfirst( $matches[1] );
2671 * @param array $matches
2672 * @return string
2674 function ucwordbreaksCallbackMB( $matches ) {
2675 return mb_strtoupper( $matches[0] );
2679 * @param array $matches
2680 * @return string
2682 function ucCallback( $matches ) {
2683 list( $wikiUpperChars ) = self::getCaseMaps();
2684 return strtr( $matches[1], $wikiUpperChars );
2688 * @param array $matches
2689 * @return string
2691 function lcCallback( $matches ) {
2692 list( , $wikiLowerChars ) = self::getCaseMaps();
2693 return strtr( $matches[1], $wikiLowerChars );
2697 * @param array $matches
2698 * @return string
2700 function ucwordsCallbackMB( $matches ) {
2701 return mb_strtoupper( $matches[0] );
2705 * @param array $matches
2706 * @return string
2708 function ucwordsCallbackWiki( $matches ) {
2709 list( $wikiUpperChars ) = self::getCaseMaps();
2710 return strtr( $matches[0], $wikiUpperChars );
2714 * Make a string's first character uppercase
2716 * @param string $str
2718 * @return string
2720 function ucfirst( $str ) {
2721 $o = ord( $str );
2722 if ( $o < 96 ) { // if already uppercase...
2723 return $str;
2724 } elseif ( $o < 128 ) {
2725 return ucfirst( $str ); // use PHP's ucfirst()
2726 } else {
2727 // fall back to more complex logic in case of multibyte strings
2728 return $this->uc( $str, true );
2733 * Convert a string to uppercase
2735 * @param string $str
2736 * @param bool $first
2738 * @return string
2740 function uc( $str, $first = false ) {
2741 if ( function_exists( 'mb_strtoupper' ) ) {
2742 if ( $first ) {
2743 if ( $this->isMultibyte( $str ) ) {
2744 return mb_strtoupper( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2745 } else {
2746 return ucfirst( $str );
2748 } else {
2749 return $this->isMultibyte( $str ) ? mb_strtoupper( $str ) : strtoupper( $str );
2751 } else {
2752 if ( $this->isMultibyte( $str ) ) {
2753 $x = $first ? '^' : '';
2754 return preg_replace_callback(
2755 "/$x([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2756 array( $this, 'ucCallback' ),
2757 $str
2759 } else {
2760 return $first ? ucfirst( $str ) : strtoupper( $str );
2766 * @param string $str
2767 * @return mixed|string
2769 function lcfirst( $str ) {
2770 $o = ord( $str );
2771 if ( !$o ) {
2772 return strval( $str );
2773 } elseif ( $o >= 128 ) {
2774 return $this->lc( $str, true );
2775 } elseif ( $o > 96 ) {
2776 return $str;
2777 } else {
2778 $str[0] = strtolower( $str[0] );
2779 return $str;
2784 * @param string $str
2785 * @param bool $first
2786 * @return mixed|string
2788 function lc( $str, $first = false ) {
2789 if ( function_exists( 'mb_strtolower' ) ) {
2790 if ( $first ) {
2791 if ( $this->isMultibyte( $str ) ) {
2792 return mb_strtolower( mb_substr( $str, 0, 1 ) ) . mb_substr( $str, 1 );
2793 } else {
2794 return strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 );
2796 } else {
2797 return $this->isMultibyte( $str ) ? mb_strtolower( $str ) : strtolower( $str );
2799 } else {
2800 if ( $this->isMultibyte( $str ) ) {
2801 $x = $first ? '^' : '';
2802 return preg_replace_callback(
2803 "/$x([A-Z]|[\\xc0-\\xff][\\x80-\\xbf]*)/",
2804 array( $this, 'lcCallback' ),
2805 $str
2807 } else {
2808 return $first ? strtolower( substr( $str, 0, 1 ) ) . substr( $str, 1 ) : strtolower( $str );
2814 * @param string $str
2815 * @return bool
2817 function isMultibyte( $str ) {
2818 return strlen( $str ) !== mb_strlen( $str );
2822 * @param string $str
2823 * @return mixed|string
2825 function ucwords( $str ) {
2826 if ( $this->isMultibyte( $str ) ) {
2827 $str = $this->lc( $str );
2829 // regexp to find first letter in each word (i.e. after each space)
2830 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)| ([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2832 // function to use to capitalize a single char
2833 if ( function_exists( 'mb_strtoupper' ) ) {
2834 return preg_replace_callback(
2835 $replaceRegexp,
2836 array( $this, 'ucwordsCallbackMB' ),
2837 $str
2839 } else {
2840 return preg_replace_callback(
2841 $replaceRegexp,
2842 array( $this, 'ucwordsCallbackWiki' ),
2843 $str
2846 } else {
2847 return ucwords( strtolower( $str ) );
2852 * capitalize words at word breaks
2854 * @param string $str
2855 * @return mixed
2857 function ucwordbreaks( $str ) {
2858 if ( $this->isMultibyte( $str ) ) {
2859 $str = $this->lc( $str );
2861 // since \b doesn't work for UTF-8, we explicitely define word break chars
2862 $breaks = "[ \-\(\)\}\{\.,\?!]";
2864 // find first letter after word break
2865 $replaceRegexp = "/^([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)|" .
2866 "$breaks([a-z]|[\\xc0-\\xff][\\x80-\\xbf]*)/";
2868 if ( function_exists( 'mb_strtoupper' ) ) {
2869 return preg_replace_callback(
2870 $replaceRegexp,
2871 array( $this, 'ucwordbreaksCallbackMB' ),
2872 $str
2874 } else {
2875 return preg_replace_callback(
2876 $replaceRegexp,
2877 array( $this, 'ucwordsCallbackWiki' ),
2878 $str
2881 } else {
2882 return preg_replace_callback(
2883 '/\b([\w\x80-\xff]+)\b/',
2884 array( $this, 'ucwordbreaksCallbackAscii' ),
2885 $str
2891 * Return a case-folded representation of $s
2893 * This is a representation such that caseFold($s1)==caseFold($s2) if $s1
2894 * and $s2 are the same except for the case of their characters. It is not
2895 * necessary for the value returned to make sense when displayed.
2897 * Do *not* perform any other normalisation in this function. If a caller
2898 * uses this function when it should be using a more general normalisation
2899 * function, then fix the caller.
2901 * @param string $s
2903 * @return string
2905 function caseFold( $s ) {
2906 return $this->uc( $s );
2910 * @param string $s
2911 * @return string
2913 function checkTitleEncoding( $s ) {
2914 if ( is_array( $s ) ) {
2915 throw new MWException( 'Given array to checkTitleEncoding.' );
2917 if ( StringUtils::isUtf8( $s ) ) {
2918 return $s;
2921 return $this->iconv( $this->fallback8bitEncoding(), 'utf-8', $s );
2925 * @return array
2927 function fallback8bitEncoding() {
2928 return self::$dataCache->getItem( $this->mCode, 'fallback8bitEncoding' );
2932 * Most writing systems use whitespace to break up words.
2933 * Some languages such as Chinese don't conventionally do this,
2934 * which requires special handling when breaking up words for
2935 * searching etc.
2937 * @return bool
2939 function hasWordBreaks() {
2940 return true;
2944 * Some languages such as Chinese require word segmentation,
2945 * Specify such segmentation when overridden in derived class.
2947 * @param string $string
2948 * @return string
2950 function segmentByWord( $string ) {
2951 return $string;
2955 * Some languages have special punctuation need to be normalized.
2956 * Make such changes here.
2958 * @param string $string
2959 * @return string
2961 function normalizeForSearch( $string ) {
2962 return self::convertDoubleWidth( $string );
2966 * convert double-width roman characters to single-width.
2967 * range: ff00-ff5f ~= 0020-007f
2969 * @param string $string
2971 * @return string
2973 protected static function convertDoubleWidth( $string ) {
2974 static $full = null;
2975 static $half = null;
2977 if ( $full === null ) {
2978 $fullWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2979 $halfWidth = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz";
2980 $full = str_split( $fullWidth, 3 );
2981 $half = str_split( $halfWidth );
2984 $string = str_replace( $full, $half, $string );
2985 return $string;
2989 * @param string $string
2990 * @param string $pattern
2991 * @return string
2993 protected static function insertSpace( $string, $pattern ) {
2994 $string = preg_replace( $pattern, " $1 ", $string );
2995 $string = preg_replace( '/ +/', ' ', $string );
2996 return $string;
3000 * @param array $termsArray
3001 * @return array
3003 function convertForSearchResult( $termsArray ) {
3004 # some languages, e.g. Chinese, need to do a conversion
3005 # in order for search results to be displayed correctly
3006 return $termsArray;
3010 * Get the first character of a string.
3012 * @param string $s
3013 * @return string
3015 function firstChar( $s ) {
3016 $matches = array();
3017 preg_match(
3018 '/^([\x00-\x7f]|[\xc0-\xdf][\x80-\xbf]|' .
3019 '[\xe0-\xef][\x80-\xbf]{2}|[\xf0-\xf7][\x80-\xbf]{3})/',
3021 $matches
3024 if ( isset( $matches[1] ) ) {
3025 if ( strlen( $matches[1] ) != 3 ) {
3026 return $matches[1];
3029 // Break down Hangul syllables to grab the first jamo
3030 $code = UtfNormal\Utils::utf8ToCodepoint( $matches[1] );
3031 if ( $code < 0xac00 || 0xd7a4 <= $code ) {
3032 return $matches[1];
3033 } elseif ( $code < 0xb098 ) {
3034 return "\xe3\x84\xb1";
3035 } elseif ( $code < 0xb2e4 ) {
3036 return "\xe3\x84\xb4";
3037 } elseif ( $code < 0xb77c ) {
3038 return "\xe3\x84\xb7";
3039 } elseif ( $code < 0xb9c8 ) {
3040 return "\xe3\x84\xb9";
3041 } elseif ( $code < 0xbc14 ) {
3042 return "\xe3\x85\x81";
3043 } elseif ( $code < 0xc0ac ) {
3044 return "\xe3\x85\x82";
3045 } elseif ( $code < 0xc544 ) {
3046 return "\xe3\x85\x85";
3047 } elseif ( $code < 0xc790 ) {
3048 return "\xe3\x85\x87";
3049 } elseif ( $code < 0xcc28 ) {
3050 return "\xe3\x85\x88";
3051 } elseif ( $code < 0xce74 ) {
3052 return "\xe3\x85\x8a";
3053 } elseif ( $code < 0xd0c0 ) {
3054 return "\xe3\x85\x8b";
3055 } elseif ( $code < 0xd30c ) {
3056 return "\xe3\x85\x8c";
3057 } elseif ( $code < 0xd558 ) {
3058 return "\xe3\x85\x8d";
3059 } else {
3060 return "\xe3\x85\x8e";
3062 } else {
3063 return '';
3067 function initEncoding() {
3068 # Some languages may have an alternate char encoding option
3069 # (Esperanto X-coding, Japanese furigana conversion, etc)
3070 # If this language is used as the primary content language,
3071 # an override to the defaults can be set here on startup.
3075 * @param string $s
3076 * @return string
3078 function recodeForEdit( $s ) {
3079 # For some languages we'll want to explicitly specify
3080 # which characters make it into the edit box raw
3081 # or are converted in some way or another.
3082 global $wgEditEncoding;
3083 if ( $wgEditEncoding == '' || $wgEditEncoding == 'UTF-8' ) {
3084 return $s;
3085 } else {
3086 return $this->iconv( 'UTF-8', $wgEditEncoding, $s );
3091 * @param string $s
3092 * @return string
3094 function recodeInput( $s ) {
3095 # Take the previous into account.
3096 global $wgEditEncoding;
3097 if ( $wgEditEncoding != '' ) {
3098 $enc = $wgEditEncoding;
3099 } else {
3100 $enc = 'UTF-8';
3102 if ( $enc == 'UTF-8' ) {
3103 return $s;
3104 } else {
3105 return $this->iconv( $enc, 'UTF-8', $s );
3110 * Convert a UTF-8 string to normal form C. In Malayalam and Arabic, this
3111 * also cleans up certain backwards-compatible sequences, converting them
3112 * to the modern Unicode equivalent.
3114 * This is language-specific for performance reasons only.
3116 * @param string $s
3118 * @return string
3120 function normalize( $s ) {
3121 global $wgAllUnicodeFixes;
3122 $s = UtfNormal\Validator::cleanUp( $s );
3123 if ( $wgAllUnicodeFixes ) {
3124 $s = $this->transformUsingPairFile( 'normalize-ar.ser', $s );
3125 $s = $this->transformUsingPairFile( 'normalize-ml.ser', $s );
3128 return $s;
3132 * Transform a string using serialized data stored in the given file (which
3133 * must be in the serialized subdirectory of $IP). The file contains pairs
3134 * mapping source characters to destination characters.
3136 * The data is cached in process memory. This will go faster if you have the
3137 * FastStringSearch extension.
3139 * @param string $file
3140 * @param string $string
3142 * @throws MWException
3143 * @return string
3145 function transformUsingPairFile( $file, $string ) {
3146 if ( !isset( $this->transformData[$file] ) ) {
3147 $data = wfGetPrecompiledData( $file );
3148 if ( $data === false ) {
3149 throw new MWException( __METHOD__ . ": The transformation file $file is missing" );
3151 $this->transformData[$file] = new ReplacementArray( $data );
3153 return $this->transformData[$file]->replace( $string );
3157 * For right-to-left language support
3159 * @return bool
3161 function isRTL() {
3162 return self::$dataCache->getItem( $this->mCode, 'rtl' );
3166 * Return the correct HTML 'dir' attribute value for this language.
3167 * @return string
3169 function getDir() {
3170 return $this->isRTL() ? 'rtl' : 'ltr';
3174 * Return 'left' or 'right' as appropriate alignment for line-start
3175 * for this language's text direction.
3177 * Should be equivalent to CSS3 'start' text-align value....
3179 * @return string
3181 function alignStart() {
3182 return $this->isRTL() ? 'right' : 'left';
3186 * Return 'right' or 'left' as appropriate alignment for line-end
3187 * for this language's text direction.
3189 * Should be equivalent to CSS3 'end' text-align value....
3191 * @return string
3193 function alignEnd() {
3194 return $this->isRTL() ? 'left' : 'right';
3198 * A hidden direction mark (LRM or RLM), depending on the language direction.
3199 * Unlike getDirMark(), this function returns the character as an HTML entity.
3200 * This function should be used when the output is guaranteed to be HTML,
3201 * because it makes the output HTML source code more readable. When
3202 * the output is plain text or can be escaped, getDirMark() should be used.
3204 * @param bool $opposite Get the direction mark opposite to your language
3205 * @return string
3206 * @since 1.20
3208 function getDirMarkEntity( $opposite = false ) {
3209 if ( $opposite ) {
3210 return $this->isRTL() ? '&lrm;' : '&rlm;';
3212 return $this->isRTL() ? '&rlm;' : '&lrm;';
3216 * A hidden direction mark (LRM or RLM), depending on the language direction.
3217 * This function produces them as invisible Unicode characters and
3218 * the output may be hard to read and debug, so it should only be used
3219 * when the output is plain text or can be escaped. When the output is
3220 * HTML, use getDirMarkEntity() instead.
3222 * @param bool $opposite Get the direction mark opposite to your language
3223 * @return string
3225 function getDirMark( $opposite = false ) {
3226 $lrm = "\xE2\x80\x8E"; # LEFT-TO-RIGHT MARK, commonly abbreviated LRM
3227 $rlm = "\xE2\x80\x8F"; # RIGHT-TO-LEFT MARK, commonly abbreviated RLM
3228 if ( $opposite ) {
3229 return $this->isRTL() ? $lrm : $rlm;
3231 return $this->isRTL() ? $rlm : $lrm;
3235 * @return array
3237 function capitalizeAllNouns() {
3238 return self::$dataCache->getItem( $this->mCode, 'capitalizeAllNouns' );
3242 * An arrow, depending on the language direction.
3244 * @param string $direction The direction of the arrow: forwards (default),
3245 * backwards, left, right, up, down.
3246 * @return string
3248 function getArrow( $direction = 'forwards' ) {
3249 switch ( $direction ) {
3250 case 'forwards':
3251 return $this->isRTL() ? '←' : '→';
3252 case 'backwards':
3253 return $this->isRTL() ? '→' : '←';
3254 case 'left':
3255 return '←';
3256 case 'right':
3257 return '→';
3258 case 'up':
3259 return '↑';
3260 case 'down':
3261 return '↓';
3266 * To allow "foo[[bar]]" to extend the link over the whole word "foobar"
3268 * @return bool
3270 function linkPrefixExtension() {
3271 return self::$dataCache->getItem( $this->mCode, 'linkPrefixExtension' );
3275 * Get all magic words from cache.
3276 * @return array
3278 function getMagicWords() {
3279 return self::$dataCache->getItem( $this->mCode, 'magicWords' );
3283 * Run the LanguageGetMagic hook once.
3285 protected function doMagicHook() {
3286 if ( $this->mMagicHookDone ) {
3287 return;
3289 $this->mMagicHookDone = true;
3290 Hooks::run( 'LanguageGetMagic', array( &$this->mMagicExtensions, $this->getCode() ) );
3294 * Fill a MagicWord object with data from here
3296 * @param MagicWord $mw
3298 function getMagic( $mw ) {
3299 // Saves a function call
3300 if ( !$this->mMagicHookDone ) {
3301 $this->doMagicHook();
3304 if ( isset( $this->mMagicExtensions[$mw->mId] ) ) {
3305 $rawEntry = $this->mMagicExtensions[$mw->mId];
3306 } else {
3307 $rawEntry = self::$dataCache->getSubitem(
3308 $this->mCode, 'magicWords', $mw->mId );
3311 if ( !is_array( $rawEntry ) ) {
3312 wfWarn( "\"$rawEntry\" is not a valid magic word for \"$mw->mId\"" );
3313 } else {
3314 $mw->mCaseSensitive = $rawEntry[0];
3315 $mw->mSynonyms = array_slice( $rawEntry, 1 );
3320 * Add magic words to the extension array
3322 * @param array $newWords
3324 function addMagicWordsByLang( $newWords ) {
3325 $fallbackChain = $this->getFallbackLanguages();
3326 $fallbackChain = array_reverse( $fallbackChain );
3327 foreach ( $fallbackChain as $code ) {
3328 if ( isset( $newWords[$code] ) ) {
3329 $this->mMagicExtensions = $newWords[$code] + $this->mMagicExtensions;
3335 * Get special page names, as an associative array
3336 * canonical name => array of valid names, including aliases
3337 * @return array
3339 function getSpecialPageAliases() {
3340 // Cache aliases because it may be slow to load them
3341 if ( is_null( $this->mExtendedSpecialPageAliases ) ) {
3342 // Initialise array
3343 $this->mExtendedSpecialPageAliases =
3344 self::$dataCache->getItem( $this->mCode, 'specialPageAliases' );
3345 Hooks::run( 'LanguageGetSpecialPageAliases',
3346 array( &$this->mExtendedSpecialPageAliases, $this->getCode() ) );
3349 return $this->mExtendedSpecialPageAliases;
3353 * Italic is unsuitable for some languages
3355 * @param string $text The text to be emphasized.
3356 * @return string
3358 function emphasize( $text ) {
3359 return "<em>$text</em>";
3363 * Normally we output all numbers in plain en_US style, that is
3364 * 293,291.235 for twohundredninetythreethousand-twohundredninetyone
3365 * point twohundredthirtyfive. However this is not suitable for all
3366 * languages, some such as Punjabi want ੨੯੩,੨੯੫.੨੩੫ and others such as
3367 * Icelandic just want to use commas instead of dots, and dots instead
3368 * of commas like "293.291,235".
3370 * An example of this function being called:
3371 * <code>
3372 * wfMessage( 'message' )->numParams( $num )->text()
3373 * </code>
3375 * See $separatorTransformTable on MessageIs.php for
3376 * the , => . and . => , implementation.
3378 * @todo check if it's viable to use localeconv() for the decimal separator thing.
3379 * @param int|float $number The string to be formatted, should be an integer
3380 * or a floating point number.
3381 * @param bool $nocommafy Set to true for special numbers like dates
3382 * @return string
3384 public function formatNum( $number, $nocommafy = false ) {
3385 global $wgTranslateNumerals;
3386 if ( !$nocommafy ) {
3387 $number = $this->commafy( $number );
3388 $s = $this->separatorTransformTable();
3389 if ( $s ) {
3390 $number = strtr( $number, $s );
3394 if ( $wgTranslateNumerals ) {
3395 $s = $this->digitTransformTable();
3396 if ( $s ) {
3397 $number = strtr( $number, $s );
3401 return $number;
3405 * Front-end for non-commafied formatNum
3407 * @param int|float $number The string to be formatted, should be an integer
3408 * or a floating point number.
3409 * @since 1.21
3410 * @return string
3412 public function formatNumNoSeparators( $number ) {
3413 return $this->formatNum( $number, true );
3417 * @param string $number
3418 * @return string
3420 public function parseFormattedNumber( $number ) {
3421 $s = $this->digitTransformTable();
3422 if ( $s ) {
3423 // eliminate empty array values such as ''. (bug 64347)
3424 $s = array_filter( $s );
3425 $number = strtr( $number, array_flip( $s ) );
3428 $s = $this->separatorTransformTable();
3429 if ( $s ) {
3430 // eliminate empty array values such as ''. (bug 64347)
3431 $s = array_filter( $s );
3432 $number = strtr( $number, array_flip( $s ) );
3435 $number = strtr( $number, array( ',' => '' ) );
3436 return $number;
3440 * Adds commas to a given number
3441 * @since 1.19
3442 * @param mixed $number
3443 * @return string
3445 function commafy( $number ) {
3446 $digitGroupingPattern = $this->digitGroupingPattern();
3447 if ( $number === null ) {
3448 return '';
3451 if ( !$digitGroupingPattern || $digitGroupingPattern === "###,###,###" ) {
3452 // default grouping is at thousands, use the same for ###,###,### pattern too.
3453 return strrev( (string)preg_replace( '/(\d{3})(?=\d)(?!\d*\.)/', '$1,', strrev( $number ) ) );
3454 } else {
3455 // Ref: http://cldr.unicode.org/translation/number-patterns
3456 $sign = "";
3457 if ( intval( $number ) < 0 ) {
3458 // For negative numbers apply the algorithm like positive number and add sign.
3459 $sign = "-";
3460 $number = substr( $number, 1 );
3462 $integerPart = array();
3463 $decimalPart = array();
3464 $numMatches = preg_match_all( "/(#+)/", $digitGroupingPattern, $matches );
3465 preg_match( "/\d+/", $number, $integerPart );
3466 preg_match( "/\.\d*/", $number, $decimalPart );
3467 $groupedNumber = ( count( $decimalPart ) > 0 ) ? $decimalPart[0] : "";
3468 if ( $groupedNumber === $number ) {
3469 // the string does not have any number part. Eg: .12345
3470 return $sign . $groupedNumber;
3472 $start = $end = ( $integerPart ) ? strlen( $integerPart[0] ) : 0;
3473 while ( $start > 0 ) {
3474 $match = $matches[0][$numMatches - 1];
3475 $matchLen = strlen( $match );
3476 $start = $end - $matchLen;
3477 if ( $start < 0 ) {
3478 $start = 0;
3480 $groupedNumber = substr( $number, $start, $end -$start ) . $groupedNumber;
3481 $end = $start;
3482 if ( $numMatches > 1 ) {
3483 // use the last pattern for the rest of the number
3484 $numMatches--;
3486 if ( $start > 0 ) {
3487 $groupedNumber = "," . $groupedNumber;
3490 return $sign . $groupedNumber;
3495 * @return string
3497 function digitGroupingPattern() {
3498 return self::$dataCache->getItem( $this->mCode, 'digitGroupingPattern' );
3502 * @return array
3504 function digitTransformTable() {
3505 return self::$dataCache->getItem( $this->mCode, 'digitTransformTable' );
3509 * @return array
3511 function separatorTransformTable() {
3512 return self::$dataCache->getItem( $this->mCode, 'separatorTransformTable' );
3516 * Take a list of strings and build a locale-friendly comma-separated
3517 * list, using the local comma-separator message.
3518 * The last two strings are chained with an "and".
3519 * NOTE: This function will only work with standard numeric array keys (0, 1, 2…)
3521 * @param string[] $l
3522 * @return string
3524 function listToText( array $l ) {
3525 $m = count( $l ) - 1;
3526 if ( $m < 0 ) {
3527 return '';
3529 if ( $m > 0 ) {
3530 $and = $this->msg( 'and' )->escaped();
3531 $space = $this->msg( 'word-separator' )->escaped();
3532 if ( $m > 1 ) {
3533 $comma = $this->msg( 'comma-separator' )->escaped();
3536 $s = $l[$m];
3537 for ( $i = $m - 1; $i >= 0; $i-- ) {
3538 if ( $i == $m - 1 ) {
3539 $s = $l[$i] . $and . $space . $s;
3540 } else {
3541 $s = $l[$i] . $comma . $s;
3544 return $s;
3548 * Take a list of strings and build a locale-friendly comma-separated
3549 * list, using the local comma-separator message.
3550 * @param string[] $list Array of strings to put in a comma list
3551 * @return string
3553 function commaList( array $list ) {
3554 return implode(
3555 wfMessage( 'comma-separator' )->inLanguage( $this )->escaped(),
3556 $list
3561 * Take a list of strings and build a locale-friendly semicolon-separated
3562 * list, using the local semicolon-separator message.
3563 * @param string[] $list Array of strings to put in a semicolon list
3564 * @return string
3566 function semicolonList( array $list ) {
3567 return implode(
3568 wfMessage( 'semicolon-separator' )->inLanguage( $this )->escaped(),
3569 $list
3574 * Same as commaList, but separate it with the pipe instead.
3575 * @param string[] $list Array of strings to put in a pipe list
3576 * @return string
3578 function pipeList( array $list ) {
3579 return implode(
3580 wfMessage( 'pipe-separator' )->inLanguage( $this )->escaped(),
3581 $list
3586 * Truncate a string to a specified length in bytes, appending an optional
3587 * string (e.g. for ellipses)
3589 * The database offers limited byte lengths for some columns in the database;
3590 * multi-byte character sets mean we need to ensure that only whole characters
3591 * are included, otherwise broken characters can be passed to the user
3593 * If $length is negative, the string will be truncated from the beginning
3595 * @param string $string String to truncate
3596 * @param int $length Maximum length (including ellipses)
3597 * @param string $ellipsis String to append to the truncated text
3598 * @param bool $adjustLength Subtract length of ellipsis from $length.
3599 * $adjustLength was introduced in 1.18, before that behaved as if false.
3600 * @return string
3602 function truncate( $string, $length, $ellipsis = '...', $adjustLength = true ) {
3603 # Use the localized ellipsis character
3604 if ( $ellipsis == '...' ) {
3605 $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3607 # Check if there is no need to truncate
3608 if ( $length == 0 ) {
3609 return $ellipsis; // convention
3610 } elseif ( strlen( $string ) <= abs( $length ) ) {
3611 return $string; // no need to truncate
3613 $stringOriginal = $string;
3614 # If ellipsis length is >= $length then we can't apply $adjustLength
3615 if ( $adjustLength && strlen( $ellipsis ) >= abs( $length ) ) {
3616 $string = $ellipsis; // this can be slightly unexpected
3617 # Otherwise, truncate and add ellipsis...
3618 } else {
3619 $eLength = $adjustLength ? strlen( $ellipsis ) : 0;
3620 if ( $length > 0 ) {
3621 $length -= $eLength;
3622 $string = substr( $string, 0, $length ); // xyz...
3623 $string = $this->removeBadCharLast( $string );
3624 $string = rtrim( $string );
3625 $string = $string . $ellipsis;
3626 } else {
3627 $length += $eLength;
3628 $string = substr( $string, $length ); // ...xyz
3629 $string = $this->removeBadCharFirst( $string );
3630 $string = ltrim( $string );
3631 $string = $ellipsis . $string;
3634 # Do not truncate if the ellipsis makes the string longer/equal (bug 22181).
3635 # This check is *not* redundant if $adjustLength, due to the single case where
3636 # LEN($ellipsis) > ABS($limit arg); $stringOriginal could be shorter than $string.
3637 if ( strlen( $string ) < strlen( $stringOriginal ) ) {
3638 return $string;
3639 } else {
3640 return $stringOriginal;
3645 * Remove bytes that represent an incomplete Unicode character
3646 * at the end of string (e.g. bytes of the char are missing)
3648 * @param string $string
3649 * @return string
3651 protected function removeBadCharLast( $string ) {
3652 if ( $string != '' ) {
3653 $char = ord( $string[strlen( $string ) - 1] );
3654 $m = array();
3655 if ( $char >= 0xc0 ) {
3656 # We got the first byte only of a multibyte char; remove it.
3657 $string = substr( $string, 0, -1 );
3658 } elseif ( $char >= 0x80 &&
3659 // Use the /s modifier (PCRE_DOTALL) so (.*) also matches newlines
3660 preg_match( '/^(.*)(?:[\xe0-\xef][\x80-\xbf]|' .
3661 '[\xf0-\xf7][\x80-\xbf]{1,2})$/s', $string, $m )
3663 # We chopped in the middle of a character; remove it
3664 $string = $m[1];
3667 return $string;
3671 * Remove bytes that represent an incomplete Unicode character
3672 * at the start of string (e.g. bytes of the char are missing)
3674 * @param string $string
3675 * @return string
3677 protected function removeBadCharFirst( $string ) {
3678 if ( $string != '' ) {
3679 $char = ord( $string[0] );
3680 if ( $char >= 0x80 && $char < 0xc0 ) {
3681 # We chopped in the middle of a character; remove the whole thing
3682 $string = preg_replace( '/^[\x80-\xbf]+/', '', $string );
3685 return $string;
3689 * Truncate a string of valid HTML to a specified length in bytes,
3690 * appending an optional string (e.g. for ellipses), and return valid HTML
3692 * This is only intended for styled/linked text, such as HTML with
3693 * tags like <span> and <a>, were the tags are self-contained (valid HTML).
3694 * Also, this will not detect things like "display:none" CSS.
3696 * Note: since 1.18 you do not need to leave extra room in $length for ellipses.
3698 * @param string $text HTML string to truncate
3699 * @param int $length (zero/positive) Maximum length (including ellipses)
3700 * @param string $ellipsis String to append to the truncated text
3701 * @return string
3703 function truncateHtml( $text, $length, $ellipsis = '...' ) {
3704 # Use the localized ellipsis character
3705 if ( $ellipsis == '...' ) {
3706 $ellipsis = wfMessage( 'ellipsis' )->inLanguage( $this )->escaped();
3708 # Check if there is clearly no need to truncate
3709 if ( $length <= 0 ) {
3710 return $ellipsis; // no text shown, nothing to format (convention)
3711 } elseif ( strlen( $text ) <= $length ) {
3712 return $text; // string short enough even *with* HTML (short-circuit)
3715 $dispLen = 0; // innerHTML legth so far
3716 $testingEllipsis = false; // checking if ellipses will make string longer/equal?
3717 $tagType = 0; // 0-open, 1-close
3718 $bracketState = 0; // 1-tag start, 2-tag name, 0-neither
3719 $entityState = 0; // 0-not entity, 1-entity
3720 $tag = $ret = ''; // accumulated tag name, accumulated result string
3721 $openTags = array(); // open tag stack
3722 $maybeState = null; // possible truncation state
3724 $textLen = strlen( $text );
3725 $neLength = max( 0, $length - strlen( $ellipsis ) ); // non-ellipsis len if truncated
3726 for ( $pos = 0; true; ++$pos ) {
3727 # Consider truncation once the display length has reached the maximim.
3728 # We check if $dispLen > 0 to grab tags for the $neLength = 0 case.
3729 # Check that we're not in the middle of a bracket/entity...
3730 if ( $dispLen && $dispLen >= $neLength && $bracketState == 0 && !$entityState ) {
3731 if ( !$testingEllipsis ) {
3732 $testingEllipsis = true;
3733 # Save where we are; we will truncate here unless there turn out to
3734 # be so few remaining characters that truncation is not necessary.
3735 if ( !$maybeState ) { // already saved? ($neLength = 0 case)
3736 $maybeState = array( $ret, $openTags ); // save state
3738 } elseif ( $dispLen > $length && $dispLen > strlen( $ellipsis ) ) {
3739 # String in fact does need truncation, the truncation point was OK.
3740 list( $ret, $openTags ) = $maybeState; // reload state
3741 $ret = $this->removeBadCharLast( $ret ); // multi-byte char fix
3742 $ret .= $ellipsis; // add ellipsis
3743 break;
3746 if ( $pos >= $textLen ) {
3747 break; // extra iteration just for above checks
3750 # Read the next char...
3751 $ch = $text[$pos];
3752 $lastCh = $pos ? $text[$pos - 1] : '';
3753 $ret .= $ch; // add to result string
3754 if ( $ch == '<' ) {
3755 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags ); // for bad HTML
3756 $entityState = 0; // for bad HTML
3757 $bracketState = 1; // tag started (checking for backslash)
3758 } elseif ( $ch == '>' ) {
3759 $this->truncate_endBracket( $tag, $tagType, $lastCh, $openTags );
3760 $entityState = 0; // for bad HTML
3761 $bracketState = 0; // out of brackets
3762 } elseif ( $bracketState == 1 ) {
3763 if ( $ch == '/' ) {
3764 $tagType = 1; // close tag (e.g. "</span>")
3765 } else {
3766 $tagType = 0; // open tag (e.g. "<span>")
3767 $tag .= $ch;
3769 $bracketState = 2; // building tag name
3770 } elseif ( $bracketState == 2 ) {
3771 if ( $ch != ' ' ) {
3772 $tag .= $ch;
3773 } else {
3774 // Name found (e.g. "<a href=..."), add on tag attributes...
3775 $pos += $this->truncate_skip( $ret, $text, "<>", $pos + 1 );
3777 } elseif ( $bracketState == 0 ) {
3778 if ( $entityState ) {
3779 if ( $ch == ';' ) {
3780 $entityState = 0;
3781 $dispLen++; // entity is one displayed char
3783 } else {
3784 if ( $neLength == 0 && !$maybeState ) {
3785 // Save state without $ch. We want to *hit* the first
3786 // display char (to get tags) but not *use* it if truncating.
3787 $maybeState = array( substr( $ret, 0, -1 ), $openTags );
3789 if ( $ch == '&' ) {
3790 $entityState = 1; // entity found, (e.g. "&#160;")
3791 } else {
3792 $dispLen++; // this char is displayed
3793 // Add the next $max display text chars after this in one swoop...
3794 $max = ( $testingEllipsis ? $length : $neLength ) - $dispLen;
3795 $skipped = $this->truncate_skip( $ret, $text, "<>&", $pos + 1, $max );
3796 $dispLen += $skipped;
3797 $pos += $skipped;
3802 // Close the last tag if left unclosed by bad HTML
3803 $this->truncate_endBracket( $tag, $text[$textLen - 1], $tagType, $openTags );
3804 while ( count( $openTags ) > 0 ) {
3805 $ret .= '</' . array_pop( $openTags ) . '>'; // close open tags
3807 return $ret;
3811 * truncateHtml() helper function
3812 * like strcspn() but adds the skipped chars to $ret
3814 * @param string $ret
3815 * @param string $text
3816 * @param string $search
3817 * @param int $start
3818 * @param null|int $len
3819 * @return int
3821 private function truncate_skip( &$ret, $text, $search, $start, $len = null ) {
3822 if ( $len === null ) {
3823 $len = -1; // -1 means "no limit" for strcspn
3824 } elseif ( $len < 0 ) {
3825 $len = 0; // sanity
3827 $skipCount = 0;
3828 if ( $start < strlen( $text ) ) {
3829 $skipCount = strcspn( $text, $search, $start, $len );
3830 $ret .= substr( $text, $start, $skipCount );
3832 return $skipCount;
3836 * truncateHtml() helper function
3837 * (a) push or pop $tag from $openTags as needed
3838 * (b) clear $tag value
3839 * @param string &$tag Current HTML tag name we are looking at
3840 * @param int $tagType (0-open tag, 1-close tag)
3841 * @param string $lastCh Character before the '>' that ended this tag
3842 * @param array &$openTags Open tag stack (not accounting for $tag)
3844 private function truncate_endBracket( &$tag, $tagType, $lastCh, &$openTags ) {
3845 $tag = ltrim( $tag );
3846 if ( $tag != '' ) {
3847 if ( $tagType == 0 && $lastCh != '/' ) {
3848 $openTags[] = $tag; // tag opened (didn't close itself)
3849 } elseif ( $tagType == 1 ) {
3850 if ( $openTags && $tag == $openTags[count( $openTags ) - 1] ) {
3851 array_pop( $openTags ); // tag closed
3854 $tag = '';
3859 * Grammatical transformations, needed for inflected languages
3860 * Invoked by putting {{grammar:case|word}} in a message
3862 * @param string $word
3863 * @param string $case
3864 * @return string
3866 function convertGrammar( $word, $case ) {
3867 global $wgGrammarForms;
3868 if ( isset( $wgGrammarForms[$this->getCode()][$case][$word] ) ) {
3869 return $wgGrammarForms[$this->getCode()][$case][$word];
3872 return $word;
3875 * Get the grammar forms for the content language
3876 * @return array Array of grammar forms
3877 * @since 1.20
3879 function getGrammarForms() {
3880 global $wgGrammarForms;
3881 if ( isset( $wgGrammarForms[$this->getCode()] )
3882 && is_array( $wgGrammarForms[$this->getCode()] )
3884 return $wgGrammarForms[$this->getCode()];
3887 return array();
3890 * Provides an alternative text depending on specified gender.
3891 * Usage {{gender:username|masculine|feminine|unknown}}.
3892 * username is optional, in which case the gender of current user is used,
3893 * but only in (some) interface messages; otherwise default gender is used.
3895 * If no forms are given, an empty string is returned. If only one form is
3896 * given, it will be returned unconditionally. These details are implied by
3897 * the caller and cannot be overridden in subclasses.
3899 * If three forms are given, the default is to use the third (unknown) form.
3900 * If fewer than three forms are given, the default is to use the first (masculine) form.
3901 * These details can be overridden in subclasses.
3903 * @param string $gender
3904 * @param array $forms
3906 * @return string
3908 function gender( $gender, $forms ) {
3909 if ( !count( $forms ) ) {
3910 return '';
3912 $forms = $this->preConvertPlural( $forms, 2 );
3913 if ( $gender === 'male' ) {
3914 return $forms[0];
3916 if ( $gender === 'female' ) {
3917 return $forms[1];
3919 return isset( $forms[2] ) ? $forms[2] : $forms[0];
3923 * Plural form transformations, needed for some languages.
3924 * For example, there are 3 form of plural in Russian and Polish,
3925 * depending on "count mod 10". See [[w:Plural]]
3926 * For English it is pretty simple.
3928 * Invoked by putting {{plural:count|wordform1|wordform2}}
3929 * or {{plural:count|wordform1|wordform2|wordform3}}
3931 * Example: {{plural:{{NUMBEROFARTICLES}}|article|articles}}
3933 * @param int $count Non-localized number
3934 * @param array $forms Different plural forms
3935 * @return string Correct form of plural for $count in this language
3937 function convertPlural( $count, $forms ) {
3938 // Handle explicit n=pluralform cases
3939 $forms = $this->handleExplicitPluralForms( $count, $forms );
3940 if ( is_string( $forms ) ) {
3941 return $forms;
3943 if ( !count( $forms ) ) {
3944 return '';
3947 $pluralForm = $this->getPluralRuleIndexNumber( $count );
3948 $pluralForm = min( $pluralForm, count( $forms ) - 1 );
3949 return $forms[$pluralForm];
3953 * Handles explicit plural forms for Language::convertPlural()
3955 * In {{PLURAL:$1|0=nothing|one|many}}, 0=nothing will be returned if $1 equals zero.
3956 * If an explicitly defined plural form matches the $count, then
3957 * string value returned, otherwise array returned for further consideration
3958 * by CLDR rules or overridden convertPlural().
3960 * @since 1.23
3962 * @param int $count Non-localized number
3963 * @param array $forms Different plural forms
3965 * @return array|string
3967 protected function handleExplicitPluralForms( $count, array $forms ) {
3968 foreach ( $forms as $index => $form ) {
3969 if ( preg_match( '/\d+=/i', $form ) ) {
3970 $pos = strpos( $form, '=' );
3971 if ( substr( $form, 0, $pos ) === (string)$count ) {
3972 return substr( $form, $pos + 1 );
3974 unset( $forms[$index] );
3977 return array_values( $forms );
3981 * Checks that convertPlural was given an array and pads it to requested
3982 * amount of forms by copying the last one.
3984 * @param array $forms Array of forms given to convertPlural
3985 * @param int $count How many forms should there be at least
3986 * @return array Padded array of forms or an exception if not an array
3988 protected function preConvertPlural( /* Array */ $forms, $count ) {
3989 while ( count( $forms ) < $count ) {
3990 $forms[] = $forms[count( $forms ) - 1];
3992 return $forms;
3996 * Wraps argument with unicode control characters for directionality safety
3998 * This solves the problem where directionality-neutral characters at the edge of
3999 * the argument string get interpreted with the wrong directionality from the
4000 * enclosing context, giving renderings that look corrupted like "(Ben_(WMF".
4002 * The wrapping is LRE...PDF or RLE...PDF, depending on the detected
4003 * directionality of the argument string, using the BIDI algorithm's own "First
4004 * strong directional codepoint" rule. Essentially, this works round the fact that
4005 * there is no embedding equivalent of U+2068 FSI (isolation with heuristic
4006 * direction inference). The latter is cleaner but still not widely supported.
4008 * @param string $text Text to wrap
4009 * @return string Text, wrapped in LRE...PDF or RLE...PDF or nothing
4011 public function embedBidi( $text = '' ) {
4012 $dir = Language::strongDirFromContent( $text );
4013 if ( $dir === 'ltr' ) {
4014 // Wrap in LEFT-TO-RIGHT EMBEDDING ... POP DIRECTIONAL FORMATTING
4015 return self::$lre . $text . self::$pdf;
4017 if ( $dir === 'rtl' ) {
4018 // Wrap in RIGHT-TO-LEFT EMBEDDING ... POP DIRECTIONAL FORMATTING
4019 return self::$rle . $text . self::$pdf;
4021 // No strong directionality: do not wrap
4022 return $text;
4026 * @todo Maybe translate block durations. Note that this function is somewhat misnamed: it
4027 * deals with translating the *duration* ("1 week", "4 days", etc), not the expiry time
4028 * (which is an absolute timestamp). Please note: do NOT add this blindly, as it is used
4029 * on old expiry lengths recorded in log entries. You'd need to provide the start date to
4030 * match up with it.
4032 * @param string $str The validated block duration in English
4033 * @return string Somehow translated block duration
4034 * @see LanguageFi.php for example implementation
4036 function translateBlockExpiry( $str ) {
4037 $duration = SpecialBlock::getSuggestedDurations( $this );
4038 foreach ( $duration as $show => $value ) {
4039 if ( strcmp( $str, $value ) == 0 ) {
4040 return htmlspecialchars( trim( $show ) );
4044 if ( wfIsInfinity( $str ) ) {
4045 foreach ( $duration as $show => $value ) {
4046 if ( wfIsInfinity( $value ) ) {
4047 return htmlspecialchars( trim( $show ) );
4052 // If all else fails, return a standard duration or timestamp description.
4053 $time = strtotime( $str, 0 );
4054 if ( $time === false ) { // Unknown format. Return it as-is in case.
4055 return $str;
4056 } elseif ( $time !== strtotime( $str, 1 ) ) { // It's a relative timestamp.
4057 // $time is relative to 0 so it's a duration length.
4058 return $this->formatDuration( $time );
4059 } else { // It's an absolute timestamp.
4060 if ( $time === 0 ) {
4061 // wfTimestamp() handles 0 as current time instead of epoch.
4062 return $this->timeanddate( '19700101000000' );
4063 } else {
4064 return $this->timeanddate( $time );
4070 * languages like Chinese need to be segmented in order for the diff
4071 * to be of any use
4073 * @param string $text
4074 * @return string
4076 public function segmentForDiff( $text ) {
4077 return $text;
4081 * and unsegment to show the result
4083 * @param string $text
4084 * @return string
4086 public function unsegmentForDiff( $text ) {
4087 return $text;
4091 * Return the LanguageConverter used in the Language
4093 * @since 1.19
4094 * @return LanguageConverter
4096 public function getConverter() {
4097 return $this->mConverter;
4101 * convert text to all supported variants
4103 * @param string $text
4104 * @return array
4106 public function autoConvertToAllVariants( $text ) {
4107 return $this->mConverter->autoConvertToAllVariants( $text );
4111 * convert text to different variants of a language.
4113 * @param string $text
4114 * @return string
4116 public function convert( $text ) {
4117 return $this->mConverter->convert( $text );
4121 * Convert a Title object to a string in the preferred variant
4123 * @param Title $title
4124 * @return string
4126 public function convertTitle( $title ) {
4127 return $this->mConverter->convertTitle( $title );
4131 * Convert a namespace index to a string in the preferred variant
4133 * @param int $ns
4134 * @return string
4136 public function convertNamespace( $ns ) {
4137 return $this->mConverter->convertNamespace( $ns );
4141 * Check if this is a language with variants
4143 * @return bool
4145 public function hasVariants() {
4146 return count( $this->getVariants() ) > 1;
4150 * Check if the language has the specific variant
4152 * @since 1.19
4153 * @param string $variant
4154 * @return bool
4156 public function hasVariant( $variant ) {
4157 return (bool)$this->mConverter->validateVariant( $variant );
4161 * Put custom tags (e.g. -{ }-) around math to prevent conversion
4163 * @param string $text
4164 * @return string
4165 * @deprecated since 1.22 is no longer used
4167 public function armourMath( $text ) {
4168 return $this->mConverter->armourMath( $text );
4172 * Perform output conversion on a string, and encode for safe HTML output.
4173 * @param string $text Text to be converted
4174 * @param bool $isTitle Whether this conversion is for the article title
4175 * @return string
4176 * @todo this should get integrated somewhere sane
4178 public function convertHtml( $text, $isTitle = false ) {
4179 return htmlspecialchars( $this->convert( $text, $isTitle ) );
4183 * @param string $key
4184 * @return string
4186 public function convertCategoryKey( $key ) {
4187 return $this->mConverter->convertCategoryKey( $key );
4191 * Get the list of variants supported by this language
4192 * see sample implementation in LanguageZh.php
4194 * @return array An array of language codes
4196 public function getVariants() {
4197 return $this->mConverter->getVariants();
4201 * @return string
4203 public function getPreferredVariant() {
4204 return $this->mConverter->getPreferredVariant();
4208 * @return string
4210 public function getDefaultVariant() {
4211 return $this->mConverter->getDefaultVariant();
4215 * @return string
4217 public function getURLVariant() {
4218 return $this->mConverter->getURLVariant();
4222 * If a language supports multiple variants, it is
4223 * possible that non-existing link in one variant
4224 * actually exists in another variant. this function
4225 * tries to find it. See e.g. LanguageZh.php
4226 * The input parameters may be modified upon return
4228 * @param string &$link The name of the link
4229 * @param Title &$nt The title object of the link
4230 * @param bool $ignoreOtherCond To disable other conditions when
4231 * we need to transclude a template or update a category's link
4233 public function findVariantLink( &$link, &$nt, $ignoreOtherCond = false ) {
4234 $this->mConverter->findVariantLink( $link, $nt, $ignoreOtherCond );
4238 * returns language specific options used by User::getPageRenderHash()
4239 * for example, the preferred language variant
4241 * @return string
4243 function getExtraHashOptions() {
4244 return $this->mConverter->getExtraHashOptions();
4248 * For languages that support multiple variants, the title of an
4249 * article may be displayed differently in different variants. this
4250 * function returns the apporiate title defined in the body of the article.
4252 * @return string
4254 public function getParsedTitle() {
4255 return $this->mConverter->getParsedTitle();
4259 * Prepare external link text for conversion. When the text is
4260 * a URL, it shouldn't be converted, and it'll be wrapped in
4261 * the "raw" tag (-{R| }-) to prevent conversion.
4263 * This function is called "markNoConversion" for historical
4264 * reasons.
4266 * @param string $text Text to be used for external link
4267 * @param bool $noParse Wrap it without confirming it's a real URL first
4268 * @return string The tagged text
4270 public function markNoConversion( $text, $noParse = false ) {
4271 // Excluding protocal-relative URLs may avoid many false positives.
4272 if ( $noParse || preg_match( '/^(?:' . wfUrlProtocolsWithoutProtRel() . ')/', $text ) ) {
4273 return $this->mConverter->markNoConversion( $text );
4274 } else {
4275 return $text;
4280 * A regular expression to match legal word-trailing characters
4281 * which should be merged onto a link of the form [[foo]]bar.
4283 * @return string
4285 public function linkTrail() {
4286 return self::$dataCache->getItem( $this->mCode, 'linkTrail' );
4290 * A regular expression character set to match legal word-prefixing
4291 * characters which should be merged onto a link of the form foo[[bar]].
4293 * @return string
4295 public function linkPrefixCharset() {
4296 return self::$dataCache->getItem( $this->mCode, 'linkPrefixCharset' );
4300 * @deprecated since 1.24, will be removed in 1.25
4301 * @return Language
4303 function getLangObj() {
4304 wfDeprecated( __METHOD__, '1.24' );
4305 return $this;
4309 * Get the "parent" language which has a converter to convert a "compatible" language
4310 * (in another variant) to this language (eg. zh for zh-cn, but not en for en-gb).
4312 * @return Language|null
4313 * @since 1.22
4315 public function getParentLanguage() {
4316 if ( $this->mParentLanguage !== false ) {
4317 return $this->mParentLanguage;
4320 $pieces = explode( '-', $this->getCode() );
4321 $code = $pieces[0];
4322 if ( !in_array( $code, LanguageConverter::$languagesWithVariants ) ) {
4323 $this->mParentLanguage = null;
4324 return null;
4326 $lang = Language::factory( $code );
4327 if ( !$lang->hasVariant( $this->getCode() ) ) {
4328 $this->mParentLanguage = null;
4329 return null;
4332 $this->mParentLanguage = $lang;
4333 return $lang;
4337 * Get the RFC 3066 code for this language object
4339 * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
4340 * htmlspecialchars() or similar
4342 * @return string
4344 public function getCode() {
4345 return $this->mCode;
4349 * Get the code in Bcp47 format which we can use
4350 * inside of html lang="" tags.
4352 * NOTE: The return value of this function is NOT HTML-safe and must be escaped with
4353 * htmlspecialchars() or similar.
4355 * @since 1.19
4356 * @return string
4358 public function getHtmlCode() {
4359 if ( is_null( $this->mHtmlCode ) ) {
4360 $this->mHtmlCode = wfBCP47( $this->getCode() );
4362 return $this->mHtmlCode;
4366 * @param string $code
4368 public function setCode( $code ) {
4369 $this->mCode = $code;
4370 // Ensure we don't leave incorrect cached data lying around
4371 $this->mHtmlCode = null;
4372 $this->mParentLanguage = false;
4376 * Get the language code from a file name. Inverse of getFileName()
4377 * @param string $filename $prefix . $languageCode . $suffix
4378 * @param string $prefix Prefix before the language code
4379 * @param string $suffix Suffix after the language code
4380 * @return string Language code, or false if $prefix or $suffix isn't found
4382 public static function getCodeFromFileName( $filename, $prefix = 'Language', $suffix = '.php' ) {
4383 $m = null;
4384 preg_match( '/' . preg_quote( $prefix, '/' ) . '([A-Z][a-z_]+)' .
4385 preg_quote( $suffix, '/' ) . '/', $filename, $m );
4386 if ( !count( $m ) ) {
4387 return false;
4389 return str_replace( '_', '-', strtolower( $m[1] ) );
4393 * @param string $code
4394 * @return string Name of the language class
4396 public static function classFromCode( $code ) {
4397 if ( $code == 'en' ) {
4398 return 'Language';
4399 } else {
4400 return 'Language' . str_replace( '-', '_', ucfirst( $code ) );
4405 * Get the name of a file for a certain language code
4406 * @param string $prefix Prepend this to the filename
4407 * @param string $code Language code
4408 * @param string $suffix Append this to the filename
4409 * @throws MWException
4410 * @return string $prefix . $mangledCode . $suffix
4412 public static function getFileName( $prefix = 'Language', $code, $suffix = '.php' ) {
4413 if ( !self::isValidBuiltInCode( $code ) ) {
4414 throw new MWException( "Invalid language code \"$code\"" );
4417 return $prefix . str_replace( '-', '_', ucfirst( $code ) ) . $suffix;
4421 * @param string $code
4422 * @return string
4424 public static function getMessagesFileName( $code ) {
4425 global $IP;
4426 $file = self::getFileName( "$IP/languages/messages/Messages", $code, '.php' );
4427 Hooks::run( 'Language::getMessagesFileName', array( $code, &$file ) );
4428 return $file;
4432 * @param string $code
4433 * @return string
4434 * @since 1.23
4436 public static function getJsonMessagesFileName( $code ) {
4437 global $IP;
4439 if ( !self::isValidBuiltInCode( $code ) ) {
4440 throw new MWException( "Invalid language code \"$code\"" );
4443 return "$IP/languages/i18n/$code.json";
4447 * Get the first fallback for a given language.
4449 * @param string $code
4451 * @return bool|string
4453 public static function getFallbackFor( $code ) {
4454 $fallbacks = self::getFallbacksFor( $code );
4455 if ( $fallbacks ) {
4456 return $fallbacks[0];
4458 return false;
4462 * Get the ordered list of fallback languages.
4464 * @since 1.19
4465 * @param string $code Language code
4466 * @return array Non-empty array, ending in "en"
4468 public static function getFallbacksFor( $code ) {
4469 if ( $code === 'en' || !Language::isValidBuiltInCode( $code ) ) {
4470 return array();
4472 // For unknown languages, fallbackSequence returns an empty array,
4473 // hardcode fallback to 'en' in that case.
4474 return self::getLocalisationCache()->getItem( $code, 'fallbackSequence' ) ?: array( 'en' );
4478 * Get the ordered list of fallback languages, ending with the fallback
4479 * language chain for the site language.
4481 * @since 1.22
4482 * @param string $code Language code
4483 * @return array Array( fallbacks, site fallbacks )
4485 public static function getFallbacksIncludingSiteLanguage( $code ) {
4486 global $wgLanguageCode;
4488 // Usually, we will only store a tiny number of fallback chains, so we
4489 // keep them in static memory.
4490 $cacheKey = "{$code}-{$wgLanguageCode}";
4492 if ( !array_key_exists( $cacheKey, self::$fallbackLanguageCache ) ) {
4493 $fallbacks = self::getFallbacksFor( $code );
4495 // Append the site's fallback chain, including the site language itself
4496 $siteFallbacks = self::getFallbacksFor( $wgLanguageCode );
4497 array_unshift( $siteFallbacks, $wgLanguageCode );
4499 // Eliminate any languages already included in the chain
4500 $siteFallbacks = array_diff( $siteFallbacks, $fallbacks );
4502 self::$fallbackLanguageCache[$cacheKey] = array( $fallbacks, $siteFallbacks );
4504 return self::$fallbackLanguageCache[$cacheKey];
4508 * Get all messages for a given language
4509 * WARNING: this may take a long time. If you just need all message *keys*
4510 * but need the *contents* of only a few messages, consider using getMessageKeysFor().
4512 * @param string $code
4514 * @return array
4516 public static function getMessagesFor( $code ) {
4517 return self::getLocalisationCache()->getItem( $code, 'messages' );
4521 * Get a message for a given language
4523 * @param string $key
4524 * @param string $code
4526 * @return string
4528 public static function getMessageFor( $key, $code ) {
4529 return self::getLocalisationCache()->getSubitem( $code, 'messages', $key );
4533 * Get all message keys for a given language. This is a faster alternative to
4534 * array_keys( Language::getMessagesFor( $code ) )
4536 * @since 1.19
4537 * @param string $code Language code
4538 * @return array Array of message keys (strings)
4540 public static function getMessageKeysFor( $code ) {
4541 return self::getLocalisationCache()->getSubItemList( $code, 'messages' );
4545 * @param string $talk
4546 * @return mixed
4548 function fixVariableInNamespace( $talk ) {
4549 if ( strpos( $talk, '$1' ) === false ) {
4550 return $talk;
4553 global $wgMetaNamespace;
4554 $talk = str_replace( '$1', $wgMetaNamespace, $talk );
4556 # Allow grammar transformations
4557 # Allowing full message-style parsing would make simple requests
4558 # such as action=raw much more expensive than they need to be.
4559 # This will hopefully cover most cases.
4560 $talk = preg_replace_callback( '/{{grammar:(.*?)\|(.*?)}}/i',
4561 array( &$this, 'replaceGrammarInNamespace' ), $talk );
4562 return str_replace( ' ', '_', $talk );
4566 * @param string $m
4567 * @return string
4569 function replaceGrammarInNamespace( $m ) {
4570 return $this->convertGrammar( trim( $m[2] ), trim( $m[1] ) );
4574 * @throws MWException
4575 * @return array
4577 static function getCaseMaps() {
4578 static $wikiUpperChars, $wikiLowerChars;
4579 if ( isset( $wikiUpperChars ) ) {
4580 return array( $wikiUpperChars, $wikiLowerChars );
4583 $arr = wfGetPrecompiledData( 'Utf8Case.ser' );
4584 if ( $arr === false ) {
4585 throw new MWException(
4586 "Utf8Case.ser is missing, please run \"make\" in the serialized directory\n" );
4588 $wikiUpperChars = $arr['wikiUpperChars'];
4589 $wikiLowerChars = $arr['wikiLowerChars'];
4590 return array( $wikiUpperChars, $wikiLowerChars );
4594 * Decode an expiry (block, protection, etc) which has come from the DB
4596 * @param string $expiry Database expiry String
4597 * @param bool|int $format True to process using language functions, or TS_ constant
4598 * to return the expiry in a given timestamp
4599 * @param string $inifinity If $format is not true, use this string for infinite expiry
4600 * @return string
4601 * @since 1.18
4603 public function formatExpiry( $expiry, $format = true, $infinity = 'infinity' ) {
4604 static $dbInfinity;
4605 if ( $dbInfinity === null ) {
4606 $dbInfinity = wfGetDB( DB_SLAVE )->getInfinity();
4609 if ( $expiry == '' || $expiry === 'infinity' || $expiry == $dbInfinity ) {
4610 return $format === true
4611 ? $this->getMessageFromDB( 'infiniteblock' )
4612 : $infinity;
4613 } else {
4614 return $format === true
4615 ? $this->timeanddate( $expiry, /* User preference timezone */ true )
4616 : wfTimestamp( $format, $expiry );
4621 * @todo Document
4622 * @param int|float $seconds
4623 * @param array $format Optional
4624 * If $format['avoid'] === 'avoidseconds': don't mention seconds if $seconds >= 1 hour.
4625 * If $format['avoid'] === 'avoidminutes': don't mention seconds/minutes if $seconds > 48 hours.
4626 * If $format['noabbrevs'] is true: use 'seconds' and friends instead of 'seconds-abbrev'
4627 * and friends.
4628 * For backwards compatibility, $format may also be one of the strings 'avoidseconds'
4629 * or 'avoidminutes'.
4630 * @return string
4632 function formatTimePeriod( $seconds, $format = array() ) {
4633 if ( !is_array( $format ) ) {
4634 $format = array( 'avoid' => $format ); // For backwards compatibility
4636 if ( !isset( $format['avoid'] ) ) {
4637 $format['avoid'] = false;
4639 if ( !isset( $format['noabbrevs'] ) ) {
4640 $format['noabbrevs'] = false;
4642 $secondsMsg = wfMessage(
4643 $format['noabbrevs'] ? 'seconds' : 'seconds-abbrev' )->inLanguage( $this );
4644 $minutesMsg = wfMessage(
4645 $format['noabbrevs'] ? 'minutes' : 'minutes-abbrev' )->inLanguage( $this );
4646 $hoursMsg = wfMessage(
4647 $format['noabbrevs'] ? 'hours' : 'hours-abbrev' )->inLanguage( $this );
4648 $daysMsg = wfMessage(
4649 $format['noabbrevs'] ? 'days' : 'days-abbrev' )->inLanguage( $this );
4651 if ( round( $seconds * 10 ) < 100 ) {
4652 $s = $this->formatNum( sprintf( "%.1f", round( $seconds * 10 ) / 10 ) );
4653 $s = $secondsMsg->params( $s )->text();
4654 } elseif ( round( $seconds ) < 60 ) {
4655 $s = $this->formatNum( round( $seconds ) );
4656 $s = $secondsMsg->params( $s )->text();
4657 } elseif ( round( $seconds ) < 3600 ) {
4658 $minutes = floor( $seconds / 60 );
4659 $secondsPart = round( fmod( $seconds, 60 ) );
4660 if ( $secondsPart == 60 ) {
4661 $secondsPart = 0;
4662 $minutes++;
4664 $s = $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4665 $s .= ' ';
4666 $s .= $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4667 } elseif ( round( $seconds ) <= 2 * 86400 ) {
4668 $hours = floor( $seconds / 3600 );
4669 $minutes = floor( ( $seconds - $hours * 3600 ) / 60 );
4670 $secondsPart = round( $seconds - $hours * 3600 - $minutes * 60 );
4671 if ( $secondsPart == 60 ) {
4672 $secondsPart = 0;
4673 $minutes++;
4675 if ( $minutes == 60 ) {
4676 $minutes = 0;
4677 $hours++;
4679 $s = $hoursMsg->params( $this->formatNum( $hours ) )->text();
4680 $s .= ' ';
4681 $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4682 if ( !in_array( $format['avoid'], array( 'avoidseconds', 'avoidminutes' ) ) ) {
4683 $s .= ' ' . $secondsMsg->params( $this->formatNum( $secondsPart ) )->text();
4685 } else {
4686 $days = floor( $seconds / 86400 );
4687 if ( $format['avoid'] === 'avoidminutes' ) {
4688 $hours = round( ( $seconds - $days * 86400 ) / 3600 );
4689 if ( $hours == 24 ) {
4690 $hours = 0;
4691 $days++;
4693 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4694 $s .= ' ';
4695 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4696 } elseif ( $format['avoid'] === 'avoidseconds' ) {
4697 $hours = floor( ( $seconds - $days * 86400 ) / 3600 );
4698 $minutes = round( ( $seconds - $days * 86400 - $hours * 3600 ) / 60 );
4699 if ( $minutes == 60 ) {
4700 $minutes = 0;
4701 $hours++;
4703 if ( $hours == 24 ) {
4704 $hours = 0;
4705 $days++;
4707 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4708 $s .= ' ';
4709 $s .= $hoursMsg->params( $this->formatNum( $hours ) )->text();
4710 $s .= ' ';
4711 $s .= $minutesMsg->params( $this->formatNum( $minutes ) )->text();
4712 } else {
4713 $s = $daysMsg->params( $this->formatNum( $days ) )->text();
4714 $s .= ' ';
4715 $s .= $this->formatTimePeriod( $seconds - $days * 86400, $format );
4718 return $s;
4722 * Format a bitrate for output, using an appropriate
4723 * unit (bps, kbps, Mbps, Gbps, Tbps, Pbps, Ebps, Zbps or Ybps) according to
4724 * the magnitude in question.
4726 * This use base 1000. For base 1024 use formatSize(), for another base
4727 * see formatComputingNumbers().
4729 * @param int $bps
4730 * @return string
4732 function formatBitrate( $bps ) {
4733 return $this->formatComputingNumbers( $bps, 1000, "bitrate-$1bits" );
4737 * @param int $size Size of the unit
4738 * @param int $boundary Size boundary (1000, or 1024 in most cases)
4739 * @param string $messageKey Message key to be uesd
4740 * @return string
4742 function formatComputingNumbers( $size, $boundary, $messageKey ) {
4743 if ( $size <= 0 ) {
4744 return str_replace( '$1', $this->formatNum( $size ),
4745 $this->getMessageFromDB( str_replace( '$1', '', $messageKey ) )
4748 $sizes = array( '', 'kilo', 'mega', 'giga', 'tera', 'peta', 'exa', 'zeta', 'yotta' );
4749 $index = 0;
4751 $maxIndex = count( $sizes ) - 1;
4752 while ( $size >= $boundary && $index < $maxIndex ) {
4753 $index++;
4754 $size /= $boundary;
4757 // For small sizes no decimal places necessary
4758 $round = 0;
4759 if ( $index > 1 ) {
4760 // For MB and bigger two decimal places are smarter
4761 $round = 2;
4763 $msg = str_replace( '$1', $sizes[$index], $messageKey );
4765 $size = round( $size, $round );
4766 $text = $this->getMessageFromDB( $msg );
4767 return str_replace( '$1', $this->formatNum( $size ), $text );
4771 * Format a size in bytes for output, using an appropriate
4772 * unit (B, KB, MB, GB, TB, PB, EB, ZB or YB) according to the magnitude in question
4774 * This method use base 1024. For base 1000 use formatBitrate(), for
4775 * another base see formatComputingNumbers()
4777 * @param int $size Size to format
4778 * @return string Plain text (not HTML)
4780 function formatSize( $size ) {
4781 return $this->formatComputingNumbers( $size, 1024, "size-$1bytes" );
4785 * Make a list item, used by various special pages
4787 * @param string $page Page link
4788 * @param string $details HTML safe text between brackets
4789 * @param bool $oppositedm Add the direction mark opposite to your
4790 * language, to display text properly
4791 * @return HTML escaped string
4793 function specialList( $page, $details, $oppositedm = true ) {
4794 if ( !$details ) {
4795 return $page;
4798 $dirmark = ( $oppositedm ? $this->getDirMark( true ) : '' ) . $this->getDirMark();
4799 return
4800 $page .
4801 $dirmark .
4802 $this->msg( 'word-separator' )->escaped() .
4803 $this->msg( 'parentheses' )->rawParams( $details )->escaped();
4807 * Generate (prev x| next x) (20|50|100...) type links for paging
4809 * @param Title $title Title object to link
4810 * @param int $offset
4811 * @param int $limit
4812 * @param array $query Optional URL query parameter string
4813 * @param bool $atend Optional param for specified if this is the last page
4814 * @return string
4816 public function viewPrevNext( Title $title, $offset, $limit,
4817 array $query = array(), $atend = false
4819 // @todo FIXME: Why on earth this needs one message for the text and another one for tooltip?
4821 # Make 'previous' link
4822 $prev = wfMessage( 'prevn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4823 if ( $offset > 0 ) {
4824 $plink = $this->numLink( $title, max( $offset - $limit, 0 ), $limit,
4825 $query, $prev, 'prevn-title', 'mw-prevlink' );
4826 } else {
4827 $plink = htmlspecialchars( $prev );
4830 # Make 'next' link
4831 $next = wfMessage( 'nextn' )->inLanguage( $this )->title( $title )->numParams( $limit )->text();
4832 if ( $atend ) {
4833 $nlink = htmlspecialchars( $next );
4834 } else {
4835 $nlink = $this->numLink( $title, $offset + $limit, $limit,
4836 $query, $next, 'nextn-title', 'mw-nextlink' );
4839 # Make links to set number of items per page
4840 $numLinks = array();
4841 foreach ( array( 20, 50, 100, 250, 500 ) as $num ) {
4842 $numLinks[] = $this->numLink( $title, $offset, $num,
4843 $query, $this->formatNum( $num ), 'shown-title', 'mw-numlink' );
4846 return wfMessage( 'viewprevnext' )->inLanguage( $this )->title( $title
4847 )->rawParams( $plink, $nlink, $this->pipeList( $numLinks ) )->escaped();
4851 * Helper function for viewPrevNext() that generates links
4853 * @param Title $title Title object to link
4854 * @param int $offset
4855 * @param int $limit
4856 * @param array $query Extra query parameters
4857 * @param string $link Text to use for the link; will be escaped
4858 * @param string $tooltipMsg Name of the message to use as tooltip
4859 * @param string $class Value of the "class" attribute of the link
4860 * @return string HTML fragment
4862 private function numLink( Title $title, $offset, $limit, array $query, $link,
4863 $tooltipMsg, $class
4865 $query = array( 'limit' => $limit, 'offset' => $offset ) + $query;
4866 $tooltip = wfMessage( $tooltipMsg )->inLanguage( $this )->title( $title )
4867 ->numParams( $limit )->text();
4869 return Html::element( 'a', array( 'href' => $title->getLocalURL( $query ),
4870 'title' => $tooltip, 'class' => $class ), $link );
4874 * Get the conversion rule title, if any.
4876 * @return string
4878 public function getConvRuleTitle() {
4879 return $this->mConverter->getConvRuleTitle();
4883 * Get the compiled plural rules for the language
4884 * @since 1.20
4885 * @return array Associative array with plural form, and plural rule as key-value pairs
4887 public function getCompiledPluralRules() {
4888 $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'compiledPluralRules' );
4889 $fallbacks = Language::getFallbacksFor( $this->mCode );
4890 if ( !$pluralRules ) {
4891 foreach ( $fallbacks as $fallbackCode ) {
4892 $pluralRules = self::$dataCache->getItem( strtolower( $fallbackCode ), 'compiledPluralRules' );
4893 if ( $pluralRules ) {
4894 break;
4898 return $pluralRules;
4902 * Get the plural rules for the language
4903 * @since 1.20
4904 * @return array Associative array with plural form number and plural rule as key-value pairs
4906 public function getPluralRules() {
4907 $pluralRules = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRules' );
4908 $fallbacks = Language::getFallbacksFor( $this->mCode );
4909 if ( !$pluralRules ) {
4910 foreach ( $fallbacks as $fallbackCode ) {
4911 $pluralRules = self::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRules' );
4912 if ( $pluralRules ) {
4913 break;
4917 return $pluralRules;
4921 * Get the plural rule types for the language
4922 * @since 1.22
4923 * @return array Associative array with plural form number and plural rule type as key-value pairs
4925 public function getPluralRuleTypes() {
4926 $pluralRuleTypes = self::$dataCache->getItem( strtolower( $this->mCode ), 'pluralRuleTypes' );
4927 $fallbacks = Language::getFallbacksFor( $this->mCode );
4928 if ( !$pluralRuleTypes ) {
4929 foreach ( $fallbacks as $fallbackCode ) {
4930 $pluralRuleTypes = self::$dataCache->getItem( strtolower( $fallbackCode ), 'pluralRuleTypes' );
4931 if ( $pluralRuleTypes ) {
4932 break;
4936 return $pluralRuleTypes;
4940 * Find the index number of the plural rule appropriate for the given number
4941 * @param int $number
4942 * @return int The index number of the plural rule
4944 public function getPluralRuleIndexNumber( $number ) {
4945 $pluralRules = $this->getCompiledPluralRules();
4946 $form = Evaluator::evaluateCompiled( $number, $pluralRules );
4947 return $form;
4951 * Find the plural rule type appropriate for the given number
4952 * For example, if the language is set to Arabic, getPluralType(5) should
4953 * return 'few'.
4954 * @since 1.22
4955 * @param int $number
4956 * @return string The name of the plural rule type, e.g. one, two, few, many
4958 public function getPluralRuleType( $number ) {
4959 $index = $this->getPluralRuleIndexNumber( $number );
4960 $pluralRuleTypes = $this->getPluralRuleTypes();
4961 if ( isset( $pluralRuleTypes[$index] ) ) {
4962 return $pluralRuleTypes[$index];
4963 } else {
4964 return 'other';