2 * @author Neil Kandalgaonkar, 2010
3 * @author Timo Tijhof, 2011-2013
11 * Parse titles into an object structure. Note that when using the constructor
12 * directly, passing invalid titles will result in an exception. Use #newFromText to use the
13 * logic directly and get null for invalid titles which is easier to work with.
16 * @param {string} title Title of the page. If no second argument given,
17 * this will be searched for a namespace
18 * @param {number} [namespace=NS_MAIN] If given, will used as default namespace for the given title
19 * @throws {Error} When the title is invalid
21 function Title( title, namespace ) {
22 var parsed = parse( title, namespace );
24 throw new Error( 'Unable to parse title' );
27 this.namespace = parsed.namespace;
28 this.title = parsed.title;
29 this.ext = parsed.ext;
30 this.fragment = parsed.fragment;
56 * @property NS_SPECIAL
77 * @property FILENAME_MAX_BYTES
79 FILENAME_MAX_BYTES = 240,
84 * @property TITLE_MAX_BYTES
86 TITLE_MAX_BYTES = 255,
89 * Get the namespace id from a namespace name (either from the localized, canonical or alias
92 * Example: On a German wiki this would return 6 for any of 'File', 'Datei', 'Image' or
97 * @method getNsIdByName
98 * @param {string} ns Namespace name (case insensitive, leading/trailing space ignored)
99 * @return {number|boolean} Namespace id or boolean false
101 getNsIdByName = function ( ns ) {
104 // Don't cast non-strings to strings, because null or undefined should not result in
105 // returning the id of a potential namespace called "Null:" (e.g. on null.example.org/wiki)
106 // Also, toLowerCase throws exception on null/undefined, because it is a String method.
107 if ( typeof ns !== 'string' ) {
110 ns = ns.toLowerCase();
111 id = mw.config.get( 'wgNamespaceIds' )[ns];
112 if ( id === undefined ) {
118 rUnderscoreTrim = /^_+|_+$/g,
120 rSplit = /^(.+?)_*:_*(.*)$/,
122 // See MediaWikiTitleCodec.php#getTitleInvalidRegex
123 rInvalid = new RegExp(
124 '[^' + mw.config.get( 'wgLegalTitleChars' ) + ']' +
125 // URL percent encoding sequences interfere with the ability
126 // to round-trip titles -- you can't link to them consistently.
128 // XML/HTML character references produce similar issues.
129 '|&[A-Za-z0-9\u0080-\uFFFF]+;' +
134 // From MediaWikiTitleCodec.php#L225 @26fcab1f18c568a41
135 // "Clean up whitespace" in function MediaWikiTitleCodec::splitTitleString()
136 rWhitespace = /[ _\u0009\u00A0\u1680\u180E\u2000-\u200A\u2028\u2029\u202F\u205F\u3000\s]+/g,
139 * Slightly modified from Flinfo. Credit goes to Lupo and Flominator.
142 * @property sanitationRules
151 // Space, underscore, tab, NBSP and other unusual spaces
153 pattern: rWhitespace,
157 // unicode bidi override characters: Implicit, Embeds, Overrides
159 pattern: /[\u200E\u200F\u202A-\u202E]/g,
163 // control characters
165 pattern: /[\x00-\x1f\x7f]/g,
169 // URL encoding (possibly)
171 pattern: /%([0-9A-Fa-f]{2})/g,
175 // HTML-character-entities
177 pattern: /&(([A-Za-z0-9\x80-\xff]+|#[0-9]+|#x[0-9A-Fa-f]+);)/g,
181 // slash, colon (not supported by file systems like NTFS/Windows, Mac OS 9 [:], ext4 [/])
187 // brackets, greater than
193 // brackets, lower than
199 // everything that wasn't covered yet
201 pattern: new RegExp( rInvalid.source, 'g' ),
205 // directory structures
207 pattern: /^(\.|\.\.|\.\/.*|\.\.\/.*|.*\/\.\/.*|.*\/\.\.\/.*|.*\/\.|.*\/\.\.)$/g,
214 * Internal helper for #constructor and #newFromtext.
216 * Based on Title.php#secureAndSplit
221 * @param {string} title
222 * @param {number} [defaultNamespace=NS_MAIN]
223 * @return {Object|boolean}
225 parse = function ( title, defaultNamespace ) {
226 var namespace, m, id, i, fragment, ext;
228 namespace = defaultNamespace === undefined ? NS_MAIN : defaultNamespace;
231 // Normalise whitespace to underscores and remove duplicates
232 .replace( /[ _\s]+/g, '_' )
234 .replace( rUnderscoreTrim, '' );
236 // Process initial colon
237 if ( title !== '' && title.charAt( 0 ) === ':' ) {
238 // Initial colon means main namespace instead of specified default
244 .replace( rUnderscoreTrim, '' );
247 if ( title === '' ) {
251 // Process namespace prefix (if any)
252 m = title.match( rSplit );
254 id = getNsIdByName( m[1] );
255 if ( id !== false ) {
256 // Ordinary namespace
260 // For Talk:X pages, make sure X has no "namespace" prefix
261 if ( namespace === NS_TALK && ( m = title.match( rSplit ) ) ) {
262 // Disallow titles like Talk:File:x (subject should roundtrip: talk:file:x -> file:x -> file_talk:x)
263 if ( getNsIdByName( m[1] ) !== false ) {
271 i = title.indexOf( '#' );
276 // Get segment starting after the hash
279 // NB: Must not be trimmed ("Example#_foo" is not the same as "Example#foo")
280 .replace( /_/g, ' ' );
285 // Trim underscores, again (strips "_" from "bar" in "Foo_bar_#quux")
286 .replace( rUnderscoreTrim, '' );
289 // Reject illegal characters
290 if ( title.match( rInvalid ) ) {
294 // Disallow titles that browsers or servers might resolve as directory navigation
296 title.indexOf( '.' ) !== -1 && (
297 title === '.' || title === '..' ||
298 title.indexOf( './' ) === 0 ||
299 title.indexOf( '../' ) === 0 ||
300 title.indexOf( '/./' ) !== -1 ||
301 title.indexOf( '/../' ) !== -1 ||
302 title.slice( -2 ) === '/.' ||
303 title.slice( -3 ) === '/..'
309 // Disallow magic tilde sequence
310 if ( title.indexOf( '~~~' ) !== -1 ) {
314 // Disallow titles exceeding the TITLE_MAX_BYTES byte size limit (size of underlying database field)
315 // Except for special pages, e.g. [[Special:Block/Long name]]
316 // Note: The PHP implementation also asserts that even in NS_SPECIAL, the title should
317 // be less than 512 bytes.
318 if ( namespace !== NS_SPECIAL && $.byteLength( title ) > TITLE_MAX_BYTES ) {
322 // Can't make a link to a namespace alone.
323 if ( title === '' && namespace !== NS_MAIN ) {
327 // Any remaining initial :s are illegal.
328 if ( title.charAt( 0 ) === ':' ) {
332 // For backwards-compatibility with old mw.Title, we separate the extension from the
333 // rest of the title.
334 i = title.lastIndexOf( '.' );
335 if ( i === -1 || title.length <= i + 1 ) {
336 // Extensions are the non-empty segment after the last dot
339 ext = title.slice( i + 1 );
340 title = title.slice( 0, i );
344 namespace: namespace,
352 * Convert db-key to readable text.
360 text = function ( s ) {
361 if ( s !== null && s !== undefined ) {
362 return s.replace( /_/g, ' ' );
369 * Sanitizes a string based on a rule set and a filter
375 * @param {Array} filter
378 sanitize = function ( s, filter ) {
379 var i, ruleLength, rule, m, filterLength,
380 rules = sanitationRules;
382 for ( i = 0, ruleLength = rules.length; i < ruleLength; ++i ) {
384 for ( m = 0, filterLength = filter.length; m < filterLength; ++m ) {
385 if ( rule[filter[m]] ) {
386 s = s.replace( rule.pattern, rule.replace );
394 * Cuts a string to a specific byte length, assuming UTF-8
395 * or less, if the last character is a multi-byte one
399 * @method trimToByteLength
401 * @param {number} length
404 trimToByteLength = function ( s, length ) {
405 var byteLength, chopOffChars, chopOffBytes;
407 // bytelength is always greater or equal to the length in characters
408 s = s.substr( 0, length );
409 while ( ( byteLength = $.byteLength( s ) ) > length ) {
410 // Calculate how many characters can be safely removed
411 // First, we need to know how many bytes the string exceeds the threshold
412 chopOffBytes = byteLength - length;
413 // A character in UTF-8 is at most 4 bytes
414 // One character must be removed in any case because the
415 // string is too long
416 chopOffChars = Math.max( 1, Math.floor( chopOffBytes / 4 ) );
417 s = s.substr( 0, s.length - chopOffChars );
423 * Cuts a file name to a specific byte length
427 * @method trimFileNameToByteLength
428 * @param {string} name without extension
429 * @param {string} extension file extension
430 * @return {string} The full name, including extension
432 trimFileNameToByteLength = function ( name, extension ) {
433 // There is a special byte limit for file names and ... remember the dot
434 return trimToByteLength( name, FILENAME_MAX_BYTES - extension.length - 1 ) + '.' + extension;
437 // Polyfill for ES5 Object.create
438 createObject = Object.create || ( function () {
439 return function ( o ) {
441 if ( o !== Object( o ) ) {
442 throw new Error( 'Cannot inherit from a non-object' );
452 * Constructor for Title objects with a null return instead of an exception for invalid titles.
455 * @param {string} title
456 * @param {number} [namespace=NS_MAIN] Default namespace
457 * @return {mw.Title|null} A valid Title object or null if the title is invalid
459 Title.newFromText = function ( title, namespace ) {
460 var t, parsed = parse( title, namespace );
465 t = createObject( Title.prototype );
466 t.namespace = parsed.namespace;
467 t.title = parsed.title;
469 t.fragment = parsed.fragment;
475 * Constructor for Title objects from user input altering that input to
476 * produce a title that MediaWiki will accept as legal
479 * @param {string} title
480 * @param {number} [defaultNamespace=NS_MAIN]
481 * If given, will used as default namespace for the given title.
482 * @param {Object} [options] additional options
483 * @param {string} [options.fileExtension='']
484 * If the title is about to be created for the Media or File namespace,
485 * ensures the resulting Title has the correct extension. Useful, for example
486 * on systems that predict the type by content-sniffing, not by file extension.
487 * If different from empty string, `forUploading` is assumed.
488 * @param {boolean} [options.forUploading=true]
489 * Makes sure that a file is uploadable under the title returned.
490 * There are pages in the file namespace under which file upload is impossible.
491 * Automatically assumed if the title is created in the Media namespace.
492 * @return {mw.Title|null} A valid Title object or null if the input cannot be turned into a valid title
494 Title.newFromUserInput = function ( title, defaultNamespace, options ) {
495 var namespace, m, id, ext, parts, normalizeExtension;
497 // defaultNamespace is optional; check whether options moves up
498 if ( arguments.length < 3 && $.type( defaultNamespace ) === 'object' ) {
499 options = defaultNamespace;
500 defaultNamespace = undefined;
503 // merge options into defaults
504 options = $.extend( {
509 normalizeExtension = function ( extension ) {
510 // Remove only trailing space (that is removed by MW anyway)
511 extension = extension.toLowerCase().replace( /\s*$/, '' );
515 namespace = defaultNamespace === undefined ? NS_MAIN : defaultNamespace;
517 // Normalise whitespace and remove duplicates
518 title = $.trim( title.replace( rWhitespace, ' ' ) );
520 // Process initial colon
521 if ( title !== '' && title.charAt( 0 ) === ':' ) {
522 // Initial colon means main namespace instead of specified default
528 .replace( rUnderscoreTrim, '' );
531 // Process namespace prefix (if any)
532 m = title.match( rSplit );
534 id = getNsIdByName( m[1] );
535 if ( id !== false ) {
536 // Ordinary namespace
542 if ( namespace === NS_MEDIA
543 || ( ( options.forUploading || options.fileExtension ) && ( namespace === NS_FILE ) )
546 title = sanitize( title, [ 'generalRule', 'fileRule' ] );
548 // Operate on the file extension
549 // Although it is possible having spaces between the name and the ".ext" this isn't nice for
550 // operating systems hiding file extensions -> strip them later on
551 parts = title.split( '.' );
553 if ( parts.length > 1 ) {
555 // Get the last part, which is supposed to be the file extension
558 // Does the supplied file name carry the desired file extension?
559 if ( options.fileExtension
560 && normalizeExtension( ext ) !== normalizeExtension( options.fileExtension )
563 // No, push back, whatever there was after the dot
566 // And add the desired file extension later
567 ext = options.fileExtension;
570 // Remove whitespace of the name part (that W/O extension)
571 title = $.trim( parts.join( '.' ) );
573 // Cut, if too long and append file extension
574 title = trimFileNameToByteLength( title, ext );
578 // Missing file extension
579 title = $.trim( parts.join( '.' ) );
581 if ( options.fileExtension ) {
583 // Cut, if too long and append the desired file extension
584 title = trimFileNameToByteLength( title, options.fileExtension );
588 // Name has no file extension and a fallback wasn't provided either
594 title = sanitize( title, [ 'generalRule' ] );
596 // Cut titles exceeding the TITLE_MAX_BYTES byte size limit
597 // (size of underlying database field)
598 if ( namespace !== NS_SPECIAL ) {
599 title = trimToByteLength( title, TITLE_MAX_BYTES );
603 // Any remaining initial :s are illegal.
604 title = title.replace( /^\:+/, '' );
606 return Title.newFromText( title, namespace );
610 * Sanitizes a file name as supplied by the user, originating in the user's file system
611 * so it is most likely a valid MediaWiki title and file name after processing.
612 * Returns null on fatal errors.
615 * @param {string} uncleanName The unclean file name including file extension but
617 * @param {string} [fileExtension] the desired file extension
618 * @return {mw.Title|null} A valid Title object or null if the title is invalid
620 Title.newFromFileName = function ( uncleanName, fileExtension ) {
622 return Title.newFromUserInput( 'File:' + uncleanName, {
623 fileExtension: fileExtension,
629 * Get the file title from an image element
631 * var title = mw.Title.newFromImg( $( 'img:first' ) );
634 * @param {HTMLElement|jQuery} img The image to use as a base
635 * @return {mw.Title|null} The file title or null if unsuccessful
637 Title.newFromImg = function ( img ) {
638 var matches, i, regex, src, decodedSrc,
640 // thumb.php-generated thumbnails
641 thumbPhpRegex = /thumb\.php/,
644 /\/[a-f0-9]\/[a-f0-9]{2}\/([^\s\/]+)\/[^\s\/]+-(?:\1|thumbnail)[^\s\/]*$/,
646 // Thumbnails in non-hashed upload directories
647 /\/([^\s\/]+)\/[^\s\/]+-(?:\1|thumbnail)[^\s\/]*$/,
650 /\/[a-f0-9]\/[a-f0-9]{2}\/([^\s\/]+)$/,
652 // Full-size images in non-hashed upload directories
656 recount = regexes.length;
658 src = img.jquery ? img[0].src : img.src;
660 matches = src.match( thumbPhpRegex );
663 return mw.Title.newFromText( 'File:' + mw.util.getParamValue( 'f', src ) );
666 decodedSrc = decodeURIComponent( src );
668 for ( i = 0; i < recount; i++ ) {
670 matches = decodedSrc.match( regex );
672 if ( matches && matches[1] ) {
673 return mw.Title.newFromText( 'File:' + matches[1] );
681 * Whether this title exists on the wiki.
684 * @param {string|mw.Title} title prefixed db-key name (string) or instance of Title
685 * @return {boolean|null} Boolean if the information is available, otherwise null
687 Title.exists = function ( title ) {
689 type = $.type( title ),
690 obj = Title.exist.pages;
692 if ( type === 'string' ) {
694 } else if ( type === 'object' && title instanceof Title ) {
695 match = obj[title.toString()];
697 throw new Error( 'mw.Title.exists: title must be a string or an instance of Title' );
700 if ( typeof match === 'boolean' ) {
708 * Store page existence
711 * @property {Object} exist
712 * @property {Object} exist.pages Keyed by title. Boolean true value indicates page does exist.
714 * @property {Function} exist.set The setter function.
716 * Example to declare existing titles:
718 * Title.exist.set( ['User:John_Doe', ...] );
720 * Example to declare titles nonexistent:
722 * Title.exist.set( ['File:Foo_bar.jpg', ...], false );
724 * @property {string|Array} exist.set.titles Title(s) in strict prefixedDb title form
725 * @property {boolean} [exist.set.state=true] State of the given titles
731 set: function ( titles, state ) {
732 titles = $.isArray( titles ) ? titles : [titles];
733 state = state === undefined ? true : !!state;
738 for ( i = 0; i < len; i++ ) {
739 pages[ titles[i] ] = state;
751 * Get the namespace number
753 * Example: 6 for "File:Example_image.svg".
757 getNamespaceId: function () {
758 return this.namespace;
762 * Get the namespace prefix (in the content language)
764 * Example: "File:" for "File:Example_image.svg".
765 * In #NS_MAIN this is '', otherwise namespace name plus ':'
769 getNamespacePrefix: function () {
770 return this.namespace === NS_MAIN ?
772 ( mw.config.get( 'wgFormattedNamespaces' )[ this.namespace ].replace( / /g, '_' ) + ':' );
776 * Get the page name without extension or namespace prefix
778 * Example: "Example_image" for "File:Example_image.svg".
780 * For the page title (full page name without namespace prefix), see #getMain.
784 getName: function () {
785 if ( $.inArray( this.namespace, mw.config.get( 'wgCaseSensitiveNamespaces' ) ) !== -1 ) {
788 return $.ucFirst( this.title );
793 * Get the page name (transformed by #text)
795 * Example: "Example image" for "File:Example_image.svg".
797 * For the page title (full page name without namespace prefix), see #getMainText.
801 getNameText: function () {
802 return text( this.getName() );
806 * Get the extension of the page name (if any)
808 * @return {string|null} Name extension or null if there is none
810 getExtension: function () {
815 * Shortcut for appendable string to form the main page name.
817 * Returns a string like ".json", or "" if no extension.
821 getDotExtension: function () {
822 return this.ext === null ? '' : '.' + this.ext;
826 * Get the main page name
828 * Example: "Example_image.svg" for "File:Example_image.svg".
832 getMain: function () {
833 return this.getName() + this.getDotExtension();
837 * Get the main page name (transformed by #text)
839 * Example: "Example image.svg" for "File:Example_image.svg".
843 getMainText: function () {
844 return text( this.getMain() );
848 * Get the full page name
850 * Example: "File:Example_image.svg".
851 * Most useful for API calls, anything that must identify the "title".
855 getPrefixedDb: function () {
856 return this.getNamespacePrefix() + this.getMain();
860 * Get the full page name (transformed by #text)
862 * Example: "File:Example image.svg" for "File:Example_image.svg".
866 getPrefixedText: function () {
867 return text( this.getPrefixedDb() );
871 * Get the page name relative to a namespace
875 * - "Foo:Bar" relative to the Foo namespace becomes "Bar".
876 * - "Bar" relative to any non-main namespace becomes ":Bar".
877 * - "Foo:Bar" relative to any namespace other than Foo stays "Foo:Bar".
879 * @param {number} namespace The namespace to be relative to
882 getRelativeText: function ( namespace ) {
883 if ( this.getNamespaceId() === namespace ) {
884 return this.getMainText();
885 } else if ( this.getNamespaceId() === NS_MAIN ) {
886 return ':' + this.getPrefixedText();
888 return this.getPrefixedText();
893 * Get the fragment (if any).
895 * Note that this method (by design) does not include the hash character and
896 * the value is not url encoded.
898 * @return {string|null}
900 getFragment: function () {
901 return this.fragment;
905 * Get the URL to this title
907 * @see mw.util#getUrl
908 * @param {Object} [params] A mapping of query parameter names to values,
909 * e.g. `{ action: 'edit' }`.
912 getUrl: function ( params ) {
913 return mw.util.getUrl( this.toString(), params );
917 * Whether this title exists on the wiki.
919 * @see #static-method-exists
920 * @return {boolean|null} Boolean if the information is available, otherwise null
922 exists: function () {
923 return Title.exists( this );
928 * @alias #getPrefixedDb
931 Title.prototype.toString = Title.prototype.getPrefixedDb;
934 * @alias #getPrefixedText
937 Title.prototype.toText = Title.prototype.getPrefixedText;
942 }( mediaWiki, jQuery ) );