Non-word characters don't terminate tag names.
[mediawiki.git] / resources / mediawiki / mediawiki.Uri.js
bloba2d4d6cb8d917b0c14673a0539952f6fe05a3735
1 /**
2  * Library for simple URI parsing and manipulation.  Requires jQuery.
3  *
4  * Do not expect full RFC 3986 compliance. Intended to be minimal, but featureful.
5  * The use cases we have in mind are constructing 'next page' or 'previous page' URLs,
6  * detecting whether we need to use cross-domain proxies for an API, constructing
7  * simple URL-based API calls, etc.
8  *
9  * Intended to compress very well if you use a JS-parsing minifier.
10  *
11  * Dependencies: mw, jQuery
12  *
13  * Example:
14  *
15  *     var uri = new mw.Uri( 'http://foo.com/mysite/mypage.php?quux=2' );
16  *
17  *     if ( uri.host == 'foo.com' ) {
18  *         uri.host = 'www.foo.com';
19  *         uri.extend( { bar: 1 } );
20  *
21  *         $( 'a#id1' ).attr( 'href', uri );
22  *         // anchor with id 'id1' now links to http://foo.com/mysite/mypage.php?bar=1&quux=2
23  *
24  *         $( 'a#id2' ).attr( 'href', uri.clone().extend( { bar: 3, pif: 'paf' } ) );
25  *         // anchor with id 'id2' now links to http://foo.com/mysite/mypage.php?bar=3&quux=2&pif=paf
26  *     }
27  *
28  * Parsing here is regex based, so may not work on all URIs, but is good enough for most.
29  *
30  * Given a URI like
31  * 'http://usr:pwd@www.test.com:81/dir/dir.2/index.htm?q1=0&&test1&test2=&test3=value+%28escaped%29&r=1&r=2#top':
32  * The returned object will have the following properties:
33  *
34  *    protocol  'http'
35  *    user      'usr'
36  *    password  'pwd'
37  *    host      'www.test.com'
38  *    port      '81'
39  *    path      '/dir/dir.2/index.htm'
40  *    query     {
41  *                  q1: 0,
42  *                  test1: null,
43  *                  test2: '',
44  *                  test3: 'value (escaped)'
45  *                  r: [1, 2]
46  *              }
47  *    fragment  'top'
48  *
49  * n.b. 'password' is not technically allowed for HTTP URIs, but it is possible with other
50  * sorts of URIs.
51  * You can modify the properties directly. Then use the toString() method to extract the
52  * full URI string again.
53  *
54  * Parsing based on parseUri 1.2.2 (c) Steven Levithan <stevenlevithan.com> MIT License
55  * http://stevenlevithan.com/demo/parseuri/js/
56  *
57  */
59 ( function ( mw, $ ) {
61         /**
62          * Function that's useful when constructing the URI string -- we frequently encounter the pattern of
63          * having to add something to the URI as we go, but only if it's present, and to include a character before or after if so.
64          * @param {string|undefined} pre To prepend.
65          * @param {string} val To include.
66          * @param {string} post To append.
67          * @param {boolean} raw If true, val will not be encoded.
68          * @return {string} Result.
69          */
70         function cat( pre, val, post, raw ) {
71                 if ( val === undefined || val === null || val === '' ) {
72                         return '';
73                 }
74                 return pre + ( raw ? val : mw.Uri.encode( val ) ) + post;
75         }
77         // Regular expressions to parse many common URIs.
78         var parser = {
79                 strict: /^(?:([^:\/?#]+):)?(?:\/\/(?:(?:([^:@\/?#]*)(?::([^:@\/?#]*))?)?@)?([^:\/?#]*)(?::(\d*))?)?((?:[^?#\/]*\/)*[^?#]*)(?:\?([^#]*))?(?:#(.*))?/,
80                 loose:  /^(?:(?![^:@]+:[^:@\/]*@)([^:\/?#.]+):)?(?:\/\/)?(?:(?:([^:@\/?#]*)(?::([^:@\/?#]*))?)?@)?([^:\/?#]*)(?::(\d*))?((?:\/(?:[^?#](?![^?#\/]*\.[^?#\/.]+(?:[?#]|$)))*\/?)?[^?#\/]*)(?:\?([^#]*))?(?:#(.*))?/
81         },
83         // The order here matches the order of captured matches in the above parser regexes.
84         properties = [
85                 'protocol',  // http
86                 'user',      // usr
87                 'password',  // pwd
88                 'host',      // www.test.com
89                 'port',      // 81
90                 'path',      // /dir/dir.2/index.htm
91                 'query',     // q1=0&&test1&test2=value (will become { q1: '0', test1: '', test2: 'value' } )
92                 'fragment'   // top
93         ];
96         /**
97          * We use a factory to inject a document location, for relative URLs, including protocol-relative URLs.
98          * so the library is still testable & purely functional.
99          */
100         mw.UriRelative = function ( documentLocation ) {
101                 var defaultUri;
103                 /**
104                  * Constructs URI object. Throws error if arguments are illegal/impossible, or otherwise don't parse.
105                  * @constructor
106                  * @param {Object|string} uri URI string, or an Object with appropriate properties (especially another URI object to clone).
107                  * Object must have non-blank 'protocol', 'host', and 'path' properties.
108                  *  This parameter is optional. If omitted (or set to undefined, null or empty string), then an object will be created
109                  *  for the default uri of this constructor (e.g. document.location for mw.Uri in MediaWiki core).
110                  * @param {Object|boolean} Object with options, or (backwards compatibility) a boolean for strictMode
111                  *  - {boolean} strictMode Trigger strict mode parsing of the url. Default: false
112                  *  - {boolean} overrideKeys Wether to let duplicate query parameters override eachother (true) or automagically
113                  *     convert to an array (false, default).
114                  */
115                 function Uri( uri, options ) {
116                         options = typeof options === 'object' ? options : { strictMode: !!options };
117                         options = $.extend( {
118                                 strictMode: false,
119                                 overrideKeys: false
120                         }, options );
122                         if ( uri !== undefined && uri !== null && uri !== '' ) {
123                                 if ( typeof uri === 'string' ) {
124                                         this.parse( uri, options );
125                                 } else if ( typeof uri === 'object' ) {
126                                         // Copy data over from existing URI object
127                                         for ( var prop in uri ) {
128                                                 // Only copy direct properties, not inherited ones
129                                                 if ( uri.hasOwnProperty( prop ) ) {
130                                                         // Deep copy object properties
131                                                         if ( $.isArray( uri[prop] ) || $.isPlainObject( uri[prop] ) ) {
132                                                                 this[prop] = $.extend( true, {}, uri[prop] );
133                                                         } else {
134                                                                 this[prop] = uri[prop];
135                                                         }
136                                                 }
137                                         }
138                                         if ( !this.query ) {
139                                                 this.query = {};
140                                         }
141                                 }
142                         } else {
143                                 // If we didn't get a URI in the constructor, use the default one.
144                                 return defaultUri.clone();
145                         }
147                         // protocol-relative URLs
148                         if ( !this.protocol ) {
149                                 this.protocol = defaultUri.protocol;
150                         }
151                         // No host given:
152                         if ( !this.host ) {
153                                 this.host = defaultUri.host;
154                                 // port ?
155                                 if ( !this.port ) {
156                                         this.port = defaultUri.port;
157                                 }
158                         }
159                         if ( this.path && this.path.charAt( 0 ) !== '/' ) {
160                                 // A real relative URL, relative to defaultUri.path. We can't really handle that since we cannot
161                                 // figure out whether the last path component of defaultUri.path is a directory or a file.
162                                 throw new Error( 'Bad constructor arguments' );
163                         }
164                         if ( !( this.protocol && this.host && this.path ) ) {
165                                 throw new Error( 'Bad constructor arguments' );
166                         }
167                 }
169                 /**
170                  * Standard encodeURIComponent, with extra stuff to make all browsers work similarly and more compliant with RFC 3986
171                  * Similar to rawurlencode from PHP and our JS library mw.util.rawurlencode, but we also replace space with a +
172                  * @param {string} s String to encode.
173                  * @return {string} Encoded string for URI.
174                  */
175                 Uri.encode = function ( s ) {
176                         return encodeURIComponent( s )
177                                 .replace( /!/g, '%21').replace( /'/g, '%27').replace( /\(/g, '%28')
178                                 .replace( /\)/g, '%29').replace( /\*/g, '%2A')
179                                 .replace( /%20/g, '+' );
180                 };
182                 /**
183                  * Standard decodeURIComponent, with '+' to space.
184                  * @param {string} s String encoded for URI.
185                  * @return {string} Decoded string.
186                  */
187                 Uri.decode = function ( s ) {
188                         return decodeURIComponent( s.replace( /\+/g, '%20' ) );
189                 };
191                 Uri.prototype = {
193                         /**
194                          * Parse a string and set our properties accordingly.
195                          * @param {string} str URI
196                          * @param {Object} options
197                          * @return {boolean} Success.
198                          */
199                         parse: function ( str, options ) {
200                                 var q,
201                                         uri = this,
202                                         matches = parser[ options.strictMode ? 'strict' : 'loose' ].exec( str );
203                                 $.each( properties, function ( i, property ) {
204                                         uri[ property ] = matches[ i + 1 ];
205                                 } );
207                                 // uri.query starts out as the query string; we will parse it into key-val pairs then make
208                                 // that object the "query" property.
209                                 // we overwrite query in uri way to make cloning easier, it can use the same list of properties.
210                                 q = {};
211                                 // using replace to iterate over a string
212                                 if ( uri.query ) {
213                                         uri.query.replace( /(?:^|&)([^&=]*)(?:(=)([^&]*))?/g, function ( $0, $1, $2, $3 ) {
214                                                 var k, v;
215                                                 if ( $1 ) {
216                                                         k = Uri.decode( $1 );
217                                                         v = ( $2 === '' || $2 === undefined ) ? null : Uri.decode( $3 );
219                                                         // If overrideKeys, always (re)set top level value.
220                                                         // If not overrideKeys but this key wasn't set before, then we set it as well.
221                                                         if ( options.overrideKeys || q[ k ] === undefined ) {
222                                                                 q[ k ] = v;
224                                                         // Use arrays if overrideKeys is false and key was already seen before
225                                                         } else {
226                                                                 // Once before, still a string, turn into an array
227                                                                 if ( typeof q[ k ] === 'string' ) {
228                                                                         q[ k ] = [ q[ k ] ];
229                                                                 }
230                                                                 // Add to the array
231                                                                 if ( $.isArray( q[ k ] ) ) {
232                                                                         q[ k ].push( v );
233                                                                 }
234                                                         }
235                                                 }
236                                         } );
237                                 }
238                                 this.query = q;
239                         },
241                         /**
242                          * Returns user and password portion of a URI.
243                          * @return {string}
244                          */
245                         getUserInfo: function () {
246                                 return cat( '', this.user, cat( ':', this.password, '' ) );
247                         },
249                         /**
250                          * Gets host and port portion of a URI.
251                          * @return {string}
252                          */
253                         getHostPort: function () {
254                                 return this.host + cat( ':', this.port, '' );
255                         },
257                         /**
258                          * Returns the userInfo and host and port portion of the URI.
259                          * In most real-world URLs, this is simply the hostname, but it is more general.
260                          * @return {string}
261                          */
262                         getAuthority: function () {
263                                 return cat( '', this.getUserInfo(), '@' ) + this.getHostPort();
264                         },
266                         /**
267                          * Returns the query arguments of the URL, encoded into a string
268                          * Does not preserve the order of arguments passed into the URI. Does handle escaping.
269                          * @return {string}
270                          */
271                         getQueryString: function () {
272                                 var args = [];
273                                 $.each( this.query, function ( key, val ) {
274                                         var k = Uri.encode( key ),
275                                                 vals = $.isArray( val ) ? val : [ val ];
276                                         $.each( vals, function ( i, v ) {
277                                                 if ( v === null ) {
278                                                         args.push( k );
279                                                 } else if ( k === 'title' ) {
280                                                         args.push( k + '=' + mw.util.wikiUrlencode( v ) );
281                                                 } else {
282                                                         args.push( k + '=' + Uri.encode( v ) );
283                                                 }
284                                         } );
285                                 } );
286                                 return args.join( '&' );
287                         },
289                         /**
290                          * Returns everything after the authority section of the URI
291                          * @return {string}
292                          */
293                         getRelativePath: function () {
294                                 return this.path + cat( '?', this.getQueryString(), '', true ) + cat( '#', this.fragment, '' );
295                         },
297                         /**
298                          * Gets the entire URI string. May not be precisely the same as input due to order of query arguments.
299                          * @return {string} The URI string.
300                          */
301                         toString: function () {
302                                 return this.protocol + '://' + this.getAuthority() + this.getRelativePath();
303                         },
305                         /**
306                          * Clone this URI
307                          * @return {Object} new URI object with same properties
308                          */
309                         clone: function () {
310                                 return new Uri( this );
311                         },
313                         /**
314                          * Extend the query -- supply query parameters to override or add to ours
315                          * @param {Object} query parameters in key-val form to override or add
316                          * @return {Object} this URI object
317                          */
318                         extend: function ( parameters ) {
319                                 $.extend( this.query, parameters );
320                                 return this;
321                         }
322                 };
324                 defaultUri = new Uri( documentLocation );
326                 return Uri;
327         };
329         // if we are running in a browser, inject the current document location, for relative URLs
330         if ( document && document.location && document.location.href ) {
331                 mw.Uri = mw.UriRelative( document.location.href );
332         }
334 }( mediaWiki, jQuery ) );