Non-word characters don't terminate tag names.
[mediawiki.git] / includes / libs / IEUrlExtension.php
blob79387e632d589517d8a2ea6ad40d67ff559bb54e
1 <?php
2 /**
3 * Checks for validity of requested URL's extension.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 /**
24 * Internet Explorer derives a cache filename from a URL, and then in certain
25 * circumstances, uses the extension of the resulting file to determine the
26 * content type of the data, ignoring the Content-Type header.
28 * This can be a problem, especially when non-HTML content is sent by MediaWiki,
29 * and Internet Explorer interprets it as HTML, exposing an XSS vulnerability.
31 * Usually the script filename (e.g. api.php) is present in the URL, and this
32 * makes Internet Explorer think the extension is a harmless script extension.
33 * But Internet Explorer 6 and earlier allows the script extension to be
34 * obscured by encoding the dot as "%2E".
36 * This class contains functions which help in detecting and dealing with this
37 * situation.
39 * Checking the URL for a bad extension is somewhat complicated due to the fact
40 * that CGI doesn't provide a standard method to determine the URL. Instead it
41 * is necessary to pass a subset of $_SERVER variables, which we then attempt
42 * to use to guess parts of the URL.
44 class IEUrlExtension {
45 /**
46 * Check a subset of $_SERVER (or the whole of $_SERVER if you like)
47 * to see if it indicates that the request was sent with a bad file
48 * extension. Returns true if the request should be denied or modified,
49 * false otherwise. The relevant $_SERVER elements are:
51 * - SERVER_SOFTWARE
52 * - REQUEST_URI
53 * - QUERY_STRING
54 * - PATH_INFO
56 * If the a variable is unset in $_SERVER, it should be unset in $vars.
58 * @param array $vars A subset of $_SERVER.
59 * @param array $extWhitelist Extensions which are allowed, assumed harmless.
60 * @return bool
62 public static function areServerVarsBad( $vars, $extWhitelist = array() ) {
63 // Check QUERY_STRING or REQUEST_URI
64 if ( isset( $vars['SERVER_SOFTWARE'] )
65 && isset( $vars['REQUEST_URI'] )
66 && self::haveUndecodedRequestUri( $vars['SERVER_SOFTWARE'] ) )
68 $urlPart = $vars['REQUEST_URI'];
69 } elseif ( isset( $vars['QUERY_STRING'] ) ) {
70 $urlPart = $vars['QUERY_STRING'];
71 } else {
72 $urlPart = '';
75 if ( self::isUrlExtensionBad( $urlPart, $extWhitelist ) ) {
76 return true;
79 // Some servers have PATH_INFO but not REQUEST_URI, so we check both
80 // to be on the safe side.
81 if ( isset( $vars['PATH_INFO'] )
82 && self::isUrlExtensionBad( $vars['PATH_INFO'], $extWhitelist ) )
84 return true;
87 // All checks passed
88 return false;
91 /**
92 * Given a right-hand portion of a URL, determine whether IE would detect
93 * a potentially harmful file extension.
95 * @param string $urlPart The right-hand portion of a URL
96 * @param array $extWhitelist An array of file extensions which may occur in this
97 * URL, and which should be allowed.
98 * @return bool
100 public static function isUrlExtensionBad( $urlPart, $extWhitelist = array() ) {
101 if ( strval( $urlPart ) === '' ) {
102 return false;
105 $extension = self::findIE6Extension( $urlPart );
106 if ( strval( $extension ) === '' ) {
107 // No extension or empty extension
108 return false;
111 if ( in_array( $extension, array( 'php', 'php5' ) ) ) {
112 // Script extension, OK
113 return false;
115 if ( in_array( $extension, $extWhitelist ) ) {
116 // Whitelisted extension
117 return false;
120 if ( !preg_match( '/^[a-zA-Z0-9_-]+$/', $extension ) ) {
121 // Non-alphanumeric extension, unlikely to be registered.
123 // The regex above is known to match all registered file extensions
124 // in a default Windows XP installation. It's important to allow
125 // extensions with ampersands and percent signs, since that reduces
126 // the number of false positives substantially.
127 return false;
130 // Possibly bad extension
131 return true;
135 * Returns a variant of $url which will pass isUrlExtensionBad() but has the
136 * same GET parameters, or false if it can't figure one out.
137 * @param $url
138 * @param $extWhitelist array
139 * @return bool|string
141 public static function fixUrlForIE6( $url, $extWhitelist = array() ) {
142 $questionPos = strpos( $url, '?' );
143 if ( $questionPos === false ) {
144 $beforeQuery = $url . '?';
145 $query = '';
146 } elseif ( $questionPos === strlen( $url ) - 1 ) {
147 $beforeQuery = $url;
148 $query = '';
149 } else {
150 $beforeQuery = substr( $url, 0, $questionPos + 1 );
151 $query = substr( $url, $questionPos + 1 );
154 // Multiple question marks cause problems. Encode the second and
155 // subsequent question mark.
156 $query = str_replace( '?', '%3E', $query );
157 // Append an invalid path character so that IE6 won't see the end of the
158 // query string as an extension
159 $query .= '&*';
160 // Put the URL back together
161 $url = $beforeQuery . $query;
162 if ( self::isUrlExtensionBad( $url, $extWhitelist ) ) {
163 // Avoid a redirect loop
164 return false;
166 return $url;
170 * Determine what extension IE6 will infer from a certain query string.
171 * If the URL has an extension before the question mark, IE6 will use
172 * that and ignore the query string, but per the comment at
173 * isPathInfoBad() we don't have a reliable way to determine the URL,
174 * so isPathInfoBad() just passes in the query string for $url.
175 * All entry points have safe extensions (php, php5) anyway, so
176 * checking the query string is possibly overly paranoid but never
177 * insecure.
179 * The criteria for finding an extension are as follows:
180 * - a possible extension is a dot followed by one or more characters not
181 * in <>\"/:|?.#
182 * - if we find a possible extension followed by the end of the string or
183 * a #, that's our extension
184 * - if we find a possible extension followed by a ?, that's our extension
185 * - UNLESS it's exe, dll or cgi, in which case we ignore it and continue
186 * searching for another possible extension
187 * - if we find a possible extension followed by a dot or another illegal
188 * character, we ignore it and continue searching
190 * @param string $url URL
191 * @return mixed Detected extension (string), or false if none found
193 public static function findIE6Extension( $url ) {
194 $pos = 0;
195 $hashPos = strpos( $url, '#' );
196 if ( $hashPos !== false ) {
197 $urlLength = $hashPos;
198 } else {
199 $urlLength = strlen( $url );
201 $remainingLength = $urlLength;
202 while ( $remainingLength > 0 ) {
203 // Skip ahead to the next dot
204 $pos += strcspn( $url, '.', $pos, $remainingLength );
205 if ( $pos >= $urlLength ) {
206 // End of string, we're done
207 return false;
210 // We found a dot. Skip past it
211 $pos++;
212 $remainingLength = $urlLength - $pos;
214 // Check for illegal characters in our prospective extension,
215 // or for another dot
216 $nextPos = $pos + strcspn( $url, "<>\\\"/:|?*.", $pos, $remainingLength );
217 if ( $nextPos >= $urlLength ) {
218 // No illegal character or next dot
219 // We have our extension
220 return substr( $url, $pos, $urlLength - $pos );
222 if ( $url[$nextPos] === '?' ) {
223 // We've found a legal extension followed by a question mark
224 // If the extension is NOT exe, dll or cgi, return it
225 $extension = substr( $url, $pos, $nextPos - $pos );
226 if ( strcasecmp( $extension, 'exe' ) && strcasecmp( $extension, 'dll' ) &&
227 strcasecmp( $extension, 'cgi' ) )
229 return $extension;
231 // Else continue looking
233 // We found an illegal character or another dot
234 // Skip to that character and continue the loop
235 $pos = $nextPos + 1;
236 $remainingLength = $urlLength - $pos;
238 return false;
242 * When passed the value of $_SERVER['SERVER_SOFTWARE'], this function
243 * returns true if that server is known to have a REQUEST_URI variable
244 * with %2E not decoded to ".". On such a server, it is possible to detect
245 * whether the script filename has been obscured.
247 * The function returns false if the server is not known to have this
248 * behavior. Microsoft IIS in particular is known to decode escaped script
249 * filenames.
251 * SERVER_SOFTWARE typically contains either a plain string such as "Zeus",
252 * or a specification in the style of a User-Agent header, such as
253 * "Apache/1.3.34 (Unix) mod_ssl/2.8.25 OpenSSL/0.9.8a PHP/4.4.2"
255 * @param $serverSoftware
256 * @return bool
259 public static function haveUndecodedRequestUri( $serverSoftware ) {
260 static $whitelist = array(
261 'Apache',
262 'Zeus',
263 'LiteSpeed' );
264 if ( preg_match( '/^(.*?)($|\/| )/', $serverSoftware, $m ) ) {
265 return in_array( $m[1], $whitelist );
266 } else {
267 return false;