Sync up with Parsoid parserTests.txt
[mediawiki.git] / includes / OutputHandler.php
blob28108f658330c99d8dd77ab22c5c3e689aa7dd5d
1 <?php
2 /**
3 * Functions to be used with PHP's output buffer.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 namespace MediaWiki;
25 use MediaWiki\Logger\LoggerFactory;
27 /**
28 * @since 1.31
30 class OutputHandler {
31 /**
32 * Standard output handler for use with ob_start.
34 * Output buffers using this method should only be started from MW_SETUP_CALLBACK,
35 * and only if there are no parent output buffers.
37 * @param string $s Web response output
38 * @param int $phase Flags indicating the reason for the call
39 * @return string
41 public static function handle( $s, $phase ) {
42 $config = MediaWikiServices::getInstance()->getMainConfig();
43 $disableOutputCompression = $config->get( MainConfigNames::DisableOutputCompression );
44 // Don't send headers if output is being discarded (T278579)
45 if ( ( $phase & PHP_OUTPUT_HANDLER_CLEAN ) === PHP_OUTPUT_HANDLER_CLEAN ) {
46 $logger = LoggerFactory::getInstance( 'output' );
47 $logger->debug( __METHOD__ . " entrypoint={entry}; size={size}; phase=$phase", [
48 'entry' => MW_ENTRY_POINT,
49 'size' => strlen( $s ),
50 ] );
52 return $s;
55 // Check if a compression output buffer is already enabled via php.ini. Such
56 // buffers exists at the start of the request and are reflected by ob_get_level().
57 $phpHandlesCompression = (
58 ini_get( 'output_handler' ) === 'ob_gzhandler' ||
59 ini_get( 'zlib.output_handler' ) === 'ob_gzhandler' ||
60 !in_array(
61 strtolower( ini_get( 'zlib.output_compression' ) ),
62 [ '', 'off', '0' ]
66 if (
67 // Compression is not already handled by an internal PHP buffer
68 !$phpHandlesCompression &&
69 // Compression is not disabled by the application entry point
70 !defined( 'MW_NO_OUTPUT_COMPRESSION' ) &&
71 // Compression is not disabled by site configuration
72 !$disableOutputCompression
73 ) {
74 $s = self::handleGzip( $s );
77 if (
78 // Response body length does not depend on internal PHP compression buffer
79 !$phpHandlesCompression &&
80 // Response body length does not depend on mangling by a custom buffer
81 !ini_get( 'output_handler' ) &&
82 !ini_get( 'zlib.output_handler' )
83 ) {
84 self::emitContentLength( strlen( $s ) );
87 return $s;
90 /**
91 * Get the "file extension" that some client apps will estimate from
92 * the currently-requested URL.
94 * This isn't a WebRequest method, because we need it before the class loads.
95 * @todo As of 2018, this actually runs after autoloader in Setup.php, so
96 * WebRequest seems like a good place for this.
98 * @return string
100 private static function findUriExtension() {
101 // @todo FIXME: this sort of dupes some code in WebRequest::getRequestUrl()
102 if ( isset( $_SERVER['REQUEST_URI'] ) ) {
103 // Strip the query string...
104 $path = explode( '?', $_SERVER['REQUEST_URI'], 2 )[0];
105 } elseif ( isset( $_SERVER['SCRIPT_NAME'] ) ) {
106 // Probably IIS. QUERY_STRING appears separately.
107 $path = $_SERVER['SCRIPT_NAME'];
108 } else {
109 // Can't get the path from the server? :(
110 return '';
113 $period = strrpos( $path, '.' );
114 if ( $period !== false ) {
115 return strtolower( substr( $path, $period ) );
117 return '';
121 * Handler that compresses data with gzip if allowed by the Accept header.
123 * Unlike ob_gzhandler, it works for HEAD requests too. This assumes that the application
124 * processes them as normal GET request and that the webserver is tasked with stripping out
125 * the response body before sending the response the client.
127 * @param string $s Web response output
128 * @return string
130 private static function handleGzip( $s ) {
131 if ( !function_exists( 'gzencode' ) ) {
132 wfDebug( __METHOD__ . "() skipping compression (gzencode unavailable)" );
133 return $s;
135 if ( headers_sent() ) {
136 wfDebug( __METHOD__ . "() skipping compression (headers already sent)" );
137 return $s;
140 $ext = self::findUriExtension();
141 if ( $ext == '.gz' || $ext == '.tgz' ) {
142 // Don't do gzip compression if the URL path ends in .gz or .tgz
143 // This confuses Safari and triggers a download of the page,
144 // even though it's pretty clearly labeled as viewable HTML.
145 // Bad Safari! Bad!
146 return $s;
149 if ( $s === '' ) {
150 // Do not gzip empty HTTP responses since that would not only bloat the body
151 // length, but it would result in invalid HTTP responses when the HTTP status code
152 // is one that must not be accompanied by a body (e.g. "204 No Content").
153 return $s;
156 if ( wfClientAcceptsGzip() ) {
157 wfDebug( __METHOD__ . "() is compressing output" );
158 header( 'Content-Encoding: gzip' );
159 $s = gzencode( $s, 6 );
162 // Set vary header if it hasn't been set already
163 $headers = headers_list();
164 $foundVary = false;
165 foreach ( $headers as $header ) {
166 $headerName = strtolower( substr( $header, 0, 5 ) );
167 if ( $headerName == 'vary:' ) {
168 $foundVary = true;
169 break;
172 if ( !$foundVary ) {
173 header( 'Vary: Accept-Encoding' );
175 return $s;
179 * Set the Content-Length header if possible
181 * This sets Content-Length for the following cases:
182 * - When the response body is meaningful (HTTP 200/404).
183 * - On any HTTP 1.0 request response. This improves cooperation with certain CDNs.
185 * This assumes that HEAD requests are processed as GET requests by MediaWiki and that
186 * the webserver is tasked with stripping out the body.
188 * Setting Content-Length can prevent clients from getting stuck waiting on PHP to finish
189 * while deferred updates are running.
191 * @param int $length
193 private static function emitContentLength( $length ) {
194 if ( headers_sent() ) {
195 wfDebug( __METHOD__ . "() headers already sent" );
196 return;
199 if (
200 in_array( http_response_code(), [ 200, 404 ], true ) ||
201 ( $_SERVER['SERVER_PROTOCOL'] ?? null ) === 'HTTP/1.0'
203 header( "Content-Length: $length" );