Localisation updates from https://translatewiki.net.
[mediawiki.git] / includes / deferred / CdnCacheUpdate.php
blob8c788ea248b4e7904b59fbd58e2c8ae6bc6511dc
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
18 * @file
21 namespace MediaWiki\Deferred;
23 use CdnPurgeJob;
24 use Exception;
25 use InvalidArgumentException;
26 use MediaWiki\MainConfigNames;
27 use MediaWiki\MediaWikiServices;
28 use MediaWiki\Page\PageReference;
29 use RuntimeException;
30 use Wikimedia\Assert\Assert;
31 use Wikimedia\IPUtils;
33 /**
34 * Handles purging the appropriate CDN objects given a list of URLs or Title instances
35 * @ingroup Cache
37 class CdnCacheUpdate implements DeferrableUpdate, MergeableUpdate {
38 /** @var array[] List of (URL, rebound purge delay) tuples */
39 private $urlTuples = [];
40 /** @var array[] List of (PageReference, rebound purge delay) tuples */
41 private $pageTuples = [];
43 /** @var int Maximum seconds of rebound purge delay */
44 private const MAX_REBOUND_DELAY = 300;
46 /**
47 * @param string[]|PageReference[] $targets Collection of URLs/titles to be purged from CDN
48 * @param array $options Options map. Supports:
49 * - reboundDelay: how many seconds after the first purge to send a rebound purge.
50 * No rebound purge will be sent if this is not positive. [Default: 0]
52 public function __construct( array $targets, array $options = [] ) {
53 $delay = min(
54 (int)max( $options['reboundDelay'] ?? 0, 0 ),
55 self::MAX_REBOUND_DELAY
58 foreach ( $targets as $target ) {
59 if ( $target instanceof PageReference ) {
60 $this->pageTuples[] = [ $target, $delay ];
61 } else {
62 $this->urlTuples[] = [ $target, $delay ];
67 public function merge( MergeableUpdate $update ) {
68 /** @var self $update */
69 Assert::parameterType( __CLASS__, $update, '$update' );
70 '@phan-var self $update';
72 $this->urlTuples = array_merge( $this->urlTuples, $update->urlTuples );
73 $this->pageTuples = array_merge( $this->pageTuples, $update->pageTuples );
76 public function doUpdate() {
77 // Resolve the final list of URLs just before purging them (T240083)
78 $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
80 // Send the immediate purges to CDN
81 self::purge( array_keys( $reboundDelayByUrl ) );
82 $immediatePurgeTimestamp = time();
84 // Get the URLs that need rebound purges, grouped by seconds of purge delay
85 $urlsWithReboundByDelay = [];
86 foreach ( $reboundDelayByUrl as $url => $delay ) {
87 if ( $delay > 0 ) {
88 $urlsWithReboundByDelay[$delay][] = $url;
91 // Enqueue delayed purge jobs for these URLs (usually only one job)
92 $jobs = [];
93 foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
94 $jobs[] = new CdnPurgeJob( [
95 'urls' => $urls,
96 'jobReleaseTimestamp' => $immediatePurgeTimestamp + $delay
97 ] );
99 MediaWikiServices::getInstance()->getJobQueueGroup()->lazyPush( $jobs );
103 * Purges a list of CDN nodes defined in $wgCdnServers.
104 * $urlArr should contain the full URLs to purge as values
105 * (example: $urlArr[] = 'http://my.host/something')
107 * @param string[] $urls List of full URLs to purge
109 public static function purge( array $urls ) {
110 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
111 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
112 if ( !$urls ) {
113 return;
116 // Remove duplicate URLs from list
117 $urls = array_unique( $urls );
119 wfDebugLog( 'squid', __METHOD__ . ': ' . implode( ' ', $urls ) );
121 // Reliably broadcast the purge to all edge nodes
122 $ts = microtime( true );
123 $relayerGroup = MediaWikiServices::getInstance()->getEventRelayerGroup();
124 $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
125 'cdn-url-purges',
126 array_map(
127 static function ( $url ) use ( $ts ) {
128 return [
129 'url' => $url,
130 'timestamp' => $ts,
133 $urls
137 // Send lossy UDP broadcasting if enabled
138 if ( $htcpRouting ) {
139 self::HTCPPurge( $urls );
142 // Do direct server purges if enabled (this does not scale very well)
143 if ( $cdnServers ) {
144 self::naivePurge( $urls );
149 * @return string[] List of URLs
151 public function getUrls() {
152 return array_keys( $this->resolveReboundDelayByUrl() );
156 * @return int[] Map of (URL => rebound purge delay)
158 private function resolveReboundDelayByUrl() {
159 $services = MediaWikiServices::getInstance();
160 /** @var PageReference $page */
162 // Avoid multiple queries for HTMLCacheUpdater::getUrls() call
163 $lb = $services->getLinkBatchFactory()->newLinkBatch();
164 foreach ( $this->pageTuples as [ $page, ] ) {
165 $lb->addObj( $page );
167 $lb->execute();
169 $reboundDelayByUrl = [];
171 // Resolve the titles into CDN URLs
172 $htmlCacheUpdater = $services->getHtmlCacheUpdater();
173 foreach ( $this->pageTuples as [ $page, $delay ] ) {
174 foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
175 // Use the highest rebound for duplicate URLs in order to handle the most lag
176 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
180 foreach ( $this->urlTuples as [ $url, $delay ] ) {
181 // Use the highest rebound for duplicate URLs in order to handle the most lag
182 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ?? 0, $delay );
185 return $reboundDelayByUrl;
189 * Send Hyper Text Caching Protocol (HTCP) CLR requests
191 * @param string[] $urls Collection of URLs to purge
193 private static function HTCPPurge( array $urls ) {
194 $htcpRouting = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPRouting );
195 $htcpMulticastTTL = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::HTCPMulticastTTL );
196 // HTCP CLR operation
197 $htcpOpCLR = 4;
199 // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
200 if ( !defined( "IPPROTO_IP" ) ) {
201 define( "IPPROTO_IP", 0 );
202 define( "IP_MULTICAST_LOOP", 34 );
203 define( "IP_MULTICAST_TTL", 33 );
206 // pfsockopen doesn't work because we need set_sock_opt
207 $conn = socket_create( AF_INET, SOCK_DGRAM, SOL_UDP );
208 if ( !$conn ) {
209 $errstr = socket_strerror( socket_last_error() );
210 wfDebugLog( 'squid', __METHOD__ .
211 ": Error opening UDP socket: $errstr" );
213 return;
216 // Set socket options
217 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_LOOP, 0 );
218 if ( $htcpMulticastTTL != 1 ) {
219 // Set multicast time to live (hop count) option on socket
220 socket_set_option( $conn, IPPROTO_IP, IP_MULTICAST_TTL,
221 $htcpMulticastTTL );
224 // Get sequential trx IDs for packet loss counting
225 $idGenerator = MediaWikiServices::getInstance()->getGlobalIdGenerator();
226 $ids = $idGenerator->newSequentialPerNodeIDs(
227 'squidhtcppurge',
229 count( $urls )
232 foreach ( $urls as $url ) {
233 if ( !is_string( $url ) ) {
234 throw new InvalidArgumentException( 'Bad purge URL' );
236 $url = self::expand( $url );
237 $conf = self::getRuleForURL( $url, $htcpRouting );
238 if ( !$conf ) {
239 wfDebugLog( 'squid', __METHOD__ .
240 "No HTCP rule configured for URL {$url} , skipping" );
241 continue;
244 if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
245 // Normalize single entries
246 $conf = [ $conf ];
248 foreach ( $conf as $subconf ) {
249 if ( !isset( $subconf['host'] ) || !isset( $subconf['port'] ) ) {
250 throw new RuntimeException( "Invalid HTCP rule for URL $url\n" );
254 // Construct a minimal HTCP request diagram
255 // as per RFC 2756
256 // Opcode 'CLR', no response desired, no auth
257 $htcpTransID = current( $ids );
258 next( $ids );
260 $htcpSpecifier = pack( 'na4na*na8n',
261 4, 'HEAD', strlen( $url ), $url,
262 8, 'HTTP/1.0', 0 );
264 $htcpDataLen = 8 + 2 + strlen( $htcpSpecifier );
265 $htcpLen = 4 + $htcpDataLen + 2;
267 // Note! Squid gets the bit order of the first
268 // word wrong, wrt the RFC. Apparently no other
269 // implementation exists, so adapt to Squid
270 $htcpPacket = pack( 'nxxnCxNxxa*n',
271 $htcpLen, $htcpDataLen, $htcpOpCLR,
272 $htcpTransID, $htcpSpecifier, 2 );
274 wfDebugLog( 'squid', __METHOD__ .
275 "Purging URL $url via HTCP" );
276 foreach ( $conf as $subconf ) {
277 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
278 $subconf['host'], $subconf['port'] );
284 * Send HTTP PURGE requests for each of the URLs to all of the cache servers
286 * @param string[] $urls
287 * @throws Exception
289 private static function naivePurge( array $urls ) {
290 $cdnServers = MediaWikiServices::getInstance()->getMainConfig()->get( MainConfigNames::CdnServers );
292 $reqs = [];
293 foreach ( $urls as $url ) {
294 $url = self::expand( $url );
295 $urlInfo = wfGetUrlUtils()->parse( $url ) ?? false;
296 $urlHost = strlen( $urlInfo['port'] ?? '' )
297 ? IPUtils::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] )
298 : $urlInfo['host'];
299 $baseReq = [
300 'method' => 'PURGE',
301 'url' => $url,
302 'headers' => [
303 'Host' => $urlHost,
304 'Connection' => 'Keep-Alive',
305 'Proxy-Connection' => 'Keep-Alive',
306 'User-Agent' => 'MediaWiki/' . MW_VERSION . ' ' . __CLASS__
309 foreach ( $cdnServers as $server ) {
310 $reqs[] = ( $baseReq + [ 'proxy' => $server ] );
314 $http = MediaWikiServices::getInstance()->getHttpRequestFactory()
315 ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
316 $http->runMulti( $reqs );
320 * Expand local URLs to fully-qualified URLs using the internal protocol
321 * and host defined in $wgInternalServer. Input that's already fully-
322 * qualified will be passed through unchanged.
324 * This is used to generate purge URLs that may be either local to the
325 * main wiki or include a non-native host, such as images hosted on a
326 * second internal server.
328 * Client functions should not need to call this.
330 * @param string $url
331 * @return string
333 private static function expand( $url ) {
334 return (string)MediaWikiServices::getInstance()->getUrlUtils()->expand( $url, PROTO_INTERNAL );
338 * Find the HTCP routing rule to use for a given URL.
339 * @param string $url URL to match
340 * @param array $rules Array of rules, see $wgHTCPRouting for format and behavior
341 * @return mixed Element of $rules that matched, or false if nothing matched
343 private static function getRuleForURL( $url, $rules ) {
344 foreach ( $rules as $regex => $routing ) {
345 if ( $regex === '' || preg_match( $regex, $url ) ) {
346 return $routing;
350 return false;
354 /** @deprecated class alias since 1.42 */
355 class_alias( CdnCacheUpdate::class, 'CdnCacheUpdate' );