3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
21 namespace MediaWiki\Deferred
;
25 use InvalidArgumentException
;
26 use MediaWiki\MainConfigNames
;
27 use MediaWiki\MediaWikiServices
;
28 use MediaWiki\Page\PageReference
;
30 use Wikimedia\Assert\Assert
;
31 use Wikimedia\IPUtils
;
34 * Handles purging the appropriate CDN objects given a list of URLs or Title instances
37 class CdnCacheUpdate
implements DeferrableUpdate
, MergeableUpdate
{
38 /** @var array[] List of (URL, rebound purge delay) tuples */
39 private $urlTuples = [];
40 /** @var array[] List of (PageReference, rebound purge delay) tuples */
41 private $pageTuples = [];
43 /** @var int Maximum seconds of rebound purge delay */
44 private const MAX_REBOUND_DELAY
= 300;
47 * @param string[]|PageReference[] $targets Collection of URLs/titles to be purged from CDN
48 * @param array $options Options map. Supports:
49 * - reboundDelay: how many seconds after the first purge to send a rebound purge.
50 * No rebound purge will be sent if this is not positive. [Default: 0]
52 public function __construct( array $targets, array $options = [] ) {
54 (int)max( $options['reboundDelay'] ??
0, 0 ),
55 self
::MAX_REBOUND_DELAY
58 foreach ( $targets as $target ) {
59 if ( $target instanceof PageReference
) {
60 $this->pageTuples
[] = [ $target, $delay ];
62 $this->urlTuples
[] = [ $target, $delay ];
67 public function merge( MergeableUpdate
$update ) {
68 /** @var self $update */
69 Assert
::parameterType( __CLASS__
, $update, '$update' );
70 '@phan-var self $update';
72 $this->urlTuples
= array_merge( $this->urlTuples
, $update->urlTuples
);
73 $this->pageTuples
= array_merge( $this->pageTuples
, $update->pageTuples
);
76 public function doUpdate() {
77 // Resolve the final list of URLs just before purging them (T240083)
78 $reboundDelayByUrl = $this->resolveReboundDelayByUrl();
80 // Send the immediate purges to CDN
81 self
::purge( array_keys( $reboundDelayByUrl ) );
82 $immediatePurgeTimestamp = time();
84 // Get the URLs that need rebound purges, grouped by seconds of purge delay
85 $urlsWithReboundByDelay = [];
86 foreach ( $reboundDelayByUrl as $url => $delay ) {
88 $urlsWithReboundByDelay[$delay][] = $url;
91 // Enqueue delayed purge jobs for these URLs (usually only one job)
93 foreach ( $urlsWithReboundByDelay as $delay => $urls ) {
94 $jobs[] = new CdnPurgeJob( [
96 'jobReleaseTimestamp' => $immediatePurgeTimestamp +
$delay
99 MediaWikiServices
::getInstance()->getJobQueueGroup()->lazyPush( $jobs );
103 * Purges a list of CDN nodes defined in $wgCdnServers.
104 * $urlArr should contain the full URLs to purge as values
105 * (example: $urlArr[] = 'http://my.host/something')
107 * @param string[] $urls List of full URLs to purge
109 public static function purge( array $urls ) {
110 $cdnServers = MediaWikiServices
::getInstance()->getMainConfig()->get( MainConfigNames
::CdnServers
);
111 $htcpRouting = MediaWikiServices
::getInstance()->getMainConfig()->get( MainConfigNames
::HTCPRouting
);
116 // Remove duplicate URLs from list
117 $urls = array_unique( $urls );
119 wfDebugLog( 'squid', __METHOD__
. ': ' . implode( ' ', $urls ) );
121 // Reliably broadcast the purge to all edge nodes
122 $ts = microtime( true );
123 $relayerGroup = MediaWikiServices
::getInstance()->getEventRelayerGroup();
124 $relayerGroup->getRelayer( 'cdn-url-purges' )->notifyMulti(
127 static function ( $url ) use ( $ts ) {
137 // Send lossy UDP broadcasting if enabled
138 if ( $htcpRouting ) {
139 self
::HTCPPurge( $urls );
142 // Do direct server purges if enabled (this does not scale very well)
144 self
::naivePurge( $urls );
149 * @return string[] List of URLs
151 public function getUrls() {
152 return array_keys( $this->resolveReboundDelayByUrl() );
156 * @return int[] Map of (URL => rebound purge delay)
158 private function resolveReboundDelayByUrl() {
159 $services = MediaWikiServices
::getInstance();
160 /** @var PageReference $page */
162 // Avoid multiple queries for HTMLCacheUpdater::getUrls() call
163 $lb = $services->getLinkBatchFactory()->newLinkBatch();
164 foreach ( $this->pageTuples
as [ $page, ] ) {
165 $lb->addObj( $page );
169 $reboundDelayByUrl = [];
171 // Resolve the titles into CDN URLs
172 $htmlCacheUpdater = $services->getHtmlCacheUpdater();
173 foreach ( $this->pageTuples
as [ $page, $delay ] ) {
174 foreach ( $htmlCacheUpdater->getUrls( $page ) as $url ) {
175 // Use the highest rebound for duplicate URLs in order to handle the most lag
176 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ??
0, $delay );
180 foreach ( $this->urlTuples
as [ $url, $delay ] ) {
181 // Use the highest rebound for duplicate URLs in order to handle the most lag
182 $reboundDelayByUrl[$url] = max( $reboundDelayByUrl[$url] ??
0, $delay );
185 return $reboundDelayByUrl;
189 * Send Hyper Text Caching Protocol (HTCP) CLR requests
191 * @param string[] $urls Collection of URLs to purge
193 private static function HTCPPurge( array $urls ) {
194 $htcpRouting = MediaWikiServices
::getInstance()->getMainConfig()->get( MainConfigNames
::HTCPRouting
);
195 $htcpMulticastTTL = MediaWikiServices
::getInstance()->getMainConfig()->get( MainConfigNames
::HTCPMulticastTTL
);
196 // HTCP CLR operation
199 // @todo FIXME: PHP doesn't support these socket constants (include/linux/in.h)
200 if ( !defined( "IPPROTO_IP" ) ) {
201 define( "IPPROTO_IP", 0 );
202 define( "IP_MULTICAST_LOOP", 34 );
203 define( "IP_MULTICAST_TTL", 33 );
206 // pfsockopen doesn't work because we need set_sock_opt
207 $conn = socket_create( AF_INET
, SOCK_DGRAM
, SOL_UDP
);
209 $errstr = socket_strerror( socket_last_error() );
210 wfDebugLog( 'squid', __METHOD__
.
211 ": Error opening UDP socket: $errstr" );
216 // Set socket options
217 socket_set_option( $conn, IPPROTO_IP
, IP_MULTICAST_LOOP
, 0 );
218 if ( $htcpMulticastTTL != 1 ) {
219 // Set multicast time to live (hop count) option on socket
220 socket_set_option( $conn, IPPROTO_IP
, IP_MULTICAST_TTL
,
224 // Get sequential trx IDs for packet loss counting
225 $idGenerator = MediaWikiServices
::getInstance()->getGlobalIdGenerator();
226 $ids = $idGenerator->newSequentialPerNodeIDs(
232 foreach ( $urls as $url ) {
233 if ( !is_string( $url ) ) {
234 throw new InvalidArgumentException( 'Bad purge URL' );
236 $url = self
::expand( $url );
237 $conf = self
::getRuleForURL( $url, $htcpRouting );
239 wfDebugLog( 'squid', __METHOD__
.
240 "No HTCP rule configured for URL {$url} , skipping" );
244 if ( isset( $conf['host'] ) && isset( $conf['port'] ) ) {
245 // Normalize single entries
248 foreach ( $conf as $subconf ) {
249 if ( !isset( $subconf['host'] ) ||
!isset( $subconf['port'] ) ) {
250 throw new RuntimeException( "Invalid HTCP rule for URL $url\n" );
254 // Construct a minimal HTCP request diagram
256 // Opcode 'CLR', no response desired, no auth
257 $htcpTransID = current( $ids );
260 $htcpSpecifier = pack( 'na4na*na8n',
261 4, 'HEAD', strlen( $url ), $url,
264 $htcpDataLen = 8 +
2 +
strlen( $htcpSpecifier );
265 $htcpLen = 4 +
$htcpDataLen +
2;
267 // Note! Squid gets the bit order of the first
268 // word wrong, wrt the RFC. Apparently no other
269 // implementation exists, so adapt to Squid
270 $htcpPacket = pack( 'nxxnCxNxxa*n',
271 $htcpLen, $htcpDataLen, $htcpOpCLR,
272 $htcpTransID, $htcpSpecifier, 2 );
274 wfDebugLog( 'squid', __METHOD__
.
275 "Purging URL $url via HTCP" );
276 foreach ( $conf as $subconf ) {
277 socket_sendto( $conn, $htcpPacket, $htcpLen, 0,
278 $subconf['host'], $subconf['port'] );
284 * Send HTTP PURGE requests for each of the URLs to all of the cache servers
286 * @param string[] $urls
289 private static function naivePurge( array $urls ) {
290 $cdnServers = MediaWikiServices
::getInstance()->getMainConfig()->get( MainConfigNames
::CdnServers
);
293 foreach ( $urls as $url ) {
294 $url = self
::expand( $url );
295 $urlInfo = wfGetUrlUtils()->parse( $url ) ??
false;
296 $urlHost = strlen( $urlInfo['port'] ??
'' )
297 ? IPUtils
::combineHostAndPort( $urlInfo['host'], (int)$urlInfo['port'] )
304 'Connection' => 'Keep-Alive',
305 'Proxy-Connection' => 'Keep-Alive',
306 'User-Agent' => 'MediaWiki/' . MW_VERSION
. ' ' . __CLASS__
309 foreach ( $cdnServers as $server ) {
310 $reqs[] = ( $baseReq +
[ 'proxy' => $server ] );
314 $http = MediaWikiServices
::getInstance()->getHttpRequestFactory()
315 ->createMultiClient( [ 'maxConnsPerHost' => 8, 'usePipelining' => true ] );
316 $http->runMulti( $reqs );
320 * Expand local URLs to fully-qualified URLs using the internal protocol
321 * and host defined in $wgInternalServer. Input that's already fully-
322 * qualified will be passed through unchanged.
324 * This is used to generate purge URLs that may be either local to the
325 * main wiki or include a non-native host, such as images hosted on a
326 * second internal server.
328 * Client functions should not need to call this.
333 private static function expand( $url ) {
334 return (string)MediaWikiServices
::getInstance()->getUrlUtils()->expand( $url, PROTO_INTERNAL
);
338 * Find the HTCP routing rule to use for a given URL.
339 * @param string $url URL to match
340 * @param array $rules Array of rules, see $wgHTCPRouting for format and behavior
341 * @return mixed Element of $rules that matched, or false if nothing matched
343 private static function getRuleForURL( $url, $rules ) {
344 foreach ( $rules as $regex => $routing ) {
345 if ( $regex === '' ||
preg_match( $regex, $url ) ) {
354 /** @deprecated class alias since 1.42 */
355 class_alias( CdnCacheUpdate
::class, 'CdnCacheUpdate' );