includes/jobqueue/jobs/HTMLCacheUpdateJob.php

   1 <?php
   2 /**
   3  * HTML cache invalidation of all pages linking to a given title.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup JobQueue
  22  * @ingroup Cache
  23  */
  24
  25 use MediaWiki\MediaWikiServices;
  26
  27 /**
  28  * Job to purge the cache for all pages that link to or use another page or file
  29  *
  30  * This job comes in a few variants:
  31  *   - a) Recursive jobs to purge caches for backlink pages for a given title.
  32  *        These jobs have (recursive:true,table:<table>) set.
  33  *   - b) Jobs to purge caches for a set of titles (the job title is ignored).
  34  *        These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set.
  35  *
  36  * @ingroup JobQueue
  37  */
  38 class HTMLCacheUpdateJob extends Job {
  39         function __construct( Title $title, array $params ) {
  40                 parent::__construct( 'htmlCacheUpdate', $title, $params );
  41                 // Base backlink purge jobs can be de-duplicated
  42                 $this->removeDuplicates = ( !isset( $params['range'] ) && !isset( $params['pages'] ) );
  43         }
  44
  45         /**
  46          * @param Title $title Title to purge backlink pages from
  47          * @param string $table Backlink table name
  48          * @return HTMLCacheUpdateJob
  49          */
  50         public static function newForBacklinks( Title $title, $table ) {
  51                 return new self(
  52                         $title,
  53                         [
  54                                 'table' => $table,
  55                                 'recursive' => true
  56                         ] + Job::newRootJobParams( // "overall" refresh links job info
  57                                 "htmlCacheUpdate:{$table}:{$title->getPrefixedText()}"
  58                         )
  59                 );
  60         }
  61
  62         function run() {
  63                 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
  64
  65                 if ( isset( $this->params['table'] ) && !isset( $this->params['pages'] ) ) {
  66                         $this->params['recursive'] = true; // b/c; base job
  67                 }
  68
  69                 // Job to purge all (or a range of) backlink pages for a page
  70                 if ( !empty( $this->params['recursive'] ) ) {
  71                         // Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title
  72                         // jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks
  73                         $jobs = BacklinkJobUtils::partitionBacklinkJob(
  74                                 $this,
  75                                 $wgUpdateRowsPerJob,
  76                                 $wgUpdateRowsPerQuery, // jobs-per-title
  77                                 // Carry over information for de-duplication
  78                                 [ 'params' => $this->getRootJobParams() ]
  79                         );
  80                         JobQueueGroup::singleton()->push( $jobs );
  81                 // Job to purge pages for a set of titles
  82                 } elseif ( isset( $this->params['pages'] ) ) {
  83                         $this->invalidateTitles( $this->params['pages'] );
  84                 // Job to update a single title
  85                 } else {
  86                         $t = $this->title;
  87                         $this->invalidateTitles( [
  88                                 $t->getArticleID() => [ $t->getNamespace(), $t->getDBkey() ]
  89                         ] );
  90                 }
  91
  92                 return true;
  93         }
  94
  95         /**
  96          * @param array $pages Map of (page ID => (namespace, DB key)) entries
  97          */
  98         protected function invalidateTitles( array $pages ) {
  99                 global $wgUpdateRowsPerQuery, $wgUseFileCache;
 100
 101                 // Get all page IDs in this query into an array
 102                 $pageIds = array_keys( $pages );
 103                 if ( !$pageIds ) {
 104                         return;
 105                 }
 106
 107                 // Bump page_touched to the current timestamp. This used to use the root job timestamp
 108                 // (e.g. template/file edit time), which was a bit more efficient when template edits are
 109                 // rare and don't effect the same pages much. However, this way allows for better
 110                 // de-duplication, which is much more useful for wikis with high edit rates. Note that
 111                 // RefreshLinksJob, which is enqueued alongside HTMLCacheUpdateJob, saves the parser output
 112                 // since it has to parse anyway. We assume that vast majority of the cache jobs finish
 113                 // before the link jobs, so using the current timestamp instead of the root timestamp is
 114                 // not expected to invalidate these cache entries too often.
 115                 $touchTimestamp = wfTimestampNow();
 116
 117                 $dbw = wfGetDB( DB_MASTER );
 118                 $factory = MediaWikiServices::getInstance()->getDBLoadBalancerFactory();
 119                 $ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
 120                 // Update page_touched (skipping pages already touched since the root job).
 121                 // Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already.
 122                 foreach ( array_chunk( $pageIds, $wgUpdateRowsPerQuery ) as $batch ) {
 123                         $factory->commitAndWaitForReplication( __METHOD__, $ticket );
 124
 125                         $dbw->update( 'page',
 126                                 [ 'page_touched' => $dbw->timestamp( $touchTimestamp ) ],
 127                                 [ 'page_id' => $batch,
 128                                         // don't invalidated pages that were already invalidated
 129                                         "page_touched < " . $dbw->addQuotes( $dbw->timestamp( $touchTimestamp ) )
 130                                 ],
 131                                 __METHOD__
 132                         );
 133                 }
 134                 // Get the list of affected pages (races only mean something else did the purge)
 135                 $titleArray = TitleArray::newFromResult( $dbw->select(
 136                         'page',
 137                         [ 'page_namespace', 'page_title' ],
 138                         [ 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $touchTimestamp ) ],
 139                         __METHOD__
 140                 ) );
 141
 142                 // Update CDN
 143                 $u = CdnCacheUpdate::newFromTitles( $titleArray );
 144                 $u->doUpdate();
 145
 146                 // Update file cache
 147                 if ( $wgUseFileCache ) {
 148                         foreach ( $titleArray as $title ) {
 149                                 HTMLFileCache::clearFileCache( $title );
 150                         }
 151                 }
 152         }
 153
 154         public function workItemCount() {
 155                 return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
 156         }
 157 }