includes/jobqueue/jobs/HTMLCacheUpdateJob.php

   1 <?php
   2 /**
   3  * HTML cache invalidation of all pages linking to a given title.
   4  *
   5  * This program is free software; you can redistribute it and/or modify
   6  * it under the terms of the GNU General Public License as published by
   7  * the Free Software Foundation; either version 2 of the License, or
   8  * (at your option) any later version.
   9  *
  10  * This program is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  13  * GNU General Public License for more details.
  14  *
  15  * You should have received a copy of the GNU General Public License along
  16  * with this program; if not, write to the Free Software Foundation, Inc.,
  17  * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
  18  * http://www.gnu.org/copyleft/gpl.html
  19  *
  20  * @file
  21  * @ingroup JobQueue
  22  * @ingroup Cache
  23  */
  24
  25 /**
  26  * Job to purge the cache for all pages that link to or use another page or file
  27  *
  28  * This job comes in a few variants:
  29  *   - a) Recursive jobs to purge caches for backlink pages for a given title.
  30  *        These jobs have (recursive:true,table:<table>) set.
  31  *   - b) Jobs to purge caches for a set of titles (the job title is ignored).
  32  *        These jobs have (pages:(<page ID>:(<namespace>,<title>),...) set.
  33  *
  34  * @ingroup JobQueue
  35  */
  36 class HTMLCacheUpdateJob extends Job {
  37         function __construct( Title $title, array $params ) {
  38                 parent::__construct( 'htmlCacheUpdate', $title, $params );
  39                 // Base backlink purge jobs can be de-duplicated
  40                 $this->removeDuplicates = ( !isset( $params['range'] ) && !isset( $params['pages'] ) );
  41         }
  42
  43         /**
  44          * @param Title $title Title to purge backlink pages from
  45          * @param string $table Backlink table name
  46          * @return HTMLCacheUpdateJob
  47          */
  48         public static function newForBacklinks( Title $title, $table ) {
  49                 return new self(
  50                         $title,
  51                         [
  52                                 'table' => $table,
  53                                 'recursive' => true
  54                         ] + Job::newRootJobParams( // "overall" refresh links job info
  55                                 "htmlCacheUpdate:{$table}:{$title->getPrefixedText()}"
  56                         )
  57                 );
  58         }
  59
  60         function run() {
  61                 global $wgUpdateRowsPerJob, $wgUpdateRowsPerQuery;
  62
  63                 if ( isset( $this->params['table'] ) && !isset( $this->params['pages'] ) ) {
  64                         $this->params['recursive'] = true; // b/c; base job
  65                 }
  66
  67                 // Job to purge all (or a range of) backlink pages for a page
  68                 if ( !empty( $this->params['recursive'] ) ) {
  69                         // Convert this into no more than $wgUpdateRowsPerJob HTMLCacheUpdateJob per-title
  70                         // jobs and possibly a recursive HTMLCacheUpdateJob job for the rest of the backlinks
  71                         $jobs = BacklinkJobUtils::partitionBacklinkJob(
  72                                 $this,
  73                                 $wgUpdateRowsPerJob,
  74                                 $wgUpdateRowsPerQuery, // jobs-per-title
  75                                 // Carry over information for de-duplication
  76                                 [ 'params' => $this->getRootJobParams() ]
  77                         );
  78                         JobQueueGroup::singleton()->push( $jobs );
  79                 // Job to purge pages for a set of titles
  80                 } elseif ( isset( $this->params['pages'] ) ) {
  81                         $this->invalidateTitles( $this->params['pages'] );
  82                 // Job to update a single title
  83                 } else {
  84                         $t = $this->title;
  85                         $this->invalidateTitles( [
  86                                 $t->getArticleID() => [ $t->getNamespace(), $t->getDBkey() ]
  87                         ] );
  88                 }
  89
  90                 return true;
  91         }
  92
  93         /**
  94          * @param array $pages Map of (page ID => (namespace, DB key)) entries
  95          */
  96         protected function invalidateTitles( array $pages ) {
  97                 global $wgUpdateRowsPerQuery, $wgUseFileCache;
  98
  99                 // Get all page IDs in this query into an array
 100                 $pageIds = array_keys( $pages );
 101                 if ( !$pageIds ) {
 102                         return;
 103                 }
 104
 105                 // Bump page_touched to the current timestamp. This used to use the root job timestamp
 106                 // (e.g. template/file edit time), which was a bit more efficient when template edits are
 107                 // rare and don't effect the same pages much. However, this way allows for better
 108                 // de-duplication, which is much more useful for wikis with high edit rates. Note that
 109                 // RefreshLinksJob, which is enqueued alongside HTMLCacheUpdateJob, saves the parser output
 110                 // since it has to parse anyway. We assume that vast majority of the cache jobs finish
 111                 // before the link jobs, so using the current timestamp instead of the root timestamp is
 112                 // not expected to invalidate these cache entries too often.
 113                 $touchTimestamp = wfTimestampNow();
 114
 115                 $dbw = wfGetDB( DB_MASTER );
 116                 $factory = wfGetLBFactory();
 117                 $ticket = $factory->getEmptyTransactionTicket( __METHOD__ );
 118                 // Update page_touched (skipping pages already touched since the root job).
 119                 // Check $wgUpdateRowsPerQuery for sanity; batch jobs are sized by that already.
 120                 foreach ( array_chunk( $pageIds, $wgUpdateRowsPerQuery ) as $batch ) {
 121                         $factory->commitAndWaitForReplication( __METHOD__, $ticket );
 122
 123                         $dbw->update( 'page',
 124                                 [ 'page_touched' => $dbw->timestamp( $touchTimestamp ) ],
 125                                 [ 'page_id' => $batch,
 126                                         // don't invalidated pages that were already invalidated
 127                                         "page_touched < " . $dbw->addQuotes( $dbw->timestamp( $touchTimestamp ) )
 128                                 ],
 129                                 __METHOD__
 130                         );
 131                 }
 132                 // Get the list of affected pages (races only mean something else did the purge)
 133                 $titleArray = TitleArray::newFromResult( $dbw->select(
 134                         'page',
 135                         [ 'page_namespace', 'page_title' ],
 136                         [ 'page_id' => $pageIds, 'page_touched' => $dbw->timestamp( $touchTimestamp ) ],
 137                         __METHOD__
 138                 ) );
 139
 140                 // Update CDN
 141                 $u = CdnCacheUpdate::newFromTitles( $titleArray );
 142                 $u->doUpdate();
 143
 144                 // Update file cache
 145                 if ( $wgUseFileCache ) {
 146                         foreach ( $titleArray as $title ) {
 147                                 HTMLFileCache::clearFileCache( $title );
 148                         }
 149                 }
 150         }
 151
 152         public function workItemCount() {
 153                 return isset( $this->params['pages'] ) ? count( $this->params['pages'] ) : 1;
 154         }
 155 }