Expose $wgMaxArticleSize in siteinfo query api
[mediawiki.git] / includes / objectcache / SqlBagOStuff.php
blob98b6eb94da010c2587157b5a35558a4b6a00ccf2
1 <?php
2 /**
3 * Object caching using a SQL database.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @ingroup Cache
24 /**
25 * Class to store objects in the database
27 * @ingroup Cache
29 class SqlBagOStuff extends BagOStuff {
30 /** @var array[] (server index => server config) */
31 protected $serverInfos;
32 /** @var string[] (server index => tag/host name) */
33 protected $serverTags;
34 /** @var int */
35 protected $numServers;
36 /** @var int */
37 protected $lastExpireAll = 0;
38 /** @var int */
39 protected $purgePeriod = 100;
40 /** @var int */
41 protected $shards = 1;
42 /** @var string */
43 protected $tableName = 'objectcache';
44 /** @var bool */
45 protected $slaveOnly = false;
46 /** @var int */
47 protected $syncTimeout = 3;
49 /** @var array */
50 protected $conns;
51 /** @var array UNIX timestamps */
52 protected $connFailureTimes = [];
53 /** @var array Exceptions */
54 protected $connFailureErrors = [];
56 /**
57 * Constructor. Parameters are:
58 * - server: A server info structure in the format required by each
59 * element in $wgDBServers.
61 * - servers: An array of server info structures describing a set of database servers
62 * to distribute keys to. If this is specified, the "server" option will be
63 * ignored. If string keys are used, then they will be used for consistent
64 * hashing *instead* of the host name (from the server config). This is useful
65 * when a cluster is replicated to another site (with different host names)
66 * but each server has a corresponding replica in the other cluster.
68 * - purgePeriod: The average number of object cache requests in between
69 * garbage collection operations, where expired entries
70 * are removed from the database. Or in other words, the
71 * reciprocal of the probability of purging on any given
72 * request. If this is set to zero, purging will never be
73 * done.
75 * - tableName: The table name to use, default is "objectcache".
77 * - shards: The number of tables to use for data storage on each server.
78 * If this is more than 1, table names will be formed in the style
79 * objectcacheNNN where NNN is the shard index, between 0 and
80 * shards-1. The number of digits will be the minimum number
81 * required to hold the largest shard index. Data will be
82 * distributed across all tables by key hash. This is for
83 * MySQL bugs 61735 and 61736.
84 * - slaveOnly: Whether to only use slave DBs and avoid triggering
85 * garbage collection logic of expired items. This only
86 * makes sense if the primary DB is used and only if get()
87 * calls will be used. This is used by ReplicatedBagOStuff.
88 * - syncTimeout: Max seconds to wait for slaves to catch up for WRITE_SYNC.
90 * @param array $params
92 public function __construct( $params ) {
93 parent::__construct( $params );
94 if ( isset( $params['servers'] ) ) {
95 $this->serverInfos = [];
96 $this->serverTags = [];
97 $this->numServers = count( $params['servers'] );
98 $index = 0;
99 foreach ( $params['servers'] as $tag => $info ) {
100 $this->serverInfos[$index] = $info;
101 if ( is_string( $tag ) ) {
102 $this->serverTags[$index] = $tag;
103 } else {
104 $this->serverTags[$index] = isset( $info['host'] ) ? $info['host'] : "#$index";
106 ++$index;
108 } elseif ( isset( $params['server'] ) ) {
109 $this->serverInfos = [ $params['server'] ];
110 $this->numServers = count( $this->serverInfos );
111 } else {
112 $this->serverInfos = false;
113 $this->numServers = 1;
115 if ( isset( $params['purgePeriod'] ) ) {
116 $this->purgePeriod = intval( $params['purgePeriod'] );
118 if ( isset( $params['tableName'] ) ) {
119 $this->tableName = $params['tableName'];
121 if ( isset( $params['shards'] ) ) {
122 $this->shards = intval( $params['shards'] );
124 if ( isset( $params['syncTimeout'] ) ) {
125 $this->syncTimeout = $params['syncTimeout'];
127 $this->slaveOnly = !empty( $params['slaveOnly'] );
131 * Get a connection to the specified database
133 * @param int $serverIndex
134 * @return IDatabase
135 * @throws MWException
137 protected function getDB( $serverIndex ) {
138 if ( !isset( $this->conns[$serverIndex] ) ) {
139 if ( $serverIndex >= $this->numServers ) {
140 throw new MWException( __METHOD__ . ": Invalid server index \"$serverIndex\"" );
143 # Don't keep timing out trying to connect for each call if the DB is down
144 if ( isset( $this->connFailureErrors[$serverIndex] )
145 && ( time() - $this->connFailureTimes[$serverIndex] ) < 60
147 throw $this->connFailureErrors[$serverIndex];
150 # If server connection info was given, use that
151 if ( $this->serverInfos ) {
152 $info = $this->serverInfos[$serverIndex];
153 $type = isset( $info['type'] ) ? $info['type'] : 'mysql';
154 $host = isset( $info['host'] ) ? $info['host'] : '[unknown]';
155 $this->logger->debug( __CLASS__ . ": connecting to $host" );
156 // Use a blank trx profiler to ignore expections as this is a cache
157 $info['trxProfiler'] = new TransactionProfiler();
158 $db = DatabaseBase::factory( $type, $info );
159 $db->clearFlag( DBO_TRX );
160 } else {
161 // We must keep a separate connection to MySQL in order to avoid deadlocks
162 // However, SQLite has an opposite behavior. And PostgreSQL needs to know
163 // if we are in transaction or not (@TODO: find some work-around).
164 $index = $this->slaveOnly ? DB_SLAVE : DB_MASTER;
165 if ( wfGetDB( $index )->getType() == 'mysql' ) {
166 $lb = wfGetLBFactory()->newMainLB();
167 $db = $lb->getConnection( $index );
168 $db->clearFlag( DBO_TRX ); // auto-commit mode
169 } else {
170 $db = wfGetDB( $index );
173 $this->logger->debug( sprintf( "Connection %s will be used for SqlBagOStuff", $db ) );
174 $this->conns[$serverIndex] = $db;
177 return $this->conns[$serverIndex];
181 * Get the server index and table name for a given key
182 * @param string $key
183 * @return array Server index and table name
185 protected function getTableByKey( $key ) {
186 if ( $this->shards > 1 ) {
187 $hash = hexdec( substr( md5( $key ), 0, 8 ) ) & 0x7fffffff;
188 $tableIndex = $hash % $this->shards;
189 } else {
190 $tableIndex = 0;
192 if ( $this->numServers > 1 ) {
193 $sortedServers = $this->serverTags;
194 ArrayUtils::consistentHashSort( $sortedServers, $key );
195 reset( $sortedServers );
196 $serverIndex = key( $sortedServers );
197 } else {
198 $serverIndex = 0;
200 return [ $serverIndex, $this->getTableNameByShard( $tableIndex ) ];
204 * Get the table name for a given shard index
205 * @param int $index
206 * @return string
208 protected function getTableNameByShard( $index ) {
209 if ( $this->shards > 1 ) {
210 $decimals = strlen( $this->shards - 1 );
211 return $this->tableName .
212 sprintf( "%0{$decimals}d", $index );
213 } else {
214 return $this->tableName;
218 protected function doGet( $key, $flags = 0 ) {
219 $casToken = null;
221 return $this->getWithToken( $key, $casToken, $flags );
224 protected function getWithToken( $key, &$casToken, $flags = 0 ) {
225 $values = $this->getMulti( [ $key ] );
226 if ( array_key_exists( $key, $values ) ) {
227 $casToken = $values[$key];
228 return $values[$key];
230 return false;
233 public function getMulti( array $keys, $flags = 0 ) {
234 $values = []; // array of (key => value)
236 $keysByTable = [];
237 foreach ( $keys as $key ) {
238 list( $serverIndex, $tableName ) = $this->getTableByKey( $key );
239 $keysByTable[$serverIndex][$tableName][] = $key;
242 $this->garbageCollect(); // expire old entries if any
244 $dataRows = [];
245 foreach ( $keysByTable as $serverIndex => $serverKeys ) {
246 try {
247 $db = $this->getDB( $serverIndex );
248 foreach ( $serverKeys as $tableName => $tableKeys ) {
249 $res = $db->select( $tableName,
250 [ 'keyname', 'value', 'exptime' ],
251 [ 'keyname' => $tableKeys ],
252 __METHOD__,
253 // Approximate write-on-the-fly BagOStuff API via blocking.
254 // This approximation fails if a ROLLBACK happens (which is rare).
255 // We do not want to flush the TRX as that can break callers.
256 $db->trxLevel() ? [ 'LOCK IN SHARE MODE' ] : []
258 if ( $res === false ) {
259 continue;
261 foreach ( $res as $row ) {
262 $row->serverIndex = $serverIndex;
263 $row->tableName = $tableName;
264 $dataRows[$row->keyname] = $row;
267 } catch ( DBError $e ) {
268 $this->handleReadError( $e, $serverIndex );
272 foreach ( $keys as $key ) {
273 if ( isset( $dataRows[$key] ) ) { // HIT?
274 $row = $dataRows[$key];
275 $this->debug( "get: retrieved data; expiry time is " . $row->exptime );
276 try {
277 $db = $this->getDB( $row->serverIndex );
278 if ( $this->isExpired( $db, $row->exptime ) ) { // MISS
279 $this->debug( "get: key has expired" );
280 } else { // HIT
281 $values[$key] = $this->unserialize( $db->decodeBlob( $row->value ) );
283 } catch ( DBQueryError $e ) {
284 $this->handleWriteError( $e, $row->serverIndex );
286 } else { // MISS
287 $this->debug( 'get: no matching rows' );
291 return $values;
294 public function setMulti( array $data, $expiry = 0 ) {
295 $keysByTable = [];
296 foreach ( $data as $key => $value ) {
297 list( $serverIndex, $tableName ) = $this->getTableByKey( $key );
298 $keysByTable[$serverIndex][$tableName][] = $key;
301 $this->garbageCollect(); // expire old entries if any
303 $result = true;
304 $exptime = (int)$expiry;
305 foreach ( $keysByTable as $serverIndex => $serverKeys ) {
306 try {
307 $db = $this->getDB( $serverIndex );
308 } catch ( DBError $e ) {
309 $this->handleWriteError( $e, $serverIndex );
310 $result = false;
311 continue;
314 if ( $exptime < 0 ) {
315 $exptime = 0;
318 if ( $exptime == 0 ) {
319 $encExpiry = $this->getMaxDateTime( $db );
320 } else {
321 $exptime = $this->convertExpiry( $exptime );
322 $encExpiry = $db->timestamp( $exptime );
324 foreach ( $serverKeys as $tableName => $tableKeys ) {
325 $rows = [];
326 foreach ( $tableKeys as $key ) {
327 $rows[] = [
328 'keyname' => $key,
329 'value' => $db->encodeBlob( $this->serialize( $data[$key] ) ),
330 'exptime' => $encExpiry,
334 try {
335 $db->replace(
336 $tableName,
337 [ 'keyname' ],
338 $rows,
339 __METHOD__
341 } catch ( DBError $e ) {
342 $this->handleWriteError( $e, $serverIndex );
343 $result = false;
350 return $result;
353 public function set( $key, $value, $exptime = 0, $flags = 0 ) {
354 $ok = $this->setMulti( [ $key => $value ], $exptime );
355 if ( ( $flags & self::WRITE_SYNC ) == self::WRITE_SYNC ) {
356 $ok = $ok && $this->waitForSlaves();
359 return $ok;
362 protected function cas( $casToken, $key, $value, $exptime = 0 ) {
363 list( $serverIndex, $tableName ) = $this->getTableByKey( $key );
364 try {
365 $db = $this->getDB( $serverIndex );
366 $exptime = intval( $exptime );
368 if ( $exptime < 0 ) {
369 $exptime = 0;
372 if ( $exptime == 0 ) {
373 $encExpiry = $this->getMaxDateTime( $db );
374 } else {
375 $exptime = $this->convertExpiry( $exptime );
376 $encExpiry = $db->timestamp( $exptime );
378 // (bug 24425) use a replace if the db supports it instead of
379 // delete/insert to avoid clashes with conflicting keynames
380 $db->update(
381 $tableName,
383 'keyname' => $key,
384 'value' => $db->encodeBlob( $this->serialize( $value ) ),
385 'exptime' => $encExpiry
388 'keyname' => $key,
389 'value' => $db->encodeBlob( $this->serialize( $casToken ) )
391 __METHOD__
393 } catch ( DBQueryError $e ) {
394 $this->handleWriteError( $e, $serverIndex );
396 return false;
399 return (bool)$db->affectedRows();
402 public function delete( $key ) {
403 list( $serverIndex, $tableName ) = $this->getTableByKey( $key );
404 try {
405 $db = $this->getDB( $serverIndex );
406 $db->delete(
407 $tableName,
408 [ 'keyname' => $key ],
409 __METHOD__ );
410 } catch ( DBError $e ) {
411 $this->handleWriteError( $e, $serverIndex );
412 return false;
415 return true;
418 public function incr( $key, $step = 1 ) {
419 list( $serverIndex, $tableName ) = $this->getTableByKey( $key );
420 try {
421 $db = $this->getDB( $serverIndex );
422 $step = intval( $step );
423 $row = $db->selectRow(
424 $tableName,
425 [ 'value', 'exptime' ],
426 [ 'keyname' => $key ],
427 __METHOD__,
428 [ 'FOR UPDATE' ] );
429 if ( $row === false ) {
430 // Missing
432 return null;
434 $db->delete( $tableName, [ 'keyname' => $key ], __METHOD__ );
435 if ( $this->isExpired( $db, $row->exptime ) ) {
436 // Expired, do not reinsert
438 return null;
441 $oldValue = intval( $this->unserialize( $db->decodeBlob( $row->value ) ) );
442 $newValue = $oldValue + $step;
443 $db->insert( $tableName,
445 'keyname' => $key,
446 'value' => $db->encodeBlob( $this->serialize( $newValue ) ),
447 'exptime' => $row->exptime
448 ], __METHOD__, 'IGNORE' );
450 if ( $db->affectedRows() == 0 ) {
451 // Race condition. See bug 28611
452 $newValue = null;
454 } catch ( DBError $e ) {
455 $this->handleWriteError( $e, $serverIndex );
456 return null;
459 return $newValue;
462 public function merge( $key, callable $callback, $exptime = 0, $attempts = 10, $flags = 0 ) {
463 $ok = $this->mergeViaCas( $key, $callback, $exptime, $attempts );
464 if ( ( $flags & self::WRITE_SYNC ) == self::WRITE_SYNC ) {
465 $ok = $ok && $this->waitForSlaves();
468 return $ok;
472 * @param IDatabase $db
473 * @param string $exptime
474 * @return bool
476 protected function isExpired( $db, $exptime ) {
477 return $exptime != $this->getMaxDateTime( $db ) && wfTimestamp( TS_UNIX, $exptime ) < time();
481 * @param IDatabase $db
482 * @return string
484 protected function getMaxDateTime( $db ) {
485 if ( time() > 0x7fffffff ) {
486 return $db->timestamp( 1 << 62 );
487 } else {
488 return $db->timestamp( 0x7fffffff );
492 protected function garbageCollect() {
493 if ( !$this->purgePeriod || $this->slaveOnly ) {
494 // Disabled
495 return;
497 // Only purge on one in every $this->purgePeriod requests.
498 if ( $this->purgePeriod !== 1 && mt_rand( 0, $this->purgePeriod - 1 ) ) {
499 return;
501 $now = time();
502 // Avoid repeating the delete within a few seconds
503 if ( $now > ( $this->lastExpireAll + 1 ) ) {
504 $this->lastExpireAll = $now;
505 $this->expireAll();
509 public function expireAll() {
510 $this->deleteObjectsExpiringBefore( wfTimestampNow() );
514 * Delete objects from the database which expire before a certain date.
515 * @param string $timestamp
516 * @param bool|callable $progressCallback
517 * @return bool
519 public function deleteObjectsExpiringBefore( $timestamp, $progressCallback = false ) {
520 for ( $serverIndex = 0; $serverIndex < $this->numServers; $serverIndex++ ) {
521 try {
522 $db = $this->getDB( $serverIndex );
523 $dbTimestamp = $db->timestamp( $timestamp );
524 $totalSeconds = false;
525 $baseConds = [ 'exptime < ' . $db->addQuotes( $dbTimestamp ) ];
526 for ( $i = 0; $i < $this->shards; $i++ ) {
527 $maxExpTime = false;
528 while ( true ) {
529 $conds = $baseConds;
530 if ( $maxExpTime !== false ) {
531 $conds[] = 'exptime > ' . $db->addQuotes( $maxExpTime );
533 $rows = $db->select(
534 $this->getTableNameByShard( $i ),
535 [ 'keyname', 'exptime' ],
536 $conds,
537 __METHOD__,
538 [ 'LIMIT' => 100, 'ORDER BY' => 'exptime' ] );
539 if ( $rows === false || !$rows->numRows() ) {
540 break;
542 $keys = [];
543 $row = $rows->current();
544 $minExpTime = $row->exptime;
545 if ( $totalSeconds === false ) {
546 $totalSeconds = wfTimestamp( TS_UNIX, $timestamp )
547 - wfTimestamp( TS_UNIX, $minExpTime );
549 foreach ( $rows as $row ) {
550 $keys[] = $row->keyname;
551 $maxExpTime = $row->exptime;
554 $db->delete(
555 $this->getTableNameByShard( $i ),
557 'exptime >= ' . $db->addQuotes( $minExpTime ),
558 'exptime < ' . $db->addQuotes( $dbTimestamp ),
559 'keyname' => $keys
561 __METHOD__ );
563 if ( $progressCallback ) {
564 if ( intval( $totalSeconds ) === 0 ) {
565 $percent = 0;
566 } else {
567 $remainingSeconds = wfTimestamp( TS_UNIX, $timestamp )
568 - wfTimestamp( TS_UNIX, $maxExpTime );
569 if ( $remainingSeconds > $totalSeconds ) {
570 $totalSeconds = $remainingSeconds;
572 $processedSeconds = $totalSeconds - $remainingSeconds;
573 $percent = ( $i + $processedSeconds / $totalSeconds )
574 / $this->shards * 100;
576 $percent = ( $percent / $this->numServers )
577 + ( $serverIndex / $this->numServers * 100 );
578 call_user_func( $progressCallback, $percent );
582 } catch ( DBError $e ) {
583 $this->handleWriteError( $e, $serverIndex );
584 return false;
587 return true;
591 * Delete content of shard tables in every server.
592 * Return true if the operation is successful, false otherwise.
593 * @return bool
595 public function deleteAll() {
596 for ( $serverIndex = 0; $serverIndex < $this->numServers; $serverIndex++ ) {
597 try {
598 $db = $this->getDB( $serverIndex );
599 for ( $i = 0; $i < $this->shards; $i++ ) {
600 $db->delete( $this->getTableNameByShard( $i ), '*', __METHOD__ );
602 } catch ( DBError $e ) {
603 $this->handleWriteError( $e, $serverIndex );
604 return false;
607 return true;
611 * Serialize an object and, if possible, compress the representation.
612 * On typical message and page data, this can provide a 3X decrease
613 * in storage requirements.
615 * @param mixed $data
616 * @return string
618 protected function serialize( &$data ) {
619 $serial = serialize( $data );
621 if ( function_exists( 'gzdeflate' ) ) {
622 return gzdeflate( $serial );
623 } else {
624 return $serial;
629 * Unserialize and, if necessary, decompress an object.
630 * @param string $serial
631 * @return mixed
633 protected function unserialize( $serial ) {
634 if ( function_exists( 'gzinflate' ) ) {
635 MediaWiki\suppressWarnings();
636 $decomp = gzinflate( $serial );
637 MediaWiki\restoreWarnings();
639 if ( false !== $decomp ) {
640 $serial = $decomp;
644 $ret = unserialize( $serial );
646 return $ret;
650 * Handle a DBError which occurred during a read operation.
652 * @param DBError $exception
653 * @param int $serverIndex
655 protected function handleReadError( DBError $exception, $serverIndex ) {
656 if ( $exception instanceof DBConnectionError ) {
657 $this->markServerDown( $exception, $serverIndex );
659 $this->logger->error( "DBError: {$exception->getMessage()}" );
660 if ( $exception instanceof DBConnectionError ) {
661 $this->setLastError( BagOStuff::ERR_UNREACHABLE );
662 $this->logger->debug( __METHOD__ . ": ignoring connection error" );
663 } else {
664 $this->setLastError( BagOStuff::ERR_UNEXPECTED );
665 $this->logger->debug( __METHOD__ . ": ignoring query error" );
670 * Handle a DBQueryError which occurred during a write operation.
672 * @param DBError $exception
673 * @param int $serverIndex
675 protected function handleWriteError( DBError $exception, $serverIndex ) {
676 if ( $exception instanceof DBConnectionError ) {
677 $this->markServerDown( $exception, $serverIndex );
679 if ( $exception->db && $exception->db->wasReadOnlyError() ) {
680 if ( $exception->db->trxLevel() ) {
681 try {
682 $exception->db->rollback( __METHOD__ );
683 } catch ( DBError $e ) {
688 $this->logger->error( "DBError: {$exception->getMessage()}" );
689 if ( $exception instanceof DBConnectionError ) {
690 $this->setLastError( BagOStuff::ERR_UNREACHABLE );
691 $this->logger->debug( __METHOD__ . ": ignoring connection error" );
692 } else {
693 $this->setLastError( BagOStuff::ERR_UNEXPECTED );
694 $this->logger->debug( __METHOD__ . ": ignoring query error" );
699 * Mark a server down due to a DBConnectionError exception
701 * @param DBError $exception
702 * @param int $serverIndex
704 protected function markServerDown( $exception, $serverIndex ) {
705 unset( $this->conns[$serverIndex] ); // bug T103435
707 if ( isset( $this->connFailureTimes[$serverIndex] ) ) {
708 if ( time() - $this->connFailureTimes[$serverIndex] >= 60 ) {
709 unset( $this->connFailureTimes[$serverIndex] );
710 unset( $this->connFailureErrors[$serverIndex] );
711 } else {
712 $this->logger->debug( __METHOD__ . ": Server #$serverIndex already down" );
713 return;
716 $now = time();
717 $this->logger->info( __METHOD__ . ": Server #$serverIndex down until " . ( $now + 60 ) );
718 $this->connFailureTimes[$serverIndex] = $now;
719 $this->connFailureErrors[$serverIndex] = $exception;
723 * Create shard tables. For use from eval.php.
725 public function createTables() {
726 for ( $serverIndex = 0; $serverIndex < $this->numServers; $serverIndex++ ) {
727 $db = $this->getDB( $serverIndex );
728 if ( $db->getType() !== 'mysql' ) {
729 throw new MWException( __METHOD__ . ' is not supported on this DB server' );
732 for ( $i = 0; $i < $this->shards; $i++ ) {
733 $db->query(
734 'CREATE TABLE ' . $db->tableName( $this->getTableNameByShard( $i ) ) .
735 ' LIKE ' . $db->tableName( 'objectcache' ),
736 __METHOD__ );
741 protected function waitForSlaves() {
742 if ( !$this->serverInfos ) {
743 // Main LB is used; wait for any slaves to catch up
744 try {
745 wfGetLBFactory()->waitForReplication( [ 'wiki' => wfWikiID() ] );
746 return true;
747 } catch ( DBReplicationWaitError $e ) {
748 return false;
750 } else {
751 // Custom DB server list; probably doesn't use replication
752 return true;