[JobQueue] Optimized redis queue to use Lua scripts.
[mediawiki.git] / includes / job / JobQueueRedis.php
blob41855a72ff5da8b562b1560695cbcf1ed8bdfde7
1 <?php
2 /**
3 * Redis-backed job queue code.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
21 * @author Aaron Schulz
24 /**
25 * Class to handle job queues stored in Redis
27 * This is faster, less resource intensive, queue that JobQueueDB.
28 * All data for a queue using this class is placed into one redis server.
30 * There are seven main redis keys used to track jobs:
31 * - l-unclaimed : A list of job IDs used for push/pop
32 * - z-claimed : A sorted set of (job ID, UNIX timestamp as score) used for job retries
33 * - z-abandoned : A sorted set of (job ID, UNIX timestamp as score) used for broken jobs
34 * - h-idBySha1 : A hash of (SHA1 => job ID) for unclaimed jobs used for de-duplication
35 * - h-sha1Byid : A hash of (job ID => SHA1) for unclaimed jobs used for de-duplication
36 * - h-attempts : A hash of (job ID => attempt count) used for job claiming/retries
37 * - h-data : A hash of (job ID => serialized blobs) for job storage
38 * Any given job ID can be in only one of l-unclaimed, z-claimed, and z-abandoned.
39 * If an ID appears in any of those lists, it should have a h-data entry for its ID.
40 * If a job has a non-empty SHA1 de-duplication value and its ID is in l-unclaimed,
41 * then there should be no other such jobs. Every h-idBySha1 entry has an h-sha1Byid
42 * entry and every h-sha1Byid must refer to an ID that is l-unclaimed. If a job has its
43 * ID in z-claimed or z-abandoned, then it must also have an h-attempts entry for its ID.
45 * Additionally, "rootjob:* keys to track "root jobs" used for additional de-duplication.
46 * Aside from root job keys, all keys have no expiry, and are only removed when jobs are run.
47 * All the keys are prefixed with the relevant wiki ID information.
49 * This class requires Redis 2.6 as it makes use Lua scripts for fast atomic operations.
50 * Additionally, it should be noted that redis has different persistence modes, such
51 * as rdb snapshots, journaling, and no persistent. Appropriate configuration should be
52 * made on the servers based on what queues are using it and what tolerance they have.
54 * @ingroup JobQueue
55 * @since 1.21
57 class JobQueueRedis extends JobQueue {
58 /** @var RedisConnectionPool */
59 protected $redisPool;
61 protected $server; // string; server address
63 const ROOTJOB_TTL = 1209600; // integer; seconds to remember root jobs (14 days)
64 const MAX_AGE_PRUNE = 604800; // integer; seconds a job can live once claimed (7 days)
66 protected $key; // string; key to prefix the queue keys with (used for testing)
68 /**
69 * @params include:
70 * - redisConfig : An array of parameters to RedisConnectionPool::__construct().
71 * Note that the serializer option is ignored "none" is always used.
72 * - redisServer : A hostname/port combination or the absolute path of a UNIX socket.
73 * If a hostname is specified but no port, the standard port number
74 * 6379 will be used. Required.
75 * @param array $params
77 public function __construct( array $params ) {
78 parent::__construct( $params );
79 $params['redisConfig']['serializer'] = 'none'; // make it easy to use Lua
80 $this->server = $params['redisServer'];
81 $this->redisPool = RedisConnectionPool::singleton( $params['redisConfig'] );
84 protected function supportedOrders() {
85 return array( 'timestamp', 'fifo' );
88 protected function optimalOrder() {
89 return 'fifo';
92 /**
93 * @see JobQueue::doIsEmpty()
94 * @return bool
95 * @throws MWException
97 protected function doIsEmpty() {
98 return $this->doGetSize() == 0;
102 * @see JobQueue::doGetSize()
103 * @return integer
104 * @throws MWException
106 protected function doGetSize() {
107 $conn = $this->getConnection();
108 try {
109 return $conn->lSize( $this->getQueueKey( 'l-unclaimed' ) );
110 } catch ( RedisException $e ) {
111 $this->throwRedisException( $this->server, $conn, $e );
116 * @see JobQueue::doGetAcquiredCount()
117 * @return integer
118 * @throws MWException
120 protected function doGetAcquiredCount() {
121 if ( $this->claimTTL <= 0 ) {
122 return 0; // no acknowledgements
124 $conn = $this->getConnection();
125 try {
126 $conn->multi( Redis::PIPELINE );
127 $conn->zSize( $this->getQueueKey( 'z-claimed' ) );
128 $conn->zSize( $this->getQueueKey( 'z-abandoned' ) );
129 return array_sum( $conn->exec() );
130 } catch ( RedisException $e ) {
131 $this->throwRedisException( $this->server, $conn, $e );
136 * @see JobQueue::doBatchPush()
137 * @param array $jobs
138 * @param $flags
139 * @return bool
140 * @throws MWException
142 protected function doBatchPush( array $jobs, $flags ) {
143 // Convert the jobs into field maps (de-duplicated against each other)
144 $items = array(); // (job ID => job fields map)
145 foreach ( $jobs as $job ) {
146 $item = $this->getNewJobFields( $job );
147 if ( strlen( $item['sha1'] ) ) { // hash identifier => de-duplicate
148 $items[$item['sha1']] = $item;
149 } else {
150 $items[$item['uuid']] = $item;
153 // Convert the field maps into serialized blobs
154 $tuples = array();
155 foreach ( $items as $item ) {
156 $tuples[] = array( $item['uuid'], $item['sha1'], serialize( $item ) );
159 if ( !count( $tuples ) ) {
160 return true; // nothing to do
163 $conn = $this->getConnection();
164 try {
165 // Actually push the non-duplicate jobs into the queue...
166 if ( $flags & self::QoS_Atomic ) {
167 $batches = array( $tuples ); // all or nothing
168 } else {
169 $batches = array_chunk( $tuples, 500 ); // avoid tying up the server
171 $failed = 0;
172 $pushed = 0;
173 foreach ( $batches as $tupleBatch ) {
174 $added = $this->pushBlobs( $conn, $tupleBatch );
175 if ( is_int( $added ) ) {
176 $pushed += $added;
177 } else {
178 $failed += count( $tupleBatch );
181 if ( $failed > 0 ) {
182 wfDebugLog( 'JobQueueRedis', "Could not insert {$failed} {$this->type} job(s)." );
183 return false;
185 wfIncrStats( 'job-insert', count( $tuples ) );
186 wfIncrStats( 'job-insert-duplicate', count( $tuples ) - $failed - $pushed );
187 } catch ( RedisException $e ) {
188 $this->throwRedisException( $this->server, $conn, $e );
191 return true;
195 * @param RedisConnRef $conn
196 * @param array $tuples List of tuples of (job ID, job SHA1 or '', serialized blob)
197 * @return integer Number of jobs inserted (duplicates are ignored)
198 * @throws RedisException
200 protected function pushBlobs( RedisConnRef $conn, array $tuples ) {
201 $args = array(); // ([id, sha1, blob [, id, sha1, blob ... ] ] )
202 foreach ( $tuples as $tuple ) {
203 $args[] = $tuple[0]; // id
204 $args[] = $tuple[1]; // sha1
205 $args[] = $tuple[2]; // blob
207 static $script =
208 <<<LUA
209 if #ARGV % 3 ~= 0 then return redis.error_reply('Unmatched arguments') end
210 local pushed = 0
211 for i = 1,#ARGV,3 do
212 local id,sha1,blob = ARGV[i],ARGV[i+1],ARGV[i+2]
213 if sha1 == '' or redis.call('hExists',KEYS[3],sha1) == 0 then
214 redis.call('lPush',KEYS[1],id)
215 if sha1 ~= '' then
216 redis.call('hSet',KEYS[2],id,sha1)
217 redis.call('hSet',KEYS[3],sha1,id)
219 redis.call('hSet',KEYS[4],id,blob)
220 pushed = pushed + 1
223 return pushed
224 LUA;
225 return $this->redisEval( $conn, $script,
226 array_merge(
227 array(
228 $this->getQueueKey( 'l-unclaimed' ), # KEYS[1]
229 $this->getQueueKey( 'h-sha1ById' ), # KEYS[2]
230 $this->getQueueKey( 'h-idBySha1' ), # KEYS[3]
231 $this->getQueueKey( 'h-data' ), # KEYS[4]
233 $args
235 4 # number of first argument(s) that are keys
240 * @see JobQueue::doPop()
241 * @return Job|bool
242 * @throws MWException
244 protected function doPop() {
245 $job = false;
247 $conn = $this->getConnection();
248 try {
249 do {
250 if ( $this->claimTTL > 0 ) {
251 // Keep the claimed job list down for high-traffic queues
252 if ( mt_rand( 0, 99 ) == 0 ) {
253 $this->recycleAndDeleteStaleJobs();
255 $blob = $this->popAndAcquireBlob( $conn );
256 } else {
257 $blob = $this->popAndDeleteBlob( $conn );
259 if ( $blob === false ) {
260 break; // no jobs; nothing to do
263 wfIncrStats( 'job-pop' );
264 $item = unserialize( $blob );
265 if ( $item === false ) {
266 wfDebugLog( 'JobQueueRedis', "Could not unserialize {$this->type} job." );
267 continue;
270 // If $item is invalid, recycleAndDeleteStaleJobs() will cleanup as needed
271 $job = $this->getJobFromFields( $item ); // may be false
272 } while ( !$job ); // job may be false if invalid
273 } catch ( RedisException $e ) {
274 $this->throwRedisException( $this->server, $conn, $e );
277 // Flag this job as an old duplicate based on its "root" job...
278 try {
279 if ( $job && $this->isRootJobOldDuplicate( $job ) ) {
280 wfIncrStats( 'job-pop-duplicate' );
281 return DuplicateJob::newFromJob( $job ); // convert to a no-op
283 } catch ( MWException $e ) {} // don't lose jobs over this
285 return $job;
289 * @param RedisConnRef $conn
290 * @return array serialized string or false
291 * @throws RedisException
293 protected function popAndDeleteBlob( RedisConnRef $conn ) {
294 static $script =
295 <<<LUA
296 -- Pop an item off the queue
297 local id = redis.call('rpop',KEYS[1])
298 if not id then return false end
299 -- Get the job data and remove it
300 local item = redis.call('hGet',KEYS[4],id)
301 redis.call('hDel',KEYS[4],id)
302 -- Allow new duplicates of this job
303 local sha1 = redis.call('hGet',KEYS[2],id)
304 if sha1 then redis.call('hDel',KEYS[3],sha1) end
305 redis.call('hDel',KEYS[2],id)
306 -- Return the job data
307 return item
308 LUA;
309 return $this->redisEval( $conn, $script,
310 array(
311 $this->getQueueKey( 'l-unclaimed' ), # KEYS[1]
312 $this->getQueueKey( 'h-sha1ById' ), # KEYS[2]
313 $this->getQueueKey( 'h-idBySha1' ), # KEYS[3]
314 $this->getQueueKey( 'h-data' ), # KEYS[4]
316 4 # number of first argument(s) that are keys
321 * @param RedisConnRef $conn
322 * @return array serialized string or false
323 * @throws RedisException
325 protected function popAndAcquireBlob( RedisConnRef $conn ) {
326 static $script =
327 <<<LUA
328 -- Pop an item off the queue
329 local id = redis.call('rPop',KEYS[1])
330 if not id then return false end
331 -- Allow new duplicates of this job
332 local sha1 = redis.call('hGet',KEYS[2],id)
333 if sha1 then redis.call('hDel',KEYS[3],sha1) end
334 redis.call('hDel',KEYS[2],id)
335 -- Mark the jobs as claimed and return it
336 redis.call('zAdd',KEYS[4],ARGV[1],id)
337 redis.call('hIncrBy',KEYS[5],id,1)
338 return redis.call('hGet',KEYS[6],id)
339 LUA;
340 return $this->redisEval( $conn, $script,
341 array(
342 $this->getQueueKey( 'l-unclaimed' ), # KEYS[1]
343 $this->getQueueKey( 'h-sha1ById' ), # KEYS[2]
344 $this->getQueueKey( 'h-idBySha1' ), # KEYS[3]
345 $this->getQueueKey( 'z-claimed' ), # KEYS[4]
346 $this->getQueueKey( 'h-attempts' ), # KEYS[5]
347 $this->getQueueKey( 'h-data' ), # KEYS[6]
348 time(), # ARGV[1] (injected to be replication-safe)
350 6 # number of first argument(s) that are keys
355 * @see JobQueue::doAck()
356 * @param Job $job
357 * @return Job|bool
358 * @throws MWException
360 protected function doAck( Job $job ) {
361 if ( $this->claimTTL > 0 ) {
362 $conn = $this->getConnection();
363 try {
364 // Get the exact field map this Job came from, regardless of whether
365 // the job was transformed into a DuplicateJob or anything of the sort.
366 $item = $job->metadata['sourceFields'];
368 static $script =
369 <<<LUA
370 -- Unmark the job as claimed
371 redis.call('zRem',KEYS[1],ARGV[1])
372 redis.call('hDel',KEYS[2],ARGV[1])
373 -- Delete the job data itself
374 return redis.call('hDel',KEYS[3],ARGV[1])
375 LUA;
376 $res = $this->redisEval( $conn, $script,
377 array(
378 $this->getQueueKey( 'z-claimed' ), # KEYS[1]
379 $this->getQueueKey( 'h-attempts' ), # KEYS[2]
380 $this->getQueueKey( 'h-data' ), # KEYS[3]
381 $item['uuid'] # ARGV[1]
383 3 # number of first argument(s) that are keys
386 if ( !$res ) {
387 wfDebugLog( 'JobQueueRedis', "Could not acknowledge {$this->type} job." );
388 return false;
390 } catch ( RedisException $e ) {
391 $this->throwRedisException( $this->server, $conn, $e );
394 return true;
398 * @see JobQueue::doDeduplicateRootJob()
399 * @param Job $job
400 * @return bool
401 * @throws MWException
403 protected function doDeduplicateRootJob( Job $job ) {
404 $params = $job->getParams();
405 if ( !isset( $params['rootJobSignature'] ) ) {
406 throw new MWException( "Cannot register root job; missing 'rootJobSignature'." );
407 } elseif ( !isset( $params['rootJobTimestamp'] ) ) {
408 throw new MWException( "Cannot register root job; missing 'rootJobTimestamp'." );
410 $key = $this->getRootJobKey( $params['rootJobSignature'] );
412 $conn = $this->getConnection();
413 try {
414 $timestamp = $conn->get( $key ); // current last timestamp of this job
415 if ( $timestamp && $timestamp >= $params['rootJobTimestamp'] ) {
416 return true; // a newer version of this root job was enqueued
418 // Update the timestamp of the last root job started at the location...
419 return $conn->set( $key, $params['rootJobTimestamp'], self::ROOTJOB_TTL ); // 2 weeks
420 } catch ( RedisException $e ) {
421 $this->throwRedisException( $this->server, $conn, $e );
426 * Check if the "root" job of a given job has been superseded by a newer one
428 * @param $job Job
429 * @return bool
430 * @throws MWException
432 protected function isRootJobOldDuplicate( Job $job ) {
433 $params = $job->getParams();
434 if ( !isset( $params['rootJobSignature'] ) ) {
435 return false; // job has no de-deplication info
436 } elseif ( !isset( $params['rootJobTimestamp'] ) ) {
437 wfDebugLog( 'JobQueueRedis', "Cannot check root job; missing 'rootJobTimestamp'." );
438 return false;
441 $conn = $this->getConnection();
442 try {
443 // Get the last time this root job was enqueued
444 $timestamp = $conn->get( $this->getRootJobKey( $params['rootJobSignature'] ) );
445 } catch ( RedisException $e ) {
446 $this->throwRedisException( $this->server, $conn, $e );
449 // Check if a new root job was started at the location after this one's...
450 return ( $timestamp && $timestamp > $params['rootJobTimestamp'] );
454 * @see JobQueue::getAllQueuedJobs()
455 * @return Iterator
457 public function getAllQueuedJobs() {
458 $conn = $this->getConnection();
459 if ( !$conn ) {
460 throw new MWException( "Unable to connect to redis server." );
462 try {
463 $that = $this;
464 return new MappedIterator(
465 $conn->lRange( $this->getQueueKey( 'l-unclaimed' ), 0, -1 ),
466 function( $uid ) use ( $that, $conn ) {
467 return $that->getJobFromUidInternal( $uid, $conn );
470 } catch ( RedisException $e ) {
471 $this->throwRedisException( $this->server, $conn, $e );
476 * This function should not be called outside RedisJobQueue
478 * @param $uid string
479 * @param $conn RedisConnRef
480 * @return Job
481 * @throws MWException
483 public function getJobFromUidInternal( $uid, RedisConnRef $conn ) {
484 try {
485 $item = unserialize( $conn->hGet( $this->getQueueKey( 'h-data' ), $uid ) );
486 if ( !is_array( $item ) ) { // this shouldn't happen
487 throw new MWException( "Could not find job with ID '$uid'." );
489 $title = Title::makeTitle( $item['namespace'], $item['title'] );
490 $job = Job::factory( $item['type'], $title, $item['params'] );
491 $job->metadata['sourceFields'] = $item;
492 return $job;
493 } catch ( RedisException $e ) {
494 $this->throwRedisException( $this->server, $conn, $e );
499 * Recycle or destroy any jobs that have been claimed for too long
501 * @return integer Number of jobs recycled/deleted
502 * @throws MWException
504 public function recycleAndDeleteStaleJobs() {
505 if ( $this->claimTTL <= 0 ) { // sanity
506 throw new MWException( "Cannot recycle jobs since acknowledgements are disabled." );
508 $count = 0;
509 // For each job item that can be retried, we need to add it back to the
510 // main queue and remove it from the list of currenty claimed job items.
511 // For those that cannot, they are marked as dead and kept around for
512 // investigation and manual job restoration but are eventually deleted.
513 $conn = $this->getConnection();
514 try {
515 $now = time();
516 static $script =
517 <<<LUA
518 local released,abandoned,pruned = 0,0,0
519 -- Get all non-dead jobs that have an expired claim on them.
520 -- The score for each item is the last claim timestamp (UNIX).
521 local staleClaims = redis.call('zRangeByScore',KEYS[1],0,ARGV[1],'WITHSCORES')
522 for id,timestamp in ipairs(staleClaims) do
523 local attempts = redis.call('hGet',KEYS[2],id)
524 if attempts < ARGV[3] then
525 -- Claim expired and retries left: re-enqueue the job
526 redis.call('lPush',KEYS[3],id)
527 redis.call('hIncrBy',KEYS[2],id,1)
528 released = released + 1
529 else
530 -- Claim expired and no retries left: mark the job as dead
531 redis.call('zAdd',KEYS[5],timestamp,id)
532 abandoned = abandoned + 1
534 redis.call('zRem',KEYS[1],id)
536 -- Get all of the dead jobs that have been marked as dead for too long.
537 -- The score for each item is the last claim timestamp (UNIX).
538 local deadClaims = redis.call('zRangeByScore',KEYS[5],0,ARGV[2],'WITHSCORES')
539 for id,timestamp in ipairs(deadClaims) do
540 -- Stale and out of retries: remove any traces of the job
541 redis.call('zRem',KEYS[5],id)
542 redis.call('hDel',KEYS[2],id)
543 redis.call('hDel',KEYS[4],id)
544 pruned = pruned + 1
546 return {released,abandoned,pruned}
547 LUA;
548 $res = $this->redisEval( $conn, $script,
549 array(
550 $this->getQueueKey( 'z-claimed' ), # KEYS[1]
551 $this->getQueueKey( 'h-attempts' ), # KEYS[2]
552 $this->getQueueKey( 'l-unclaimed' ), # KEYS[3]
553 $this->getQueueKey( 'h-data' ), # KEYS[4]
554 $this->getQueueKey( 'z-abandoned' ), # KEYS[5]
555 $now - $this->claimTTL, # ARGV[1]
556 $now - self::MAX_AGE_PRUNE, # ARGV[2]
557 $this->maxTries # ARGV[3]
559 5 # number of first argument(s) that are keys
561 if ( $res ) {
562 list( $released, $abandoned, $pruned ) = $res;
563 $count += $released + $pruned;
564 wfIncrStats( 'job-recycle', count( $released ) );
566 } catch ( RedisException $e ) {
567 $this->throwRedisException( $this->server, $conn, $e );
570 return $count;
574 * @return Array
576 protected function doGetPeriodicTasks() {
577 if ( $this->claimTTL > 0 ) {
578 return array(
579 'recycleAndDeleteStaleJobs' => array(
580 'callback' => array( $this, 'recycleAndDeleteStaleJobs' ),
581 'period' => ceil( $this->claimTTL / 2 )
584 } else {
585 return array();
590 * @param RedisConnRef $conn
591 * @param string $script
592 * @param array $params
593 * @param integer $numKeys
594 * @return mixed
596 protected function redisEval( RedisConnRef $conn, $script, array $params, $numKeys ) {
597 $res = $conn->evalSha( sha1( $script ), $params, $numKeys );
598 if ( $res === false && $conn->getLastError() != '' ) { // not in script cache?
599 wfDebugLog( 'JobQueueRedis', "Lua script error: " . $conn->getLastError() );
600 $res = $conn->eval( $script, $params, $numKeys );
602 return $res;
606 * @param $job Job
607 * @return array
609 protected function getNewJobFields( Job $job ) {
610 return array(
611 // Fields that describe the nature of the job
612 'type' => $job->getType(),
613 'namespace' => $job->getTitle()->getNamespace(),
614 'title' => $job->getTitle()->getDBkey(),
615 'params' => $job->getParams(),
616 // Additional job metadata
617 'uuid' => UIDGenerator::newRawUUIDv4( UIDGenerator::QUICK_RAND ),
618 'sha1' => $job->ignoreDuplicates()
619 ? wfBaseConvert( sha1( serialize( $job->getDeduplicationInfo() ) ), 16, 36, 31 )
620 : '',
621 'timestamp' => time() // UNIX timestamp
626 * @param $fields array
627 * @return Job|bool
629 protected function getJobFromFields( array $fields ) {
630 $title = Title::makeTitleSafe( $fields['namespace'], $fields['title'] );
631 if ( $title ) {
632 $job = Job::factory( $fields['type'], $title, $fields['params'] );
633 $job->metadata['sourceFields'] = $fields;
634 return $job;
636 return false;
640 * Get a connection to the server that handles all sub-queues for this queue
642 * @return Array (server name, Redis instance)
643 * @throws MWException
645 protected function getConnection() {
646 $conn = $this->redisPool->getConnection( $this->server );
647 if ( !$conn ) {
648 throw new MWException( "Unable to connect to redis server." );
650 return $conn;
654 * @param $server string
655 * @param $conn RedisConnRef
656 * @param $e RedisException
657 * @throws MWException
659 protected function throwRedisException( $server, RedisConnRef $conn, $e ) {
660 $this->redisPool->handleException( $server, $conn, $e );
661 throw new MWException( "Redis server error: {$e->getMessage()}\n" );
665 * @param $prop string
666 * @return string
668 private function getQueueKey( $prop ) {
669 list( $db, $prefix ) = wfSplitWikiID( $this->wiki );
670 if ( strlen( $this->key ) ) { // namespaced queue (for testing)
671 return wfForeignMemcKey( $db, $prefix, 'jobqueue', $this->type, $this->key, $prop );
672 } else {
673 return wfForeignMemcKey( $db, $prefix, 'jobqueue', $this->type, $prop );
678 * @param string $signature Hash identifier of the root job
679 * @return string
681 private function getRootJobKey( $signature ) {
682 list( $db, $prefix ) = wfSplitWikiID( $this->wiki );
683 return wfForeignMemcKey( $db, $prefix, 'jobqueue', $this->type, 'rootjob', $signature );
687 * @param $key string
688 * @return void
690 public function setTestingPrefix( $key ) {
691 $this->key = $key;