Merge "rest: Return a 400 for invalid render IDs"
[mediawiki.git] / includes / libs / HashRing.php
blob64d755c272603041a83fb3530976746d0cc3c9c0
1 <?php
2 /**
3 * Convenience class for weighted consistent hash rings.
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
20 * @file
23 /**
24 * Convenience class for weighted consistent hash rings
26 * This deterministically maps "keys" to a set of "locations" while avoiding clumping
28 * Each location is represented by a number of nodes on a ring proportionate to the ratio
29 * of its weight compared to the total location weight. Note positions are deterministically
30 * derived from the hash of the location name. Nodes are responsible for the portion of the
31 * ring, counter-clockwise, up until the next node. Locations are responsible for all portions
32 * of the ring that the location's nodes are responsible for.
34 * A location that is temporarily "ejected" is said to be absent from the "live" ring.
35 * If no location ejections are active, then the base ring and live ring are identical.
37 * This class is designed in a way that using the "md5" algorithm will make it compatible
38 * with libketama, e.g. OPT_LIBKETAMA_COMPATIBLE from the PECL memcached extension or "ketama"
39 * from twemproxy. This can simplify the process of switching client libraries. However, note
40 * that different clients might use incompatible 32-bit memcached value flag conventions.
42 * @since 1.22
44 class HashRing {
45 /** @var string Hashing algorithm for hash() */
46 protected $algo;
47 /** @var int[] Non-empty (location => integer weight) */
48 protected $weightByLocation;
49 /** @var int[] Map of (location => UNIX timestamp) */
50 protected $ejectExpiryByLocation;
52 /** @var array[] Non-empty position-ordered list of (position, location name) */
53 protected $baseRing;
54 /** @var array[]|null Non-empty position-ordered list of (position, location name) */
55 protected $liveRing;
57 /** @var integer Overall number of node groups per server */
58 private const HASHES_PER_LOCATION = 40;
59 /** @var integer Number of nodes in a node group */
60 private const SECTORS_PER_HASH = 4;
62 public const KEY_POS = 0;
63 public const KEY_LOCATION = 1;
65 /** @var int Consider all locations */
66 public const RING_ALL = 0;
67 /** @var int Only consider "live" locations */
68 public const RING_LIVE = 1;
70 /**
71 * Make a consistent hash ring given a set of locations and their weight values
73 * @param int[] $map Map of (location => weight)
74 * @param string $algo Hashing algorithm listed in hash_algos() [optional]
75 * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expiries
76 * @since 1.31
78 public function __construct( array $map, $algo = 'sha1', array $ejections = [] ) {
79 $this->init( $map, $algo, $ejections );
82 /**
83 * @param int[] $map Map of (location => integer)
84 * @param string $algo Hashing algorithm
85 * @param int[] $ejections Map of (location => UNIX timestamp) for ejection expires
87 protected function init( array $map, $algo, array $ejections ) {
88 if ( !in_array( $algo, hash_algos(), true ) ) {
89 throw new RuntimeException( __METHOD__ . ": unsupported '$algo' hash algorithm." );
92 $weightByLocation = array_filter( $map );
93 if ( $weightByLocation === [] ) {
94 throw new UnexpectedValueException( "No locations with non-zero weight." );
95 } elseif ( min( $map ) < 0 ) {
96 throw new InvalidArgumentException( "Location weight cannot be negative." );
99 $this->algo = $algo;
100 $this->weightByLocation = $weightByLocation;
101 $this->ejectExpiryByLocation = $ejections;
102 $this->baseRing = $this->buildLocationRing( $this->weightByLocation );
106 * Get the location of an item on the ring
108 * @param string $item
109 * @return string Location
110 * @throws UnexpectedValueException
112 final public function getLocation( $item ) {
113 return $this->getLocations( $item, 1 )[0];
117 * Get the location of an item on the ring followed by the next ring locations
119 * @param string $item
120 * @param int $limit Maximum number of locations to return
121 * @param int $from One of the RING_* class constants
122 * @return string[] List of locations
123 * @throws UnexpectedValueException
125 public function getLocations( $item, $limit, $from = self::RING_ALL ) {
126 if ( $from === self::RING_ALL ) {
127 $ring = $this->baseRing;
128 } elseif ( $from === self::RING_LIVE ) {
129 $ring = $this->getLiveRing();
130 } else {
131 throw new InvalidArgumentException( "Invalid ring source specified." );
134 // Short-circuit for the common single-location case. Note that if there was only one
135 // location and it was ejected from the live ring, getLiveRing() would have error out.
136 if ( count( $this->weightByLocation ) == 1 ) {
137 return ( $limit > 0 ) ? [ $ring[0][self::KEY_LOCATION] ] : [];
140 // Locate the node index for this item's position on the hash ring
141 $itemIndex = $this->findNodeIndexForPosition( $this->getItemPosition( $item ), $ring );
143 $locations = [];
144 $currentIndex = null;
145 while ( count( $locations ) < $limit ) {
146 if ( $currentIndex === null ) {
147 $currentIndex = $itemIndex;
148 } else {
149 $currentIndex = $this->getNextClockwiseNodeIndex( $currentIndex, $ring );
150 if ( $currentIndex === $itemIndex ) {
151 break; // all nodes visited
154 // @phan-suppress-next-line PhanTypeMismatchDimFetchNullable False positive
155 $nodeLocation = $ring[$currentIndex][self::KEY_LOCATION];
156 if ( !in_array( $nodeLocation, $locations, true ) ) {
157 // Ignore other nodes for the same locations already added
158 $locations[] = $nodeLocation;
162 return $locations;
166 * @param float $position
167 * @param array[] $ring Either the base or live ring
168 * @return int|null
170 private function findNodeIndexForPosition( $position, $ring ) {
171 $count = count( $ring );
172 if ( $count === 0 ) {
173 return null;
176 $index = null;
177 $lowPos = 0;
178 $highPos = $count;
179 while ( true ) {
180 $midPos = (int)( ( $lowPos + $highPos ) / 2 );
181 if ( $midPos === $count ) {
182 $index = 0;
183 break;
186 $midVal = $ring[$midPos][self::KEY_POS];
187 $midMinusOneVal = ( $midPos === 0 ) ? 0 : $ring[$midPos - 1][self::KEY_POS];
188 if ( $position <= $midVal && $position > $midMinusOneVal ) {
189 $index = $midPos;
190 break;
193 if ( $midVal < $position ) {
194 $lowPos = $midPos + 1;
195 } else {
196 $highPos = $midPos - 1;
199 if ( $lowPos > $highPos ) {
200 $index = 0;
201 break;
205 return $index;
209 * Get the map of locations to weight (does not include zero weight items)
211 * @return int[]
213 public function getLocationWeights() {
214 return $this->weightByLocation;
218 * Remove a location from the "live" hash ring
220 * @param string $location
221 * @param int $ttl Seconds
222 * @return bool Whether some non-ejected locations are left
223 * @throws UnexpectedValueException
225 public function ejectFromLiveRing( $location, $ttl ) {
226 if ( !isset( $this->weightByLocation[$location] ) ) {
227 throw new UnexpectedValueException( "No location '$location' in the ring." );
230 $expiry = $this->getCurrentTime() + $ttl;
231 $this->ejectExpiryByLocation[$location] = $expiry;
233 $this->liveRing = null; // invalidate ring cache
235 return ( count( $this->ejectExpiryByLocation ) < count( $this->weightByLocation ) );
239 * Get the location of an item on the "live" ring
241 * @param string $item
242 * @return string Location
243 * @throws UnexpectedValueException
245 final public function getLiveLocation( $item ) {
246 return $this->getLocations( $item, 1, self::RING_LIVE )[0];
250 * Get the location of an item on the "live" ring, as well as the next locations
252 * @param string $item
253 * @param int $limit Maximum number of locations to return
254 * @return string[] List of locations
255 * @throws UnexpectedValueException
257 final public function getLiveLocations( $item, $limit ) {
258 return $this->getLocations( $item, $limit, self::RING_LIVE );
262 * Get the map of "live" locations to weight (does not include zero weight items)
264 * @return int[]
265 * @throws UnexpectedValueException
267 public function getLiveLocationWeights() {
268 $now = $this->getCurrentTime();
270 return array_diff_key(
271 $this->weightByLocation,
272 array_filter(
273 $this->ejectExpiryByLocation,
274 static function ( $expiry ) use ( $now ) {
275 return ( $expiry > $now );
282 * @param int[] $weightByLocation
283 * @return array[]
285 private function buildLocationRing( array $weightByLocation ) {
286 $locationCount = count( $weightByLocation );
287 $totalWeight = array_sum( $weightByLocation );
289 $ring = [];
290 // Assign nodes to all locations based on location weight
291 $claimed = []; // (position as string => (node, index))
292 foreach ( $weightByLocation as $location => $weight ) {
293 $ratio = $weight / $totalWeight;
294 // There $locationCount * (HASHES_PER_LOCATION * 4) nodes available;
295 // assign a few groups of nodes to this location based on its weight.
296 $nodesQuartets = intval( $ratio * self::HASHES_PER_LOCATION * $locationCount );
297 for ( $qi = 0; $qi < $nodesQuartets; ++$qi ) {
298 // For efficiency, get 4 points per hash call and 4X node count.
299 // If $algo is MD5, then this matches that of with libketama.
300 // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
301 $positions = $this->getNodePositionQuartet( "{$location}-{$qi}" );
302 foreach ( $positions as $gi => $position ) {
303 $node = ( $qi * self::SECTORS_PER_HASH + $gi ) . "@$location";
304 $posKey = (string)$position; // large integer
305 if ( isset( $claimed[$posKey] ) ) {
306 // Disallow duplicates (name decides precedence)
307 if ( $claimed[$posKey]['node'] > $node ) {
308 continue;
309 } else {
310 unset( $ring[$claimed[$posKey]['index']] );
313 $ring[] = [
314 self::KEY_POS => $position,
315 self::KEY_LOCATION => $location
317 $claimed[$posKey] = [ 'node' => $node, 'index' => count( $ring ) - 1 ];
321 // Sort the locations into clockwise order based on the hash ring position
322 usort( $ring, static function ( $a, $b ) {
323 if ( $a[self::KEY_POS] === $b[self::KEY_POS] ) {
324 throw new UnexpectedValueException( 'Duplicate node positions.' );
327 return ( $a[self::KEY_POS] < $b[self::KEY_POS] ? -1 : 1 );
328 } );
330 return $ring;
334 * @param string $item Key
335 * @return float Ring position; integral number in [0, 4294967296] (2^32)
337 private function getItemPosition( $item ) {
338 // If $algo is MD5, then this matches that of with libketama.
339 // See https://github.com/RJ/ketama/blob/master/libketama/ketama.c
340 $octets = substr( hash( $this->algo, (string)$item, true ), 0, 4 );
341 if ( strlen( $octets ) != 4 ) {
342 throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 32 bits." );
345 $pos = unpack( 'V', $octets )[1];
346 if ( $pos < 0 ) {
347 // Most-significant-bit is set, causing unpack() to return a negative integer due
348 // to the fact that it returns a signed int. Cast it to an unsigned integer string.
349 $pos = sprintf( '%u', $pos );
352 return (float)$pos;
356 * @param string $nodeGroupName
357 * @return float[] Four ring positions on [0, 4294967296] (2^32)
359 private function getNodePositionQuartet( $nodeGroupName ) {
360 $octets = substr( hash( $this->algo, (string)$nodeGroupName, true ), 0, 16 );
361 if ( strlen( $octets ) != 16 ) {
362 throw new UnexpectedValueException( __METHOD__ . ": {$this->algo} is < 128 bits." );
365 $positions = [];
366 foreach ( unpack( 'V4', $octets ) as $signed ) {
367 $positions[] = (float)sprintf( '%u', $signed );
370 return $positions;
374 * @param int $i Valid index for a node in the ring
375 * @param array[] $ring Either the base or live ring
376 * @return int Valid index for a node in the ring
378 private function getNextClockwiseNodeIndex( $i, $ring ) {
379 if ( !isset( $ring[$i] ) ) {
380 throw new UnexpectedValueException( __METHOD__ . ": reference index is invalid." );
383 $next = $i + 1;
385 return ( $next < count( $ring ) ) ? $next : 0;
389 * Get the "live" hash ring (which does not include ejected locations)
391 * @return array[]
392 * @throws UnexpectedValueException
394 protected function getLiveRing() {
395 if ( !$this->ejectExpiryByLocation ) {
396 return $this->baseRing; // nothing ejected
399 $now = $this->getCurrentTime();
401 if ( $this->liveRing === null || min( $this->ejectExpiryByLocation ) <= $now ) {
402 // Live ring needs to be regenerated...
403 $this->ejectExpiryByLocation = array_filter(
404 $this->ejectExpiryByLocation,
405 static function ( $expiry ) use ( $now ) {
406 return ( $expiry > $now );
410 if ( count( $this->ejectExpiryByLocation ) ) {
411 // Some locations are still ejected from the ring
412 $liveRing = [];
413 foreach ( $this->baseRing as $nodeInfo ) {
414 $location = $nodeInfo[self::KEY_LOCATION];
415 if ( !isset( $this->ejectExpiryByLocation[$location] ) ) {
416 $liveRing[] = $nodeInfo;
419 } else {
420 $liveRing = $this->baseRing;
423 $this->liveRing = $liveRing;
426 if ( !$this->liveRing ) {
427 throw new UnexpectedValueException( "The live ring is currently empty." );
430 return $this->liveRing;
434 * @return int UNIX timestamp
436 protected function getCurrentTime() {
437 return time();
440 public function __serialize() {
441 return [
442 'algorithm' => $this->algo,
443 'locations' => $this->weightByLocation,
444 'ejections' => $this->ejectExpiryByLocation
448 public function __unserialize( $data ) {
449 if ( is_array( $data ) ) {
450 $this->init( $data['locations'] ?? [], $data['algorithm'] ?? 'sha1', $data['ejections'] ?? [] );
451 } else {
452 throw new UnexpectedValueException( __METHOD__ . ": unable to decode JSON." );