Support offsets in prefix searching
[mediawiki.git] / includes / cache / bloom / BloomCache.php
blob627f4f0db97d5b67ef92afe693e8788760dd3eb0
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
18 * @file
19 * @author Aaron Schulz
22 /**
23 * Persistent bloom filter used to avoid expensive lookups
25 * @since 1.24
27 abstract class BloomCache {
28 /** @var string Unique ID for key namespacing */
29 protected $cacheID;
31 /** @var array Map of (id => BloomCache) */
32 protected static $instances = array();
34 /**
35 * @param string $id
36 * @return BloomCache
38 final public static function get( $id ) {
39 global $wgBloomFilterStores;
41 if ( !isset( self::$instances[$id] ) ) {
42 if ( isset( $wgBloomFilterStores[$id] ) ) {
43 $class = $wgBloomFilterStores[$id]['class'];
44 self::$instances[$id] = new $class( $wgBloomFilterStores[$id] );
45 } else {
46 wfDebug( "No bloom filter store '$id'; using EmptyBloomCache." );
47 return new EmptyBloomCache( array() );
51 return self::$instances[$id];
54 /**
55 * Create a new bloom cache instance from configuration.
56 * This should only be called from within BloomCache.
58 * @param array $config Parameters include:
59 * - cacheID : Prefix to all bloom filter names that is unique to this cache.
60 * It should only consist of alphanumberic, '-', and '_' characters.
61 * This ID is what avoids collisions if multiple logical caches
62 * use the same storage system, so this should be set carefully.
64 public function __construct( array $config ) {
65 $this->cacheID = $config['cacheId'];
66 if ( !preg_match( '!^[a-zA-Z0-9-_]{1,32}$!', $this->cacheID ) ) {
67 throw new MWException( "Cache ID '{$this->cacheID}' is invalid." );
71 /**
72 * Check if a member is set in the bloom filter
74 * A member being set means that it *might* have been added.
75 * A member not being set means it *could not* have been added.
77 * This abstracts over isHit() to deal with filter updates and readiness.
78 * A class must exist with the name BloomFilter<type> and a static public
79 * mergeAndCheck() method. The later takes the following arguments:
80 * (BloomCache $bcache, $domain, $virtualKey, array $status)
81 * The method should return a bool indicating whether to use the filter.
83 * The 'shared' bloom key must be used for any updates and will be used
84 * for the membership check if the method returns true. Since the key is shared,
85 * the method should never use delete(). The filter cannot be used in cases where
86 * membership in the filter needs to be taken away. In such cases, code *cannot*
87 * use this method - instead, it can directly use the other BloomCache methods
88 * to manage custom filters with their own keys (e.g. not 'shared').
90 * @param string $domain
91 * @param string $type
92 * @param string $member
93 * @return bool True if set, false if not (also returns true on error)
95 final public function check( $domain, $type, $member ) {
96 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
98 if ( method_exists( "BloomFilter{$type}", 'mergeAndCheck' ) ) {
99 try {
100 $virtualKey = "$domain:$type";
102 $status = $this->getStatus( $virtualKey );
103 if ( $status == false ) {
104 wfDebug( "Could not query virtual bloom filter '$virtualKey'." );
105 return true;
108 $useFilter = call_user_func_array(
109 array( "BloomFilter{$type}", 'mergeAndCheck' ),
110 array( $this, $domain, $virtualKey, $status )
113 if ( $useFilter ) {
114 return ( $this->isHit( 'shared', "$virtualKey:$member" ) !== false );
116 } catch ( MWException $e ) {
117 MWExceptionHandler::logException( $e );
118 return true;
122 return true;
126 * Inform the bloom filter of a new member in order to keep it up to date
128 * @param string $domain
129 * @param string $type
130 * @param string|array $members
131 * @return bool Success
133 final public function insert( $domain, $type, $members ) {
134 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
136 if ( method_exists( "BloomFilter{$type}", 'mergeAndCheck' ) ) {
137 try {
138 $virtualKey = "$domain:$type";
139 $prefixedMembers = array();
140 foreach ( (array)$members as $member ) {
141 $prefixedMembers[] = "$virtualKey:$member";
144 return $this->add( 'shared', $prefixedMembers );
145 } catch ( MWException $e ) {
146 MWExceptionHandler::logException( $e );
147 return false;
151 return true;
155 * Create a new bloom filter at $key (if one does not exist yet)
157 * @param string $key
158 * @param integer $size Bit length [default: 1000000]
159 * @param float $precision [default: .001]
160 * @return bool Success
162 final public function init( $key, $size = 1000000, $precision = .001 ) {
163 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
165 return $this->doInit( "{$this->cacheID}:$key", $size, min( .1, $precision ) );
169 * Add a member to the bloom filter at $key
171 * @param string $key
172 * @param string|array $members
173 * @return bool Success
175 final public function add( $key, $members ) {
176 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
178 return $this->doAdd( "{$this->cacheID}:$key", (array)$members );
182 * Check if a member is set in the bloom filter.
184 * A member being set means that it *might* have been added.
185 * A member not being set means it *could not* have been added.
187 * If this returns true, then the caller usually should do the
188 * expensive check (whatever that may be). It can be avoided otherwise.
190 * @param string $key
191 * @param string $member
192 * @return bool|null True if set, false if not, null on error
194 final public function isHit( $key, $member ) {
195 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
197 return $this->doIsHit( "{$this->cacheID}:$key", $member );
201 * Destroy a bloom filter at $key
203 * @param string $key
204 * @return bool Success
206 final public function delete( $key ) {
207 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
209 return $this->doDelete( "{$this->cacheID}:$key" );
213 * Set the status map of the virtual bloom filter at $key
215 * @param string $virtualKey
216 * @param array $values Map including some of (lastID, asOfTime, epoch)
217 * @return bool Success
219 final public function setStatus( $virtualKey, array $values ) {
220 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
222 return $this->doSetStatus( "{$this->cacheID}:$virtualKey", $values );
226 * Get the status map of the virtual bloom filter at $key
228 * The map includes:
229 * - lastID : the highest ID of the items merged in
230 * - asOfTime : UNIX timestamp that the filter is up-to-date as of
231 * - epoch : UNIX timestamp that filter started being populated
232 * Unset fields will have a null value.
234 * @param string $virtualKey
235 * @return array|bool False on failure
237 final public function getStatus( $virtualKey ) {
238 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
240 return $this->doGetStatus( "{$this->cacheID}:$virtualKey" );
244 * Get an exclusive lock on a filter for updates
246 * @param string $virtualKey
247 * @return ScopedCallback|ScopedLock|null Returns null if acquisition failed
249 public function getScopedLock( $virtualKey ) {
250 return null;
254 * @param string $key
255 * @param integer $size Bit length
256 * @param float $precision
257 * @return bool Success
259 abstract protected function doInit( $key, $size, $precision );
262 * @param string $key
263 * @param array $members
264 * @return bool Success
266 abstract protected function doAdd( $key, array $members );
269 * @param string $key
270 * @param string $member
271 * @return bool|null
273 abstract protected function doIsHit( $key, $member );
276 * @param string $key
277 * @return bool Success
279 abstract protected function doDelete( $key );
282 * @param string $virtualKey
283 * @param array $values
284 * @return bool Success
286 abstract protected function doSetStatus( $virtualKey, array $values );
289 * @param string $key
290 * @return array|bool
292 abstract protected function doGetStatus( $key );
295 class EmptyBloomCache extends BloomCache {
296 public function __construct( array $config ) {
297 parent::__construct( array( 'cacheId' => 'none' ) );
300 protected function doInit( $key, $size, $precision ) {
301 return true;
304 protected function doAdd( $key, array $members ) {
305 return true;
308 protected function doIsHit( $key, $member ) {
309 return true;
312 protected function doDelete( $key ) {
313 return true;
316 protected function doSetStatus( $virtualKey, array $values ) {
317 return true;
320 protected function doGetStatus( $virtualKey ) {
321 return array( 'lastID' => null, 'asOfTime' => null, 'epoch' => null );