236db9544c1d9747eae843b8b660d486e8acc72e
[lhc/web/wiklou.git] / includes / cache / bloom / BloomCache.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @author Aaron Schulz
20 */
21
22 /**
23 * Persistent bloom filter used to avoid expensive lookups
24 *
25 * @since 1.24
26 */
27 abstract class BloomCache {
28 /** @var string Unique ID for key namespacing */
29 protected $cacheID;
30
31 /** @var array Map of (id => BloomCache) */
32 protected static $instances = array();
33
34 /**
35 * @param string $id
36 * @return BloomCache
37 */
38 final public static function get( $id ) {
39 global $wgBloomFilterStores;
40
41 if ( !isset( self::$instances[$id] ) ) {
42 if ( isset( $wgBloomFilterStores[$id] ) ) {
43 $class = $wgBloomFilterStores[$id]['class'];
44 self::$instances[$id] = new $class( $wgBloomFilterStores[$id] );
45 } else {
46 wfDebug( "No bloom filter store '$id'; using EmptyBloomCache." );
47 return new EmptyBloomCache( array() );
48 }
49 }
50
51 return self::$instances[$id];
52 }
53
54 /**
55 * Create a new bloom cache instance from configuration.
56 * This should only be called from within BloomCache.
57 *
58 * @param array $config Parameters include:
59 * - cacheID : Prefix to all bloom filter names that is unique to this cache.
60 * It should only consist of alphanumberic, '-', and '_' characters.
61 * This ID is what avoids collisions if multiple logical caches
62 * use the same storage system, so this should be set carefully.
63 */
64 public function __construct( array $config ) {
65 $this->cacheID = $config['cacheId'];
66 if ( !preg_match( '!^[a-zA-Z0-9-_]{1,32}$!', $this->cacheID ) ) {
67 throw new MWException( "Cache ID '{$this->cacheID}' is invalid." );
68 }
69 }
70
71 /**
72 * Check if a member is set in the bloom filter
73 *
74 * A member being set means that it *might* have been added.
75 * A member not being set means it *could not* have been added.
76 *
77 * This abstracts over isHit() to deal with filter updates and readiness.
78 * A class must exist with the name BloomFilter<type> and a static public
79 * mergeAndCheck() method. The later takes the following arguments:
80 * (BloomCache $bcache, $domain, $virtualKey, array $status)
81 * The method should return a bool indicating whether to use the filter.
82 *
83 * The 'shared' bloom key must be used for any updates and will be used
84 * for the membership check if the method returns true. Since the key is shared,
85 * the method should never use delete(). The filter cannot be used in cases where
86 * membership in the filter needs to be taken away. In such cases, code *cannot*
87 * use this method - instead, it can directly use the other BloomCache methods
88 * to manage custom filters with their own keys (e.g. not 'shared').
89 *
90 * @param string $domain
91 * @param string $type
92 * @param string $member
93 * @return bool True if set, false if not (also returns true on error)
94 */
95 final public function check( $domain, $type, $member ) {
96 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
97
98 if ( method_exists( "BloomFilter{$type}", 'mergeAndCheck' ) ) {
99 try {
100 $virtualKey = "$domain:$type";
101
102 $status = $this->getStatus( $virtualKey );
103 if ( $status == false ) {
104 wfDebug( "Could not query virtual bloom filter '$virtualKey'." );
105 return null;
106 }
107
108 $useFilter = call_user_func_array(
109 array( "BloomFilter{$type}", 'mergeAndCheck' ),
110 array( $this, $domain, $virtualKey, $status )
111 );
112
113 if ( $useFilter ) {
114 return ( $this->isHit( 'shared', "$virtualKey:$member" ) !== false );
115 }
116 } catch ( MWException $e ) {
117 MWExceptionHandler::logException( $e );
118 return true;
119 }
120 }
121
122 return true;
123 }
124
125 /**
126 * Inform the bloom filter of a new member in order to keep it up to date
127 *
128 * @param string $domain
129 * @param string $type
130 * @param string|array $members
131 * @return bool Success
132 */
133 final public function insert( $domain, $type, $members ) {
134 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
135
136 if ( method_exists( "BloomFilter{$type}", 'mergeAndCheck' ) ) {
137 try {
138 $virtualKey = "$domain:$type";
139 $prefixedMembers = array();
140 foreach ( (array)$members as $member ) {
141 $prefixedMembers[] = "$virtualKey:$member";
142 }
143
144 return $this->add( 'shared', $prefixedMembers );
145 } catch ( MWException $e ) {
146 MWExceptionHandler::logException( $e );
147 return false;
148 }
149 }
150
151 return true;
152 }
153
154 /**
155 * Create a new bloom filter at $key (if one does not exist yet)
156 *
157 * @param string $key
158 * @param integer $size Bit length [default: 1000000]
159 * @param float $precision [default: .001]
160 * @return bool Success
161 */
162 final public function init( $key, $size = 1000000, $precision = .001 ) {
163 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
164
165 return $this->doInit( "{$this->cacheID}:$key", $size, min( .1, $precision ) );
166 }
167
168 /**
169 * Add a member to the bloom filter at $key
170 *
171 * @param string $key
172 * @param string|array $members
173 * @return bool Success
174 */
175 final public function add( $key, $members ) {
176 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
177
178 return $this->doAdd( "{$this->cacheID}:$key", (array)$members );
179 }
180
181 /**
182 * Check if a member is set in the bloom filter.
183 *
184 * A member being set means that it *might* have been added.
185 * A member not being set means it *could not* have been added.
186 *
187 * If this returns true, then the caller usually should do the
188 * expensive check (whatever that may be). It can be avoided otherwise.
189 *
190 * @param string $key
191 * @param string $member
192 * @return bool|null True if set, false if not, null on error
193 */
194 final public function isHit( $key, $member ) {
195 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
196
197 return $this->doIsHit( "{$this->cacheID}:$key", $member );
198 }
199
200 /**
201 * Destroy a bloom filter at $key
202 *
203 * @param string $key
204 * @return bool Success
205 */
206 final public function delete( $key ) {
207 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
208
209 return $this->doDelete( "{$this->cacheID}:$key" );
210 }
211
212 /**
213 * Set the status map of the virtual bloom filter at $key
214 *
215 * @param string $virtualKey
216 * @param array $values Map including some of (lastID, asOfTime, epoch)
217 * @return bool Success
218 */
219 final public function setStatus( $virtualKey, array $values ) {
220 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
221
222 return $this->doSetStatus( "{$this->cacheID}:$virtualKey", $values );
223 }
224
225 /**
226 * Get the status map of the virtual bloom filter at $key
227 *
228 * The map includes:
229 * - lastID : the highest ID of the items merged in
230 * - asOfTime : UNIX timestamp that the filter is up-to-date as of
231 * - epoch : UNIX timestamp that filter started being populated
232 * Unset fields will have a null value.
233 *
234 * @param string $virtualKey
235 * @return array|bool False on failure
236 */
237 final public function getStatus( $virtualKey ) {
238 $section = new ProfileSection( get_class( $this ) . '::' . __FUNCTION__ );
239
240 return $this->doGetStatus( "{$this->cacheID}:$virtualKey" );
241 }
242
243 /**
244 * Get an exclusive lock on a filter for updates
245 *
246 * @param string $virtualKey
247 * @return ScopedCallback|ScopedLock|null Returns null if acquisition failed
248 */
249 public function getScopedLock( $virtualKey ) {
250 return null;
251 }
252
253 /**
254 * @param string $key
255 * @param integer $size Bit length
256 * @param float $precision
257 * @return bool Success
258 */
259 abstract protected function doInit( $key, $size, $precision );
260
261 /**
262 * @param string $key
263 * @param array $members
264 * @return bool Success
265 */
266 abstract protected function doAdd( $key, array $members );
267
268 /**
269 * @param string $key
270 * @param string $member
271 * @return bool|null
272 */
273 abstract protected function doIsHit( $key, $member );
274
275 /**
276 * @param string $key
277 * @return bool Success
278 */
279 abstract protected function doDelete( $key );
280
281 /**
282 * @param string $virtualKey
283 * @param array $values
284 * @return bool Success
285 */
286 abstract protected function doSetStatus( $virtualKey, array $values );
287
288 /**
289 * @param string $key
290 * @return array|bool
291 */
292 abstract protected function doGetStatus( $key );
293 }
294
295 class EmptyBloomCache extends BloomCache {
296 public function __construct( array $config ) {
297 parent::__construct( array( 'cacheId' => 'none' ) );
298 }
299
300 protected function doInit( $key, $size, $precision ) {
301 return true;
302 }
303
304 protected function doAdd( $key, array $members ) {
305 return true;
306 }
307
308 protected function doIsHit( $key, $member ) {
309 return true;
310 }
311
312 protected function doDelete( $key ) {
313 return true;
314 }
315
316 protected function doSetStatus( $virtualKey, array $values ) {
317 return true;
318 }
319
320 protected function doGetStatus( $virtualKey ) {
321 return array( 'lastID' => null, 'asOfTime' => null, 'epoch' => null ) ;
322 }
323 }