Merge "Add missing uploadstash.us_props for PostgreSQL"
[lhc/web/wiklou.git] / includes / utils / UIDGenerator.php
1 <?php
2 /**
3 * This file deals with UID generation.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @author Aaron Schulz
22 */
23
24 /**
25 * Class for getting statistically unique IDs
26 *
27 * @since 1.21
28 */
29 class UIDGenerator {
30 /** @var UIDGenerator */
31 protected static $instance = null;
32
33 protected $nodeId32; // string; node ID in binary (32 bits)
34 protected $nodeId48; // string; node ID in binary (48 bits)
35
36 protected $lockFile88; // string; local file path
37 protected $lockFile128; // string; local file path
38
39 /** @var Array */
40 protected $fileHandles = array(); // cache file handles
41
42 const QUICK_RAND = 1; // get randomness from fast and insecure sources
43 const QUICK_VOLATILE = 2; // use an APC like in-memory counter if available
44
45 protected function __construct() {
46 $idFile = wfTempDir() . '/mw-' . __CLASS__ . '-UID-nodeid';
47 $nodeId = is_file( $idFile ) ? file_get_contents( $idFile ) : '';
48 // Try to get some ID that uniquely identifies this machine (RFC 4122)...
49 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
50 wfSuppressWarnings();
51 if ( wfIsWindows() ) {
52 // http://technet.microsoft.com/en-us/library/bb490913.aspx
53 $csv = trim( wfShellExec( 'getmac /NH /FO CSV' ) );
54 $line = substr( $csv, 0, strcspn( $csv, "\n" ) );
55 $info = str_getcsv( $line );
56 $nodeId = isset( $info[0] ) ? str_replace( '-', '', $info[0] ) : '';
57 } elseif ( is_executable( '/sbin/ifconfig' ) ) { // Linux/BSD/Solaris/OS X
58 // See http://linux.die.net/man/8/ifconfig
59 $m = array();
60 preg_match( '/\s([0-9a-f]{2}(:[0-9a-f]{2}){5})\s/',
61 wfShellExec( '/sbin/ifconfig -a' ), $m );
62 $nodeId = isset( $m[1] ) ? str_replace( ':', '', $m[1] ) : '';
63 }
64 wfRestoreWarnings();
65 if ( !preg_match( '/^[0-9a-f]{12}$/i', $nodeId ) ) {
66 $nodeId = MWCryptRand::generateHex( 12, true );
67 $nodeId[1] = dechex( hexdec( $nodeId[1] ) | 0x1 ); // set multicast bit
68 }
69 file_put_contents( $idFile, $nodeId ); // cache
70 }
71 $this->nodeId32 = wfBaseConvert( substr( sha1( $nodeId ), 0, 8 ), 16, 2, 32 );
72 $this->nodeId48 = wfBaseConvert( $nodeId, 16, 2, 48 );
73 // If different processes run as different users, they may have different temp dirs.
74 // This is dealt with by initializing the clock sequence number and counters randomly.
75 $this->lockFile88 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-88';
76 $this->lockFile128 = wfTempDir() . '/mw-' . __CLASS__ . '-UID-128';
77 }
78
79 /**
80 * @return UIDGenerator
81 */
82 protected static function singleton() {
83 if ( self::$instance === null ) {
84 self::$instance = new self();
85 }
86
87 return self::$instance;
88 }
89
90 /**
91 * Get a statistically unique 88-bit unsigned integer ID string.
92 * The bits of the UID are prefixed with the time (down to the millisecond).
93 *
94 * These IDs are suitable as values for the shard key of distributed data.
95 * If a column uses these as values, it should be declared UNIQUE to handle collisions.
96 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
97 * They can also be stored "DECIMAL(27) UNSIGNED" or BINARY(11) in MySQL.
98 *
99 * UID generation is serialized on each server (as the node ID is for the whole machine).
100 *
101 * @param $base integer Specifies a base other than 10
102 * @return string Number
103 * @throws MWException
104 */
105 public static function newTimestampedUID88( $base = 10 ) {
106 if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
107 throw new MWException( "Base must an integer be between 2 and 36" );
108 }
109 $gen = self::singleton();
110 $time = $gen->getTimestampAndDelay( 'lockFile88', 1, 1024 );
111
112 return wfBaseConvert( $gen->getTimestampedID88( $time ), 2, $base );
113 }
114
115 /**
116 * @param array $time (UIDGenerator::millitime(), clock sequence)
117 * @return string 88 bits
118 */
119 protected function getTimestampedID88( array $info ) {
120 list( $time, $counter ) = $info;
121 // Take the 46 MSBs of "milliseconds since epoch"
122 $id_bin = $this->millisecondsSinceEpochBinary( $time );
123 // Add a 10 bit counter resulting in 56 bits total
124 $id_bin .= str_pad( decbin( $counter ), 10, '0', STR_PAD_LEFT );
125 // Add the 32 bit node ID resulting in 88 bits total
126 $id_bin .= $this->nodeId32;
127 // Convert to a 1-27 digit integer string
128 if ( strlen( $id_bin ) !== 88 ) {
129 throw new MWException( "Detected overflow for millisecond timestamp." );
130 }
131
132 return $id_bin;
133 }
134
135 /**
136 * Get a statistically unique 128-bit unsigned integer ID string.
137 * The bits of the UID are prefixed with the time (down to the millisecond).
138 *
139 * These IDs are suitable as globally unique IDs, without any enforced uniqueness.
140 * New rows almost always have higher UIDs, which makes B-TREE updates on INSERT fast.
141 * They can also be stored as "DECIMAL(39) UNSIGNED" or BINARY(16) in MySQL.
142 *
143 * UID generation is serialized on each server (as the node ID is for the whole machine).
144 *
145 * @param $base integer Specifies a base other than 10
146 * @return string Number
147 * @throws MWException
148 */
149 public static function newTimestampedUID128( $base = 10 ) {
150 if ( !is_integer( $base ) || $base > 36 || $base < 2 ) {
151 throw new MWException( "Base must be an integer between 2 and 36" );
152 }
153 $gen = self::singleton();
154 $time = $gen->getTimestampAndDelay( 'lockFile128', 16384, 1048576 );
155
156 return wfBaseConvert( $gen->getTimestampedID128( $time ), 2, $base );
157 }
158
159 /**
160 * @param array $info (UIDGenerator::millitime(), counter, clock sequence)
161 * @return string 128 bits
162 */
163 protected function getTimestampedID128( array $info ) {
164 list( $time, $counter, $clkSeq ) = $info;
165 // Take the 46 MSBs of "milliseconds since epoch"
166 $id_bin = $this->millisecondsSinceEpochBinary( $time );
167 // Add a 20 bit counter resulting in 66 bits total
168 $id_bin .= str_pad( decbin( $counter ), 20, '0', STR_PAD_LEFT );
169 // Add a 14 bit clock sequence number resulting in 80 bits total
170 $id_bin .= str_pad( decbin( $clkSeq ), 14, '0', STR_PAD_LEFT );
171 // Add the 48 bit node ID resulting in 128 bits total
172 $id_bin .= $this->nodeId48;
173 // Convert to a 1-39 digit integer string
174 if ( strlen( $id_bin ) !== 128 ) {
175 throw new MWException( "Detected overflow for millisecond timestamp." );
176 }
177
178 return $id_bin;
179 }
180
181 /**
182 * Return an RFC4122 compliant v4 UUID
183 *
184 * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
185 * @return string
186 * @throws MWException
187 */
188 public static function newUUIDv4( $flags = 0 ) {
189 $hex = ( $flags & self::QUICK_RAND )
190 ? wfRandomString( 31 )
191 : MWCryptRand::generateHex( 31 );
192
193 return sprintf( '%s-%s-%s-%s-%s',
194 // "time_low" (32 bits)
195 substr( $hex, 0, 8 ),
196 // "time_mid" (16 bits)
197 substr( $hex, 8, 4 ),
198 // "time_hi_and_version" (16 bits)
199 '4' . substr( $hex, 12, 3 ),
200 // "clk_seq_hi_res (8 bits, variant is binary 10x) and "clk_seq_low" (8 bits)
201 dechex( 0x8 | ( hexdec( $hex[15] ) & 0x3 ) ) . $hex[16] . substr( $hex, 17, 2 ),
202 // "node" (48 bits)
203 substr( $hex, 19, 12 )
204 );
205 }
206
207 /**
208 * Return an RFC4122 compliant v4 UUID
209 *
210 * @param $flags integer Bitfield (supports UIDGenerator::QUICK_RAND)
211 * @return string 32 hex characters with no hyphens
212 * @throws MWException
213 */
214 public static function newRawUUIDv4( $flags = 0 ) {
215 return str_replace( '-', '', self::newUUIDv4( $flags ) );
216 }
217
218 /**
219 * Return an ID that is sequential *only* for this node and bucket
220 *
221 * These IDs are suitable for per-host sequence numbers, e.g. for some packet protocols.
222 * If UIDGenerator::QUICK_VOLATILE is used the counter might reset on server restart.
223 *
224 * @param string $bucket Arbitrary bucket name (should be ASCII)
225 * @param integer $bits Bit size (<=48) of resulting numbers before wrap-around
226 * @param integer $flags (supports UIDGenerator::QUICK_VOLATILE)
227 * @return float Integer value as float
228 * @since 1.23
229 */
230 public static function newSequentialPerNodeID( $bucket, $bits = 48, $flags = 0 ) {
231 return current( self::newSequentialPerNodeIDs( $bucket, $bits, 1, $flags ) );
232 }
233
234 /**
235 * Return IDs that are sequential *only* for this node and bucket
236 *
237 * @see UIDGenerator::newSequentialPerNodeID()
238 * @param string $bucket Arbitrary bucket name (should be ASCII)
239 * @param integer $bits Bit size (16 to 48) of resulting numbers before wrap-around
240 * @param integer $count Number of IDs to return (1 to 10000)
241 * @param integer $flags (supports UIDGenerator::QUICK_VOLATILE)
242 * @return array Ordered list of float integer values
243 * @since 1.23
244 */
245 public static function newSequentialPerNodeIDs( $bucket, $bits, $count, $flags = 0 ) {
246 $gen = self::singleton();
247 return $gen->getSequentialPerNodeIDs( $bucket, $bits, $count, $flags );
248 }
249
250 /**
251 * Return IDs that are sequential *only* for this node and bucket
252 *
253 * @see UIDGenerator::newSequentialPerNodeID()
254 * @param string $bucket Arbitrary bucket name (should be ASCII)
255 * @param integer $bits Bit size (16 to 48) of resulting numbers before wrap-around
256 * @param integer $count Number of IDs to return (1 to 10000)
257 * @param integer $flags (supports UIDGenerator::QUICK_VOLATILE)
258 * @return array Ordered list of float integer values
259 */
260 protected function getSequentialPerNodeIDs( $bucket, $bits, $count, $flags ) {
261 if ( $count <= 0 ) {
262 return array(); // nothing to do
263 } elseif ( $count > 10000 ) {
264 throw new MWException( "Number of requested IDs ($count) is too high." );
265 } elseif ( $bits < 16 || $bits > 48 ) {
266 throw new MWException( "Requested bit size ($bits) is out of range." );
267 }
268
269 $counter = null; // post-increment persistent counter value
270
271 // Use APC/eAccelerator/xcache if requested, available, and not in CLI mode;
272 // Counter values would not survive accross script instances in CLI mode.
273 $cache = null;
274 if ( ( $flags & self::QUICK_VOLATILE ) && PHP_SAPI !== 'cli' ) {
275 try {
276 $cache = ObjectCache::newAccelerator( array() );
277 } catch ( MWException $e ) {} // not supported
278 }
279 if ( $cache ) {
280 $counter = $cache->incr( $bucket, $count );
281 if ( $counter === false ) {
282 if ( !$cache->add( $bucket, $count ) ) {
283 throw new MWException( 'Unable to set value to ' . get_class( $cache ) );
284 }
285 $counter = $count;
286 }
287 }
288
289 // Note: use of fmod() avoids "division by zero" on 32 bit machines
290 if ( $counter === null ) {
291 $path = wfTempDir() . '/mw-' . __CLASS__ . '-' . rawurlencode( $bucket ) . '-48';
292 // Get the UID lock file handle
293 if ( isset( $this->fileHandles[$path] ) ) {
294 $handle = $this->fileHandles[$path];
295 } else {
296 $handle = fopen( $path, 'cb+' );
297 $this->fileHandles[$path] = $handle ?: null; // cache
298 }
299 // Acquire the UID lock file
300 if ( $handle === false ) {
301 throw new MWException( "Could not open '{$path}'." );
302 } elseif ( !flock( $handle, LOCK_EX ) ) {
303 fclose( $handle );
304 throw new MWException( "Could not acquire '{$path}'." );
305 }
306 // Fetch the counter value and increment it...
307 rewind( $handle );
308 $counter = floor( trim( fgets( $handle ) ) ) + $count; // fetch as float
309 // Write back the new counter value
310 ftruncate( $handle, 0 );
311 rewind( $handle );
312 fwrite( $handle, fmod( $counter, pow( 2, 48 ) ) ); // warp-around as needed
313 fflush( $handle );
314 // Release the UID lock file
315 flock( $handle, LOCK_UN );
316 }
317
318 $ids = array();
319 $divisor = pow( 2, $bits );
320 $currentId = floor( $counter - $count ); // pre-increment counter value
321 for ( $i = 0; $i < $count; ++$i ) {
322 $ids[] = fmod( ++$currentId, $divisor );
323 }
324
325 return $ids;
326 }
327
328 /**
329 * Get a (time,counter,clock sequence) where (time,counter) is higher
330 * than any previous (time,counter) value for the given clock sequence.
331 * This is useful for making UIDs sequential on a per-node bases.
332 *
333 * @param string $lockFile Name of a local lock file
334 * @param $clockSeqSize integer The number of possible clock sequence values
335 * @param $counterSize integer The number of possible counter values
336 * @return Array (result of UIDGenerator::millitime(), counter, clock sequence)
337 * @throws MWException
338 */
339 protected function getTimestampAndDelay( $lockFile, $clockSeqSize, $counterSize ) {
340 // Get the UID lock file handle
341 if ( isset( $this->fileHandles[$lockFile] ) ) {
342 $handle = $this->fileHandles[$lockFile];
343 } else {
344 $handle = fopen( $this->$lockFile, 'cb+' );
345 $this->fileHandles[$lockFile] = $handle ?: null; // cache
346 }
347 // Acquire the UID lock file
348 if ( $handle === false ) {
349 throw new MWException( "Could not open '{$this->$lockFile}'." );
350 } elseif ( !flock( $handle, LOCK_EX ) ) {
351 fclose( $handle );
352 throw new MWException( "Could not acquire '{$this->$lockFile}'." );
353 }
354 // Get the current timestamp, clock sequence number, last time, and counter
355 rewind( $handle );
356 $data = explode( ' ', fgets( $handle ) ); // "<clk seq> <sec> <msec> <counter> <offset>"
357 $clockChanged = false; // clock set back significantly?
358 if ( count( $data ) == 5 ) { // last UID info already initialized
359 $clkSeq = (int)$data[0] % $clockSeqSize;
360 $prevTime = array( (int)$data[1], (int)$data[2] );
361 $offset = (int)$data[4] % $counterSize; // random counter offset
362 $counter = 0; // counter for UIDs with the same timestamp
363 // Delay until the clock reaches the time of the last ID.
364 // This detects any microtime() drift among processes.
365 $time = $this->timeWaitUntil( $prevTime );
366 if ( !$time ) { // too long to delay?
367 $clockChanged = true; // bump clock sequence number
368 $time = self::millitime();
369 } elseif ( $time == $prevTime ) {
370 // Bump the counter if there are timestamp collisions
371 $counter = (int)$data[3] % $counterSize;
372 if ( ++$counter >= $counterSize ) { // sanity (starts at 0)
373 flock( $handle, LOCK_UN ); // abort
374 throw new MWException( "Counter overflow for timestamp value." );
375 }
376 }
377 } else { // last UID info not initialized
378 $clkSeq = mt_rand( 0, $clockSeqSize - 1 );
379 $counter = 0;
380 $offset = mt_rand( 0, $counterSize - 1 );
381 $time = self::millitime();
382 }
383 // microtime() and gettimeofday() can drift from time() at least on Windows.
384 // The drift is immediate for processes running while the system clock changes.
385 // time() does not have this problem. See https://bugs.php.net/bug.php?id=42659.
386 if ( abs( time() - $time[0] ) >= 2 ) {
387 // We don't want processes using too high or low timestamps to avoid duplicate
388 // UIDs and clock sequence number churn. This process should just be restarted.
389 flock( $handle, LOCK_UN ); // abort
390 throw new MWException( "Process clock is outdated or drifted." );
391 }
392 // If microtime() is synced and a clock change was detected, then the clock went back
393 if ( $clockChanged ) {
394 // Bump the clock sequence number and also randomize the counter offset,
395 // which is useful for UIDs that do not include the clock sequence number.
396 $clkSeq = ( $clkSeq + 1 ) % $clockSeqSize;
397 $offset = mt_rand( 0, $counterSize - 1 );
398 trigger_error( "Clock was set back; sequence number incremented." );
399 }
400 // Update the (clock sequence number, timestamp, counter)
401 ftruncate( $handle, 0 );
402 rewind( $handle );
403 fwrite( $handle, "{$clkSeq} {$time[0]} {$time[1]} {$counter} {$offset}" );
404 fflush( $handle );
405 // Release the UID lock file
406 flock( $handle, LOCK_UN );
407
408 return array( $time, ( $counter + $offset ) % $counterSize, $clkSeq );
409 }
410
411 /**
412 * Wait till the current timestamp reaches $time and return the current
413 * timestamp. This returns false if it would have to wait more than 10ms.
414 *
415 * @param array $time Result of UIDGenerator::millitime()
416 * @return Array|bool UIDGenerator::millitime() result or false
417 */
418 protected function timeWaitUntil( array $time ) {
419 do {
420 $ct = self::millitime();
421 if ( $ct >= $time ) { // http://php.net/manual/en/language.operators.comparison.php
422 return $ct; // current timestamp is higher than $time
423 }
424 } while ( ( ( $time[0] - $ct[0] ) * 1000 + ( $time[1] - $ct[1] ) ) <= 10 );
425
426 return false;
427 }
428
429 /**
430 * @param array $time Result of UIDGenerator::millitime()
431 * @return string 46 MSBs of "milliseconds since epoch" in binary (rolls over in 4201)
432 */
433 protected function millisecondsSinceEpochBinary( array $time ) {
434 list( $sec, $msec ) = $time;
435 $ts = 1000 * $sec + $msec;
436 if ( $ts > pow( 2, 52 ) ) {
437 throw new MWException( __METHOD__ .
438 ': sorry, this function doesn\'t work after the year 144680' );
439 }
440
441 return substr( wfBaseConvert( $ts, 10, 2, 46 ), -46 );
442 }
443
444 /**
445 * @return Array (current time in seconds, milliseconds since then)
446 */
447 protected static function millitime() {
448 list( $msec, $sec ) = explode( ' ', microtime() );
449
450 return array( (int)$sec, (int)( $msec * 1000 ) );
451 }
452
453 function __destruct() {
454 array_map( 'fclose', $this->fileHandles );
455 }
456 }