Use __DIR__ instead of dirname( __FILE__ )
[lhc/web/wiklou.git] / maintenance / storage / testCompression.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @ingroup Maintenance
20 * @see wfWaitForSlaves()
21 */
22
23 $optionsWithArgs = array( 'start', 'limit', 'type' );
24 require( __DIR__ . '/../commandLine.inc' );
25
26 if ( !isset( $args[0] ) ) {
27 echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] [--limit=<num-revs>] <page-title>\n";
28 exit( 1 );
29 }
30
31 $title = Title::newFromText( $args[0] );
32 if ( isset( $options['start'] ) ) {
33 $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
34 echo "Starting from " . $wgLang->timeanddate( $start ) . "\n";
35 } else {
36 $start = '19700101000000';
37 }
38 if ( isset( $options['limit'] ) ) {
39 $limit = $options['limit'];
40 $untilHappy = false;
41 } else {
42 $limit = 1000;
43 $untilHappy = true;
44 }
45 $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob';
46
47
48 $dbr = wfGetDB( DB_SLAVE );
49 $res = $dbr->select(
50 array( 'page', 'revision', 'text' ),
51 '*',
52 array(
53 'page_namespace' => $title->getNamespace(),
54 'page_title' => $title->getDBkey(),
55 'page_id=rev_page',
56 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
57 'rev_text_id=old_id'
58 ), __FILE__, array( 'LIMIT' => $limit )
59 );
60
61 $blob = new $type;
62 $hashes = array();
63 $keys = array();
64 $uncompressedSize = 0;
65 $t = -microtime( true );
66 foreach ( $res as $row ) {
67 $revision = new Revision( $row );
68 $text = $revision->getText();
69 $uncompressedSize += strlen( $text );
70 $hashes[$row->rev_id] = md5( $text );
71 $keys[$row->rev_id] = $blob->addItem( $text );
72 if ( $untilHappy && !$blob->isHappy() ) {
73 break;
74 }
75 }
76
77 $serialized = serialize( $blob );
78 $t += microtime( true );
79 # print_r( $blob->mDiffMap );
80
81 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
82 $type,
83 count( $hashes ),
84 $uncompressedSize / strlen( $serialized ),
85 $wgLang->formatSize( $uncompressedSize ),
86 strlen( $serialized )
87 );
88 printf( "Compression time: %5.2f ms\n", $t * 1000 );
89
90 $t = -microtime( true );
91 $blob = unserialize( $serialized );
92 foreach ( $keys as $id => $key ) {
93 $text = $blob->getItem( $key );
94 if ( md5( $text ) != $hashes[$id] ) {
95 echo "Content hash mismatch for rev_id $id\n";
96 # var_dump( $text );
97 }
98 }
99 $t += microtime( true );
100 printf( "Decompression time: %5.2f ms\n", $t * 1000 );
101