Merge "Get timestamp from WikiPage, instead of Article"
[lhc/web/wiklou.git] / maintenance / storage / testCompression.php
1 <?php
2 /**
3 * Test revision text compression and decompression.
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License along
16 * with this program; if not, write to the Free Software Foundation, Inc.,
17 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
18 * http://www.gnu.org/copyleft/gpl.html
19 *
20 * @file
21 * @ingroup Maintenance ExternalStorage
22 */
23
24 $optionsWithArgs = [ 'start', 'limit', 'type' ];
25 require __DIR__ . '/../commandLine.inc';
26
27 if ( !isset( $args[0] ) ) {
28 echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] " .
29 "[--limit=<num-revs>] <page-title>\n";
30 exit( 1 );
31 }
32
33 $title = Title::newFromText( $args[0] );
34 if ( isset( $options['start'] ) ) {
35 $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
36 echo "Starting from " . $wgLang->timeanddate( $start ) . "\n";
37 } else {
38 $start = '19700101000000';
39 }
40 if ( isset( $options['limit'] ) ) {
41 $limit = $options['limit'];
42 $untilHappy = false;
43 } else {
44 $limit = 1000;
45 $untilHappy = true;
46 }
47 $type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob';
48
49 $dbr = $this->getDB( DB_SLAVE );
50 $res = $dbr->select(
51 [ 'page', 'revision', 'text' ],
52 '*',
53 [
54 'page_namespace' => $title->getNamespace(),
55 'page_title' => $title->getDBkey(),
56 'page_id=rev_page',
57 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
58 'rev_text_id=old_id'
59 ], __FILE__, [ 'LIMIT' => $limit ]
60 );
61
62 $blob = new $type;
63 $hashes = [];
64 $keys = [];
65 $uncompressedSize = 0;
66 $t = -microtime( true );
67 foreach ( $res as $row ) {
68 $revision = new Revision( $row );
69 $text = $revision->getSerializedData();
70 $uncompressedSize += strlen( $text );
71 $hashes[$row->rev_id] = md5( $text );
72 $keys[$row->rev_id] = $blob->addItem( $text );
73 if ( $untilHappy && !$blob->isHappy() ) {
74 break;
75 }
76 }
77
78 $serialized = serialize( $blob );
79 $t += microtime( true );
80 # print_r( $blob->mDiffMap );
81
82 printf( "%s\nCompression ratio for %d revisions: %5.2f, %s -> %d\n",
83 $type,
84 count( $hashes ),
85 $uncompressedSize / strlen( $serialized ),
86 $wgLang->formatSize( $uncompressedSize ),
87 strlen( $serialized )
88 );
89 printf( "Compression time: %5.2f ms\n", $t * 1000 );
90
91 $t = -microtime( true );
92 $blob = unserialize( $serialized );
93 foreach ( $keys as $id => $key ) {
94 $text = $blob->getItem( $key );
95 if ( md5( $text ) != $hashes[$id] ) {
96 echo "Content hash mismatch for rev_id $id\n";
97 # var_dump( $text );
98 }
99 }
100 $t += microtime( true );
101 printf( "Decompression time: %5.2f ms\n", $t * 1000 );