2c0c29c44715f7f1319eecb2faf22a4efb896ec6
[lhc/web/wiklou.git] / maintenance / dumpHTML.php
1 <?php
2 /**
3 * @todo document
4 * @package MediaWiki
5 * @subpackage Maintenance
6 */
7
8 /**
9 * Usage:
10 * php dumpHTML.php [options...]
11 *
12 * -d <dest> destination directory
13 * -s <start> start ID
14 * -e <end> end ID
15 * -k <skin> skin to use (defaults to htmldump)
16 * --no-overwrite skip existing HTML files
17 * --checkpoint <file> use a checkpoint file to allow restarting of interrupted dumps
18 * --slice <n/m> split the job into m segments and do the n'th one
19 * --images only do image description pages
20 * --shared-desc only do shared (commons) image description pages
21 * --no-shared-desc don't do shared image description pages
22 * --categories only do category pages
23 * --redirects only do redirects
24 * --special only do miscellaneous stuff
25 * --force-copy copy commons instead of symlink, needed for Wikimedia
26 * --interlang allow interlanguage links
27 * --image-snapshot copy all images used to the destination directory
28 * --compress generate compressed version of the html pages
29 * --udp-profile <N> profile 1/N rendering operations using ProfilerSimpleUDP
30 */
31
32
33 $optionsWithArgs = array( 's', 'd', 'e', 'k', 'checkpoint', 'slice', 'udp-profile' );
34
35 $profiling = false;
36
37 if ( $profiling ) {
38 define( 'MW_CMDLINE_CALLBACK', 'wfSetupDump' );
39 function wfSetupDump() {
40 global $wgProfiling, $wgProfileToDatabase, $wgProfileSampleRate;
41 $wgProfiling = true;
42 $wgProfileToDatabase = false;
43 $wgProfileSampleRate = 1;
44 }
45 }
46
47 if ( in_array( '--udp-profile', $argv ) ) {
48 define( 'MW_FORCE_PROFILE', 1 );
49 }
50
51 require_once( "commandLine.inc" );
52 require_once( "dumpHTML.inc" );
53
54 error_reporting( E_ALL & (~E_NOTICE) );
55
56 if ( !empty( $options['s'] ) ) {
57 $start = $options['s'];
58 } else {
59 $start = 1;
60 }
61
62 if ( !empty( $options['e'] ) ) {
63 $end = $options['e'];
64 } else {
65 $dbr =& wfGetDB( DB_SLAVE );
66 $end = $dbr->selectField( 'page', 'max(page_id)', false );
67 }
68
69 if ( !empty( $options['d'] ) ) {
70 $dest = $options['d'];
71 } else {
72 $dest = "$IP/static";
73 }
74
75 $skin = isset( $options['k'] ) ? $options['k'] : 'htmldump';
76
77 if ( $options['slice'] ) {
78 $bits = explode( '/', $options['slice'] );
79 if ( count( $bits ) != 2 || $bits[0] < 1 || $bits[0] > $bits[1] ) {
80 print "Invalid slice specification";
81 exit;
82 }
83 $sliceNumerator = $bits[0];
84 $sliceDenominator = $bits[1];
85 } else {
86 $sliceNumerator = $sliceDenominator = 1;
87 }
88
89 $wgHTMLDump = new DumpHTML( array(
90 'dest' => $dest,
91 'forceCopy' => $options['force-copy'],
92 'alternateScriptPath' => $options['interlang'],
93 'interwiki' => $options['interlang'],
94 'skin' => $skin,
95 'makeSnapshot' => $options['image-snapshot'],
96 'checkpointFile' => $options['checkpoint'],
97 'startID' => $start,
98 'endID' => $end,
99 'sliceNumerator' => $sliceNumerator,
100 'sliceDenominator' => $sliceDenominator,
101 'noOverwrite' => $options['no-overwrite'],
102 'compress' => $options['compress'],
103 'noSharedDesc' => $options['no-shared-desc'],
104 'udpProfile' => $options['udp-profile'],
105 ));
106
107
108 if ( $options['special'] ) {
109 $wgHTMLDump->doSpecials();
110 } elseif ( $options['images'] ) {
111 $wgHTMLDump->doImageDescriptions();
112 } elseif ( $options['categories'] ) {
113 $wgHTMLDump->doCategories();
114 } elseif ( $options['redirects'] ) {
115 $wgHTMLDump->doRedirects();
116 } elseif ( $options['shared-desc'] ) {
117 $wgHTMLDump->doSharedImageDescriptions();
118 } else {
119 print "Creating static HTML dump in directory $dest. \n";
120 $dbr =& wfGetDB( DB_SLAVE );
121 $server = $dbr->getProperty( 'mServer' );
122 print "Using database {$server}\n";
123
124 if ( !isset( $options['e'] ) ) {
125 $wgHTMLDump->doEverything();
126 } else {
127 $wgHTMLDump->doArticles();
128 }
129 }
130
131 if ( isset( $options['debug'] ) ) {
132 #print_r($GLOBALS);
133 # Workaround for bug #36957
134 $globals = array_keys( $GLOBALS );
135 #sort( $globals );
136 $sizes = array();
137 foreach ( $globals as $name ) {
138 $sizes[$name] = strlen( serialize( $GLOBALS[$name] ) );
139 }
140 arsort($sizes);
141 $sizes = array_slice( $sizes, 0, 20 );
142 foreach ( $sizes as $name => $size ) {
143 printf( "%9d %s\n", $size, $name );
144 }
145 }
146
147 if ( $profiling ) {
148 echo $wgProfiler->getOutput();
149 }
150
151 ?>