Chunked queries. Use modulo slicing instead of range slicing to avoid gaps when the...
[lhc/web/wiklou.git] / maintenance / dumpHTML.php
1 <?php
2 /**
3 * @todo document
4 * @package MediaWiki
5 * @subpackage Maintenance
6 */
7
8 /**
9 * Usage:
10 * php dumpHTML.php [options...]
11 *
12 * -d <dest> destination directory
13 * -s <start> start ID
14 * -e <end> end ID
15 * -k <skin> skin to use (defaults to htmldump)
16 * --no-overwrite skip existing HTML files
17 * --checkpoint <file> use a checkpoint file to allow restarting of interrupted dumps
18 * --slice <n/m> split the job into m segments and do the n'th one
19 * --images only do image description pages
20 * --categories only do category pages
21 * --redirects only do redirects
22 * --special only do miscellaneous stuff
23 * --force-copy copy commons instead of symlink, needed for Wikimedia
24 * --interlang allow interlanguage links
25 * --image-snapshot copy all images used to the destination directory
26 */
27
28
29 $optionsWithArgs = array( 's', 'd', 'e', 'k', 'checkpoint', 'slice' );
30
31 $profiling = false;
32
33 if ( $profiling ) {
34 define( 'MW_CMDLINE_CALLBACK', 'wfSetupDump' );
35 function wfSetupDump() {
36 global $wgProfiling, $wgProfileToDatabase, $wgProfileSampleRate;
37 $wgProfiling = true;
38 $wgProfileToDatabase = false;
39 $wgProfileSampleRate = 1;
40 }
41 }
42
43 require_once( "commandLine.inc" );
44 require_once( "dumpHTML.inc" );
45
46 error_reporting( E_ALL & (~E_NOTICE) );
47
48 if ( !empty( $options['s'] ) ) {
49 $start = $options['s'];
50 } else {
51 $start = 1;
52 }
53
54 if ( !empty( $options['e'] ) ) {
55 $end = $options['e'];
56 } else {
57 $dbr =& wfGetDB( DB_SLAVE );
58 $end = $dbr->selectField( 'page', 'max(page_id)', false );
59 }
60
61 if ( !empty( $options['d'] ) ) {
62 $dest = $options['d'];
63 } else {
64 $dest = "$IP/static";
65 }
66
67 $skin = isset( $options['k'] ) ? $options['k'] : 'htmldump';
68
69 if ( $options['slice'] ) {
70 $bits = explode( '/', $options['slice'] );
71 if ( count( $bits ) != 2 || $bits[0] < 1 || $bits[0] > $bits[1] ) {
72 print "Invalid slice specification";
73 exit;
74 }
75 $sliceNumerator = $bits[0];
76 $sliceDenominator = $bits[1];
77 } else {
78 $sliceNumerator = $sliceDenominator = 1;
79 }
80
81 $wgHTMLDump = new DumpHTML( array(
82 'dest' => $dest,
83 'forceCopy' => $options['force-copy'],
84 'alternateScriptPath' => $options['interlang'],
85 'interwiki' => $options['interlang'],
86 'skin' => $skin,
87 'makeSnapshot' => $options['image-snapshot'],
88 'checkpointFile' => $options['checkpoint'],
89 'startID' => $start,
90 'endID' => $end,
91 'sliceNumerator' => $sliceNumerator,
92 'sliceDenominator' => $sliceDenominator,
93 'noOverwrite' => $options['no-overwrite'],
94 ));
95
96
97 if ( $options['special'] ) {
98 $wgHTMLDump->doSpecials();
99 } elseif ( $options['images'] ) {
100 $wgHTMLDump->doImageDescriptions();
101 } elseif ( $options['categories'] ) {
102 $wgHTMLDump->doCategories();
103 } elseif ( $options['redirects'] ) {
104 $wgHTMLDump->doRedirects();
105 } else {
106 print "Creating static HTML dump in directory $dest. \n";
107 $dbr =& wfGetDB( DB_SLAVE );
108 $server = $dbr->getProperty( 'mServer' );
109 print "Using database {$server}\n";
110
111 if ( !isset( $options['e'] ) ) {
112 $wgHTMLDump->doEverything();
113 } else {
114 $wgHTMLDump->doArticles();
115 }
116 }
117
118 if ( isset( $options['debug'] ) ) {
119 print_r($GLOBALS);
120 }
121
122 if ( $profiling ) {
123 echo $wgProfiler->getOutput();
124 }
125
126 ?>