* Die gracefully if php has not been compiled with --enable-memory-limit
[lhc/web/wiklou.git] / maintenance / dumpHTML.inc
1 <?php
2 /**
3 * @package MediaWiki
4 * @subpackage Maintenance
5 */
6
7 define( 'REPORTING_INTERVAL', 10 );
8
9 require_once( 'includes/ImagePage.php' );
10 require_once( 'includes/CategoryPage.php' );
11
12 class DumpHTML {
13 # Destination directory
14 var $dest;
15
16 # Show interlanguage links?
17 var $interwiki = true;
18
19 # Depth of HTML directory tree
20 var $depth = 3;
21
22 # Directory that commons images are copied into
23 var $sharedStaticPath;
24
25 # Relative path to image directory
26 var $imageRel = 'upload';
27
28 # Copy commons images instead of symlinking
29 var $forceCopy = false;
30
31 # Make links assuming the script path is in the same directory as
32 # the destination
33 var $alternateScriptPath = false;
34
35 function DumpHTML( $settings ) {
36 foreach ( $settings as $var => $value ) {
37 $this->$var = $value;
38 }
39 }
40
41 /**
42 * Write a set of articles specified by start and end page_id
43 * Skip categories and images, they will be done separately
44 */
45 function doArticles( $start, $end = false ) {
46 $fname = 'DumpHTML::doArticles';
47
48 $this->setupGlobals();
49
50 if ( $end === false ) {
51 $dbr =& wfGetDB( DB_SLAVE );
52 $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname );
53 }
54
55
56 for ($id = $start; $id <= $end; $id++) {
57 if ( !($id % REPORTING_INTERVAL) ) {
58 print "Processing ID: $id\r";
59 }
60 $title = Title::newFromID( $id );
61 if ( $title ) {
62 $ns = $title->getNamespace() ;
63 if ( $ns != NS_CATEGORY ) {
64 $this->doArticle( $title );
65 }
66 }
67 }
68 print "\n";
69 }
70
71 function doSpecials() {
72 $this->doMainPage();
73
74 $this->setupGlobals();
75 print "Special:Categories...";
76 $this->doArticle( Title::makeTitle( NS_SPECIAL, 'Categories' ) );
77 print "\n";
78 }
79
80 /** Write the main page as index.html */
81 function doMainPage() {
82 global $wgMakeDumpLinks;
83
84 print "Making index.html ";
85
86 // Set up globals with no ../../.. in the link URLs
87 $this->setupGlobals( 0 );
88
89 // But still use that directory style
90 $wgMakeDumpLinks = 3;
91
92 $title = Title::newMainPage();
93 $text = $this->getArticleHTML( $title );
94 $file = fopen( "{$this->dest}/index.html", "w" );
95 if ( !$file ) {
96 print "\nCan't open index.html for writing\n";
97 return false;
98 }
99 fwrite( $file, $text );
100 fclose( $file );
101 print "\n";
102 }
103
104 function doImageDescriptions() {
105 global $wgSharedUploadDirectory;
106
107 $fname = 'DumpHTML::doImageDescriptions';
108
109 $this->setupGlobals( 3 );
110
111 /**
112 * Dump image description pages that don't have an associated article, but do
113 * have a local image
114 */
115 $dbr =& wfGetDB( DB_SLAVE );
116 extract( $dbr->tableNames( 'image', 'page' ) );
117 $res = $dbr->select( 'image', array( 'img_name' ), false, $fname );
118
119 $i = 0;
120 print "Writing image description pages for local images\n";
121 $num = $dbr->numRows( $res );
122 while ( $row = $dbr->fetchObject( $res ) ) {
123 if ( !( ++$i % REPORTING_INTERVAL ) ) {
124 print "Done $i of $num\r";
125 }
126 $title = Title::makeTitle( NS_IMAGE, $row->img_name );
127 if ( $title->getArticleID() ) {
128 // Already done by dumpHTML
129 continue;
130 }
131 $this->doArticle( $title );
132 }
133 print "\n";
134
135 /**
136 * Dump images which only have a real description page on commons
137 */
138 print "Writing description pages for commons images\n";
139 $i = 0;
140 for ( $hash = 0; $hash < 256; $hash++ ) {
141 $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash );
142 $paths = glob( "{$this->sharedStaticPath}/$dir/*" );
143 $paths += glob( "{$this->sharedStaticPath}/thumb/$dir/*" );
144
145 foreach ( $paths as $path ) {
146 $file = basename( $path );
147 if ( !(++$i % REPORTING_INTERVAL ) ) {
148 print "$i\r";
149 }
150
151 $title = Title::makeTitle( NS_IMAGE, $file );
152 $this->doArticle( $title );
153 }
154 }
155 print "\n";
156 }
157
158 function doCategories() {
159 $fname = 'DumpHTML::doCategories';
160 $this->setupGlobals();
161
162 $dbr =& wfGetDB( DB_SLAVE );
163 $categorylinks = $dbr->tableName( 'categorylinks' );
164 print "Selecting categories...";
165 $sql = 'SELECT DISTINCT cl_to FROM categorylinks';
166 $res = $dbr->query( $sql, $fname );
167
168 print "\nWriting " . $dbr->numRows( $res ). " category pages\n";
169 $i = 0;
170 while ( $row = $dbr->fetchObject( $res ) ) {
171 if ( !(++$i % REPORTING_INTERVAL ) ) {
172 print "$i\r";
173 }
174 $title = Title::makeTitle( NS_CATEGORY, $row->cl_to );
175 $this->doArticle( $title );
176 }
177 print "\n";
178 }
179
180
181 /** Write an article specified by title */
182 function doArticle( $title ) {
183 global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory;
184 global $wgUploadDirectory;
185
186 $text = $this->getArticleHTML( $title );
187 if ( $text === false ) {
188 return;
189 }
190
191 # Parse the XHTML to find the images
192 $images = $this->findImages( $text );
193 $this->copyImages( $images );
194
195 # Write to file
196 $this->writeArticle( $title, $text );
197 }
198
199 /** Write the given text to the file identified by the given title object */
200 function writeArticle( &$title, $text ) {
201 $filename = $title->getHashedFilename();
202 $fullName = "{$this->dest}/$filename";
203 $fullDir = dirname( $fullName );
204
205 wfMkdirParents( $fullDir, 0755 );
206
207 $file = fopen( $fullName, 'w' );
208 if ( !$file ) {
209 print("Can't open file $fullName for writing\n");
210 return;
211 }
212
213 fwrite( $file, $text );
214 fclose( $file );
215 }
216
217 /** Set up globals required for parsing */
218 function setupGlobals( $depth = NULL ) {
219 global $wgUser, $wgTitle, $wgMakeDumpLinks, $wgStylePath, $wgArticlePath;
220 global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath;
221 global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath;
222 global $wgSharedThumbnailScriptPath, $wgEnableParserCache;
223
224 static $oldLogo = NULL;
225
226 if ( is_null( $depth ) ) {
227 $wgMakeDumpLinks = $this->depth;
228 } else {
229 $wgMakeDumpLinks = $depth;
230 }
231
232 if ( $this->alternateScriptPath ) {
233 if ( $wgMakeDumpLinks == 0 ) {
234 $wgScriptPath = '.';
235 } else {
236 $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks - 1 );
237 }
238 } else {
239 $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks );
240 }
241
242 $wgArticlePath = str_repeat( '../', $wgMakeDumpLinks ) . '$1';
243
244 # Logo image
245 # Allow for repeated setup
246 if ( !is_null( $oldLogo ) ) {
247 $wgLogo = $oldLogo;
248 } else {
249 $oldLogo = $wgLogo;
250 }
251
252 if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) {
253 # If it's in the upload directory, rewrite it to the new upload directory
254 $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 );
255 } elseif ( $wgLogo{0} == '/' ) {
256 # This is basically heuristic
257 # Rewrite an absolute logo path to one relative to the the script path
258 $wgLogo = $wgScriptPath . $wgLogo;
259 }
260
261 $wgStylePath = "$wgScriptPath/skins";
262 $wgUploadPath = "$wgScriptPath/{$this->imageRel}";
263 $wgSharedUploadPath = "$wgUploadPath/shared";
264 $wgMaxCredits = -1;
265 $wgHideInterlangageLinks = !$this->interwiki;
266 $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false;
267 $wgEnableParserCache = false;
268
269 $wgUser = new User;
270 $wgUser->setOption( 'skin', 'htmldump' );
271 $wgUser->setOption( 'editsection', 0 );
272
273 $this->sharedStaticPath = "$wgUploadDirectory/shared";
274
275 }
276
277 /** Reads the content of a title object, executes the skin and captures the result */
278 function getArticleHTML( &$title ) {
279 global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic;
280
281 $wgOut = new OutputPage;
282 $wgOut->setParserOptions( new ParserOptions );
283
284 $wgTitle = $title;
285 if ( is_null( $wgTitle ) ) {
286 return false;
287 }
288
289 $ns = $wgTitle->getNamespace();
290 if ( $ns == NS_SPECIAL ) {
291 SpecialPage::executePath( $wgTitle );
292 } else {
293 if ( $ns == NS_IMAGE ) {
294 $wgArticle = new ImagePage( $wgTitle );
295 } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) {
296 $wgArticle = new CategoryPage( $wgTitle );
297 } else {
298 $wgArticle = new Article( $wgTitle );
299 }
300 $wgArticle->view();
301 }
302
303 $sk =& $wgUser->getSkin();
304 ob_start();
305 $sk->outputPage( $wgOut );
306 $text = ob_get_contents();
307 ob_end_clean();
308
309 return $text;
310 }
311
312 /** Returns image paths used in an XHTML document */
313 function findImages( $text ) {
314 global $wgOutputEncoding, $wgDumpImages;
315 $parser = xml_parser_create( $wgOutputEncoding );
316 xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' );
317
318 $wgDumpImages = array();
319 xml_parse( $parser, $text );
320 xml_parser_free( $parser );
321
322 return $wgDumpImages;
323 }
324
325 /**
326 * Copy images (or create symlinks) from commons to a static directory.
327 * This is necessary even if you intend to distribute all of commons, because
328 * the directory contents is used to work out which image description pages
329 * are needed.
330 */
331 function copyImages( $images ) {
332 global $wgSharedUploadPath, $wgSharedUploadDirectory;
333 # Find shared uploads and copy them into the static directory
334 $sharedPathLength = strlen( $wgSharedUploadPath );
335 foreach ( $images as $image => $dummy ) {
336 # Is it shared?
337 if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) {
338 # Reconstruct full filename
339 $rel = substr( $image, $sharedPathLength + 1 ); // +1 for slash
340 $sourceLoc = "$wgSharedUploadDirectory/$rel";
341 $staticLoc = "{$this->sharedStaticPath}/$rel";
342 #print "Copying $sourceLoc to $staticLoc\n";
343 # Copy to static directory
344 if ( !file_exists( $staticLoc ) ) {
345 wfMkdirParents( dirname( $staticLoc ), 0755 );
346 if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
347 symlink( $sourceLoc, $staticLoc );
348 } else {
349 copy( $sourceLoc, $staticLoc );
350 }
351 }
352
353 if ( substr( $rel, 0, 6 ) == 'thumb/' ) {
354 # That was a thumbnail
355 # We will also copy the real image
356 $parts = explode( '/', $rel );
357 $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}";
358 $sourceLoc = "$wgSharedUploadDirectory/$rel";
359 $staticLoc = "{$this->sharedStaticPath}/$rel";
360 #print "Copying $sourceLoc to $staticLoc\n";
361 if ( !file_exists( $staticLoc ) ) {
362 wfMkdirParents( dirname( $staticLoc ), 0755 );
363 if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
364 symlink( $sourceLoc, $staticLoc );
365 } else {
366 copy( $sourceLoc, $staticLoc );
367 }
368 }
369 }
370 }
371 }
372 }
373 }
374
375 /** XML parser callback */
376 function wfDumpStartTagHandler( $parser, $name, $attribs ) {
377 global $wgDumpImages;
378
379 if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) {
380 $wgDumpImages[$attribs['SRC']] = true;
381 }
382 }
383
384 /** XML parser callback */
385 function wfDumpEndTagHandler( $parser, $name ) {}
386
387 # vim: syn=php
388 ?>