Merge from SerbianVariants branch, trunk 16500 vs SerbianVariants 16523
[lhc/web/wiklou.git] / maintenance / dumpHTML.inc
1 <?php
2 /**
3 * @package MediaWiki
4 * @subpackage Maintenance
5 */
6
7 define( 'REPORTING_INTERVAL', 10 );
8
9 require_once( 'includes/ImagePage.php' );
10 require_once( 'includes/CategoryPage.php' );
11 require_once( 'includes/RawPage.php' );
12
13 class DumpHTML {
14 # Destination directory
15 var $dest;
16
17 # Show interlanguage links?
18 var $interwiki = true;
19
20 # Depth of HTML directory tree
21 var $depth = 3;
22
23 # Directory that commons images are copied into
24 var $sharedStaticDirectory;
25
26 # Directory that the images are in, after copying
27 var $destUploadDirectory;
28
29 # Relative path to image directory
30 var $imageRel = 'upload';
31
32 # Copy commons images instead of symlinking
33 var $forceCopy = false;
34
35 # Make a copy of all images encountered
36 var $makeSnapshot = false;
37
38 # Make links assuming the script path is in the same directory as
39 # the destination
40 var $alternateScriptPath = false;
41
42 # Original values of various globals
43 var $oldArticlePath = false, $oldCopyrightIcon = false;
44
45 # Has setupGlobals been called?
46 var $setupDone = false;
47
48 # List of raw pages used in the current article
49 var $rawPages;
50
51 # Skin to use
52 var $skin = 'htmldump';
53
54 # Checkpoint stuff
55 var $checkpointFile = false, $checkpoints = false;
56
57 var $startID = 1, $endID = false;
58
59 var $sliceNumerator = 1, $sliceDenominator = 1;
60
61 function DumpHTML( $settings = array() ) {
62 foreach ( $settings as $var => $value ) {
63 $this->$var = $value;
64 }
65 }
66
67 function loadCheckpoints() {
68 if ( $this->checkpoints !== false ) {
69 return true;
70 } elseif ( !$this->checkpointFile ) {
71 return false;
72 } else {
73 $lines = @file( $this->checkpointFile );
74 if ( $lines === false ) {
75 print "Starting new checkpoint file \"{$this->checkpointFile}\"\n";
76 $this->checkpoints = array();
77 } else {
78 $lines = array_map( 'trim', $lines );
79 $this->checkpoints = array();
80 foreach ( $lines as $line ) {
81 list( $name, $value ) = explode( '=', $line, 2 );
82 $this->checkpoints[$name] = $value;
83 }
84 }
85 return true;
86 }
87 }
88
89 function getCheckpoint( $type, $defValue = false ) {
90 if ( !$this->loadCheckpoints() ) {
91 return false;
92 }
93 if ( !isset( $this->checkpoints[$type] ) ) {
94 return false;
95 } else {
96 return $this->checkpoints[$type];
97 }
98 }
99
100 function setCheckpoint( $type, $value ) {
101 if ( !$this->checkpointFile ) {
102 return;
103 }
104 $this->checkpoints[$type] = $value;
105 $blob = '';
106 foreach ( $this->checkpoints as $type => $value ) {
107 $blob .= "$type=$value\n";
108 }
109 file_put_contents( $this->checkpointFile, $blob );
110 }
111
112 function doEverything() {
113 if ( $this->getCheckpoint( 'everything' ) == 'done' ) {
114 print "Checkpoint says everything is already done\n";
115 return;
116 }
117 $this->doArticles();
118 $this->doLocalImageDescriptions();
119 $this->doSharedImageDescriptions();
120 $this->doCategories();
121 $this->doRedirects();
122 if ( $this->sliceNumerator == 1 ) {
123 $this->doSpecials();
124 }
125
126 $this->setCheckpoint( 'everything', 'done' );
127 }
128
129 /**
130 * Write a set of articles specified by start and end page_id
131 * Skip categories and images, they will be done separately
132 */
133 function doArticles() {
134 $fname = 'DumpHTML::doArticles';
135
136 if ( $this->endID === false ) {
137 $dbr =& wfGetDB( DB_SLAVE );
138 $this->endID = $dbr->selectField( 'page', 'max(page_id)', false, $fname );
139 }
140
141 # Slice the range
142 list( $start, $end ) = $this->sliceRange( $this->startID, $this->endID );
143
144 # Start from the checkpoint
145 $cp = $this->getCheckpoint( 'article' );
146 if ( $cp == 'done' ) {
147 print "Articles already done\n";
148 return;
149 } elseif ( $cp !== false ) {
150 $start = $cp;
151 print "Resuming article dump from checkpoint at page_id $start of $end\n";
152 } else {
153 print "Starting from page_id $start of $end\n";
154 }
155
156 $this->setupGlobals();
157
158 $mainPageObj = Title::newMainPage();
159 $mainPage = $mainPageObj->getPrefixedDBkey();
160
161 for ($id = $start; $id <= $end; $id++) {
162 wfWaitForSlaves( 20 );
163 if ( !($id % REPORTING_INTERVAL) ) {
164 print "Processing ID: $id\r";
165 $this->setCheckpoint( 'article', $id );
166 }
167 if ( !($id % (REPORTING_INTERVAL*10) ) ) {
168 print "\n";
169 }
170 $title = Title::newFromID( $id );
171 if ( $title ) {
172 $ns = $title->getNamespace() ;
173 if ( $ns != NS_CATEGORY && $title->getPrefixedDBkey() != $mainPage ) {
174 $this->doArticle( $title );
175 }
176 }
177 }
178 $this->setCheckpoint( 'article', 'done' );
179 print "\n";
180 }
181
182 function doSpecials() {
183 $this->doMainPage();
184
185 $this->setupGlobals();
186 print "Special:Categories...";
187 $this->doArticle( Title::makeTitle( NS_SPECIAL, 'Categories' ) );
188 print "\n";
189 }
190
191 /** Write the main page as index.html */
192 function doMainPage() {
193
194 print "Making index.html ";
195
196 // Set up globals with no ../../.. in the link URLs
197 $this->setupGlobals( 0 );
198
199 $title = Title::newMainPage();
200 $text = $this->getArticleHTML( $title );
201
202 # Parse the XHTML to find the images
203 $images = $this->findImages( $text );
204 $this->copyImages( $images );
205
206 $file = fopen( "{$this->dest}/index.html", "w" );
207 if ( !$file ) {
208 print "\nCan't open index.html for writing\n";
209 return false;
210 }
211 fwrite( $file, $text );
212 fclose( $file );
213 print "\n";
214 }
215
216 function doImageDescriptions() {
217 $this->doLocalImageDescriptions();
218 $this->doSharedImageDescriptions();
219 }
220
221 /**
222 * Dump image description pages that don't have an associated article, but do
223 * have a local image
224 */
225 function doLocalImageDescriptions() {
226 global $wgSharedUploadDirectory;
227
228 $dbr =& wfGetDB( DB_SLAVE );
229
230 $cp = $this->getCheckpoint( 'local image' );
231 if ( $cp == 'done' ) {
232 print "Local image descriptions already done\n";
233 return;
234 } elseif ( $cp !== false ) {
235 print "Writing image description pages starting from $cp\n";
236 $conds = array( 'img_name >= ' . $dbr->addQuotes( $cp ) );
237 } else {
238 print "Writing image description pages for local images\n";
239 $conds = false;
240 }
241
242 $this->setupGlobals();
243
244 $res = $dbr->select( 'image', array( 'img_name' ), $conds, __METHOD__,
245 array( 'ORDER BY' => 'img_name' ) );
246
247 $i = 0;
248 $num = $dbr->numRows( $res );
249 while ( $row = $dbr->fetchObject( $res ) ) {
250 // Slice the result set with a filter
251 if ( !$this->sliceFilter( $row->img_name ) ) {
252 continue;
253 }
254
255 wfWaitForSlaves( 10 );
256 if ( !( ++$i % REPORTING_INTERVAL ) ) {
257 print "Done $i of $num\r";
258 if ( $row->img_name !== 'done' ) {
259 $this->setCheckpoint( 'local image', $row->img_name );
260 }
261 }
262 $title = Title::makeTitle( NS_IMAGE, $row->img_name );
263 if ( $title->getArticleID() ) {
264 // Already done by dumpHTML
265 continue;
266 }
267 $this->doArticle( $title );
268 }
269 $this->setCheckpoint( 'local image', 'done' );
270 print "\n";
271 }
272
273 /**
274 * Dump images which only have a real description page on commons
275 */
276 function doSharedImageDescriptions() {
277 list( $start, $end ) = $this->sliceRange( 0, 255 );
278
279 $cp = $this->getCheckpoint( 'shared image' );
280 if ( $cp == 'done' ) {
281 print "Shared description pages already done\n";
282 return;
283 } elseif ( $cp !== false ) {
284 print "Writing description pages for commons images starting from directory $cp/255\n";
285 $start = $cp;
286 } else {
287 print "Writing description pages for commons images\n";
288 }
289
290
291 $this->setupGlobals();
292 $i = 0;
293 for ( $hash = $start; $hash <= $end; $hash++ ) {
294 $this->setCheckpoint( 'shared image', $hash );
295
296 $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash );
297 $paths = array_merge( glob( "{$this->sharedStaticDirectory}/$dir/*" ),
298 glob( "{$this->sharedStaticDirectory}/thumb/$dir/*" ) );
299
300 foreach ( $paths as $path ) {
301 $file = wfBaseName( $path );
302 if ( !(++$i % REPORTING_INTERVAL ) ) {
303 print "$i\r";
304 }
305
306 $title = Title::makeTitle( NS_IMAGE, $file );
307 $this->doArticle( $title );
308 }
309 }
310 $this->setCheckpoint( 'shared image', 'done' );
311 print "\n";
312 }
313
314 function doCategories() {
315 $fname = 'DumpHTML::doCategories';
316 $this->setupGlobals();
317 $dbr =& wfGetDB( DB_SLAVE );
318 $sql = 'SELECT DISTINCT cl_to FROM ' . $dbr->tableName( 'categorylinks' );
319
320 $cp = $this->getCheckpoint( 'category' );
321 if ( $cp == 'done' ) {
322 print "Category pages already done\n";
323 return;
324 } elseif ( $cp !== false ) {
325 print "Resuming category page dump from $cp\n";
326 $sql .= ' WHERE cl_to >= ' . $dbr->addQuotes( $cp );
327 }
328
329 $sql .= ' ORDER BY cl_to';
330 print "Selecting categories...";
331 $res = $dbr->query( $sql, $fname );
332
333 print "\nWriting " . $dbr->numRows( $res ). " category pages\n";
334 $i = 0;
335 while ( $row = $dbr->fetchObject( $res ) ) {
336 // Filter pages from other slices
337 if ( !$this->sliceFilter( $row->cl_to ) ) {
338 continue;
339 }
340
341 wfWaitForSlaves( 10 );
342 if ( !(++$i % REPORTING_INTERVAL ) ) {
343 print "{$row->cl_to}\n";
344 if ( $row->cl_to != 'done' ) {
345 $this->setCheckpoint( 'category', $row->cl_to );
346 }
347 }
348 $title = Title::makeTitle( NS_CATEGORY, $row->cl_to );
349 $this->doArticle( $title );
350 }
351 $this->setCheckpoint( 'category', 'done' );
352 print "\n";
353 }
354
355 function doRedirects() {
356 print "Doing redirects...\n";
357 $fname = 'DumpHTML::doRedirects';
358
359
360 $dbr =& wfGetDB( DB_SLAVE );
361 $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname );
362 list( $start, $end ) = $this->sliceRange( 1, $end );
363
364 $cp = $this->getCheckpoint( 'redirect' );
365 if ( $cp == 'done' ) {
366 print "Redirects already done\n";
367 return;
368 } elseif ( $cp !== false ) {
369 print "Resuming redirect generation from page_id $cp\n";
370 $start = intval( $cp );
371 }
372
373 $conds = array(
374 'page_is_redirect' => 1,
375 "page_id BETWEEN $start AND $end"
376 );
377
378 $this->setupGlobals();
379 $res = $dbr->select( 'page', array( 'page_id', 'page_namespace', 'page_title' ),
380 $conds, $fname );
381 $num = $dbr->numRows( $res );
382 print "$num redirects to do...\n";
383 $i = 0;
384 while ( $row = $dbr->fetchObject( $res ) ) {
385 $title = Title::makeTitle( $row->page_namespace, $row->page_title );
386 if ( !(++$i % (REPORTING_INTERVAL*10) ) ) {
387 print "Done $i of $num (ID {$row->page_id})\n";
388 $this->setCheckpoint( 'redirect', $row->page_id );
389 }
390 $this->doArticle( $title );
391 }
392 $this->setCheckpoint( 'redirect', 'done' );
393 }
394
395 /** Write an article specified by title */
396 function doArticle( $title ) {
397 global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory;
398 global $wgUploadDirectory;
399
400 $this->rawPages = array();
401 $text = $this->getArticleHTML( $title );
402
403 if ( $text === false ) {
404 return;
405 }
406
407 # Parse the XHTML to find the images
408 $images = $this->findImages( $text );
409 $this->copyImages( $images );
410
411 # Write to file
412 $this->writeArticle( $title, $text );
413
414 # Do raw pages
415 wfMkdirParents( "{$this->dest}/raw", 0755 );
416 foreach( $this->rawPages as $record ) {
417 list( $file, $title, $params ) = $record;
418
419 $path = "{$this->dest}/raw/$file";
420 if ( !file_exists( $path ) ) {
421 $article = new Article( $title );
422 $request = new FauxRequest( $params );
423 $rp = new RawPage( $article, $request );
424 $text = $rp->getRawText();
425
426 print "Writing $file\n";
427 $file = fopen( $path, 'w' );
428 if ( !$file ) {
429 print("Can't open file $fullName for writing\n");
430 continue;
431 }
432 fwrite( $file, $text );
433 fclose( $file );
434 }
435 }
436 }
437
438 /** Write the given text to the file identified by the given title object */
439 function writeArticle( &$title, $text ) {
440 $filename = $this->getHashedFilename( $title );
441 $fullName = "{$this->dest}/$filename";
442 $fullDir = dirname( $fullName );
443
444 wfMkdirParents( $fullDir, 0755 );
445
446 wfSuppressWarnings();
447 $file = fopen( $fullName, 'w' );
448 wfRestoreWarnings();
449
450 if ( !$file ) {
451 die("Can't open file '$fullName' for writing.\nCheck permissions or use another destination (-d).\n");
452 return;
453 }
454
455 fwrite( $file, $text );
456 fclose( $file );
457 }
458
459 /** Set up globals required for parsing */
460 function setupGlobals( $currentDepth = NULL ) {
461 global $wgUser, $wgTitle, $wgStylePath, $wgArticlePath, $wgMathPath;
462 global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath;
463 global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath;
464 global $wgSharedThumbnailScriptPath, $wgEnableParserCache, $wgHooks, $wgServer;
465 global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon, $wgEnableSidebarCache;
466 global $wgGenerateThumbnailOnParse;
467
468 static $oldLogo = NULL;
469
470 if ( !$this->setupDone ) {
471 $wgHooks['GetLocalURL'][] =& $this;
472 $wgHooks['GetFullURL'][] =& $this;
473 $wgHooks['SiteNoticeBefore'][] =& $this;
474 $wgHooks['SiteNoticeAfter'][] =& $this;
475 $this->oldArticlePath = $wgServer . $wgArticlePath;
476 }
477
478 if ( is_null( $currentDepth ) ) {
479 $currentDepth = $this->depth;
480 }
481
482 if ( $this->alternateScriptPath ) {
483 if ( $currentDepth == 0 ) {
484 $wgScriptPath = '.';
485 } else {
486 $wgScriptPath = '..' . str_repeat( '/..', $currentDepth - 1 );
487 }
488 } else {
489 $wgScriptPath = '..' . str_repeat( '/..', $currentDepth );
490 }
491
492 $wgArticlePath = str_repeat( '../', $currentDepth ) . '$1';
493
494 # Logo image
495 # Allow for repeated setup
496 if ( !is_null( $oldLogo ) ) {
497 $wgLogo = $oldLogo;
498 } else {
499 $oldLogo = $wgLogo;
500 }
501
502 if ( strpos( $wgLogo, $wgUploadPath ) === 0 ) {
503 # If it's in the upload directory, rewrite it to the new upload directory
504 $wgLogo = "$wgScriptPath/{$this->imageRel}/" . substr( $wgLogo, strlen( $wgUploadPath ) + 1 );
505 } elseif ( $wgLogo{0} == '/' ) {
506 # This is basically heuristic
507 # Rewrite an absolute logo path to one relative to the the script path
508 $wgLogo = $wgScriptPath . $wgLogo;
509 }
510
511 # Another ugly hack
512 if ( !$this->setupDone ) {
513 $this->oldCopyrightIcon = $wgCopyrightIcon;
514 }
515 $wgCopyrightIcon = str_replace( 'src="/images',
516 'src="' . htmlspecialchars( $wgScriptPath ) . '/images', $this->oldCopyrightIcon );
517
518 $wgStylePath = "$wgScriptPath/skins";
519 $wgUploadPath = "$wgScriptPath/{$this->imageRel}";
520 $wgSharedUploadPath = "$wgUploadPath/shared";
521 $wgMaxCredits = -1;
522 $wgHideInterlanguageLinks = !$this->interwiki;
523 $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false;
524 $wgEnableParserCache = false;
525 $wgMathPath = "$wgScriptPath/math";
526 $wgEnableSidebarCache = false;
527 $wgGenerateThumbnailOnParse = true;
528
529 if ( !empty( $wgRightsText ) ) {
530 $wgRightsUrl = "$wgScriptPath/COPYING.html";
531 }
532
533 $wgUser = new User;
534 $wgUser->setOption( 'skin', $this->skin );
535 $wgUser->setOption( 'editsection', 0 );
536
537 if ( $this->makeSnapshot ) {
538 $this->destUploadDirectory = "{$this->dest}/{$this->imageRel}";
539 if ( realpath( $this->destUploadDirectory == $wgUploadDirectory ) ) {
540 $this->makeSnapshot = false;
541 }
542 }
543
544 $this->sharedStaticDirectory = "{$this->destUploadDirectory}/shared";
545
546 $this->setupDone = true;
547 }
548
549 /** Reads the content of a title object, executes the skin and captures the result */
550 function getArticleHTML( &$title ) {
551 global $wgOut, $wgTitle, $wgArticle, $wgUser;
552
553 $linkCache =& LinkCache::singleton();
554 $linkCache->clear();
555 $wgTitle = $title;
556 if ( is_null( $wgTitle ) ) {
557 return false;
558 }
559
560 $ns = $wgTitle->getNamespace();
561 if ( $ns == NS_SPECIAL ) {
562 $wgOut = new OutputPage;
563 $wgOut->setParserOptions( new ParserOptions );
564 SpecialPage::executePath( $wgTitle );
565 } else {
566 /** @todo merge with Wiki.php code */
567 if ( $ns == NS_IMAGE ) {
568 $wgArticle = new ImagePage( $wgTitle );
569 } elseif ( $ns == NS_CATEGORY ) {
570 $wgArticle = new CategoryPage( $wgTitle );
571 } else {
572 $wgArticle = new Article( $wgTitle );
573 }
574 $rt = Title::newFromRedirect( $wgArticle->fetchContent() );
575 if ( $rt != NULL ) {
576 return $this->getRedirect( $rt );
577 } else {
578 $wgOut = new OutputPage;
579 $wgOut->setParserOptions( new ParserOptions );
580
581 $wgArticle->view();
582 }
583 }
584
585 $sk =& $wgUser->getSkin();
586 ob_start();
587 $sk->outputPage( $wgOut );
588 $text = ob_get_contents();
589 ob_end_clean();
590
591 return $text;
592 }
593
594 function getRedirect( $rt ) {
595 $url = $rt->escapeLocalURL();
596 $text = $rt->getPrefixedText();
597 return <<<ENDTEXT
598 <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
599 <html xmlns="http://www.w3.org/1999/xhtml">
600 <head>
601 <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
602 <meta http-equiv="Refresh" content="0;url=$url" />
603 </head>
604 <body>
605 <p>Redirecting to <a href="$url">$text</a></p>
606 </body>
607 </html>
608 ENDTEXT;
609 }
610
611 /** Returns image paths used in an XHTML document */
612 function findImages( $text ) {
613 global $wgOutputEncoding, $wgDumpImages;
614 $parser = xml_parser_create( $wgOutputEncoding );
615 xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' );
616
617 $wgDumpImages = array();
618 xml_parse( $parser, $text );
619 xml_parser_free( $parser );
620
621 return $wgDumpImages;
622 }
623
624 /**
625 * Copy a file specified by a URL to a given directory
626 *
627 * @param string $srcPath The source URL
628 * @param string $srcPathBase The base directory of the source URL
629 * @param string $srcDirBase The base filesystem directory of the source URL
630 * @param string $destDirBase The base filesystem directory of the destination URL
631 */
632 function relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) {
633 $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 ); // +1 for slash
634 $sourceLoc = "$srcDirBase/$rel";
635 $destLoc = "$destDirBase/$rel";
636 #print "Copying $sourceLoc to $destLoc\n";
637 if ( !file_exists( $destLoc ) ) {
638 wfMkdirParents( dirname( $destLoc ), 0755 );
639 if ( function_exists( 'symlink' ) && !$this->forceCopy ) {
640 symlink( $sourceLoc, $destLoc );
641 } else {
642 copy( $sourceLoc, $destLoc );
643 }
644 }
645 }
646
647 /**
648 * Copy an image, and if it is a thumbnail, copy its parent image too
649 */
650 function copyImage( $srcPath, $srcPathBase, $srcDirBase, $destDirBase ) {
651 global $wgUploadPath, $wgUploadDirectory, $wgSharedUploadPath;
652 $this->relativeCopy( $srcPath, $srcPathBase, $srcDirBase, $destDirBase );
653 if ( substr( $srcPath, strlen( $srcPathBase ) + 1, 6 ) == 'thumb/' ) {
654 # The image was a thumbnail
655 # Copy the source image as well
656 $rel = substr( $srcPath, strlen( $srcPathBase ) + 1 );
657 $parts = explode( '/', $rel );
658 $rel = "{$parts[1]}/{$parts[2]}/{$parts[3]}";
659 $newSrc = "$srcPathBase/$rel";
660 $this->relativeCopy( $newSrc, $srcPathBase, $srcDirBase, $destDirBase );
661 }
662 }
663
664 /**
665 * Copy images (or create symlinks) from commons to a static directory.
666 * This is necessary even if you intend to distribute all of commons, because
667 * the directory contents is used to work out which image description pages
668 * are needed.
669 *
670 * Also copies math images, and full-sized images if the makeSnapshot option
671 * is specified.
672 *
673 */
674 function copyImages( $images ) {
675 global $wgUploadPath, $wgUploadDirectory, $wgSharedUploadPath, $wgSharedUploadDirectory,
676 $wgMathPath, $wgMathDirectory;
677 # Find shared uploads and copy them into the static directory
678 $sharedPathLength = strlen( $wgSharedUploadPath );
679 $mathPathLength = strlen( $wgMathPath );
680 $uploadPathLength = strlen( $wgUploadPath );
681 foreach ( $images as $escapedImage => $dummy ) {
682 $image = urldecode( $escapedImage );
683
684 if ( substr( $image, 0, $sharedPathLength ) == $wgSharedUploadPath ) {
685 $this->copyImage( $image, $wgSharedUploadPath, $wgSharedUploadDirectory, $this->sharedStaticDirectory );
686 } elseif ( substr( $image, 0, $mathPathLength ) == $wgMathPath ) {
687 $this->relativeCopy( $image, $wgMathPath, $wgMathDirectory, "{$this->dest}/math" );
688 } elseif ( $this->makeSnapshot && substr( $image, 0, $uploadPathLength ) == $wgUploadPath ) {
689 $this->copyImage( $image, $wgUploadPath, $wgUploadDirectory, $this->destUploadDirectory );
690 }
691 }
692 }
693
694 function onGetFullURL( &$title, &$url, $query ) {
695 global $wgContLang, $wgArticlePath;
696
697 $iw = $title->getInterwiki();
698 if ( $title->isExternal() && $wgContLang->getLanguageName( $iw ) ) {
699 if ( $title->getDBkey() == '' ) {
700 $url = str_replace( '$1', "../$iw/index.html", $wgArticlePath );
701 } else {
702 $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ),
703 $wgArticlePath );
704 }
705 return false;
706 } else {
707 return true;
708 }
709 }
710
711 function onGetLocalURL( &$title, &$url, $query ) {
712 global $wgArticlePath;
713
714 if ( $title->isExternal() ) {
715 # Default is fine for interwiki
716 return true;
717 }
718
719 $url = false;
720 if ( $query != '' ) {
721 parse_str( $query, $params );
722 if ( isset($params['action']) && $params['action'] == 'raw' ) {
723 if ( $params['gen'] == 'css' || $params['gen'] == 'js' ) {
724 $file = 'gen.' . $params['gen'];
725 } else {
726 $file = $this->getFriendlyName( $title->getPrefixedDBkey() );
727 // Clean up Monobook.css etc.
728 if ( preg_match( '/^(.*)\.(css|js)_[0-9a-f]{4}$/', $file, $matches ) ) {
729 $file = $matches[1] . '.' . $matches[2];
730 }
731 }
732 $this->rawPages[$file] = array( $file, $title, $params );
733 $url = str_replace( '$1', "raw/" . wfUrlencode( $file ), $wgArticlePath );
734 }
735 }
736 if ( $url === false ) {
737 $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath );
738 }
739
740 return false;
741 }
742
743 function getHashedFilename( &$title ) {
744 if ( '' != $title->mInterwiki ) {
745 $dbkey = $title->getDBkey();
746 } else {
747 $dbkey = $title->getPrefixedDBkey();
748 }
749
750 $mainPage = Title::newMainPage();
751 if ( $mainPage->getPrefixedDBkey() == $dbkey ) {
752 return 'index.html';
753 }
754
755 return $this->getHashedDirectory( $title ) . '/' .
756 $this->getFriendlyName( $dbkey ) . '.html';
757 }
758
759 function getFriendlyName( $name ) {
760 global $wgLang;
761 # Replace illegal characters for Windows paths with underscores
762 $friendlyName = strtr( $name, '/\\*?"<>|~', '_________' );
763
764 # Work out lower case form. We assume we're on a system with case-insensitive
765 # filenames, so unless the case is of a special form, we have to disambiguate
766 if ( function_exists( 'mb_strtolower' ) ) {
767 $lowerCase = $wgLang->ucfirst( mb_strtolower( $name ) );
768 } else {
769 $lowerCase = ucfirst( strtolower( $name ) );
770 }
771
772 # Make it mostly unique
773 if ( $lowerCase != $friendlyName ) {
774 $friendlyName .= '_' . substr(md5( $name ), 0, 4);
775 }
776 # Handle colon specially by replacing it with tilde
777 # Thus we reduce the number of paths with hashes appended
778 $friendlyName = str_replace( ':', '~', $friendlyName );
779
780 return $friendlyName;
781 }
782
783 /**
784 * Get a relative directory for putting a title into
785 */
786 function getHashedDirectory( &$title ) {
787 if ( '' != $title->getInterwiki() ) {
788 $pdbk = $title->getDBkey();
789 } else {
790 $pdbk = $title->getPrefixedDBkey();
791 }
792
793 # Find the first colon if there is one, use characters after it
794 $p = strpos( $pdbk, ':' );
795 if ( $p !== false ) {
796 $dbk = substr( $pdbk, $p + 1 );
797 $dbk = substr( $dbk, strspn( $dbk, '_' ) );
798 } else {
799 $dbk = $pdbk;
800 }
801
802 # Split into characters
803 preg_match_all( '/./us', $dbk, $m );
804
805 $chars = $m[0];
806 $length = count( $chars );
807 $dir = '';
808
809 for ( $i = 0; $i < $this->depth; $i++ ) {
810 if ( $i ) {
811 $dir .= '/';
812 }
813 if ( $i >= $length ) {
814 $dir .= '_';
815 } else {
816 $c = $chars[$i];
817 if ( ord( $c ) >= 128 || preg_match( '/[a-zA-Z0-9!#$%&()+,[\]^_`{}-]/', $c ) ) {
818 if ( function_exists( 'mb_strtolower' ) ) {
819 $dir .= mb_strtolower( $c );
820 } else {
821 $dir .= strtolower( $c );
822 }
823 } else {
824 $dir .= sprintf( "%02X", ord( $c ) );
825 }
826 }
827 }
828 return $dir;
829 }
830
831 /**
832 * Calculate the start end end of a job based on the current slice
833 * @param integer $start
834 * @param integer $end
835 * @return array of integers
836 */
837 function sliceRange( $start, $end ) {
838 $count = $end - $start + 1;
839 $each = $count / $this->sliceDenominator;
840 $sliceStart = $start + intval( $each * ( $this->sliceNumerator - 1 ) );
841 if ( $this->sliceNumerator == $this->sliceDenominator ) {
842 $sliceEnd = $end;
843 } else {
844 $sliceEnd = $start + intval( $each * $this->sliceNumerator ) - 1;
845 }
846 return array( $sliceStart, $sliceEnd );
847 }
848
849 /**
850 * Determine whether a string belongs to the current slice, based on hash
851 */
852 function sliceFilter( $s ) {
853 return crc32( $s ) % $this->sliceDenominator == $this->sliceNumerator - 1;
854 }
855
856 /**
857 * No site notice
858 */
859 function onSiteNoticeBefore( &$text ) {
860 $text = '';
861 return false;
862 }
863 function onSiteNoticeAfter( &$text ) {
864 $text = '';
865 return false;
866 }
867 }
868
869 /** XML parser callback */
870 function wfDumpStartTagHandler( $parser, $name, $attribs ) {
871 global $wgDumpImages;
872
873 if ( $name == 'IMG' && isset( $attribs['SRC'] ) ) {
874 $wgDumpImages[$attribs['SRC']] = true;
875 }
876 }
877
878 /** XML parser callback */
879 function wfDumpEndTagHandler( $parser, $name ) {}
880
881 # vim: syn=php
882 ?>