X-Git-Url: http://git.heureux-cyclage.org/?a=blobdiff_plain;f=maintenance%2FdumpHTML.inc;h=057b2219126203f035dc7b44dc1d79b820b3f914;hb=aa4f7f8ac1974bd46a9754ad32f4508d6063bd3e;hp=1549e470df7b5984dbcafbaf0ba06a93d2575ae1;hpb=22e60a4772bcdc4765ecb00b254644d26b8b16e9;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/dumpHTML.inc b/maintenance/dumpHTML.inc index 1549e470df..057b221912 100644 --- a/maintenance/dumpHTML.inc +++ b/maintenance/dumpHTML.inc @@ -16,47 +16,47 @@ class DumpHTML { # Show interlanguage links? var $interwiki = true; - + # Depth of HTML directory tree var $depth = 3; # Directory that commons images are copied into var $sharedStaticPath; - + # Relative path to image directory var $imageRel = 'upload'; # Copy commons images instead of symlinking var $forceCopy = false; - # Make links assuming the script path is in the same directory as + # Make links assuming the script path is in the same directory as # the destination var $alternateScriptPath = false; - # Original article path, for "current version" links - var $oldArticlePath = false; + # Original values of various globals + var $oldArticlePath = false, $oldCopyrightIcon = false; # Has setupGlobals been called? var $setupDone = false; # List of raw pages used in the current article var $rawPages; - + function DumpHTML( $settings ) { foreach ( $settings as $var => $value ) { $this->$var = $value; } } - /** - * Write a set of articles specified by start and end page_id + /** + * Write a set of articles specified by start and end page_id * Skip categories and images, they will be done separately */ function doArticles( $start, $end = false ) { $fname = 'DumpHTML::doArticles'; - + $this->setupGlobals(); - + if ( $end === false ) { $dbr =& wfGetDB( DB_SLAVE ); $end = $dbr->selectField( 'page', 'max(page_id)', false, $fname ); @@ -65,7 +65,7 @@ class DumpHTML { $mainPageObj = Title::newMainPage(); $mainPage = $mainPageObj->getPrefixedDBkey(); - + for ($id = $start; $id <= $end; $id++) { wfWaitForSlaves( 20 ); if ( !($id % REPORTING_INTERVAL) ) { @@ -77,13 +77,13 @@ class DumpHTML { $title = Title::newFromID( $id ); if ( $title ) { $ns = $title->getNamespace() ; - if ( $ns != NS_CATEGORY && $title->getPrefixedDBkey() != $mainPage ) { + if ( $ns != NS_CATEGORY && $title->getPrefixedDBkey() != $mainPage ) { $this->doArticle( $title ); } } } print "\n"; - } + } function doSpecials() { $this->doMainPage(); @@ -96,16 +96,12 @@ class DumpHTML { /** Write the main page as index.html */ function doMainPage() { - global $wgMakeDumpLinks; print "Making index.html "; // Set up globals with no ../../.. in the link URLs $this->setupGlobals( 0 ); - // But still use that directory style - $wgMakeDumpLinks = 3; - $title = Title::newMainPage(); $text = $this->getArticleHTML( $title ); $file = fopen( "{$this->dest}/index.html", "w" ); @@ -120,13 +116,13 @@ class DumpHTML { function doImageDescriptions() { global $wgSharedUploadDirectory; - + $fname = 'DumpHTML::doImageDescriptions'; - - $this->setupGlobals( 3 ); - /** - * Dump image description pages that don't have an associated article, but do + $this->setupGlobals(); + + /** + * Dump image description pages that don't have an associated article, but do * have a local image */ $dbr =& wfGetDB( DB_SLAVE ); @@ -142,7 +138,7 @@ class DumpHTML { print "Done $i of $num\r"; } $title = Title::makeTitle( NS_IMAGE, $row->img_name ); - if ( $title->getArticleID() ) { + if ( $title->getArticleID() ) { // Already done by dumpHTML continue; } @@ -155,7 +151,7 @@ class DumpHTML { */ print "Writing description pages for commons images\n"; $i = 0; - for ( $hash = 0; $hash < 256; $hash++ ) { + for ( $hash = 0; $hash < 256; $hash++ ) { $dir = sprintf( "%01x/%02x", intval( $hash / 16 ), $hash ); $paths = array_merge( glob( "{$this->sharedStaticPath}/$dir/*" ), glob( "{$this->sharedStaticPath}/thumb/$dir/*" ) ); @@ -178,9 +174,8 @@ class DumpHTML { $this->setupGlobals(); $dbr =& wfGetDB( DB_SLAVE ); - $categorylinks = $dbr->tableName( 'categorylinks' ); print "Selecting categories..."; - $sql = 'SELECT DISTINCT cl_to FROM categorylinks'; + $sql = 'SELECT DISTINCT cl_to FROM ' . $dbr->tableName( 'categorylinks' ); $res = $dbr->query( $sql, $fname ); print "\nWriting " . $dbr->numRows( $res ). " category pages\n"; @@ -197,14 +192,12 @@ class DumpHTML { } function doRedirects() { - global $wgLinkCache; - print "Doing redirects...\n"; $fname = 'DumpHTML::doRedirects'; $this->setupGlobals(); $dbr =& wfGetDB( DB_SLAVE ); - $res = $dbr->select( 'page', array( 'page_namespace', 'page_title' ), + $res = $dbr->select( 'page', array( 'page_namespace', 'page_title' ), array( 'page_is_redirect' => 1 ), $fname ); $num = $dbr->numRows( $res ); print "$num redirects to do...\n"; @@ -220,11 +213,6 @@ class DumpHTML { /** Write an article specified by title */ function doArticle( $title ) { - // Testing - if ( $title->getNamespace() == 8 ) { - return; - } - global $wgTitle, $wgSharedUploadPath, $wgSharedUploadDirectory; global $wgUploadDirectory; @@ -279,44 +267,42 @@ class DumpHTML { print("Can't open file $fullName for writing\n"); return; } - + fwrite( $file, $text ); fclose( $file ); } /** Set up globals required for parsing */ - function setupGlobals( $depth = NULL ) { - global $wgUser, $wgTitle, $wgMakeDumpLinks, $wgStylePath, $wgArticlePath; + function setupGlobals( $currentDepth = NULL ) { + global $wgUser, $wgTitle, $wgStylePath, $wgArticlePath; global $wgUploadPath, $wgLogo, $wgMaxCredits, $wgSharedUploadPath; global $wgHideInterlanguageLinks, $wgUploadDirectory, $wgThumbnailScriptPath; global $wgSharedThumbnailScriptPath, $wgEnableParserCache, $wgHooks, $wgServer; - global $wgRightsUrl, $wgRightsText; + global $wgRightsUrl, $wgRightsText, $wgCopyrightIcon; static $oldLogo = NULL; - + if ( !$this->setupDone ) { $wgHooks['GetLocalURL'][] =& $this; $wgHooks['GetFullURL'][] =& $this; $this->oldArticlePath = $wgServer . $wgArticlePath; } - if ( is_null( $depth ) ) { - $wgMakeDumpLinks = $this->depth; - } else { - $wgMakeDumpLinks = $depth; + if ( is_null( $currentDepth ) ) { + $currentDepth = $this->depth; } - + if ( $this->alternateScriptPath ) { - if ( $wgMakeDumpLinks == 0 ) { + if ( $currentDepth == 0 ) { $wgScriptPath = '.'; } else { - $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks - 1 ); + $wgScriptPath = '..' . str_repeat( '/..', $currentDepth - 1 ); } } else { - $wgScriptPath = '..' . str_repeat( '/..', $wgMakeDumpLinks ); + $wgScriptPath = '..' . str_repeat( '/..', $currentDepth ); } - $wgArticlePath = str_repeat( '../', $wgMakeDumpLinks ) . '$1'; + $wgArticlePath = str_repeat( '../', $currentDepth ) . '$1'; # Logo image # Allow for repeated setup @@ -335,6 +321,15 @@ class DumpHTML { $wgLogo = $wgScriptPath . $wgLogo; } + # Another ugly hack + if ( !$this->setupDone ) { + $this->oldCopyrightIcon = $wgCopyrightIcon; + } + $wgCopyrightIcon = str_replace( 'src="/images', + 'src="' . htmlspecialchars( $wgScriptPath ) . '/images', $this->oldCopyrightIcon ); + + + $wgStylePath = "$wgScriptPath/skins"; $wgUploadPath = "$wgScriptPath/{$this->imageRel}"; $wgSharedUploadPath = "$wgUploadPath/shared"; @@ -343,7 +338,7 @@ class DumpHTML { $wgThumbnailScriptPath = $wgSharedThumbnailScriptPath = false; $wgEnableParserCache = false; $wgMathPath = "$wgScriptPath/math"; - + if ( !empty( $wgRightsText ) ) { $wgRightsUrl = "$wgScriptPath/COPYING.html"; } @@ -359,20 +354,25 @@ class DumpHTML { /** Reads the content of a title object, executes the skin and captures the result */ function getArticleHTML( &$title ) { - global $wgOut, $wgTitle, $wgArticle, $wgUser, $wgUseCategoryMagic, $wgLinkCache; - + global $wgOut, $wgTitle, $wgArticle, $wgUser; + + $linkCache =& LinkCache::singleton(); + $linkCache->clear(); $wgTitle = $title; if ( is_null( $wgTitle ) ) { return false; } - + $ns = $wgTitle->getNamespace(); if ( $ns == NS_SPECIAL ) { + $wgOut = new OutputPage; + $wgOut->setParserOptions( new ParserOptions ); SpecialPage::executePath( $wgTitle ); } else { + /** @todo merge with Wiki.php code */ if ( $ns == NS_IMAGE ) { $wgArticle = new ImagePage( $wgTitle ); - } elseif ( $wgUseCategoryMagic && $ns == NS_CATEGORY ) { + } elseif ( $ns == NS_CATEGORY ) { $wgArticle = new CategoryPage( $wgTitle ); } else { $wgArticle = new Article( $wgTitle ); @@ -383,8 +383,7 @@ class DumpHTML { } else { $wgOut = new OutputPage; $wgOut->setParserOptions( new ParserOptions ); - $wgLinkCache = new LinkCache; - + $wgArticle->view(); } } @@ -420,7 +419,7 @@ ENDTEXT; global $wgOutputEncoding, $wgDumpImages; $parser = xml_parser_create( $wgOutputEncoding ); xml_set_element_handler( $parser, 'wfDumpStartTagHandler', 'wfDumpEndTagHandler' ); - + $wgDumpImages = array(); xml_parse( $parser, $text ); xml_parser_free( $parser ); @@ -433,14 +432,14 @@ ENDTEXT; * This is necessary even if you intend to distribute all of commons, because * the directory contents is used to work out which image description pages * are needed. - * + * * Also copies math images * */ function copyImages( $images ) { global $wgSharedUploadPath, $wgSharedUploadDirectory, $wgMathPath, $wgMathDirectory; # Find shared uploads and copy them into the static directory - $sharedPathLength = strlen( $wgSharedUploadPath ); + $sharedPathLength = strlen( $wgSharedUploadPath ); $mathPathLength = strlen( $wgMathPath ); foreach ( $images as $escapedImage => $dummy ) { $image = urldecode( $escapedImage ); @@ -501,7 +500,7 @@ ENDTEXT; if ( $title->getDBkey() == '' ) { $url = str_replace( '$1', "../$iw/index.html", $wgArticlePath ); } else { - $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ), + $url = str_replace( '$1', "../$iw/" . wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath ); } return false; @@ -509,7 +508,7 @@ ENDTEXT; return true; } } - + function onGetLocalURL( &$title, &$url, $query ) { global $wgArticlePath; @@ -538,6 +537,7 @@ ENDTEXT; if ( $url === false ) { $url = str_replace( '$1', wfUrlencode( $this->getHashedFilename( $title ) ), $wgArticlePath ); } + return false; } @@ -553,17 +553,22 @@ ENDTEXT; return 'index.html'; } - return $this->getHashedDirectory( $title ) . '/' . + return $this->getHashedDirectory( $title ) . '/' . $this->getFriendlyName( $dbkey ) . '.html'; } function getFriendlyName( $name ) { + global $wgLang; # Replace illegal characters for Windows paths with underscores $friendlyName = strtr( $name, '/\\*?"<>|~', '_________' ); # Work out lower case form. We assume we're on a system with case-insensitive # filenames, so unless the case is of a special form, we have to disambiguate - $lowerCase = ucfirst( strtolower( $name ) ); + if ( function_exists( 'mb_strtolower' ) ) { + $lowerCase = $wgLang->ucfirst( mb_strtolower( $name ) ); + } else { + $lowerCase = ucfirst( strtolower( $name ) ); + } # Make it mostly unique if ( $lowerCase != $friendlyName ) { @@ -575,7 +580,7 @@ ENDTEXT; return $friendlyName; } - + /** * Get a relative directory for putting a title into */ @@ -610,7 +615,7 @@ ENDTEXT; $dir .= '_'; } else { $c = $chars[$i]; - if ( ord( $c ) >= 128 || ctype_alnum( $c ) ) { + if ( ord( $c ) >= 128 || preg_match( '/[a-zA-Z0-9!#$%&()+,[\]^_`{}-]/', $c ) ) { if ( function_exists( 'mb_strtolower' ) ) { $dir .= mb_strtolower( $c ); } else {