X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FImport.php;h=06355ed11ca4befaabbeaec68516d2599b4cd515;hb=45788085afaa7be7b606c08ccc3cba06d224496b;hp=36028eab9256b88a8750f9701bbcd698ea21a8c6;hpb=e6696ed61129be255ba5e6032c2eadcab85ed3b8;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Import.php b/includes/Import.php index 36028eab92..06355ed11c 100644 --- a/includes/Import.php +++ b/includes/Import.php @@ -42,6 +42,8 @@ class WikiImporter { private $config; /** @var ImportTitleFactory */ private $importTitleFactory; + /** @var array */ + private $countableCache = array(); /** * Creates an ImportXMLReader drawing from the source provided @@ -67,6 +69,7 @@ class WikiImporter { } // Default callbacks + $this->setPageCallback( array( $this, 'beforeImportPage' ) ); $this->setRevisionCallback( array( $this, "importRevision" ) ); $this->setUploadCallback( array( $this, 'importUpload' ) ); $this->setLogItemCallback( array( $this, 'importLogItem' ) ); @@ -288,6 +291,19 @@ class WikiImporter { $this->mImportUploads = $import; } + /** + * Default per-page callback. Sets up some things related to site statistics + * @param array $titleAndForeignTitle Two-element array, with Title object at + * index 0 and ForeignTitle object at index 1 + * @return bool + */ + public function beforeImportPage( $titleAndForeignTitle ) { + $title = $titleAndForeignTitle[0]; + $page = WikiPage::factory( $title ); + $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable(); + return true; + } + /** * Default per-revision callback, performs the import. * @param WikiRevision $revision @@ -349,6 +365,31 @@ class WikiImporter { */ public function finishImportPage( $title, $foreignTitle, $revCount, $sRevCount, $pageInfo ) { + + // Update article count statistics (T42009) + // The normal counting logic in WikiPage->doEditUpdates() is designed for + // one-revision-at-a-time editing, not bulk imports. In this situation it + // suffers from issues of slave lag. We let WikiPage handle the total page + // and revision count, and we implement our own custom logic for the + // article (content page) count. + $page = WikiPage::factory( $title ); + $page->loadPageData( 'fromdbmaster' ); + $content = $page->getContent(); + if ( $content === null ) { + wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title . + ' because WikiPage::getContent() returned null' ); + } else { + $editInfo = $page->prepareContentForEdit( $content ); + $countKey = 'title_' . $title->getPrefixedText(); + $countable = $page->isCountable( $editInfo ); + if ( array_key_exists( $countKey, $this->countableCache ) && + $countable != $this->countableCache[ $countKey ] ) { + DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array( + 'articles' => ( (int)$countable - (int)$this->countableCache[ $countKey ] ) + ) ) ); + } + } + $args = func_get_args(); return Hooks::run( 'AfterImportPage', $args ); } @@ -441,7 +482,8 @@ class WikiImporter { /** * Retrieves the contents of the named attribute of the current element. * @param string $attr The name of the attribute - * @return string The value of the attribute or an empty string if it is not set in the current element. + * @return string The value of the attribute or an empty string if it is not set in the current + * element. */ public function nodeAttribute( $attr ) { return $this->reader->getAttribute( $attr ); @@ -497,36 +539,48 @@ class WikiImporter { $keepReading = $this->reader->read(); $skip = false; - while ( $keepReading ) { - $tag = $this->reader->name; - $type = $this->reader->nodeType; - - if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) { - // Do nothing - } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) { - break; - } elseif ( $tag == 'siteinfo' ) { - $this->handleSiteInfo(); - } elseif ( $tag == 'page' ) { - $this->handlePage(); - } elseif ( $tag == 'logitem' ) { - $this->handleLogItem(); - } elseif ( $tag != '#text' ) { - $this->warn( "Unhandled top-level XML tag $tag" ); - - $skip = true; - } + $rethrow = null; + try { + while ( $keepReading ) { + $tag = $this->reader->name; + $type = $this->reader->nodeType; + + if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) { + // Do nothing + } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) { + break; + } elseif ( $tag == 'siteinfo' ) { + $this->handleSiteInfo(); + } elseif ( $tag == 'page' ) { + $this->handlePage(); + } elseif ( $tag == 'logitem' ) { + $this->handleLogItem(); + } elseif ( $tag != '#text' ) { + $this->warn( "Unhandled top-level XML tag $tag" ); + + $skip = true; + } - if ( $skip ) { - $keepReading = $this->reader->next(); - $skip = false; - $this->debug( "Skip" ); - } else { - $keepReading = $this->reader->read(); + if ( $skip ) { + $keepReading = $this->reader->next(); + $skip = false; + $this->debug( "Skip" ); + } else { + $keepReading = $this->reader->read(); + } } + } catch ( Exception $ex ) { + $rethrow = $ex; } + // finally libxml_disable_entity_loader( $oldDisable ); + $this->reader->close(); + + if ( $rethrow ) { + throw $rethrow; + } + return true; } @@ -1532,7 +1586,6 @@ class WikiRevision { $this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" ); return false; } - $oldcountable = $page->isCountable(); } # @todo FIXME: Use original rev_id optionally (better for backups) @@ -1555,10 +1608,11 @@ class WikiRevision { if ( $changed !== false && !$this->mNoUpdates ) { wfDebug( __METHOD__ . ": running updates\n" ); + // countable/oldcountable stuff is handled in WikiImporter::finishImportPage $page->doEditUpdates( $revision, $userObj, - array( 'created' => $created, 'oldcountable' => $oldcountable ) + array( 'created' => $created, 'oldcountable' => 'no-change' ) ); } @@ -1621,6 +1675,7 @@ class WikiRevision { RepoGroup::singleton()->getLocalRepo(), $archiveName ); } else { $file = wfLocalFile( $this->getTitle() ); + $file->load( File::READ_LATEST ); wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" ); if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) { $archiveName = $file->getTimestamp() . '!' . $file->getName(); @@ -1692,7 +1747,7 @@ class WikiRevision { // @todo FIXME! $src = $this->getSrc(); - $data = Http::get( $src ); + $data = Http::get( $src, array(), __METHOD__ ); if ( !$data ) { wfDebug( "IMPORT: couldn't fetch source $src\n" ); fclose( $f ); @@ -1849,7 +1904,7 @@ class ImportStreamSource implements ImportSource { # quicker and sorts out user-agent problems which might # otherwise prevent importing from large sites, such # as the Wikimedia cluster, etc. - $data = Http::request( $method, $url, array( 'followRedirects' => true ) ); + $data = Http::request( $method, $url, array( 'followRedirects' => true ), __METHOD__ ); if ( $data !== false ) { $file = tmpfile(); fwrite( $file, $data );