private $config;
/** @var ImportTitleFactory */
private $importTitleFactory;
+ /** @var array */
+ private $countableCache = array();
/**
* Creates an ImportXMLReader drawing from the source provided
}
// Default callbacks
+ $this->setPageCallback( array( $this, 'beforeImportPage' ) );
$this->setRevisionCallback( array( $this, "importRevision" ) );
$this->setUploadCallback( array( $this, 'importUpload' ) );
$this->setLogItemCallback( array( $this, 'importLogItem' ) );
$this->mImportUploads = $import;
}
+ /**
+ * Default per-page callback. Sets up some things related to site statistics
+ * @param array $titleAndForeignTitle Two-element array, with Title object at
+ * index 0 and ForeignTitle object at index 1
+ * @return bool
+ */
+ public function beforeImportPage( $titleAndForeignTitle ) {
+ $title = $titleAndForeignTitle[0];
+ $page = WikiPage::factory( $title );
+ $this->countableCache['title_' . $title->getPrefixedText()] = $page->isCountable();
+ return true;
+ }
+
/**
* Default per-revision callback, performs the import.
* @param WikiRevision $revision
*/
public function finishImportPage( $title, $foreignTitle, $revCount,
$sRevCount, $pageInfo ) {
+
+ // Update article count statistics (T42009)
+ // The normal counting logic in WikiPage->doEditUpdates() is designed for
+ // one-revision-at-a-time editing, not bulk imports. In this situation it
+ // suffers from issues of slave lag. We let WikiPage handle the total page
+ // and revision count, and we implement our own custom logic for the
+ // article (content page) count.
+ $page = WikiPage::factory( $title );
+ $page->loadPageData( 'fromdbmaster' );
+ $content = $page->getContent();
+ if ( $content === null ) {
+ wfDebug( __METHOD__ . ': Skipping article count adjustment for ' . $title .
+ ' because WikiPage::getContent() returned null' );
+ } else {
+ $editInfo = $page->prepareContentForEdit( $content );
+ $countKey = 'title_' . $title->getPrefixedText();
+ $countable = $page->isCountable( $editInfo );
+ if ( array_key_exists( $countKey, $this->countableCache ) &&
+ $countable != $this->countableCache[ $countKey ] ) {
+ DeferredUpdates::addUpdate( SiteStatsUpdate::factory( array(
+ 'articles' => ( (int)$countable - (int)$this->countableCache[ $countKey ] )
+ ) ) );
+ }
+ }
+
$args = func_get_args();
return Hooks::run( 'AfterImportPage', $args );
}
/**
* Retrieves the contents of the named attribute of the current element.
* @param string $attr The name of the attribute
- * @return string The value of the attribute or an empty string if it is not set in the current element.
+ * @return string The value of the attribute or an empty string if it is not set in the current
+ * element.
*/
public function nodeAttribute( $attr ) {
return $this->reader->getAttribute( $attr );
$keepReading = $this->reader->read();
$skip = false;
- while ( $keepReading ) {
- $tag = $this->reader->name;
- $type = $this->reader->nodeType;
-
- if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
- // Do nothing
- } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
- break;
- } elseif ( $tag == 'siteinfo' ) {
- $this->handleSiteInfo();
- } elseif ( $tag == 'page' ) {
- $this->handlePage();
- } elseif ( $tag == 'logitem' ) {
- $this->handleLogItem();
- } elseif ( $tag != '#text' ) {
- $this->warn( "Unhandled top-level XML tag $tag" );
-
- $skip = true;
- }
+ $rethrow = null;
+ try {
+ while ( $keepReading ) {
+ $tag = $this->reader->name;
+ $type = $this->reader->nodeType;
+
+ if ( !Hooks::run( 'ImportHandleToplevelXMLTag', array( $this ) ) ) {
+ // Do nothing
+ } elseif ( $tag == 'mediawiki' && $type == XMLReader::END_ELEMENT ) {
+ break;
+ } elseif ( $tag == 'siteinfo' ) {
+ $this->handleSiteInfo();
+ } elseif ( $tag == 'page' ) {
+ $this->handlePage();
+ } elseif ( $tag == 'logitem' ) {
+ $this->handleLogItem();
+ } elseif ( $tag != '#text' ) {
+ $this->warn( "Unhandled top-level XML tag $tag" );
+
+ $skip = true;
+ }
- if ( $skip ) {
- $keepReading = $this->reader->next();
- $skip = false;
- $this->debug( "Skip" );
- } else {
- $keepReading = $this->reader->read();
+ if ( $skip ) {
+ $keepReading = $this->reader->next();
+ $skip = false;
+ $this->debug( "Skip" );
+ } else {
+ $keepReading = $this->reader->read();
+ }
}
+ } catch ( Exception $ex ) {
+ $rethrow = $ex;
}
+ // finally
libxml_disable_entity_loader( $oldDisable );
+ $this->reader->close();
+
+ if ( $rethrow ) {
+ throw $rethrow;
+ }
+
return true;
}
$this->title->getPrefixedText() . "]], timestamp " . $this->timestamp . "\n" );
return false;
}
- $oldcountable = $page->isCountable();
}
# @todo FIXME: Use original rev_id optionally (better for backups)
if ( $changed !== false && !$this->mNoUpdates ) {
wfDebug( __METHOD__ . ": running updates\n" );
+ // countable/oldcountable stuff is handled in WikiImporter::finishImportPage
$page->doEditUpdates(
$revision,
$userObj,
- array( 'created' => $created, 'oldcountable' => $oldcountable )
+ array( 'created' => $created, 'oldcountable' => 'no-change' )
);
}
RepoGroup::singleton()->getLocalRepo(), $archiveName );
} else {
$file = wfLocalFile( $this->getTitle() );
+ $file->load( File::READ_LATEST );
wfDebug( __METHOD__ . 'Importing new file as ' . $file->getName() . "\n" );
if ( $file->exists() && $file->getTimestamp() > $this->getTimestamp() ) {
$archiveName = $file->getTimestamp() . '!' . $file->getName();
// @todo FIXME!
$src = $this->getSrc();
- $data = Http::get( $src );
+ $data = Http::get( $src, array(), __METHOD__ );
if ( !$data ) {
wfDebug( "IMPORT: couldn't fetch source $src\n" );
fclose( $f );
# quicker and sorts out user-agent problems which might
# otherwise prevent importing from large sites, such
# as the Wikimedia cluster, etc.
- $data = Http::request( $method, $url, array( 'followRedirects' => true ) );
+ $data = Http::request( $method, $url, array( 'followRedirects' => true ), __METHOD__ );
if ( $data !== false ) {
$file = tmpfile();
fwrite( $file, $data );