Add skipping to nth page option/ability for dump importing process
authorMehmet Mert Yıldıran <mehmetmertyildiran@gmail.com>
Fri, 26 May 2017 01:54:32 +0000 (04:54 +0300)
committerMehmet Mert Yıldıran <mehmetmertyildiran@gmail.com>
Thu, 8 Jun 2017 22:01:05 +0000 (01:01 +0300)
Usage: php importDump.php --skip-to 271500 /path_to/dumpfile.xml.gz

When importing a database dump and the import process crashes
(for random reasons) after a certain number of pages, the
"--skip-to" parameter allows restarting the import process at
a certain page instead of starting the import from scratch.

Change-Id: Ib36063b69d6846fc197800bba44287493b0632c0

includes/import/WikiImporter.php
maintenance/importDump.php

index 06b579a..2fc9f5e 100644 (file)
@@ -39,6 +39,7 @@ class WikiImporter {
        private $mNoticeCallback, $mDebug;
        private $mImportUploads, $mImageBasePath;
        private $mNoUpdates = false;
+       private $pageOffset = 0;
        /** @var Config */
        private $config;
        /** @var ImportTitleFactory */
@@ -146,6 +147,16 @@ class WikiImporter {
                $this->mNoUpdates = $noupdates;
        }
 
+       /**
+        * Sets 'pageOffset' value. So it will skip the first n-1 pages
+        * and start from the nth page. It's 1-based indexing.
+        * @param int $nthPage
+        * @since 1.29
+        */
+       function setPageOffset( $nthPage ) {
+               $this->pageOffset = $nthPage;
+       }
+
        /**
         * Set a callback that displays notice messages
         *
@@ -562,9 +573,19 @@ class WikiImporter {
                $keepReading = $this->reader->read();
                $skip = false;
                $rethrow = null;
+               $pageCount = 0;
                try {
                        while ( $keepReading ) {
                                $tag = $this->reader->localName;
+                               if ( $this->pageOffset ) {
+                                       if ( $tag === 'page' ) {
+                                               $pageCount++;
+                                       }
+                                       if ( $pageCount < $this->pageOffset ) {
+                                               $keepReading = $this->reader->next();
+                                               continue;
+                                       }
+                               }
                                $type = $this->reader->nodeType;
 
                                if ( !Hooks::run( 'ImportHandleToplevelXMLTag', [ $this ] ) ) {
index 6717a8e..802619e 100644 (file)
@@ -80,6 +80,7 @@ TEXT
                        'Disable link table updates. Is faster but leaves the wiki in an inconsistent state'
                );
                $this->addOption( 'image-base-path', 'Import files from a specified path', false, true );
+               $this->addOption( 'skip-to', 'Start from nth page by skipping first n-1 pages', false, true );
                $this->addArg( 'file', 'Dump file to import [else use stdin]', false );
        }
 
@@ -301,6 +302,11 @@ TEXT
                                return false;
                        }
                }
+               if ( $this->hasOption( 'skip-to' ) ) {
+                       $nthPage = (int)$this->getOption( 'skip-to' );
+                       $importer->setPageOffset( $nthPage );
+                       $this->pageCount = $nthPage - 1;
+               }
                $importer->setPageCallback( [ $this, 'reportPage' ] );
                $this->importCallback = $importer->setRevisionCallback(
                        [ $this, 'handleRevision' ] );