Refactor dumpBackup.php and dumpTextPass.php to be Maintenance subclasses
authorThis, that and the other <at.light@live.com.au>
Thu, 31 Dec 2015 09:46:54 +0000 (20:46 +1100)
committerThis, that and the other <at.light@live.com.au>
Thu, 31 Dec 2015 09:46:54 +0000 (20:46 +1100)
Use the Maintenance class's new $orderedOptions and support for
passing options multiple times. This allows for option "chaining".

The BackupDumper and TextPassDumper class now extend Maintenance, but
should continue to function as before. The public function processArgs()
has been removed and replaced by processOptions(), which takes no
parameters. It is unlikely that users of these classes were calling
processArgs.

Inheritors of these classes that overrode processOption() will now need to
override processOptions() and use Maintenance::getOption() and friends.

The maintenance/backupTextPass.inc file has been deleted. Users should
include maintenance/dumpTextPass.php instead.

Bug: T122587
Change-Id: I2473ee119c185d1b2b00ac4b1e70ee8a6cafe4a3

RELEASE-NOTES-1.27
autoload.php
includes/export/DumpDBZip2Output.php [new file with mode: 0644]
maintenance/backup.inc
maintenance/backupTextPass.inc [deleted file]
maintenance/dumpBackup.php
maintenance/dumpTextPass.php
tests/phpunit/maintenance/backupTextPassTest.php
tests/phpunit/maintenance/backup_LogTest.php
tests/phpunit/maintenance/backup_PageTest.php

index 1b74f52..9860723 100644 (file)
@@ -192,6 +192,10 @@ changes to languages because of Phabricator reports.
 * User::editToken() was removed (deprecated since 1.19).
 * Removed --force-normal option of dumpBackup.php, as it no longer served
   any useful purpose since 1.22.
+* The functions processOption() and processArgs() on the BackupDumper and
+  TextPassDumper classes have been removed.
+* The maintenance/backupTextPass.inc file was deleted. You should include
+  maintenance/dumpTextPass.php instead.
 
 == Compatibility ==
 
index ac38fa5..92c5436 100644 (file)
@@ -354,7 +354,8 @@ $wgAutoloadLocalClasses = array(
        'DummyTermColorer' => __DIR__ . '/maintenance/term/MWTerm.php',
        'Dump7ZipOutput' => __DIR__ . '/includes/export/Dump7ZipOutput.php',
        'DumpBZip2Output' => __DIR__ . '/includes/export/DumpBZip2Output.php',
-       'DumpDBZip2Output' => __DIR__ . '/maintenance/backup.inc',
+       'DumpBackup' => __DIR__ . '/maintenance/dumpBackup.php',
+       'DumpDBZip2Output' => __DIR__ . '/includes/export/DumpDBZip2Output.php',
        'DumpFileOutput' => __DIR__ . '/includes/export/DumpFileOutput.php',
        'DumpFilter' => __DIR__ . '/includes/export/DumpFilter.php',
        'DumpGZipOutput' => __DIR__ . '/includes/export/DumpGZipOutput.php',
@@ -1252,7 +1253,7 @@ $wgAutoloadLocalClasses = array(
        'TestFileOpPerformance' => __DIR__ . '/maintenance/fileOpPerfTest.php',
        'TextContent' => __DIR__ . '/includes/content/TextContent.php',
        'TextContentHandler' => __DIR__ . '/includes/content/TextContentHandler.php',
-       'TextPassDumper' => __DIR__ . '/maintenance/backupTextPass.inc',
+       'TextPassDumper' => __DIR__ . '/maintenance/dumpTextPass.php',
        'TextStatsOutput' => __DIR__ . '/maintenance/language/StatOutputs.php',
        'TgConverter' => __DIR__ . '/languages/classes/LanguageTg.php',
        'ThrottledError' => __DIR__ . '/includes/exception/ThrottledError.php',
diff --git a/includes/export/DumpDBZip2Output.php b/includes/export/DumpDBZip2Output.php
new file mode 100644 (file)
index 0000000..5edde8f
--- /dev/null
@@ -0,0 +1,36 @@
+<?php
+/**
+ * Sends dump output via the bgzip2 compressor.
+ *
+ * Copyright © 2003, 2005, 2006 Brion Vibber <brion@pobox.com>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
+/**
+ * @ingroup Dump
+ */
+class DumpDBZip2Output extends DumpPipeOutput {
+       /**
+        * @param string $file
+        */
+       function __construct( $file ) {
+               parent::__construct( "dbzip2", $file );
+       }
+}
index 93010ae..ec59c60 100644 (file)
  * @ingroup Dump Maintenance
  */
 
-/**
- * @ingroup Dump Maintenance
- */
-class DumpDBZip2Output extends DumpPipeOutput {
-       function __construct( $file ) {
-               parent::__construct( "dbzip2", $file );
-       }
-}
+require_once __DIR__ . '/Maintenance.php';
+require_once __DIR__ . '/../includes/export/DumpFilter.php';
 
 /**
  * @ingroup Dump Maintenance
  */
-class BackupDumper {
+class BackupDumper extends Maintenance {
        public $reporting = true;
        public $pages = null; // all pages
        public $skipHeader = false; // don't output <mediawiki> and <siteinfo>
@@ -67,7 +61,7 @@ class BackupDumper {
         *
         * @var DatabaseBase|null
         *
-        * @see self::setDb
+        * @see self::setDB
         */
        protected $forcedDb = null;
 
@@ -77,7 +71,11 @@ class BackupDumper {
        // @todo Unused?
        private $stubText = false; // include rev_text_id instead of text; for 2-pass dump
 
-       function __construct( $args ) {
+       /**
+        * @param array $args For backward compatibility
+        */
+       function __construct( $args = null ) {
+               parent::__construct();
                $this->stderr = fopen( "php://stderr", "wt" );
 
                // Built-in output and filter plugins
@@ -91,7 +89,23 @@ class BackupDumper {
                $this->registerFilter( 'notalk', 'DumpNotalkFilter' );
                $this->registerFilter( 'namespace', 'DumpNamespaceFilter' );
 
-               $this->sink = $this->processArgs( $args );
+               // These three can be specified multiple times
+               $this->addOption( 'plugin', 'Load a dump plugin class. Specify as <class>[:<file>].',
+                       false, true, false, true );
+               $this->addOption( 'output', 'Begin a filtered output stream; Specify as <type>:<file>. ' .
+                       '<type>s: file, gzip, bzip2, 7zip, dbzip2', false, true, false, true );
+               $this->addOption( 'filter', 'Add a filter on an output branch. Specify as ' .
+                       '<type>[:<options>]. <types>s: latest, notalk, namespace', false, true, false, true );
+               $this->addOption( 'report', 'Report position and speed after every n pages processed. ' .
+                       'Default: 100.', false, true );
+               $this->addOption( 'server', 'Force reading from MySQL server', false, true );
+
+               if ( $args ) {
+                       // Args should be loaded and processed so that dump() can be called directly
+                       // instead of execute()
+                       $this->loadWithArgv( $args );
+                       $this->processOptions();
+               }
        }
 
        /**
@@ -125,77 +139,102 @@ class BackupDumper {
                call_user_func_array( $register, array( &$this ) );
        }
 
+       function execute() {
+               throw new MWException( 'execute() must be overridden in subclasses' );
+       }
+
        /**
-        * @param array $args
-        * @return array
+        * Processes arguments and sets $this->$sink accordingly
         */
-       function processArgs( $args ) {
+       function processOptions() {
                $sink = null;
                $sinks = array();
-               foreach ( $args as $arg ) {
-                       $matches = array();
-                       if ( preg_match( '/^--(.+?)(?:=(.+?)(?::(.+?))?)?$/', $arg, $matches ) ) {
-                               MediaWiki\suppressWarnings();
-                               list( /* $full */, $opt, $val, $param ) = $matches;
-                               MediaWiki\restoreWarnings();
-
-                               switch ( $opt ) {
-                                       case "plugin":
-                                               $this->loadPlugin( $val, $param );
-                                               break;
-                                       case "output":
-                                               if ( !is_null( $sink ) ) {
-                                                       $sinks[] = $sink;
-                                               }
-                                               if ( !isset( $this->outputTypes[$val] ) ) {
-                                                       $this->fatalError( "Unrecognized output sink type '$val'" );
-                                               }
-                                               $type = $this->outputTypes[$val];
-                                               $sink = new $type( $param );
-                                               break;
-                                       case "filter":
-                                               if ( is_null( $sink ) ) {
-                                                       $sink = new DumpOutput();
-                                               }
-                                               if ( !isset( $this->filterTypes[$val] ) ) {
-                                                       $this->fatalError( "Unrecognized filter type '$val'" );
-                                               }
-                                               $type = $this->filterTypes[$val];
-                                               $filter = new $type( $sink, $param );
-
-                                               // references are lame in php...
-                                               unset( $sink );
-                                               $sink = $filter;
-
-                                               break;
-                                       case "report":
-                                               $this->reportingInterval = intval( $val );
-                                               break;
-                                       case "server":
-                                               $this->server = $val;
-                                               break;
-                                       default:
-                                               $this->processOption( $opt, $val, $param );
-                               }
+
+               $options = $this->orderedOptions;
+               foreach ( $options as $arg ) {
+                       $opt = $arg[0];
+                       $param = $arg[1];
+
+                       switch ( $opt ) {
+                               case 'plugin':
+                                       $val = explode( ':', $param );
+
+                                       if ( count( $val ) === 1 ) {
+                                               $this->loadPlugin( $val[0] );
+                                       } elseif ( count( $val ) === 2 ) {
+                                               $this->loadPlugin( $val[0], $val[1] );
+                                       } else {
+                                               $this->fatalError( 'Invalid plugin parameter' );
+                                               return;
+                                       }
+
+                                       break;
+                               case 'output':
+                                       $split = explode( ':', $param, 2 );
+                                       if ( count( $split ) !== 2 ) {
+                                               $this->fatalError( 'Invalid output parameter' );
+                                       }
+                                       list( $type, $file ) = $split;
+                                       if ( !is_null( $sink ) ) {
+                                               $sinks[] = $sink;
+                                       }
+                                       if ( !isset( $this->outputTypes[$type] ) ) {
+                                               $this->fatalError( "Unrecognized output sink type '$type'" );
+                                       }
+                                       $class = $this->outputTypes[$type];
+                                       $sink = new $class( $file );
+
+                                       break;
+                               case 'filter':
+                                       if ( is_null( $sink ) ) {
+                                               $sink = new DumpOutput();
+                                       }
+
+                                       $split = explode( ':', $param );
+                                       $key = $split[0];
+
+                                       if ( !isset( $this->filterTypes[$key] ) ) {
+                                               $this->fatalError( "Unrecognized filter type '$key'" );
+                                       }
+
+                                       $type = $this->filterTypes[$key];
+
+                                       if ( count( $split ) === 1 ) {
+                                               $filter = new $type( $sink );
+                                       } elseif ( count( $split ) === 2 ) {
+                                               $filter = new $type( $sink, $split[1] );
+                                       } else {
+                                               $this->fatalError( 'Invalid filter parameter' );
+                                       }
+
+                                       // references are lame in php...
+                                       unset( $sink );
+                                       $sink = $filter;
+
+                                       break;
                        }
                }
 
+               if ( $this->hasOption( 'report' ) ) {
+                       $this->reportingInterval = intval( $this->getOption( 'report' ) );
+               }
+
+               if ( $this->hasOption( 'server' ) ) {
+                       $this->server = $this->getOption( 'server' );
+               }
+
                if ( is_null( $sink ) ) {
                        $sink = new DumpOutput();
                }
                $sinks[] = $sink;
 
                if ( count( $sinks ) > 1 ) {
-                       return new DumpMultiWriter( $sinks );
+                       $this->sink = new DumpMultiWriter( $sinks );
                } else {
-                       return $sink;
+                       $this->sink = $sink;
                }
        }
 
-       function processOption( $opt, $val, $param ) {
-               // extension point for subclasses to add options
-       }
-
        function dump( $history, $text = WikiExporter::TEXT ) {
                # Notice messages will foul up your XML output even if they're
                # relatively harmless.
@@ -292,7 +331,8 @@ class BackupDumper {
         * @param DatabaseBase|null $db (Optional) the database connection to use. If null, resort to
         *   use the globally provided ways to get database connections.
         */
-       function setDb( DatabaseBase $db = null ) {
+       function setDB( IDatabase $db = null ) {
+               parent::setDB( $db );
                $this->forcedDb = $db;
        }
 
@@ -365,12 +405,13 @@ class BackupDumper {
        }
 
        function progress( $string ) {
-               fwrite( $this->stderr, $string . "\n" );
+               if ( $this->reporting ) {
+                       fwrite( $this->stderr, $string . "\n" );
+               }
        }
 
        function fatalError( $msg ) {
-               $this->progress( "$msg\n" );
-               die( 1 );
+               $this->error( "$msg\n", 1 );
        }
 }
 
diff --git a/maintenance/backupTextPass.inc b/maintenance/backupTextPass.inc
deleted file mode 100644 (file)
index 0562333..0000000
+++ /dev/null
@@ -1,925 +0,0 @@
-<?php
-/**
- * BackupDumper that postprocesses XML dumps from dumpBackup.php to add page text
- *
- * Copyright (C) 2005 Brion Vibber <brion@pobox.com>
- * https://www.mediawiki.org/
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
- * http://www.gnu.org/copyleft/gpl.html
- *
- * @file
- * @ingroup Maintenance
- */
-
-require_once __DIR__ . '/backup.inc';
-
-/**
- * @ingroup Maintenance
- */
-class TextPassDumper extends BackupDumper {
-       public $prefetch = null;
-
-       // when we spend more than maxTimeAllowed seconds on this run, we continue
-       // processing until we write out the next complete page, then save output file(s),
-       // rename it/them and open new one(s)
-       public $maxTimeAllowed = 0; // 0 = no limit
-
-       protected $input = "php://stdin";
-       protected $history = WikiExporter::FULL;
-       protected $fetchCount = 0;
-       protected $prefetchCount = 0;
-       protected $prefetchCountLast = 0;
-       protected $fetchCountLast = 0;
-
-       protected $maxFailures = 5;
-       protected $maxConsecutiveFailedTextRetrievals = 200;
-       protected $failureTimeout = 5; // Seconds to sleep after db failure
-
-       protected $bufferSize = 524288; // In bytes. Maximum size to read from the stub in on go.
-
-       protected $php = "php";
-       protected $spawn = false;
-
-       /**
-        * @var bool|resource
-        */
-       protected $spawnProc = false;
-
-       /**
-        * @var bool|resource
-        */
-       protected $spawnWrite = false;
-
-       /**
-        * @var bool|resource
-        */
-       protected $spawnRead = false;
-
-       /**
-        * @var bool|resource
-        */
-       protected $spawnErr = false;
-
-       protected $xmlwriterobj = false;
-
-       protected $timeExceeded = false;
-       protected $firstPageWritten = false;
-       protected $lastPageWritten = false;
-       protected $checkpointJustWritten = false;
-       protected $checkpointFiles = array();
-
-       /**
-        * @var DatabaseBase
-        */
-       protected $db;
-
-       /**
-        * Drop the database connection $this->db and try to get a new one.
-        *
-        * This function tries to get a /different/ connection if this is
-        * possible. Hence, (if this is possible) it switches to a different
-        * failover upon each call.
-        *
-        * This function resets $this->lb and closes all connections on it.
-        *
-        * @throws MWException
-        */
-       function rotateDb() {
-               // Cleaning up old connections
-               if ( isset( $this->lb ) ) {
-                       $this->lb->closeAll();
-                       unset( $this->lb );
-               }
-
-               if ( $this->forcedDb !== null ) {
-                       $this->db = $this->forcedDb;
-
-                       return;
-               }
-
-               if ( isset( $this->db ) && $this->db->isOpen() ) {
-                       throw new MWException( 'DB is set and has not been closed by the Load Balancer' );
-               }
-
-               unset( $this->db );
-
-               // Trying to set up new connection.
-               // We do /not/ retry upon failure, but delegate to encapsulating logic, to avoid
-               // individually retrying at different layers of code.
-
-               // 1. The LoadBalancer.
-               try {
-                       $this->lb = wfGetLBFactory()->newMainLB();
-               } catch ( Exception $e ) {
-                       throw new MWException( __METHOD__
-                               . " rotating DB failed to obtain new load balancer (" . $e->getMessage() . ")" );
-               }
-
-               // 2. The Connection, through the load balancer.
-               try {
-                       $this->db = $this->lb->getConnection( DB_SLAVE, 'dump' );
-               } catch ( Exception $e ) {
-                       throw new MWException( __METHOD__
-                               . " rotating DB failed to obtain new database (" . $e->getMessage() . ")" );
-               }
-       }
-
-       function initProgress( $history = WikiExporter::FULL ) {
-               parent::initProgress();
-               $this->timeOfCheckpoint = $this->startTime;
-       }
-
-       function dump( $history, $text = WikiExporter::TEXT ) {
-               // Notice messages will foul up your XML output even if they're
-               // relatively harmless.
-               if ( ini_get( 'display_errors' ) ) {
-                       ini_set( 'display_errors', 'stderr' );
-               }
-
-               $this->initProgress( $this->history );
-
-               // We are trying to get an initial database connection to avoid that the
-               // first try of this request's first call to getText fails. However, if
-               // obtaining a good DB connection fails it's not a serious issue, as
-               // getText does retry upon failure and can start without having a working
-               // DB connection.
-               try {
-                       $this->rotateDb();
-               } catch ( Exception $e ) {
-                       // We do not even count this as failure. Just let eventual
-                       // watchdogs know.
-                       $this->progress( "Getting initial DB connection failed (" .
-                               $e->getMessage() . ")" );
-               }
-
-               $this->egress = new ExportProgressFilter( $this->sink, $this );
-
-               // it would be nice to do it in the constructor, oh well. need egress set
-               $this->finalOptionCheck();
-
-               // we only want this so we know how to close a stream :-P
-               $this->xmlwriterobj = new XmlDumpWriter();
-
-               $input = fopen( $this->input, "rt" );
-               $this->readDump( $input );
-
-               if ( $this->spawnProc ) {
-                       $this->closeSpawn();
-               }
-
-               $this->report( true );
-       }
-
-       function processOption( $opt, $val, $param ) {
-               global $IP;
-               $url = $this->processFileOpt( $val, $param );
-
-               switch ( $opt ) {
-                       case 'buffersize':
-                               // Lower bound for xml reading buffer size is 4 KB
-                               $this->bufferSize = max( intval( $val ), 4 * 1024 );
-                               break;
-                       case 'prefetch':
-                               require_once "$IP/maintenance/backupPrefetch.inc";
-                               $this->prefetch = new BaseDump( $url );
-                               break;
-                       case 'stub':
-                               $this->input = $url;
-                               break;
-                       case 'maxtime':
-                               $this->maxTimeAllowed = intval( $val ) * 60;
-                               break;
-                       case 'checkpointfile':
-                               $this->checkpointFiles[] = $val;
-                               break;
-                       case 'current':
-                               $this->history = WikiExporter::CURRENT;
-                               break;
-                       case 'full':
-                               $this->history = WikiExporter::FULL;
-                               break;
-                       case 'spawn':
-                               $this->spawn = true;
-                               if ( $val ) {
-                                       $this->php = $val;
-                               }
-                               break;
-               }
-       }
-
-       function processFileOpt( $val, $param ) {
-               $fileURIs = explode( ';', $param );
-               foreach ( $fileURIs as $URI ) {
-                       switch ( $val ) {
-                               case "file":
-                                       $newURI = $URI;
-                                       break;
-                               case "gzip":
-                                       $newURI = "compress.zlib://$URI";
-                                       break;
-                               case "bzip2":
-                                       $newURI = "compress.bzip2://$URI";
-                                       break;
-                               case "7zip":
-                                       $newURI = "mediawiki.compress.7z://$URI";
-                                       break;
-                               default:
-                                       $newURI = $URI;
-                       }
-                       $newFileURIs[] = $newURI;
-               }
-               $val = implode( ';', $newFileURIs );
-
-               return $val;
-       }
-
-       /**
-        * Overridden to include prefetch ratio if enabled.
-        */
-       function showReport() {
-               if ( !$this->prefetch ) {
-                       parent::showReport();
-
-                       return;
-               }
-
-               if ( $this->reporting ) {
-                       $now = wfTimestamp( TS_DB );
-                       $nowts = microtime( true );
-                       $deltaAll = $nowts - $this->startTime;
-                       $deltaPart = $nowts - $this->lastTime;
-                       $this->pageCountPart = $this->pageCount - $this->pageCountLast;
-                       $this->revCountPart = $this->revCount - $this->revCountLast;
-
-                       if ( $deltaAll ) {
-                               $portion = $this->revCount / $this->maxCount;
-                               $eta = $this->startTime + $deltaAll / $portion;
-                               $etats = wfTimestamp( TS_DB, intval( $eta ) );
-                               if ( $this->fetchCount ) {
-                                       $fetchRate = 100.0 * $this->prefetchCount / $this->fetchCount;
-                               } else {
-                                       $fetchRate = '-';
-                               }
-                               $pageRate = $this->pageCount / $deltaAll;
-                               $revRate = $this->revCount / $deltaAll;
-                       } else {
-                               $pageRate = '-';
-                               $revRate = '-';
-                               $etats = '-';
-                               $fetchRate = '-';
-                       }
-                       if ( $deltaPart ) {
-                               if ( $this->fetchCountLast ) {
-                                       $fetchRatePart = 100.0 * $this->prefetchCountLast / $this->fetchCountLast;
-                               } else {
-                                       $fetchRatePart = '-';
-                               }
-                               $pageRatePart = $this->pageCountPart / $deltaPart;
-                               $revRatePart = $this->revCountPart / $deltaPart;
-                       } else {
-                               $fetchRatePart = '-';
-                               $pageRatePart = '-';
-                               $revRatePart = '-';
-                       }
-                       $this->progress( sprintf(
-                               "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
-                                       . "%d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% "
-                                       . "prefetched (all|curr), ETA %s [max %d]",
-                               $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
-                               $pageRatePart, $this->revCount, $revRate, $revRatePart,
-                               $fetchRate, $fetchRatePart, $etats, $this->maxCount
-                       ) );
-                       $this->lastTime = $nowts;
-                       $this->revCountLast = $this->revCount;
-                       $this->prefetchCountLast = $this->prefetchCount;
-                       $this->fetchCountLast = $this->fetchCount;
-               }
-       }
-
-       function setTimeExceeded() {
-               $this->timeExceeded = true;
-       }
-
-       function checkIfTimeExceeded() {
-               if ( $this->maxTimeAllowed
-                       && ( $this->lastTime - $this->timeOfCheckpoint > $this->maxTimeAllowed )
-               ) {
-                       return true;
-               }
-
-               return false;
-       }
-
-       function finalOptionCheck() {
-               if ( ( $this->checkpointFiles && !$this->maxTimeAllowed )
-                       || ( $this->maxTimeAllowed && !$this->checkpointFiles )
-               ) {
-                       throw new MWException( "Options checkpointfile and maxtime must be specified together.\n" );
-               }
-               foreach ( $this->checkpointFiles as $checkpointFile ) {
-                       $count = substr_count( $checkpointFile, "%s" );
-                       if ( $count != 2 ) {
-                               throw new MWException( "Option checkpointfile must contain two '%s' "
-                                       . "for substitution of first and last pageids, count is $count instead, "
-                                       . "file is $checkpointFile.\n" );
-                       }
-               }
-
-               if ( $this->checkpointFiles ) {
-                       $filenameList = (array)$this->egress->getFilenames();
-                       if ( count( $filenameList ) != count( $this->checkpointFiles ) ) {
-                               throw new MWException( "One checkpointfile must be specified "
-                                       . "for each output option, if maxtime is used.\n" );
-                       }
-               }
-       }
-
-       /**
-        * @throws MWException Failure to parse XML input
-        * @param string $input
-        * @return bool
-        */
-       function readDump( $input ) {
-               $this->buffer = "";
-               $this->openElement = false;
-               $this->atStart = true;
-               $this->state = "";
-               $this->lastName = "";
-               $this->thisPage = 0;
-               $this->thisRev = 0;
-               $this->thisRevModel = null;
-               $this->thisRevFormat = null;
-
-               $parser = xml_parser_create( "UTF-8" );
-               xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
-
-               xml_set_element_handler(
-                       $parser,
-                       array( &$this, 'startElement' ),
-                       array( &$this, 'endElement' )
-               );
-               xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) );
-
-               $offset = 0; // for context extraction on error reporting
-               do {
-                       if ( $this->checkIfTimeExceeded() ) {
-                               $this->setTimeExceeded();
-                       }
-                       $chunk = fread( $input, $this->bufferSize );
-                       if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) {
-                               wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" );
-
-                               $byte = xml_get_current_byte_index( $parser );
-                               $msg = wfMessage( 'xml-error-string',
-                                       'XML import parse failure',
-                                       xml_get_current_line_number( $parser ),
-                                       xml_get_current_column_number( $parser ),
-                                       $byte . ( is_null( $chunk ) ? null : ( '; "' . substr( $chunk, $byte - $offset, 16 ) . '"' ) ),
-                                       xml_error_string( xml_get_error_code( $parser ) ) )->escaped();
-
-                               xml_parser_free( $parser );
-
-                               throw new MWException( $msg );
-                       }
-                       $offset += strlen( $chunk );
-               } while ( $chunk !== false && !feof( $input ) );
-               if ( $this->maxTimeAllowed ) {
-                       $filenameList = (array)$this->egress->getFilenames();
-                       // we wrote some stuff after last checkpoint that needs renamed
-                       if ( file_exists( $filenameList[0] ) ) {
-                               $newFilenames = array();
-                               # we might have just written the header and footer and had no
-                               # pages or revisions written... perhaps they were all deleted
-                               # there's no pageID 0 so we use that. the caller is responsible
-                               # for deciding what to do with a file containing only the
-                               # siteinfo information and the mw tags.
-                               if ( !$this->firstPageWritten ) {
-                                       $firstPageID = str_pad( 0, 9, "0", STR_PAD_LEFT );
-                                       $lastPageID = str_pad( 0, 9, "0", STR_PAD_LEFT );
-                               } else {
-                                       $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT );
-                                       $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT );
-                               }
-
-                               $filenameCount = count( $filenameList );
-                               for ( $i = 0; $i < $filenameCount; $i++ ) {
-                                       $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID );
-                                       $fileinfo = pathinfo( $filenameList[$i] );
-                                       $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn;
-                               }
-                               $this->egress->closeAndRename( $newFilenames );
-                       }
-               }
-               xml_parser_free( $parser );
-
-               return true;
-       }
-
-       /**
-        * Applies applicable export transformations to $text.
-        *
-        * @param string $text
-        * @param string $model
-        * @param string|null $format
-        *
-        * @return string
-        */
-       private function exportTransform( $text, $model, $format = null ) {
-               try {
-                       $handler = ContentHandler::getForModelID( $model );
-                       $text = $handler->exportTransform( $text, $format );
-               }
-               catch ( MWException $ex ) {
-                       $this->progress(
-                               "Unable to apply export transformation for content model '$model': " .
-                               $ex->getMessage()
-                       );
-               }
-
-               return $text;
-       }
-
-       /**
-        * Tries to get the revision text for a revision id.
-        * Export transformations are applied if the content model can is given or can be
-        * determined from the database.
-        *
-        * Upon errors, retries (Up to $this->maxFailures tries each call).
-        * If still no good revision get could be found even after this retrying, "" is returned.
-        * If no good revision text could be returned for
-        * $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException
-        * is thrown.
-        *
-        * @param string $id The revision id to get the text for
-        * @param string|bool|null $model The content model used to determine
-        *  applicable export transformations.
-        *  If $model is null, it will be determined from the database.
-        * @param string|null $format The content format used when applying export transformations.
-        *
-        * @throws MWException
-        * @return string The revision text for $id, or ""
-        */
-       function getText( $id, $model = null, $format = null ) {
-               global $wgContentHandlerUseDB;
-
-               $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch.
-               $text = false; // The candidate for a good text. false if no proper value.
-               $failures = 0; // The number of times, this invocation of getText already failed.
-
-               // The number of times getText failed without yielding a good text in between.
-               static $consecutiveFailedTextRetrievals = 0;
-
-               $this->fetchCount++;
-
-               // To allow to simply return on success and do not have to worry about book keeping,
-               // we assume, this fetch works (possible after some retries). Nevertheless, we koop
-               // the old value, so we can restore it, if problems occur (See after the while loop).
-               $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals;
-               $consecutiveFailedTextRetrievals = 0;
-
-               if ( $model === null && $wgContentHandlerUseDB ) {
-                       $row = $this->db->selectRow(
-                               'revision',
-                               array( 'rev_content_model', 'rev_content_format' ),
-                               array( 'rev_id' => $this->thisRev ),
-                               __METHOD__
-                       );
-
-                       if ( $row ) {
-                               $model = $row->rev_content_model;
-                               $format = $row->rev_content_format;
-                       }
-               }
-
-               if ( $model === null || $model === '' ) {
-                       $model = false;
-               }
-
-               while ( $failures < $this->maxFailures ) {
-
-                       // As soon as we found a good text for the $id, we will return immediately.
-                       // Hence, if we make it past the try catch block, we know that we did not
-                       // find a good text.
-
-                       try {
-                               // Step 1: Get some text (or reuse from previous iteratuon if checking
-                               //         for plausibility failed)
-
-                               // Trying to get prefetch, if it has not been tried before
-                               if ( $text === false && isset( $this->prefetch ) && $prefetchNotTried ) {
-                                       $prefetchNotTried = false;
-                                       $tryIsPrefetch = true;
-                                       $text = $this->prefetch->prefetch( intval( $this->thisPage ),
-                                               intval( $this->thisRev ) );
-
-                                       if ( $text === null ) {
-                                               $text = false;
-                                       }
-
-                                       if ( is_string( $text ) && $model !== false ) {
-                                               // Apply export transformation to text coming from an old dump.
-                                               // The purpose of this transformation is to convert up from legacy
-                                               // formats, which may still be used in the older dump that is used
-                                               // for pre-fetching. Applying the transformation again should not
-                                               // interfere with content that is already in the correct form.
-                                               $text = $this->exportTransform( $text, $model, $format );
-                                       }
-                               }
-
-                               if ( $text === false ) {
-                                       // Fallback to asking the database
-                                       $tryIsPrefetch = false;
-                                       if ( $this->spawn ) {
-                                               $text = $this->getTextSpawned( $id );
-                                       } else {
-                                               $text = $this->getTextDb( $id );
-                                       }
-
-                                       if ( $text !== false && $model !== false ) {
-                                               // Apply export transformation to text coming from the database.
-                                               // Prefetched text should already have transformations applied.
-                                               $text = $this->exportTransform( $text, $model, $format );
-                                       }
-
-                                       // No more checks for texts from DB for now.
-                                       // If we received something that is not false,
-                                       // We treat it as good text, regardless of whether it actually is or is not
-                                       if ( $text !== false ) {
-                                               return $text;
-                                       }
-                               }
-
-                               if ( $text === false ) {
-                                       throw new MWException( "Generic error while obtaining text for id " . $id );
-                               }
-
-                               // We received a good candidate for the text of $id via some method
-
-                               // Step 2: Checking for plausibility and return the text if it is
-                               //         plausible
-                               $revID = intval( $this->thisRev );
-                               if ( !isset( $this->db ) ) {
-                                       throw new MWException( "No database available" );
-                               }
-
-                               if ( $model !== CONTENT_MODEL_WIKITEXT ) {
-                                       $revLength = strlen( $text );
-                               } else {
-                                       $revLength = $this->db->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) );
-                               }
-
-                               if ( strlen( $text ) == $revLength ) {
-                                       if ( $tryIsPrefetch ) {
-                                               $this->prefetchCount++;
-                                       }
-
-                                       return $text;
-                               }
-
-                               $text = false;
-                               throw new MWException( "Received text is unplausible for id " . $id );
-                       } catch ( Exception $e ) {
-                               $msg = "getting/checking text " . $id . " failed (" . $e->getMessage() . ")";
-                               if ( $failures + 1 < $this->maxFailures ) {
-                                       $msg .= " (Will retry " . ( $this->maxFailures - $failures - 1 ) . " more times)";
-                               }
-                               $this->progress( $msg );
-                       }
-
-                       // Something went wrong; we did not a text that was plausible :(
-                       $failures++;
-
-                       // A failure in a prefetch hit does not warrant resetting db connection etc.
-                       if ( !$tryIsPrefetch ) {
-                               // After backing off for some time, we try to reboot the whole process as
-                               // much as possible to not carry over failures from one part to the other
-                               // parts
-                               sleep( $this->failureTimeout );
-                               try {
-                                       $this->rotateDb();
-                                       if ( $this->spawn ) {
-                                               $this->closeSpawn();
-                                               $this->openSpawn();
-                                       }
-                               } catch ( Exception $e ) {
-                                       $this->progress( "Rebooting getText infrastructure failed (" . $e->getMessage() . ")" .
-                                               " Trying to continue anyways" );
-                               }
-                       }
-               }
-
-               // Retirieving a good text for $id failed (at least) maxFailures times.
-               // We abort for this $id.
-
-               // Restoring the consecutive failures, and maybe aborting, if the dump
-               // is too broken.
-               $consecutiveFailedTextRetrievals = $oldConsecutiveFailedTextRetrievals + 1;
-               if ( $consecutiveFailedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals ) {
-                       throw new MWException( "Graceful storage failure" );
-               }
-
-               return "";
-       }
-
-       /**
-        * May throw a database error if, say, the server dies during query.
-        * @param int $id
-        * @return bool|string
-        * @throws MWException
-        */
-       private function getTextDb( $id ) {
-               global $wgContLang;
-               if ( !isset( $this->db ) ) {
-                       throw new MWException( __METHOD__ . "No database available" );
-               }
-               $row = $this->db->selectRow( 'text',
-                       array( 'old_text', 'old_flags' ),
-                       array( 'old_id' => $id ),
-                       __METHOD__ );
-               $text = Revision::getRevisionText( $row );
-               if ( $text === false ) {
-                       return false;
-               }
-               $stripped = str_replace( "\r", "", $text );
-               $normalized = $wgContLang->normalize( $stripped );
-
-               return $normalized;
-       }
-
-       private function getTextSpawned( $id ) {
-               MediaWiki\suppressWarnings();
-               if ( !$this->spawnProc ) {
-                       // First time?
-                       $this->openSpawn();
-               }
-               $text = $this->getTextSpawnedOnce( $id );
-               MediaWiki\restoreWarnings();
-
-               return $text;
-       }
-
-       function openSpawn() {
-               global $IP;
-
-               if ( file_exists( "$IP/../multiversion/MWScript.php" ) ) {
-                       $cmd = implode( " ",
-                               array_map( 'wfEscapeShellArg',
-                                       array(
-                                               $this->php,
-                                               "$IP/../multiversion/MWScript.php",
-                                               "fetchText.php",
-                                               '--wiki', wfWikiID() ) ) );
-               } else {
-                       $cmd = implode( " ",
-                               array_map( 'wfEscapeShellArg',
-                                       array(
-                                               $this->php,
-                                               "$IP/maintenance/fetchText.php",
-                                               '--wiki', wfWikiID() ) ) );
-               }
-               $spec = array(
-                       0 => array( "pipe", "r" ),
-                       1 => array( "pipe", "w" ),
-                       2 => array( "file", "/dev/null", "a" ) );
-               $pipes = array();
-
-               $this->progress( "Spawning database subprocess: $cmd" );
-               $this->spawnProc = proc_open( $cmd, $spec, $pipes );
-               if ( !$this->spawnProc ) {
-                       $this->progress( "Subprocess spawn failed." );
-
-                       return false;
-               }
-               list(
-                       $this->spawnWrite, // -> stdin
-                       $this->spawnRead, // <- stdout
-               ) = $pipes;
-
-               return true;
-       }
-
-       private function closeSpawn() {
-               MediaWiki\suppressWarnings();
-               if ( $this->spawnRead ) {
-                       fclose( $this->spawnRead );
-               }
-               $this->spawnRead = false;
-               if ( $this->spawnWrite ) {
-                       fclose( $this->spawnWrite );
-               }
-               $this->spawnWrite = false;
-               if ( $this->spawnErr ) {
-                       fclose( $this->spawnErr );
-               }
-               $this->spawnErr = false;
-               if ( $this->spawnProc ) {
-                       pclose( $this->spawnProc );
-               }
-               $this->spawnProc = false;
-               MediaWiki\restoreWarnings();
-       }
-
-       private function getTextSpawnedOnce( $id ) {
-               global $wgContLang;
-
-               $ok = fwrite( $this->spawnWrite, "$id\n" );
-               // $this->progress( ">> $id" );
-               if ( !$ok ) {
-                       return false;
-               }
-
-               $ok = fflush( $this->spawnWrite );
-               // $this->progress( ">> [flush]" );
-               if ( !$ok ) {
-                       return false;
-               }
-
-               // check that the text id they are sending is the one we asked for
-               // this avoids out of sync revision text errors we have encountered in the past
-               $newId = fgets( $this->spawnRead );
-               if ( $newId === false ) {
-                       return false;
-               }
-               if ( $id != intval( $newId ) ) {
-                       return false;
-               }
-
-               $len = fgets( $this->spawnRead );
-               // $this->progress( "<< " . trim( $len ) );
-               if ( $len === false ) {
-                       return false;
-               }
-
-               $nbytes = intval( $len );
-               // actual error, not zero-length text
-               if ( $nbytes < 0 ) {
-                       return false;
-               }
-
-               $text = "";
-
-               // Subprocess may not send everything at once, we have to loop.
-               while ( $nbytes > strlen( $text ) ) {
-                       $buffer = fread( $this->spawnRead, $nbytes - strlen( $text ) );
-                       if ( $buffer === false ) {
-                               break;
-                       }
-                       $text .= $buffer;
-               }
-
-               $gotbytes = strlen( $text );
-               if ( $gotbytes != $nbytes ) {
-                       $this->progress( "Expected $nbytes bytes from database subprocess, got $gotbytes " );
-
-                       return false;
-               }
-
-               // Do normalization in the dump thread...
-               $stripped = str_replace( "\r", "", $text );
-               $normalized = $wgContLang->normalize( $stripped );
-
-               return $normalized;
-       }
-
-       function startElement( $parser, $name, $attribs ) {
-               $this->checkpointJustWritten = false;
-
-               $this->clearOpenElement( null );
-               $this->lastName = $name;
-
-               if ( $name == 'revision' ) {
-                       $this->state = $name;
-                       $this->egress->writeOpenPage( null, $this->buffer );
-                       $this->buffer = "";
-               } elseif ( $name == 'page' ) {
-                       $this->state = $name;
-                       if ( $this->atStart ) {
-                               $this->egress->writeOpenStream( $this->buffer );
-                               $this->buffer = "";
-                               $this->atStart = false;
-                       }
-               }
-
-               if ( $name == "text" && isset( $attribs['id'] ) ) {
-                       $id = $attribs['id'];
-                       $model = trim( $this->thisRevModel );
-                       $format = trim( $this->thisRevFormat );
-
-                       $model = $model === '' ? null : $model;
-                       $format = $format === '' ? null : $format;
-
-                       $text = $this->getText( $id, $model, $format );
-                       $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) );
-                       if ( strlen( $text ) > 0 ) {
-                               $this->characterData( $parser, $text );
-                       }
-               } else {
-                       $this->openElement = array( $name, $attribs );
-               }
-       }
-
-       function endElement( $parser, $name ) {
-               $this->checkpointJustWritten = false;
-
-               if ( $this->openElement ) {
-                       $this->clearOpenElement( "" );
-               } else {
-                       $this->buffer .= "</$name>";
-               }
-
-               if ( $name == 'revision' ) {
-                       $this->egress->writeRevision( null, $this->buffer );
-                       $this->buffer = "";
-                       $this->thisRev = "";
-                       $this->thisRevModel = null;
-                       $this->thisRevFormat = null;
-               } elseif ( $name == 'page' ) {
-                       if ( !$this->firstPageWritten ) {
-                               $this->firstPageWritten = trim( $this->thisPage );
-                       }
-                       $this->lastPageWritten = trim( $this->thisPage );
-                       if ( $this->timeExceeded ) {
-                               $this->egress->writeClosePage( $this->buffer );
-                               // nasty hack, we can't just write the chardata after the
-                               // page tag, it will include leading blanks from the next line
-                               $this->egress->sink->write( "\n" );
-
-                               $this->buffer = $this->xmlwriterobj->closeStream();
-                               $this->egress->writeCloseStream( $this->buffer );
-
-                               $this->buffer = "";
-                               $this->thisPage = "";
-                               // this could be more than one file if we had more than one output arg
-
-                               $filenameList = (array)$this->egress->getFilenames();
-                               $newFilenames = array();
-                               $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT );
-                               $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT );
-                               $filenamesCount = count( $filenameList );
-                               for ( $i = 0; $i < $filenamesCount; $i++ ) {
-                                       $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID );
-                                       $fileinfo = pathinfo( $filenameList[$i] );
-                                       $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn;
-                               }
-                               $this->egress->closeRenameAndReopen( $newFilenames );
-                               $this->buffer = $this->xmlwriterobj->openStream();
-                               $this->timeExceeded = false;
-                               $this->timeOfCheckpoint = $this->lastTime;
-                               $this->firstPageWritten = false;
-                               $this->checkpointJustWritten = true;
-                       } else {
-                               $this->egress->writeClosePage( $this->buffer );
-                               $this->buffer = "";
-                               $this->thisPage = "";
-                       }
-               } elseif ( $name == 'mediawiki' ) {
-                       $this->egress->writeCloseStream( $this->buffer );
-                       $this->buffer = "";
-               }
-       }
-
-       function characterData( $parser, $data ) {
-               $this->clearOpenElement( null );
-               if ( $this->lastName == "id" ) {
-                       if ( $this->state == "revision" ) {
-                               $this->thisRev .= $data;
-                       } elseif ( $this->state == "page" ) {
-                               $this->thisPage .= $data;
-                       }
-               } elseif ( $this->lastName == "model" ) {
-                       $this->thisRevModel .= $data;
-               } elseif ( $this->lastName == "format" ) {
-                       $this->thisRevFormat .= $data;
-               }
-
-               // have to skip the newline left over from closepagetag line of
-               // end of checkpoint files. nasty hack!!
-               if ( $this->checkpointJustWritten ) {
-                       if ( $data[0] == "\n" ) {
-                               $data = substr( $data, 1 );
-                       }
-                       $this->checkpointJustWritten = false;
-               }
-               $this->buffer .= htmlspecialchars( $data );
-       }
-
-       function clearOpenElement( $style ) {
-               if ( $this->openElement ) {
-                       $this->buffer .= Xml::element( $this->openElement[0], $this->openElement[1], $style );
-                       $this->openElement = false;
-               }
-       }
-}
index 18c78dc..6b5792a 100644 (file)
  * @ingroup Dump Maintenance
  */
 
-$originalDir = getcwd();
-
-$optionsWithArgs = array( 'pagelist', 'start', 'end', 'revstart', 'revend' );
-
-require_once __DIR__ . '/commandLine.inc';
 require_once __DIR__ . '/backup.inc';
 
-$dumper = new BackupDumper( $argv );
+class DumpBackup extends BackupDumper {
+       function __construct( $args = null ) {
+               parent::__construct();
 
-if ( isset( $options['quiet'] ) ) {
-       $dumper->reporting = false;
-}
-
-if ( isset( $options['pagelist'] ) ) {
-       $olddir = getcwd();
-       chdir( $originalDir );
-       $pages = file( $options['pagelist'] );
-       chdir( $olddir );
-       if ( $pages === false ) {
-               echo "Unable to open file {$options['pagelist']}\n";
-               die( 1 );
-       }
-       $pages = array_map( 'trim', $pages );
-       $dumper->pages = array_filter( $pages, create_function( '$x', 'return $x !== "";' ) );
-}
-
-if ( isset( $options['start'] ) ) {
-       $dumper->startId = intval( $options['start'] );
-}
-if ( isset( $options['end'] ) ) {
-       $dumper->endId = intval( $options['end'] );
-}
-
-if ( isset( $options['revstart'] ) ) {
-       $dumper->revStartId = intval( $options['revstart'] );
-}
-if ( isset( $options['revend'] ) ) {
-       $dumper->revEndId = intval( $options['revend'] );
-}
-$dumper->skipHeader = isset( $options['skip-header'] );
-$dumper->skipFooter = isset( $options['skip-footer'] );
-$dumper->dumpUploads = isset( $options['uploads'] );
-$dumper->dumpUploadFileContents = isset( $options['include-files'] );
-
-$textMode = isset( $options['stub'] ) ? WikiExporter::STUB : WikiExporter::TEXT;
-
-if ( isset( $options['full'] ) ) {
-       $dumper->dump( WikiExporter::FULL, $textMode );
-} elseif ( isset( $options['current'] ) ) {
-       $dumper->dump( WikiExporter::CURRENT, $textMode );
-} elseif ( isset( $options['stable'] ) ) {
-       $dumper->dump( WikiExporter::STABLE, $textMode );
-} elseif ( isset( $options['logs'] ) ) {
-       $dumper->dump( WikiExporter::LOGS );
-} elseif ( isset( $options['revrange'] ) ) {
-       $dumper->dump( WikiExporter::RANGE, $textMode );
-} else {
-       $dumper->progress( <<<ENDS
+               $this->mDescription = <<<TEXT
 This script dumps the wiki page or logging database into an
 XML interchange wrapper format for export or backup.
 
 XML output is sent to stdout; progress reports are sent to stderr.
 
 WARNING: this is not a full database dump! It is merely for public export
-                of your wiki. For full backup, see our online help at:
+         of your wiki. For full backup, see our online help at:
          https://www.mediawiki.org/wiki/Backup
+TEXT;
+               $this->stderr = fopen( "php://stderr", "wt" );
+               // Actions
+               $this->addOption( 'full', 'Dump all revisions of every page' );
+               $this->addOption( 'current', 'Dump only the latest revision of every page.' );
+               $this->addOption( 'logs', 'Dump all log events' );
+               $this->addOption( 'stable', 'Dump stable versions of pages' );
+               $this->addOption( 'revrange', 'Dump range of revisions specified by revstart and ' .
+                       'revend parameters' );
+               $this->addOption( 'pagelist',
+                       'Dump only pages included in the file', false, true );
+               // Options
+               $this->addOption( 'start', 'Start from page_id or log_id', false, true );
+               $this->addOption( 'end', 'Stop before page_id or log_id n (exclusive)', false, true );
+               $this->addOption( 'revstart', 'Start from rev_id', false, true );
+               $this->addOption( 'revend', 'Stop before rev_id n (exclusive)', false, true );
+               $this->addOption( 'skip-header', 'Don\'t output the <mediawiki> header' );
+               $this->addOption( 'skip-footer', 'Don\'t output the </mediawiki> footer' );
+               $this->addOption( 'stub', 'Don\'t perform old_text lookups; for 2-pass dump' );
+               $this->addOption( 'uploads', 'Include upload records without files' );
+               $this->addOption( 'include-files', 'Include files within the XML stream' );
+
+               if ( $args ) {
+                       $this->loadWithArgv( $args );
+                       $this->processOptions();
+               }
+       }
 
-Usage: php dumpBackup.php <action> [<options>]
-Actions:
-  --full      Dump all revisions of every page.
-  --current   Dump only the latest revision of every page.
-  --logs      Dump all log events.
-  --stable    Stable versions of pages?
-  --pagelist=<file>
-                         Where <file> is a list of page titles to be dumped
-  --revrange  Dump specified range of revisions, requires
-              revstart and revend options.
-Options:
-  --quiet     Don't dump status reports to stderr.
-  --report=n  Report position and speed after every n pages processed.
-                         (Default: 100)
-  --server=h  Force reading from MySQL server h
-  --start=n   Start from page_id or log_id n
-  --end=n     Stop before page_id or log_id n (exclusive)
-  --revstart=n  Start from rev_id n
-  --revend=n    Stop before rev_id n (exclusive)
-  --skip-header Don't output the <mediawiki> header
-  --skip-footer Don't output the </mediawiki> footer
-  --stub      Don't perform old_text lookups; for 2-pass dump
-  --uploads   Include upload records without files
-  --include-files Include files within the XML stream
-  --conf=<file> Use the specified configuration file (LocalSettings.php)
-
-  --wiki=<wiki>  Only back up the specified <wiki>
-
-Fancy stuff: (Works? Add examples please.)
-  --plugin=<class>[:<file>]   Load a dump plugin class
-  --output=<type>:<file>      Begin a filtered output stream;
-                              <type>s: file, gzip, bzip2, 7zip
-  --filter=<type>[:<options>] Add a filter on an output branch
-
-ENDS
-       );
+       function execute() {
+               $this->processOptions();
+
+               $textMode = $this->hasOption( 'stub' ) ? WikiExporter::STUB : WikiExporter::TEXT;
+
+               if ( $this->hasOption( 'full' ) ) {
+                       $this->dump( WikiExporter::FULL, $textMode );
+               } elseif ( $this->hasOption( 'current' ) ) {
+                       $this->dump( WikiExporter::CURRENT, $textMode );
+               } elseif ( $this->hasOption( 'stable' ) ) {
+                       $this->dump( WikiExporter::STABLE, $textMode );
+               } elseif ( $this->hasOption( 'logs' ) ) {
+                       $this->dump( WikiExporter::LOGS );
+               } elseif ( $this->hasOption( 'revrange' ) ) {
+                       $this->dump( WikiExporter::RANGE, $textMode );
+               } else {
+                       $this->error( 'No valid action specified.', 1 );
+               }
+       }
+
+       function processOptions() {
+               parent::processOptions();
+
+               // Evaluate options specific to this class
+               $this->reporting = !$this->hasOption( 'quiet' );
+
+               if ( $this->hasOption( 'pagelist' ) ) {
+                       $olddir = getcwd();
+                       chdir( $originalDir );
+                       $pages = file( $this->getOption( 'quiet' ) );
+                       chdir( $olddir );
+                       if ( $pages === false ) {
+                               echo "Unable to open file {$options['pagelist']}\n";
+                               die( 1 );
+                       }
+                       $pages = array_map( 'trim', $pages );
+                       $this->pages = array_filter( $pages, create_function( '$x', 'return $x !== "";' ) );
+               }
+
+               if ( $this->hasOption( 'start' ) ) {
+                       $this->startId = intval( $this->getOption( 'start' ) );
+               }
+
+               if ( $this->hasOption( 'end' ) ) {
+                       $this->endId = intval( $this->getOption( 'end' ) );
+               }
+
+               if ( $this->hasOption( 'revstart' ) ) {
+                       $this->revStartId = intval( $this->getOption( 'revstart' ) );
+               }
+
+               if ( $this->hasOption( 'revend' ) ) {
+                       $this->revEndId = intval( $this->getOption( 'revend' ) );
+               }
+
+               $this->skipHeader = $this->hasOption( 'skip-header' );
+               $this->skipFooter = $this->hasOption( 'skip-footer' );
+               $this->dumpUploads = $this->hasOption( 'uploads' );
+               $this->dumpUploadFileContents = $this->hasOption( 'include-files' );
+       }
 }
+
+$maintClass = 'DumpBackup';
+require_once RUN_MAINTENANCE_IF_MAIN;
index bde5a07..7511392 100644 (file)
@@ -1,6 +1,6 @@
 <?php
 /**
- * Script that postprocesses XML dumps from dumpBackup.php to add page text
+ * BackupDumper that postprocesses XML dumps from dumpBackup.php to add page text
  *
  * Copyright (C) 2005 Brion Vibber <brion@pobox.com>
  * https://www.mediawiki.org/
  * @ingroup Maintenance
  */
 
-$originalDir = getcwd();
+require_once __DIR__ . '/backup.inc';
+require_once __DIR__ . '/../includes/export/WikiExporter.php';
 
-require_once __DIR__ . '/commandLine.inc';
-require_once __DIR__ . '/backupTextPass.inc';
+/**
+ * @ingroup Maintenance
+ */
+class TextPassDumper extends BackupDumper {
+       public $prefetch = null;
+
+       // when we spend more than maxTimeAllowed seconds on this run, we continue
+       // processing until we write out the next complete page, then save output file(s),
+       // rename it/them and open new one(s)
+       public $maxTimeAllowed = 0; // 0 = no limit
+
+       protected $input = "php://stdin";
+       protected $history = WikiExporter::FULL;
+       protected $fetchCount = 0;
+       protected $prefetchCount = 0;
+       protected $prefetchCountLast = 0;
+       protected $fetchCountLast = 0;
+
+       protected $maxFailures = 5;
+       protected $maxConsecutiveFailedTextRetrievals = 200;
+       protected $failureTimeout = 5; // Seconds to sleep after db failure
+
+       protected $bufferSize = 524288; // In bytes. Maximum size to read from the stub in on go.
+
+       protected $php = "php";
+       protected $spawn = false;
+
+       /**
+        * @var bool|resource
+        */
+       protected $spawnProc = false;
 
-$dumper = new TextPassDumper( $argv );
+       /**
+        * @var bool|resource
+        */
+       protected $spawnWrite = false;
 
-if ( !isset( $options['help'] ) ) {
-       $dumper->dump( true );
-} else {
-       $dumper->progress( <<<ENDS
+       /**
+        * @var bool|resource
+        */
+       protected $spawnRead = false;
+
+       /**
+        * @var bool|resource
+        */
+       protected $spawnErr = false;
+
+       protected $xmlwriterobj = false;
+
+       protected $timeExceeded = false;
+       protected $firstPageWritten = false;
+       protected $lastPageWritten = false;
+       protected $checkpointJustWritten = false;
+       protected $checkpointFiles = array();
+
+       /**
+        * @var DatabaseBase
+        */
+       protected $db;
+
+       /**
+        * @param array $args For backward compatibility
+        */
+       function __construct( $args = null ) {
+               parent::__construct();
+
+               $this->mDescription = <<<TEXT
 This script postprocesses XML dumps from dumpBackup.php to add
 page text which was stubbed out (using --stub).
 
 XML input is accepted on stdin.
 XML output is sent to stdout; progress reports are sent to stderr.
+TEXT;
+               $this->stderr = fopen( "php://stderr", "wt" );
+
+               $this->addOption( 'stub', 'To load a compressed stub dump instead of stdin. ' .
+                       'Specify as --stub=<type>:<file>.', false, true );
+               $this->addOption( 'prefetch', 'Use a prior dump file as a text source, to savepressure on the ' .
+                       'database. (Requires the XMLReader extension). Specify as --prefetch=<type>:<file>',
+                       false, true );
+               $this->addOption( 'maxtime', 'Write out checkpoint file after this many minutes (writing' .
+                       'out complete page, closing xml file properly, and opening new one' .
+                       'with header).  This option requires the checkpointfile option.', false, true );
+               $this->addOption( 'checkpointfile', 'Use this string for checkpoint filenames,substituting ' .
+                       'first pageid written for the first %s (required) and the last pageid written for the ' .
+                       'second %s if it exists.', false, true, false, true ); // This can be specified multiple times
+               $this->addOption( 'quiet', 'Don\'t dump status reports to stderr.' );
+               $this->addOption( 'current', 'Base ETA on number of pages in database instead of all revisions' );
+               $this->addOption( 'spawn', 'Spawn a subprocess for loading text records' );
+               $this->addOption( 'buffersize', 'Buffer size in bytes to use for reading the stub. ' .
+                       '(Default: 512KB, Minimum: 4KB)', false, true );
+
+               if ( $args ) {
+                       $this->loadWithArgv( $args );
+                       $this->processOptions();
+               }
+       }
+
+       function execute() {
+               $this->processOptions();
+               $this->dump( true );
+       }
+
+       function processOptions() {
+               global $IP;
+
+               parent::processOptions();
+
+               if ( $this->hasOption( 'buffersize' ) ) {
+                       $this->bufferSize = max( intval( $this->getOption( 'buffersize' ) ), 4 * 1024 );
+               }
+
+               if ( $this->hasOption( 'prefetch' ) ) {
+                       require_once "$IP/maintenance/backupPrefetch.inc";
+                       $url = $this->processFileOpt( $this->getOption( 'prefetch' ) );
+                       $this->prefetch = new BaseDump( $url );
+               }
+
+               if ( $this->hasOption( 'stub' ) ) {
+                       $this->input = $this->processFileOpt( $this->getOption( 'stub' ) );
+               }
+
+               if ( $this->hasOption( 'maxtime' ) ) {
+                       $this->maxTimeAllowed = intval( $this->getOption( 'maxtime' ) ) * 60;
+               }
+
+               if ( $this->hasOption( 'checkpointfile' ) ) {
+                       $this->checkpointFiles = $this->getOption( 'checkpointfile' );
+               }
+
+               if ( $this->hasOption( 'current' ) ) {
+                       $this->history = WikiExporter::CURRENT;
+               }
+
+               if ( $this->hasOption( 'full' ) ) {
+                       $this->history = WikiExporter::FULL;
+               }
+
+               if ( $this->hasOption( 'spawn' ) ) {
+                       $this->spawn = true;
+                       $val = $this->getOption( 'spawn' );
+                       if ( $val !== 1 ) {
+                               $this->php = $val;
+                       }
+               }
+       }
+
+       /**
+        * Drop the database connection $this->db and try to get a new one.
+        *
+        * This function tries to get a /different/ connection if this is
+        * possible. Hence, (if this is possible) it switches to a different
+        * failover upon each call.
+        *
+        * This function resets $this->lb and closes all connections on it.
+        *
+        * @throws MWException
+        */
+       function rotateDb() {
+               // Cleaning up old connections
+               if ( isset( $this->lb ) ) {
+                       $this->lb->closeAll();
+                       unset( $this->lb );
+               }
+
+               if ( $this->forcedDb !== null ) {
+                       $this->db = $this->forcedDb;
+
+                       return;
+               }
+
+               if ( isset( $this->db ) && $this->db->isOpen() ) {
+                       throw new MWException( 'DB is set and has not been closed by the Load Balancer' );
+               }
+
+               unset( $this->db );
+
+               // Trying to set up new connection.
+               // We do /not/ retry upon failure, but delegate to encapsulating logic, to avoid
+               // individually retrying at different layers of code.
+
+               // 1. The LoadBalancer.
+               try {
+                       $this->lb = wfGetLBFactory()->newMainLB();
+               } catch ( Exception $e ) {
+                       throw new MWException( __METHOD__
+                               . " rotating DB failed to obtain new load balancer (" . $e->getMessage() . ")" );
+               }
+
+               // 2. The Connection, through the load balancer.
+               try {
+                       $this->db = $this->lb->getConnection( DB_SLAVE, 'dump' );
+               } catch ( Exception $e ) {
+                       throw new MWException( __METHOD__
+                               . " rotating DB failed to obtain new database (" . $e->getMessage() . ")" );
+               }
+       }
+
+       function initProgress( $history = WikiExporter::FULL ) {
+               parent::initProgress();
+               $this->timeOfCheckpoint = $this->startTime;
+       }
+
+       function dump( $history, $text = WikiExporter::TEXT ) {
+               // Notice messages will foul up your XML output even if they're
+               // relatively harmless.
+               if ( ini_get( 'display_errors' ) ) {
+                       ini_set( 'display_errors', 'stderr' );
+               }
+
+               $this->initProgress( $this->history );
+
+               // We are trying to get an initial database connection to avoid that the
+               // first try of this request's first call to getText fails. However, if
+               // obtaining a good DB connection fails it's not a serious issue, as
+               // getText does retry upon failure and can start without having a working
+               // DB connection.
+               try {
+                       $this->rotateDb();
+               } catch ( Exception $e ) {
+                       // We do not even count this as failure. Just let eventual
+                       // watchdogs know.
+                       $this->progress( "Getting initial DB connection failed (" .
+                               $e->getMessage() . ")" );
+               }
+
+               $this->egress = new ExportProgressFilter( $this->sink, $this );
+
+               // it would be nice to do it in the constructor, oh well. need egress set
+               $this->finalOptionCheck();
+
+               // we only want this so we know how to close a stream :-P
+               $this->xmlwriterobj = new XmlDumpWriter();
+
+               $input = fopen( $this->input, "rt" );
+               $this->readDump( $input );
+
+               if ( $this->spawnProc ) {
+                       $this->closeSpawn();
+               }
+
+               $this->report( true );
+       }
+
+       function processFileOpt( $opt ) {
+               $split = explode( ':', $opt, 2 );
+               $val = $split[0];
+               $param = '';
+               if ( count( $split ) === 2 ) {
+                       $param = $split[1];
+               }
+               $fileURIs = explode( ';', $param );
+               foreach ( $fileURIs as $URI ) {
+                       switch ( $val ) {
+                               case "file":
+                                       $newURI = $URI;
+                                       break;
+                               case "gzip":
+                                       $newURI = "compress.zlib://$URI";
+                                       break;
+                               case "bzip2":
+                                       $newURI = "compress.bzip2://$URI";
+                                       break;
+                               case "7zip":
+                                       $newURI = "mediawiki.compress.7z://$URI";
+                                       break;
+                               default:
+                                       $newURI = $URI;
+                       }
+                       $newFileURIs[] = $newURI;
+               }
+               $val = implode( ';', $newFileURIs );
+
+               return $val;
+       }
+
+       /**
+        * Overridden to include prefetch ratio if enabled.
+        */
+       function showReport() {
+               if ( !$this->prefetch ) {
+                       parent::showReport();
+
+                       return;
+               }
+
+               if ( $this->reporting ) {
+                       $now = wfTimestamp( TS_DB );
+                       $nowts = microtime( true );
+                       $deltaAll = $nowts - $this->startTime;
+                       $deltaPart = $nowts - $this->lastTime;
+                       $this->pageCountPart = $this->pageCount - $this->pageCountLast;
+                       $this->revCountPart = $this->revCount - $this->revCountLast;
+
+                       if ( $deltaAll ) {
+                               $portion = $this->revCount / $this->maxCount;
+                               $eta = $this->startTime + $deltaAll / $portion;
+                               $etats = wfTimestamp( TS_DB, intval( $eta ) );
+                               if ( $this->fetchCount ) {
+                                       $fetchRate = 100.0 * $this->prefetchCount / $this->fetchCount;
+                               } else {
+                                       $fetchRate = '-';
+                               }
+                               $pageRate = $this->pageCount / $deltaAll;
+                               $revRate = $this->revCount / $deltaAll;
+                       } else {
+                               $pageRate = '-';
+                               $revRate = '-';
+                               $etats = '-';
+                               $fetchRate = '-';
+                       }
+                       if ( $deltaPart ) {
+                               if ( $this->fetchCountLast ) {
+                                       $fetchRatePart = 100.0 * $this->prefetchCountLast / $this->fetchCountLast;
+                               } else {
+                                       $fetchRatePart = '-';
+                               }
+                               $pageRatePart = $this->pageCountPart / $deltaPart;
+                               $revRatePart = $this->revCountPart / $deltaPart;
+                       } else {
+                               $fetchRatePart = '-';
+                               $pageRatePart = '-';
+                               $revRatePart = '-';
+                       }
+                       $this->progress( sprintf(
+                               "%s: %s (ID %d) %d pages (%0.1f|%0.1f/sec all|curr), "
+                                       . "%d revs (%0.1f|%0.1f/sec all|curr), %0.1f%%|%0.1f%% "
+                                       . "prefetched (all|curr), ETA %s [max %d]",
+                               $now, wfWikiID(), $this->ID, $this->pageCount, $pageRate,
+                               $pageRatePart, $this->revCount, $revRate, $revRatePart,
+                               $fetchRate, $fetchRatePart, $etats, $this->maxCount
+                       ) );
+                       $this->lastTime = $nowts;
+                       $this->revCountLast = $this->revCount;
+                       $this->prefetchCountLast = $this->prefetchCount;
+                       $this->fetchCountLast = $this->fetchCount;
+               }
+       }
+
+       function setTimeExceeded() {
+               $this->timeExceeded = true;
+       }
+
+       function checkIfTimeExceeded() {
+               if ( $this->maxTimeAllowed
+                       && ( $this->lastTime - $this->timeOfCheckpoint > $this->maxTimeAllowed )
+               ) {
+                       return true;
+               }
+
+               return false;
+       }
+
+       function finalOptionCheck() {
+               if ( ( $this->checkpointFiles && !$this->maxTimeAllowed )
+                       || ( $this->maxTimeAllowed && !$this->checkpointFiles )
+               ) {
+                       throw new MWException( "Options checkpointfile and maxtime must be specified together.\n" );
+               }
+               foreach ( $this->checkpointFiles as $checkpointFile ) {
+                       $count = substr_count( $checkpointFile, "%s" );
+                       if ( $count != 2 ) {
+                               throw new MWException( "Option checkpointfile must contain two '%s' "
+                                       . "for substitution of first and last pageids, count is $count instead, "
+                                       . "file is $checkpointFile.\n" );
+                       }
+               }
+
+               if ( $this->checkpointFiles ) {
+                       $filenameList = (array)$this->egress->getFilenames();
+                       if ( count( $filenameList ) != count( $this->checkpointFiles ) ) {
+                               throw new MWException( "One checkpointfile must be specified "
+                                       . "for each output option, if maxtime is used.\n" );
+                       }
+               }
+       }
+
+       /**
+        * @throws MWException Failure to parse XML input
+        * @param string $input
+        * @return bool
+        */
+       function readDump( $input ) {
+               $this->buffer = "";
+               $this->openElement = false;
+               $this->atStart = true;
+               $this->state = "";
+               $this->lastName = "";
+               $this->thisPage = 0;
+               $this->thisRev = 0;
+               $this->thisRevModel = null;
+               $this->thisRevFormat = null;
+
+               $parser = xml_parser_create( "UTF-8" );
+               xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
+
+               xml_set_element_handler(
+                       $parser,
+                       array( &$this, 'startElement' ),
+                       array( &$this, 'endElement' )
+               );
+               xml_set_character_data_handler( $parser, array( &$this, 'characterData' ) );
+
+               $offset = 0; // for context extraction on error reporting
+               do {
+                       if ( $this->checkIfTimeExceeded() ) {
+                               $this->setTimeExceeded();
+                       }
+                       $chunk = fread( $input, $this->bufferSize );
+                       if ( !xml_parse( $parser, $chunk, feof( $input ) ) ) {
+                               wfDebug( "TextDumpPass::readDump encountered XML parsing error\n" );
+
+                               $byte = xml_get_current_byte_index( $parser );
+                               $msg = wfMessage( 'xml-error-string',
+                                       'XML import parse failure',
+                                       xml_get_current_line_number( $parser ),
+                                       xml_get_current_column_number( $parser ),
+                                       $byte . ( is_null( $chunk ) ? null : ( '; "' . substr( $chunk, $byte - $offset, 16 ) . '"' ) ),
+                                       xml_error_string( xml_get_error_code( $parser ) ) )->escaped();
+
+                               xml_parser_free( $parser );
+
+                               throw new MWException( $msg );
+                       }
+                       $offset += strlen( $chunk );
+               } while ( $chunk !== false && !feof( $input ) );
+               if ( $this->maxTimeAllowed ) {
+                       $filenameList = (array)$this->egress->getFilenames();
+                       // we wrote some stuff after last checkpoint that needs renamed
+                       if ( file_exists( $filenameList[0] ) ) {
+                               $newFilenames = array();
+                               # we might have just written the header and footer and had no
+                               # pages or revisions written... perhaps they were all deleted
+                               # there's no pageID 0 so we use that. the caller is responsible
+                               # for deciding what to do with a file containing only the
+                               # siteinfo information and the mw tags.
+                               if ( !$this->firstPageWritten ) {
+                                       $firstPageID = str_pad( 0, 9, "0", STR_PAD_LEFT );
+                                       $lastPageID = str_pad( 0, 9, "0", STR_PAD_LEFT );
+                               } else {
+                                       $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT );
+                                       $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT );
+                               }
+
+                               $filenameCount = count( $filenameList );
+                               for ( $i = 0; $i < $filenameCount; $i++ ) {
+                                       $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID );
+                                       $fileinfo = pathinfo( $filenameList[$i] );
+                                       $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn;
+                               }
+                               $this->egress->closeAndRename( $newFilenames );
+                       }
+               }
+               xml_parser_free( $parser );
+
+               return true;
+       }
+
+       /**
+        * Applies applicable export transformations to $text.
+        *
+        * @param string $text
+        * @param string $model
+        * @param string|null $format
+        *
+        * @return string
+        */
+       private function exportTransform( $text, $model, $format = null ) {
+               try {
+                       $handler = ContentHandler::getForModelID( $model );
+                       $text = $handler->exportTransform( $text, $format );
+               }
+               catch ( MWException $ex ) {
+                       $this->progress(
+                               "Unable to apply export transformation for content model '$model': " .
+                               $ex->getMessage()
+                       );
+               }
 
-Usage: php dumpTextPass.php [<options>]
-Options:
-  --stub=<type>:<file> To load a compressed stub dump instead of stdin
-  --prefetch=<type>:<file> Use a prior dump file as a text source, to save
-                         pressure on the database.
-                         (Requires the XMLReader extension)
-  --maxtime=<minutes> Write out checkpoint file after this many minutes (writing
-                 out complete page, closing xml file properly, and opening new one
-                 with header).  This option requires the checkpointfile option.
-  --checkpointfile=<filenamepattern> Use this string for checkpoint filenames,
-                     substituting first pageid written for the first %s (required) and the
-              last pageid written for the second %s if it exists.
-  --quiet        Don't dump status reports to stderr.
-  --report=n  Report position and speed after every n pages processed.
-                         (Default: 100)
-  --server=h  Force reading from MySQL server h
-  --current      Base ETA on number of pages in database instead of all revisions
-  --spawn        Spawn a subprocess for loading text records
-  --buffersize=<size> Buffer size in bytes to use for reading the stub.
-              (Default: 512KB, Minimum: 4KB)
-  --help      Display this help message
-ENDS
-       );
+               return $text;
+       }
+
+       /**
+        * Tries to get the revision text for a revision id.
+        * Export transformations are applied if the content model can is given or can be
+        * determined from the database.
+        *
+        * Upon errors, retries (Up to $this->maxFailures tries each call).
+        * If still no good revision get could be found even after this retrying, "" is returned.
+        * If no good revision text could be returned for
+        * $this->maxConsecutiveFailedTextRetrievals consecutive calls to getText, MWException
+        * is thrown.
+        *
+        * @param string $id The revision id to get the text for
+        * @param string|bool|null $model The content model used to determine
+        *  applicable export transformations.
+        *  If $model is null, it will be determined from the database.
+        * @param string|null $format The content format used when applying export transformations.
+        *
+        * @throws MWException
+        * @return string The revision text for $id, or ""
+        */
+       function getText( $id, $model = null, $format = null ) {
+               global $wgContentHandlerUseDB;
+
+               $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch.
+               $text = false; // The candidate for a good text. false if no proper value.
+               $failures = 0; // The number of times, this invocation of getText already failed.
+
+               // The number of times getText failed without yielding a good text in between.
+               static $consecutiveFailedTextRetrievals = 0;
+
+               $this->fetchCount++;
+
+               // To allow to simply return on success and do not have to worry about book keeping,
+               // we assume, this fetch works (possible after some retries). Nevertheless, we koop
+               // the old value, so we can restore it, if problems occur (See after the while loop).
+               $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals;
+               $consecutiveFailedTextRetrievals = 0;
+
+               if ( $model === null && $wgContentHandlerUseDB ) {
+                       $row = $this->db->selectRow(
+                               'revision',
+                               array( 'rev_content_model', 'rev_content_format' ),
+                               array( 'rev_id' => $this->thisRev ),
+                               __METHOD__
+                       );
+
+                       if ( $row ) {
+                               $model = $row->rev_content_model;
+                               $format = $row->rev_content_format;
+                       }
+               }
+
+               if ( $model === null || $model === '' ) {
+                       $model = false;
+               }
+
+               while ( $failures < $this->maxFailures ) {
+
+                       // As soon as we found a good text for the $id, we will return immediately.
+                       // Hence, if we make it past the try catch block, we know that we did not
+                       // find a good text.
+
+                       try {
+                               // Step 1: Get some text (or reuse from previous iteratuon if checking
+                               //         for plausibility failed)
+
+                               // Trying to get prefetch, if it has not been tried before
+                               if ( $text === false && isset( $this->prefetch ) && $prefetchNotTried ) {
+                                       $prefetchNotTried = false;
+                                       $tryIsPrefetch = true;
+                                       $text = $this->prefetch->prefetch( intval( $this->thisPage ),
+                                               intval( $this->thisRev ) );
+
+                                       if ( $text === null ) {
+                                               $text = false;
+                                       }
+
+                                       if ( is_string( $text ) && $model !== false ) {
+                                               // Apply export transformation to text coming from an old dump.
+                                               // The purpose of this transformation is to convert up from legacy
+                                               // formats, which may still be used in the older dump that is used
+                                               // for pre-fetching. Applying the transformation again should not
+                                               // interfere with content that is already in the correct form.
+                                               $text = $this->exportTransform( $text, $model, $format );
+                                       }
+                               }
+
+                               if ( $text === false ) {
+                                       // Fallback to asking the database
+                                       $tryIsPrefetch = false;
+                                       if ( $this->spawn ) {
+                                               $text = $this->getTextSpawned( $id );
+                                       } else {
+                                               $text = $this->getTextDb( $id );
+                                       }
+
+                                       if ( $text !== false && $model !== false ) {
+                                               // Apply export transformation to text coming from the database.
+                                               // Prefetched text should already have transformations applied.
+                                               $text = $this->exportTransform( $text, $model, $format );
+                                       }
+
+                                       // No more checks for texts from DB for now.
+                                       // If we received something that is not false,
+                                       // We treat it as good text, regardless of whether it actually is or is not
+                                       if ( $text !== false ) {
+                                               return $text;
+                                       }
+                               }
+
+                               if ( $text === false ) {
+                                       throw new MWException( "Generic error while obtaining text for id " . $id );
+                               }
+
+                               // We received a good candidate for the text of $id via some method
+
+                               // Step 2: Checking for plausibility and return the text if it is
+                               //         plausible
+                               $revID = intval( $this->thisRev );
+                               if ( !isset( $this->db ) ) {
+                                       throw new MWException( "No database available" );
+                               }
+
+                               if ( $model !== CONTENT_MODEL_WIKITEXT ) {
+                                       $revLength = strlen( $text );
+                               } else {
+                                       $revLength = $this->db->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) );
+                               }
+
+                               if ( strlen( $text ) == $revLength ) {
+                                       if ( $tryIsPrefetch ) {
+                                               $this->prefetchCount++;
+                                       }
+
+                                       return $text;
+                               }
+
+                               $text = false;
+                               throw new MWException( "Received text is unplausible for id " . $id );
+                       } catch ( Exception $e ) {
+                               $msg = "getting/checking text " . $id . " failed (" . $e->getMessage() . ")";
+                               if ( $failures + 1 < $this->maxFailures ) {
+                                       $msg .= " (Will retry " . ( $this->maxFailures - $failures - 1 ) . " more times)";
+                               }
+                               $this->progress( $msg );
+                       }
+
+                       // Something went wrong; we did not a text that was plausible :(
+                       $failures++;
+
+                       // A failure in a prefetch hit does not warrant resetting db connection etc.
+                       if ( !$tryIsPrefetch ) {
+                               // After backing off for some time, we try to reboot the whole process as
+                               // much as possible to not carry over failures from one part to the other
+                               // parts
+                               sleep( $this->failureTimeout );
+                               try {
+                                       $this->rotateDb();
+                                       if ( $this->spawn ) {
+                                               $this->closeSpawn();
+                                               $this->openSpawn();
+                                       }
+                               } catch ( Exception $e ) {
+                                       $this->progress( "Rebooting getText infrastructure failed (" . $e->getMessage() . ")" .
+                                               " Trying to continue anyways" );
+                               }
+                       }
+               }
+
+               // Retirieving a good text for $id failed (at least) maxFailures times.
+               // We abort for this $id.
+
+               // Restoring the consecutive failures, and maybe aborting, if the dump
+               // is too broken.
+               $consecutiveFailedTextRetrievals = $oldConsecutiveFailedTextRetrievals + 1;
+               if ( $consecutiveFailedTextRetrievals > $this->maxConsecutiveFailedTextRetrievals ) {
+                       throw new MWException( "Graceful storage failure" );
+               }
+
+               return "";
+       }
+
+       /**
+        * May throw a database error if, say, the server dies during query.
+        * @param int $id
+        * @return bool|string
+        * @throws MWException
+        */
+       private function getTextDb( $id ) {
+               global $wgContLang;
+               if ( !isset( $this->db ) ) {
+                       throw new MWException( __METHOD__ . "No database available" );
+               }
+               $row = $this->db->selectRow( 'text',
+                       array( 'old_text', 'old_flags' ),
+                       array( 'old_id' => $id ),
+                       __METHOD__ );
+               $text = Revision::getRevisionText( $row );
+               if ( $text === false ) {
+                       return false;
+               }
+               $stripped = str_replace( "\r", "", $text );
+               $normalized = $wgContLang->normalize( $stripped );
+
+               return $normalized;
+       }
+
+       private function getTextSpawned( $id ) {
+               MediaWiki\suppressWarnings();
+               if ( !$this->spawnProc ) {
+                       // First time?
+                       $this->openSpawn();
+               }
+               $text = $this->getTextSpawnedOnce( $id );
+               MediaWiki\restoreWarnings();
+
+               return $text;
+       }
+
+       function openSpawn() {
+               global $IP;
+
+               if ( file_exists( "$IP/../multiversion/MWScript.php" ) ) {
+                       $cmd = implode( " ",
+                               array_map( 'wfEscapeShellArg',
+                                       array(
+                                               $this->php,
+                                               "$IP/../multiversion/MWScript.php",
+                                               "fetchText.php",
+                                               '--wiki', wfWikiID() ) ) );
+               } else {
+                       $cmd = implode( " ",
+                               array_map( 'wfEscapeShellArg',
+                                       array(
+                                               $this->php,
+                                               "$IP/maintenance/fetchText.php",
+                                               '--wiki', wfWikiID() ) ) );
+               }
+               $spec = array(
+                       0 => array( "pipe", "r" ),
+                       1 => array( "pipe", "w" ),
+                       2 => array( "file", "/dev/null", "a" ) );
+               $pipes = array();
+
+               $this->progress( "Spawning database subprocess: $cmd" );
+               $this->spawnProc = proc_open( $cmd, $spec, $pipes );
+               if ( !$this->spawnProc ) {
+                       $this->progress( "Subprocess spawn failed." );
+
+                       return false;
+               }
+               list(
+                       $this->spawnWrite, // -> stdin
+                       $this->spawnRead, // <- stdout
+               ) = $pipes;
+
+               return true;
+       }
+
+       private function closeSpawn() {
+               MediaWiki\suppressWarnings();
+               if ( $this->spawnRead ) {
+                       fclose( $this->spawnRead );
+               }
+               $this->spawnRead = false;
+               if ( $this->spawnWrite ) {
+                       fclose( $this->spawnWrite );
+               }
+               $this->spawnWrite = false;
+               if ( $this->spawnErr ) {
+                       fclose( $this->spawnErr );
+               }
+               $this->spawnErr = false;
+               if ( $this->spawnProc ) {
+                       pclose( $this->spawnProc );
+               }
+               $this->spawnProc = false;
+               MediaWiki\restoreWarnings();
+       }
+
+       private function getTextSpawnedOnce( $id ) {
+               global $wgContLang;
+
+               $ok = fwrite( $this->spawnWrite, "$id\n" );
+               // $this->progress( ">> $id" );
+               if ( !$ok ) {
+                       return false;
+               }
+
+               $ok = fflush( $this->spawnWrite );
+               // $this->progress( ">> [flush]" );
+               if ( !$ok ) {
+                       return false;
+               }
+
+               // check that the text id they are sending is the one we asked for
+               // this avoids out of sync revision text errors we have encountered in the past
+               $newId = fgets( $this->spawnRead );
+               if ( $newId === false ) {
+                       return false;
+               }
+               if ( $id != intval( $newId ) ) {
+                       return false;
+               }
+
+               $len = fgets( $this->spawnRead );
+               // $this->progress( "<< " . trim( $len ) );
+               if ( $len === false ) {
+                       return false;
+               }
+
+               $nbytes = intval( $len );
+               // actual error, not zero-length text
+               if ( $nbytes < 0 ) {
+                       return false;
+               }
+
+               $text = "";
+
+               // Subprocess may not send everything at once, we have to loop.
+               while ( $nbytes > strlen( $text ) ) {
+                       $buffer = fread( $this->spawnRead, $nbytes - strlen( $text ) );
+                       if ( $buffer === false ) {
+                               break;
+                       }
+                       $text .= $buffer;
+               }
+
+               $gotbytes = strlen( $text );
+               if ( $gotbytes != $nbytes ) {
+                       $this->progress( "Expected $nbytes bytes from database subprocess, got $gotbytes " );
+
+                       return false;
+               }
+
+               // Do normalization in the dump thread...
+               $stripped = str_replace( "\r", "", $text );
+               $normalized = $wgContLang->normalize( $stripped );
+
+               return $normalized;
+       }
+
+       function startElement( $parser, $name, $attribs ) {
+               $this->checkpointJustWritten = false;
+
+               $this->clearOpenElement( null );
+               $this->lastName = $name;
+
+               if ( $name == 'revision' ) {
+                       $this->state = $name;
+                       $this->egress->writeOpenPage( null, $this->buffer );
+                       $this->buffer = "";
+               } elseif ( $name == 'page' ) {
+                       $this->state = $name;
+                       if ( $this->atStart ) {
+                               $this->egress->writeOpenStream( $this->buffer );
+                               $this->buffer = "";
+                               $this->atStart = false;
+                       }
+               }
+
+               if ( $name == "text" && isset( $attribs['id'] ) ) {
+                       $id = $attribs['id'];
+                       $model = trim( $this->thisRevModel );
+                       $format = trim( $this->thisRevFormat );
+
+                       $model = $model === '' ? null : $model;
+                       $format = $format === '' ? null : $format;
+
+                       $text = $this->getText( $id, $model, $format );
+                       $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) );
+                       if ( strlen( $text ) > 0 ) {
+                               $this->characterData( $parser, $text );
+                       }
+               } else {
+                       $this->openElement = array( $name, $attribs );
+               }
+       }
+
+       function endElement( $parser, $name ) {
+               $this->checkpointJustWritten = false;
+
+               if ( $this->openElement ) {
+                       $this->clearOpenElement( "" );
+               } else {
+                       $this->buffer .= "</$name>";
+               }
+
+               if ( $name == 'revision' ) {
+                       $this->egress->writeRevision( null, $this->buffer );
+                       $this->buffer = "";
+                       $this->thisRev = "";
+                       $this->thisRevModel = null;
+                       $this->thisRevFormat = null;
+               } elseif ( $name == 'page' ) {
+                       if ( !$this->firstPageWritten ) {
+                               $this->firstPageWritten = trim( $this->thisPage );
+                       }
+                       $this->lastPageWritten = trim( $this->thisPage );
+                       if ( $this->timeExceeded ) {
+                               $this->egress->writeClosePage( $this->buffer );
+                               // nasty hack, we can't just write the chardata after the
+                               // page tag, it will include leading blanks from the next line
+                               $this->egress->sink->write( "\n" );
+
+                               $this->buffer = $this->xmlwriterobj->closeStream();
+                               $this->egress->writeCloseStream( $this->buffer );
+
+                               $this->buffer = "";
+                               $this->thisPage = "";
+                               // this could be more than one file if we had more than one output arg
+
+                               $filenameList = (array)$this->egress->getFilenames();
+                               $newFilenames = array();
+                               $firstPageID = str_pad( $this->firstPageWritten, 9, "0", STR_PAD_LEFT );
+                               $lastPageID = str_pad( $this->lastPageWritten, 9, "0", STR_PAD_LEFT );
+                               $filenamesCount = count( $filenameList );
+                               for ( $i = 0; $i < $filenamesCount; $i++ ) {
+                                       $checkpointNameFilledIn = sprintf( $this->checkpointFiles[$i], $firstPageID, $lastPageID );
+                                       $fileinfo = pathinfo( $filenameList[$i] );
+                                       $newFilenames[] = $fileinfo['dirname'] . '/' . $checkpointNameFilledIn;
+                               }
+                               $this->egress->closeRenameAndReopen( $newFilenames );
+                               $this->buffer = $this->xmlwriterobj->openStream();
+                               $this->timeExceeded = false;
+                               $this->timeOfCheckpoint = $this->lastTime;
+                               $this->firstPageWritten = false;
+                               $this->checkpointJustWritten = true;
+                       } else {
+                               $this->egress->writeClosePage( $this->buffer );
+                               $this->buffer = "";
+                               $this->thisPage = "";
+                       }
+               } elseif ( $name == 'mediawiki' ) {
+                       $this->egress->writeCloseStream( $this->buffer );
+                       $this->buffer = "";
+               }
+       }
+
+       function characterData( $parser, $data ) {
+               $this->clearOpenElement( null );
+               if ( $this->lastName == "id" ) {
+                       if ( $this->state == "revision" ) {
+                               $this->thisRev .= $data;
+                       } elseif ( $this->state == "page" ) {
+                               $this->thisPage .= $data;
+                       }
+               } elseif ( $this->lastName == "model" ) {
+                       $this->thisRevModel .= $data;
+               } elseif ( $this->lastName == "format" ) {
+                       $this->thisRevFormat .= $data;
+               }
+
+               // have to skip the newline left over from closepagetag line of
+               // end of checkpoint files. nasty hack!!
+               if ( $this->checkpointJustWritten ) {
+                       if ( $data[0] == "\n" ) {
+                               $data = substr( $data, 1 );
+                       }
+                       $this->checkpointJustWritten = false;
+               }
+               $this->buffer .= htmlspecialchars( $data );
+       }
+
+       function clearOpenElement( $style ) {
+               if ( $this->openElement ) {
+                       $this->buffer .= Xml::element( $this->openElement[0], $this->openElement[1], $style );
+                       $this->openElement = false;
+               }
+       }
 }
+
+$maintClass = 'TextPassDumper';
+require_once RUN_MAINTENANCE_IF_MAIN;
index f5dd98b..893e4f9 100644 (file)
@@ -1,10 +1,15 @@
 <?php
 
-require_once __DIR__ . "/../../../maintenance/backupTextPass.inc";
+require_once __DIR__ . "/../../../maintenance/dumpTextPass.php";
 
 /**
  * Tests for TextPassDumper that rely on the database
  *
+ * Some of these tests use the old constuctor for TextPassDumper
+ * and the dump() function, while others use the new loadWithArgv( $args )
+ * function and execute(). This is to ensure both the old and new methods
+ * work properly.
+ *
  * @group Database
  * @group Dump
  * @covers TextPassDumper
@@ -172,8 +177,10 @@ class TextPassDumperDatabaseTest extends DumpTestCase {
                // Setting up of the dump
                $nameStub = $this->setUpStub();
                $nameFull = $this->getNewTempFile();
-               $dumper = new TextPassDumper( array( "--stub=file:"
-                       . $nameStub, "--output=file:" . $nameFull ) );
+
+               $dumper = new TextPassDumper( array( "--stub=file:" . $nameStub,
+                       "--output=file:" . $nameFull ) );
+
                $dumper->prefetch = $prefetchMock;
                $dumper->reporting = false;
                $dumper->setDb( $this->db );
@@ -261,7 +268,8 @@ class TextPassDumperDatabaseTest extends DumpTestCase {
                        $this->assertTrue( wfMkdirParents( $nameOutputDir ),
                                "Creating temporary output directory " );
                        $this->setUpStub( $nameStub, $iterations );
-                       $dumper = new TextPassDumper( array( "--stub=file:" . $nameStub,
+                       $dumper = new TextPassDumper();
+                       $dumper->loadWithArgv( array( "--stub=file:" . $nameStub,
                                "--output=" . $checkpointFormat . ":" . $nameOutputDir . "/full",
                                "--maxtime=1" /*This is in minutes. Fixup is below*/,
                                "--buffersize=32768", // The default of 32 iterations fill up 32KB about twice
@@ -272,7 +280,7 @@ class TextPassDumperDatabaseTest extends DumpTestCase {
 
                        // The actual dump and taking time
                        $ts_before = microtime( true );
-                       $dumper->dump( WikiExporter::FULL, WikiExporter::TEXT );
+                       $dumper->execute();
                        $ts_after = microtime( true );
                        $lastDuration = $ts_after - $ts_before;
 
@@ -634,7 +642,9 @@ class TextPassDumperDatabaselessTest extends MediaWikiLangTestCase {
         * @dataProvider bufferSizeProvider
         */
        function testBufferSizeSetting( $expected, $size, $msg ) {
-               $dumper = new TextPassDumperAccessor( array( "--buffersize=" . $size ) );
+               $dumper = new TextPassDumperAccessor();
+               $dumper->loadWithArgv( array( "--buffersize=" . $size ) );
+               $dumper->execute();
                $this->assertEquals( $expected, $dumper->getBufferSize(), $msg );
        }
 
@@ -674,4 +684,8 @@ class TextPassDumperAccessor extends TextPassDumper {
        public function getBufferSize() {
                return $this->bufferSize;
        }
+
+       function dump( $history, $text = null ) {
+               return true;
+       }
 }
index 7ca4596..6629b67 100644 (file)
@@ -2,6 +2,11 @@
 /**
  * Tests for log dumps of BackupDumper
  *
+ * Some of these tests use the old constuctor for TextPassDumper
+ * and the dump() function, while others use the new loadWithArgv( $args )
+ * function and execute(). This is to ensure both the old and new methods
+ * work properly.
+ *
  * @group Database
  * @group Dump
  * @covers BackupDumper
@@ -136,7 +141,8 @@ class BackupDumperLoggerTest extends DumpTestCase {
 
                // Preparing the dump
                $fname = $this->getNewTempFile();
-               $dumper = new BackupDumper( array( "--output=file:" . $fname ) );
+
+               $dumper = new DumpBackup( array( '--output=file:' . $fname ) );
                $dumper->startId = $this->logId1;
                $dumper->endId = $this->logId3 + 1;
                $dumper->reporting = false;
@@ -173,8 +179,10 @@ class BackupDumperLoggerTest extends DumpTestCase {
 
                // Preparing the dump
                $fname = $this->getNewTempFile();
-               $dumper = new BackupDumper( array( "--output=gzip:" . $fname,
-                       "--reporting=2" ) );
+
+               $dumper = new DumpBackup();
+               $dumper->loadWithArgv( array( '--logs', '--output=gzip:' . $fname,
+                       '--reporting=2' ) );
                $dumper->startId = $this->logId1;
                $dumper->endId = $this->logId3 + 1;
                $dumper->setDb( $this->db );
@@ -190,7 +198,7 @@ class BackupDumperLoggerTest extends DumpTestCase {
                }
 
                // Performing the dump
-               $dumper->dump( WikiExporter::LOGS, WikiExporter::TEXT );
+               $dumper->execute();
 
                $this->assertTrue( fclose( $dumper->stderr ), "Closing stderr handle" );
 
index 8b6221b..5781d1c 100644 (file)
@@ -6,6 +6,7 @@
  * @group Dump
  * @covers BackupDumper
  */
+
 class BackupDumperPageTest extends DumpTestCase {
 
        // We'll add several pages, revision and texts. The following variables hold the
@@ -98,14 +99,15 @@ class BackupDumperPageTest extends DumpTestCase {
        function testFullTextPlain() {
                // Preparing the dump
                $fname = $this->getNewTempFile();
-               $dumper = new BackupDumper( array( "--output=file:" . $fname ) );
+
+               $dumper = new DumpBackup();
+               $dumper->loadWithArgv( array( '--full', '--quiet', '--output', 'file:' . $fname ) );
                $dumper->startId = $this->pageId1;
                $dumper->endId = $this->pageId4 + 1;
-               $dumper->reporting = false;
                $dumper->setDb( $this->db );
 
                // Performing the dump
-               $dumper->dump( WikiExporter::FULL, WikiExporter::TEXT );
+               $dumper->execute();
 
                // Checking the dumped data
                $this->assertDumpStart( $fname );
@@ -153,14 +155,15 @@ class BackupDumperPageTest extends DumpTestCase {
        function testFullStubPlain() {
                // Preparing the dump
                $fname = $this->getNewTempFile();
-               $dumper = new BackupDumper( array( "--output=file:" . $fname ) );
+
+               $dumper = new DumpBackup();
+               $dumper->loadWithArgv( array( '--full', '--quiet', '--output', 'file:' . $fname, '--stub' ) );
                $dumper->startId = $this->pageId1;
                $dumper->endId = $this->pageId4 + 1;
-               $dumper->reporting = false;
                $dumper->setDb( $this->db );
 
                // Performing the dump
-               $dumper->dump( WikiExporter::FULL, WikiExporter::STUB );
+               $dumper->execute();
 
                // Checking the dumped data
                $this->assertDumpStart( $fname );
@@ -202,7 +205,8 @@ class BackupDumperPageTest extends DumpTestCase {
        function testCurrentStubPlain() {
                // Preparing the dump
                $fname = $this->getNewTempFile();
-               $dumper = new BackupDumper( array( "--output=file:" . $fname ) );
+
+               $dumper = new DumpBackup( array( '--output', 'file:' . $fname ) );
                $dumper->startId = $this->pageId1;
                $dumper->endId = $this->pageId4 + 1;
                $dumper->reporting = false;
@@ -247,7 +251,8 @@ class BackupDumperPageTest extends DumpTestCase {
 
                // Preparing the dump
                $fname = $this->getNewTempFile();
-               $dumper = new BackupDumper( array( "--output=gzip:" . $fname ) );
+
+               $dumper = new DumpBackup( array( '--output', 'gzip:' . $fname ) );
                $dumper->startId = $this->pageId1;
                $dumper->endId = $this->pageId4 + 1;
                $dumper->reporting = false;
@@ -306,7 +311,7 @@ class BackupDumperPageTest extends DumpTestCase {
                $fnameMetaCurrent = $this->getNewTempFile();
                $fnameArticles = $this->getNewTempFile();
 
-               $dumper = new BackupDumper( array( "--output=gzip:" . $fnameMetaHistory,
+               $dumper = new DumpBackup( array( "--full", "--stub", "--output=gzip:" . $fnameMetaHistory,
                        "--output=gzip:" . $fnameMetaCurrent, "--filter=latest",
                        "--output=gzip:" . $fnameArticles, "--filter=latest",
                        "--filter=notalk", "--filter=namespace:!NS_USER",