Merge "Removed some unnecessary code in LocalFileDeleteBatch"
[lhc/web/wiklou.git] / maintenance / backupTextPass.inc
index fd31844..85ebd51 100644 (file)
@@ -31,6 +31,12 @@ require_once __DIR__ . '/backup.inc';
  */
 class TextPassDumper extends BackupDumper {
        public $prefetch = null;
+
+       // when we spend more than maxTimeAllowed seconds on this run, we continue
+       // processing until we write out the next complete page, then save output file(s),
+       // rename it/them and open new one(s)
+       public $maxTimeAllowed = 0; // 0 = no limit
+
        protected $input = "php://stdin";
        protected $history = WikiExporter::FULL;
        protected $fetchCount = 0;
@@ -67,10 +73,6 @@ class TextPassDumper extends BackupDumper {
 
        protected $xmlwriterobj = false;
 
-       // when we spend more than maxTimeAllowed seconds on this run, we continue
-       // processing until we write out the next complete page, then save output file(s),
-       // rename it/them and open new one(s)
-       protected $maxTimeAllowed = 0;  // 0 = no limit
        protected $timeExceeded = false;
        protected $firstPageWritten = false;
        protected $lastPageWritten = false;
@@ -102,6 +104,7 @@ class TextPassDumper extends BackupDumper {
 
                if ( $this->forcedDb !== null ) {
                        $this->db = $this->forcedDb;
+
                        return;
                }
 
@@ -183,31 +186,31 @@ class TextPassDumper extends BackupDumper {
                $url = $this->processFileOpt( $val, $param );
 
                switch ( $opt ) {
-               case 'prefetch':
-                       require_once "$IP/maintenance/backupPrefetch.inc";
-                       $this->prefetch = new BaseDump( $url );
-                       break;
-               case 'stub':
-                       $this->input = $url;
-                       break;
-               case 'maxtime':
-                       $this->maxTimeAllowed = intval( $val ) * 60;
-                       break;
-               case 'checkpointfile':
-                       $this->checkpointFiles[] = $val;
-                       break;
-               case 'current':
-                       $this->history = WikiExporter::CURRENT;
-                       break;
-               case 'full':
-                       $this->history = WikiExporter::FULL;
-                       break;
-               case 'spawn':
-                       $this->spawn = true;
-                       if ( $val ) {
-                               $this->php = $val;
-                       }
-                       break;
+                       case 'prefetch':
+                               require_once "$IP/maintenance/backupPrefetch.inc";
+                               $this->prefetch = new BaseDump( $url );
+                               break;
+                       case 'stub':
+                               $this->input = $url;
+                               break;
+                       case 'maxtime':
+                               $this->maxTimeAllowed = intval( $val ) * 60;
+                               break;
+                       case 'checkpointfile':
+                               $this->checkpointFiles[] = $val;
+                               break;
+                       case 'current':
+                               $this->history = WikiExporter::CURRENT;
+                               break;
+                       case 'full':
+                               $this->history = WikiExporter::FULL;
+                               break;
+                       case 'spawn':
+                               $this->spawn = true;
+                               if ( $val ) {
+                                       $this->php = $val;
+                               }
+                               break;
                }
        }
 
@@ -233,6 +236,7 @@ class TextPassDumper extends BackupDumper {
                        $newFileURIs[] = $newURI;
                }
                $val = implode( ';', $newFileURIs );
+
                return $val;
        }
 
@@ -242,6 +246,7 @@ class TextPassDumper extends BackupDumper {
        function showReport() {
                if ( !$this->prefetch ) {
                        parent::showReport();
+
                        return;
                }
 
@@ -278,7 +283,6 @@ class TextPassDumper extends BackupDumper {
                                }
                                $pageRatePart = $this->pageCountPart / $deltaPart;
                                $revRatePart = $this->revCountPart / $deltaPart;
-
                        } else {
                                $fetchRatePart = '-';
                                $pageRatePart = '-';
@@ -314,12 +318,13 @@ class TextPassDumper extends BackupDumper {
        }
 
        function finalOptionCheck() {
-               if ( ( $this->checkpointFiles && ! $this->maxTimeAllowed ) ||
-                       ( $this->maxTimeAllowed && !$this->checkpointFiles ) ) {
+               if ( ( $this->checkpointFiles && !$this->maxTimeAllowed )
+                       || ( $this->maxTimeAllowed && !$this->checkpointFiles )
+               ) {
                        throw new MWException( "Options checkpointfile and maxtime must be specified together.\n" );
                }
                foreach ( $this->checkpointFiles as $checkpointFile ) {
-                       $count = substr_count ( $checkpointFile, "%s" );
+                       $count = substr_count( $checkpointFile, "%s" );
                        if ( $count != 2 ) {
                                throw new MWException( "Option checkpointfile must contain two '%s' "
                                        . "for substitution of first and last pageids, count is $count instead, "
@@ -338,6 +343,7 @@ class TextPassDumper extends BackupDumper {
 
        /**
         * @throws MWException Failure to parse XML input
+        * @param string $input
         * @return bool
         */
        function readDump( $input ) {
@@ -348,6 +354,8 @@ class TextPassDumper extends BackupDumper {
                $this->lastName = "";
                $this->thisPage = 0;
                $this->thisRev = 0;
+               $this->thisRevModel = null;
+               $this->thisRevFormat = null;
 
                $parser = xml_parser_create( "UTF-8" );
                xml_parser_set_option( $parser, XML_OPTION_CASE_FOLDING, false );
@@ -374,7 +382,7 @@ class TextPassDumper extends BackupDumper {
                                        'XML import parse failure',
                                        xml_get_current_line_number( $parser ),
                                        xml_get_current_column_number( $parser ),
-                                       $byte . ( is_null( $chunk ) ? null : ( '; "' . substr( $chunk, $byte -$offset, 16 ) . '"' ) ),
+                                       $byte . ( is_null( $chunk ) ? null : ( '; "' . substr( $chunk, $byte - $offset, 16 ) . '"' ) ),
                                        xml_error_string( xml_get_error_code( $parser ) ) )->escaped();
 
                                xml_parser_free( $parser );
@@ -393,7 +401,7 @@ class TextPassDumper extends BackupDumper {
                                # there's no pageID 0 so we use that. the caller is responsible
                                # for deciding what to do with a file containing only the
                                # siteinfo information and the mw tags.
-                               if ( ! $this->firstPageWritten ) {
+                               if ( !$this->firstPageWritten ) {
                                        $firstPageID = str_pad( 0, 9, "0", STR_PAD_LEFT );
                                        $lastPageID = str_pad( 0, 9, "0", STR_PAD_LEFT );
                                } else {
@@ -415,8 +423,34 @@ class TextPassDumper extends BackupDumper {
                return true;
        }
 
+       /**
+        * Applies applicable export transformations to $text.
+        *
+        * @param string $text
+        * @param string $model
+        * @param string|null $format
+        *
+        * @return string
+        */
+       private function exportTransform( $text, $model, $format = null ) {
+               try {
+                       $handler = ContentHandler::getForModelID( $model );
+                       $text = $handler->exportTransform( $text, $format );
+               }
+               catch ( MWException $ex ) {
+                       $this->progress(
+                               "Unable to apply export transformation for content model '$model': " .
+                               $ex->getMessage()
+                       );
+               }
+
+               return $text;
+       }
+
        /**
         * Tries to get the revision text for a revision id.
+        * Export transformations are applied if the content model can is given or can be
+        * determined from the database.
         *
         * Upon errors, retries (Up to $this->maxFailures tries each call).
         * If still no good revision get could be found even after this retrying, "" is returned.
@@ -425,11 +459,14 @@ class TextPassDumper extends BackupDumper {
         * is thrown.
         *
         * @param string $id The revision id to get the text for
+        * @param string|bool|null $model The content model used to determine applicable export transformations.
+        *      If $model is null, it will be determined from the database.
+        * @param string|null $format The content format used when applying export transformations.
         *
-        * @return string The revision text for $id, or ""
         * @throws MWException
+        * @return string The revision text for $id, or ""
         */
-       function getText( $id ) {
+       function getText( $id, $model = null, $format = null ) {
                global $wgContentHandlerUseDB;
 
                $prefetchNotTried = true; // Whether or not we already tried to get the text via prefetch.
@@ -447,6 +484,24 @@ class TextPassDumper extends BackupDumper {
                $oldConsecutiveFailedTextRetrievals = $consecutiveFailedTextRetrievals;
                $consecutiveFailedTextRetrievals = 0;
 
+               if ( $model === null && $wgContentHandlerUseDB ) {
+                       $row = $this->db->selectRow(
+                               'revision',
+                               array( 'rev_content_model', 'rev_content_format' ),
+                               array( 'rev_id' => $this->thisRev ),
+                               __METHOD__
+                       );
+
+                       if ( $row ) {
+                               $model = $row->rev_content_model;
+                               $format = $row->rev_content_format;
+                       }
+               }
+
+               if ( $model === null || $model === '' ) {
+                       $model = false;
+               }
+
                while ( $failures < $this->maxFailures ) {
 
                        // As soon as we found a good text for the $id, we will return immediately.
@@ -463,9 +518,19 @@ class TextPassDumper extends BackupDumper {
                                        $tryIsPrefetch = true;
                                        $text = $this->prefetch->prefetch( intval( $this->thisPage ),
                                                intval( $this->thisRev ) );
+
                                        if ( $text === null ) {
                                                $text = false;
                                        }
+
+                                       if ( is_string( $text ) && $model !== false ) {
+                                               // Apply export transformation to text coming from an old dump.
+                                               // The purpose of this transformation is to convert up from legacy
+                                               // formats, which may still be used in the older dump that is used
+                                               // for pre-fetching. Applying the transformation again should not
+                                               // interfere with content that is already in the correct form.
+                                               $text = $this->exportTransform( $text, $model, $format );
+                                       }
                                }
 
                                if ( $text === false ) {
@@ -477,6 +542,12 @@ class TextPassDumper extends BackupDumper {
                                                $text = $this->getTextDb( $id );
                                        }
 
+                                       if ( $text !== false && $model !== false ) {
+                                               // Apply export transformation to text coming from the database.
+                                               // Prefetched text should already have transformations applied.
+                                               $text = $this->exportTransform( $text, $model, $format );
+                                       }
+
                                        // No more checks for texts from DB for now.
                                        // If we received something that is not false,
                                        // We treat it as good text, regardless of whether it actually is or is not
@@ -494,28 +565,13 @@ class TextPassDumper extends BackupDumper {
                                // Step 2: Checking for plausibility and return the text if it is
                                //         plausible
                                $revID = intval( $this->thisRev );
-                               if ( ! isset( $this->db ) ) {
+                               if ( !isset( $this->db ) ) {
                                        throw new MWException( "No database available" );
                                }
 
-                               $revLength = strlen( $text );
-                               if ( $wgContentHandlerUseDB ) {
-                                       $row = $this->db->selectRow(
-                                               'revision',
-                                               array( 'rev_len', 'rev_content_model' ),
-                                               array( 'rev_id' => $revID ),
-                                               __METHOD__
-                                       );
-                                       if ( $row ) {
-                                               // only check the length for the wikitext content handler,
-                                               // it's a wasted (and failed) check otherwise
-                                               if ( $row->rev_content_model == CONTENT_MODEL_WIKITEXT ) {
-                                                       $revLength = $row->rev_len;
-                                               }
-                                       }
-
-                               }
-                               else {
+                               if ( $model !== CONTENT_MODEL_WIKITEXT ) {
+                                       $revLength = strlen( $text );
+                               } else {
                                        $revLength = $this->db->selectField( 'revision', 'rev_len', array( 'rev_id' => $revID ) );
                                }
 
@@ -523,12 +579,12 @@ class TextPassDumper extends BackupDumper {
                                        if ( $tryIsPrefetch ) {
                                                $this->prefetchCount++;
                                        }
+
                                        return $text;
                                }
 
                                $text = false;
                                throw new MWException( "Received text is unplausible for id " . $id );
-
                        } catch ( Exception $e ) {
                                $msg = "getting/checking text " . $id . " failed (" . $e->getMessage() . ")";
                                if ( $failures + 1 < $this->maxFailures ) {
@@ -541,7 +597,7 @@ class TextPassDumper extends BackupDumper {
                        $failures++;
 
                        // A failure in a prefetch hit does not warrant resetting db connection etc.
-                       if ( ! $tryIsPrefetch ) {
+                       if ( !$tryIsPrefetch ) {
                                // After backing off for some time, we try to reboot the whole process as
                                // much as possible to not carry over failures from one part to the other
                                // parts
@@ -580,7 +636,7 @@ class TextPassDumper extends BackupDumper {
         */
        private function getTextDb( $id ) {
                global $wgContLang;
-               if ( ! isset( $this->db ) ) {
+               if ( !isset( $this->db ) ) {
                        throw new MWException( __METHOD__ . "No database available" );
                }
                $row = $this->db->selectRow( 'text',
@@ -593,6 +649,7 @@ class TextPassDumper extends BackupDumper {
                }
                $stripped = str_replace( "\r", "", $text );
                $normalized = $wgContLang->normalize( $stripped );
+
                return $normalized;
        }
 
@@ -604,6 +661,7 @@ class TextPassDumper extends BackupDumper {
                }
                $text = $this->getTextSpawnedOnce( $id );
                wfRestoreWarnings();
+
                return $text;
        }
 
@@ -618,8 +676,7 @@ class TextPassDumper extends BackupDumper {
                                                "$IP/../multiversion/MWScript.php",
                                                "fetchText.php",
                                                '--wiki', wfWikiID() ) ) );
-               }
-               else {
+               } else {
                        $cmd = implode( " ",
                                array_map( 'wfEscapeShellArg',
                                        array(
@@ -638,11 +695,12 @@ class TextPassDumper extends BackupDumper {
                if ( !$this->spawnProc ) {
                        // shit
                        $this->progress( "Subprocess spawn failed." );
+
                        return false;
                }
                list(
                        $this->spawnWrite, // -> stdin
-                       $this->spawnRead,  // <- stdout
+                       $this->spawnRead, // <- stdout
                ) = $pipes;
 
                return true;
@@ -720,12 +778,14 @@ class TextPassDumper extends BackupDumper {
                $gotbytes = strlen( $text );
                if ( $gotbytes != $nbytes ) {
                        $this->progress( "Expected $nbytes bytes from database subprocess, got $gotbytes " );
+
                        return false;
                }
 
                // Do normalization in the dump thread...
                $stripped = str_replace( "\r", "", $text );
                $normalized = $wgContLang->normalize( $stripped );
+
                return $normalized;
        }
 
@@ -749,7 +809,14 @@ class TextPassDumper extends BackupDumper {
                }
 
                if ( $name == "text" && isset( $attribs['id'] ) ) {
-                       $text = $this->getText( $attribs['id'] );
+                       $id = $attribs['id'];
+                       $model = trim( $this->thisRevModel );
+                       $format = trim( $this->thisRevFormat );
+
+                       $model = $model === '' ? null : $model;
+                       $format = $format === '' ? null : $format;
+
+                       $text = $this->getText( $id, $model, $format );
                        $this->openElement = array( $name, array( 'xml:space' => 'preserve' ) );
                        if ( strlen( $text ) > 0 ) {
                                $this->characterData( $parser, $text );
@@ -772,8 +839,10 @@ class TextPassDumper extends BackupDumper {
                        $this->egress->writeRevision( null, $this->buffer );
                        $this->buffer = "";
                        $this->thisRev = "";
+                       $this->thisRevModel = null;
+                       $this->thisRevFormat = null;
                } elseif ( $name == 'page' ) {
-                       if ( ! $this->firstPageWritten ) {
+                       if ( !$this->firstPageWritten ) {
                                $this->firstPageWritten = trim( $this->thisPage );
                        }
                        $this->lastPageWritten = trim( $this->thisPage );
@@ -811,7 +880,6 @@ class TextPassDumper extends BackupDumper {
                                $this->buffer = "";
                                $this->thisPage = "";
                        }
-
                } elseif ( $name == 'mediawiki' ) {
                        $this->egress->writeCloseStream( $this->buffer );
                        $this->buffer = "";
@@ -827,6 +895,13 @@ class TextPassDumper extends BackupDumper {
                                $this->thisPage .= $data;
                        }
                }
+               elseif ( $this->lastName == "model" ) {
+                       $this->thisRevModel .= $data;
+               }
+               elseif ( $this->lastName == "format" ) {
+                       $this->thisRevFormat .= $data;
+               }
+
                // have to skip the newline left over from closepagetag line of
                // end of checkpoint files. nasty hack!!
                if ( $this->checkpointJustWritten ) {