Merge "Add comments for ParserOptions::getUserLang regarding cache split"
[lhc/web/wiklou.git] / maintenance / backupPrefetch.inc
index f40bc89..7bfb734 100644 (file)
@@ -1,4 +1,28 @@
 <?php
+/**
+ * Helper class for the --prefetch option of dumpTextPass.php
+ *
+ * Copyright © 2005 Brion Vibber <brion@pobox.com>
+ * https://www.mediawiki.org/
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
 
 /**
  * Readahead helper for making large MediaWiki data dumps;
  * - text contents are immutable and should not change once
  *   recorded, so the previous dump is a reliable source
  *
- * Requires PHP 5 and the XMLReader PECL extension.
+ * @ingroup Maintenance
  */
 class BaseDump {
-       var $reader = null;
-       var $atEnd = false;
-       var $atPageEnd = false;
-       var $lastPage = 0;
-       var $lastRev = 0;
-       
-       function BaseDump( $infile ) {
+       protected $reader = null;
+       protected $atEnd = false;
+       protected $atPageEnd = false;
+       protected $lastPage = 0;
+       protected $lastRev = 0;
+       protected $infiles = null;
+
+       public function __construct( $infile ) {
+               $this->infiles = explode( ';', $infile );
                $this->reader = new XMLReader();
-               $this->reader->open( $infile );
+               $infile = array_shift( $this->infiles );
+               if ( defined( 'LIBXML_PARSEHUGE' ) ) {
+                       $this->reader->open( $infile, null, LIBXML_PARSEHUGE );
+               } else {
+                       $this->reader->open( $infile );
+               }
        }
-       
+
        /**
         * Attempts to fetch the text of a particular page revision
         * from the dump stream. May return null if the page is
@@ -34,132 +65,154 @@ class BaseDump {
         *
         * @param int $page ID number of page to read
         * @param int $rev ID number of revision to read
-        * @return string or null
+        * @return string|null
         */
        function prefetch( $page, $rev ) {
                $page = intval( $page );
                $rev = intval( $rev );
-               while( $this->lastPage < $page && !$this->atEnd ) {
+               while ( $this->lastPage < $page && !$this->atEnd ) {
                        $this->debug( "BaseDump::prefetch at page $this->lastPage, looking for $page" );
                        $this->nextPage();
                }
-               if( $this->lastPage > $page || $this->atEnd ) {
-                       $this->debug( "BaseDump::prefetch already past page $page looking for rev $rev  [$this->lastPage, $this->lastRev]" );
+               if ( $this->lastPage > $page || $this->atEnd ) {
+                       $this->debug( "BaseDump::prefetch already past page $page "
+                               . "looking for rev $rev  [$this->lastPage, $this->lastRev]" );
+
                        return null;
                }
-               while( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
-                       $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, looking for $page, $rev" );
+               while ( $this->lastRev < $rev && !$this->atEnd && !$this->atPageEnd ) {
+                       $this->debug( "BaseDump::prefetch at page $this->lastPage, rev $this->lastRev, "
+                               . "looking for $page, $rev" );
                        $this->nextRev();
                }
-               if( $this->lastRev == $rev && !$this->atEnd ) {
+               if ( $this->lastRev == $rev && !$this->atEnd ) {
                        $this->debug( "BaseDump::prefetch hit on $page, $rev [$this->lastPage, $this->lastRev]" );
+
                        return $this->nextText();
                } else {
-                       $this->debug( "BaseDump::prefetch already past rev $rev on page $page  [$this->lastPage, $this->lastRev]" );
+                       $this->debug( "BaseDump::prefetch already past rev $rev on page $page "
+                               . "[$this->lastPage, $this->lastRev]" );
+
                        return null;
                }
        }
-       
+
        function debug( $str ) {
                wfDebug( $str . "\n" );
-               //global $dumper;
-               //$dumper->progress( $str );
+               // global $dumper;
+               // $dumper->progress( $str );
        }
-       
+
        /**
         * @access private
         */
        function nextPage() {
-               if( $this->skipTo( 'page', 'mediawiki' ) ) {
-                       if( $this->skipTo( 'id' ) ) {
+               if ( $this->skipTo( 'page', 'mediawiki' ) ) {
+                       if ( $this->skipTo( 'id' ) ) {
                                $this->lastPage = intval( $this->nodeContents() );
                                $this->lastRev = 0;
                                $this->atPageEnd = false;
                        }
                } else {
-                       $this->atEnd = true;
+                       $this->close();
+                       if ( count( $this->infiles ) ) {
+                               $infile = array_shift( $this->infiles );
+                               $this->reader->open( $infile );
+                               $this->atEnd = false;
+                       }
                }
        }
-       
+
        /**
         * @access private
         */
        function nextRev() {
-               if( $this->skipTo( 'revision' ) ) {
-                       if( $this->skipTo( 'id' ) ) {
+               if ( $this->skipTo( 'revision' ) ) {
+                       if ( $this->skipTo( 'id' ) ) {
                                $this->lastRev = intval( $this->nodeContents() );
                        }
                } else {
                        $this->atPageEnd = true;
                }
        }
-       
+
        /**
         * @access private
+        * @return string
         */
        function nextText() {
                $this->skipTo( 'text' );
+
                return strval( $this->nodeContents() );
        }
-       
+
        /**
         * @access private
+        * @param string $name
+        * @param string $parent
+        * @return bool|null
         */
-       function skipTo( $name, $parent='page' ) {
-               if( $this->atEnd ) {
+       function skipTo( $name, $parent = 'page' ) {
+               if ( $this->atEnd ) {
                        return false;
                }
-               while( $this->reader->read() ) {
-                       if( $this->reader->nodeType == XMLREADER_ELEMENT &&
-                               $this->reader->name == $name ) {
+               while ( $this->reader->read() ) {
+                       if ( $this->reader->nodeType == XMLReader::ELEMENT
+                               && $this->reader->name == $name
+                       ) {
                                return true;
                        }
-                       if( $this->reader->nodeType == XMLREADER_END_ELEMENT &&
-                               $this->reader->name == $parent ) {
+                       if ( $this->reader->nodeType == XMLReader::END_ELEMENT
+                               && $this->reader->name == $parent
+                       ) {
                                $this->debug( "BaseDump::skipTo found </$parent> searching for <$name>" );
+
                                return false;
                        }
                }
+
                return $this->close();
        }
-       
+
        /**
         * Shouldn't something like this be built-in to XMLReader?
         * Fetches text contents of the current element, assuming
         * no sub-elements or such scary things.
+        *
         * @return string
         * @access private
         */
        function nodeContents() {
-               if( $this->atEnd ) {
+               if ( $this->atEnd ) {
                        return null;
                }
-               if( $this->reader->isEmptyElement ) {
+               if ( $this->reader->isEmptyElement ) {
                        return "";
                }
                $buffer = "";
-               while( $this->reader->read() ) {
-                       switch( $this->reader->nodeType ) {
-                       case XMLREADER_TEXT:
-//                     case XMLREADER_WHITESPACE:
-                       case XMLREADER_SIGNIFICANT_WHITESPACE:
-                               $buffer .= $this->reader->value;
-                               break;
-                       case XMLREADER_END_ELEMENT:
-                               return $buffer;
+               while ( $this->reader->read() ) {
+                       switch ( $this->reader->nodeType ) {
+                               case XMLReader::TEXT:
+                               //case XMLReader::WHITESPACE:
+                               case XMLReader::SIGNIFICANT_WHITESPACE:
+                                       $buffer .= $this->reader->value;
+                                       break;
+                               case XMLReader::END_ELEMENT:
+                                       return $buffer;
                        }
                }
+
                return $this->close();
        }
-       
+
        /**
         * @access private
+        * @return null
         */
        function close() {
                $this->reader->close();
                $this->atEnd = true;
+
                return null;
        }
 }
-
-?>