Merge "Follow up: I1b017e13 - Fix Strict Standards"
[lhc/web/wiklou.git] / includes / HistoryBlob.php
index 7489399..05c27fe 100644 (file)
@@ -1,8 +1,28 @@
 <?php
+/**
+ * Efficient concatenated text storage.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
 
 /**
- * Base class for general text storage via the "object" flag in old_flags, or 
- * two-part external storage URLs. Used for represent efficient concatenated 
+ * Base class for general text storage via the "object" flag in old_flags, or
+ * two-part external storage URLs. Used for represent efficient concatenated
  * storage, and migration-related pointer objects.
  */
 interface HistoryBlob
@@ -11,14 +31,21 @@ interface HistoryBlob
         * Adds an item of text, returns a stub object which points to the item.
         * You must call setLocation() on the stub object before storing it to the
         * database
-        * Returns the key for getItem()
+        *
+        * @param $text string
+        *
+        * @return String: the key for getItem()
         */
-       public function addItem( $text );
+       function addItem( $text );
 
        /**
         * Get item by key, or false if the key is not present
+        *
+        * @param $key string
+        *
+        * @return String or false
         */
-       public function getItem( $key );
+       function getItem( $key );
 
        /**
         * Set the "default text"
@@ -27,11 +54,15 @@ interface HistoryBlob
         * be other revisions in the same object.
         *
         * Default text is not required for two-part external storage URLs.
+        *
+        * @param $text string
         */
-       public function setText( $text );
+       function setText( $text );
 
        /**
         * Get default text. This is called from Revision::getRevisionText()
+        *
+        * @return String
         */
        function getText();
 }
@@ -48,12 +79,16 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob
        public $mMaxCount = 100;
 
        /** Constructor */
-       public function ConcatenatedGzipHistoryBlob() {
+       public function __construct() {
                if ( !function_exists( 'gzdeflate' ) ) {
                        throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
                }
        }
 
+       /**
+        * @param $text string
+        * @return string
+        */
        public function addItem( $text ) {
                $this->uncompress();
                $hash = md5( $text );
@@ -64,6 +99,10 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob
                return $hash;
        }
 
+       /**
+        * @param $hash string
+        * @return array|bool
+        */
        public function getItem( $hash ) {
                $this->uncompress();
                if ( array_key_exists( $hash, $this->mItems ) ) {
@@ -73,11 +112,18 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob
                }
        }
 
+       /**
+        * @param $text string
+        * @return void
+        */
        public function setText( $text ) {
                $this->uncompress();
                $this->mDefaultHash = $this->addItem( $text );
        }
 
+       /**
+        * @return array|bool
+        */
        public function getText() {
                $this->uncompress();
                return $this->getItem( $this->mDefaultHash );
@@ -85,6 +131,8 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob
 
        /**
         * Remove an item
+        *
+        * @param $hash string
         */
        public function removeItem( $hash ) {
                $this->mSize -= strlen( $this->mItems[$hash] );
@@ -111,7 +159,9 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob
                }
        }
 
-
+       /**
+        * @return array
+        */
        function __sleep() {
                $this->compress();
                return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
@@ -124,35 +174,35 @@ class ConcatenatedGzipHistoryBlob implements HistoryBlob
        /**
         * Helper function for compression jobs
         * Returns true until the object is "full" and ready to be committed
+        *
+        * @return bool
         */
        public function isHappy() {
-               return $this->mSize < $this->mMaxSize 
+               return $this->mSize < $this->mMaxSize
                        && count( $this->mItems ) < $this->mMaxCount;
        }
 }
 
 
-/**
- * One-step cache variable to hold base blobs; operations that
- * pull multiple revisions may often pull multiple times from
- * the same blob. By keeping the last-used one open, we avoid
- * redundant unserialization and decompression overhead.
- */
-global $wgBlobCache;
-$wgBlobCache = array();
-
-
 /**
  * Pointer object for an item within a CGZ blob stored in the text table.
  */
 class HistoryBlobStub {
+       /**
+        * One-step cache variable to hold base blobs; operations that
+        * pull multiple revisions may often pull multiple times from
+        * the same blob. By keeping the last-used one open, we avoid
+        * redundant unserialization and decompression overhead.
+        */
+       protected static $blobCache = array();
+
        var $mOldId, $mHash, $mRef;
 
        /**
-        * @param string $hash The content hash of the text
-        * @param integer $oldid The old_id for the CGZ object
+        * @param $hash string the content hash of the text
+        * @param $oldid Integer the old_id for the CGZ object
         */
-       function HistoryBlobStub( $hash = '', $oldid = 0 ) {
+       function __construct( $hash = '', $oldid = 0 ) {
                $this->mHash = $hash;
        }
 
@@ -178,11 +228,14 @@ class HistoryBlobStub {
                return $this->mRef;
        }
 
+       /**
+        * @return string
+        */
        function getText() {
                $fname = 'HistoryBlobStub::getText';
-               global $wgBlobCache;
-               if( isset( $wgBlobCache[$this->mOldId] ) ) {
-                       $obj = $wgBlobCache[$this->mOldId];
+
+               if( isset( self::$blobCache[$this->mOldId] ) ) {
+                       $obj = self::$blobCache[$this->mOldId];
                } else {
                        $dbr = wfGetDB( DB_SLAVE );
                        $row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
@@ -192,12 +245,12 @@ class HistoryBlobStub {
                        $flags = explode( ',', $row->old_flags );
                        if( in_array( 'external', $flags ) ) {
                                $url=$row->old_text;
-                               @list( /* $proto */ ,$path)=explode('://',$url,2);
-                               if ($path=="") {
+                               $parts = explode( '://', $url, 2 );
+                               if ( !isset( $parts[1] ) || $parts[1] == '' ) {
                                        wfProfileOut( $fname );
                                        return false;
                                }
-                               $row->old_text=ExternalStore::fetchFromUrl($url);
+                               $row->old_text = ExternalStore::fetchFromUrl($url);
 
                        }
                        if( !in_array( 'object', $flags ) ) {
@@ -220,13 +273,15 @@ class HistoryBlobStub {
                        // Save this item for reference; if pulling many
                        // items in a row we'll likely use it again.
                        $obj->uncompress();
-                       $wgBlobCache = array( $this->mOldId => $obj );
+                       self::$blobCache = array( $this->mOldId => $obj );
                }
                return $obj->getItem( $this->mHash );
        }
 
        /**
         * Get the content hash
+        *
+        * @return string
         */
        function getHash() {
                return $this->mHash;
@@ -246,20 +301,25 @@ class HistoryBlobCurStub {
        var $mCurId;
 
        /**
-        * @param integer $curid The cur_id pointed to
+        * @param $curid Integer: the cur_id pointed to
         */
-       function HistoryBlobCurStub( $curid = 0 ) {
+       function __construct( $curid = 0 ) {
                $this->mCurId = $curid;
        }
 
        /**
         * Sets the location (cur_id) of the main object to which this object
         * points
+        *
+        * @param $id int
         */
        function setLocation( $id ) {
                $this->mCurId = $id;
        }
 
+       /**
+        * @return string|bool
+        */
        function getText() {
                $dbr = wfGetDB( DB_SLAVE );
                $row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
@@ -281,12 +341,12 @@ class DiffHistoryBlob implements HistoryBlob {
        /** Total uncompressed size */
        var $mSize = 0;
 
-       /** 
-        * Array of diffs. If a diff D from A to B is notated D = B - A, and Z is 
+       /**
+        * Array of diffs. If a diff D from A to B is notated D = B - A, and Z is
         * an empty string:
         *
         *              { item[map[i]] - item[map[i-1]]   where i > 0
-        *    diff[i] = { 
+        *    diff[i] = {
         *              { item[map[i]] - Z                where i = 0
         */
        var $mDiffs;
@@ -320,15 +380,22 @@ class DiffHistoryBlob implements HistoryBlob {
         */
        var $mMaxCount = 100;
 
+       /** Constants from xdiff.h */
+       const XDL_BDOP_INS = 1;
+       const XDL_BDOP_CPY = 2;
+       const XDL_BDOP_INSB = 3;
+
        function __construct() {
-               if ( !function_exists( 'xdiff_string_bdiff' ) ){ 
-                       throw new MWException( "Need xdiff 1.5+ support to read or write DiffHistoryBlob\n" );
-               }
                if ( !function_exists( 'gzdeflate' ) ) {
                        throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
                }
        }
 
+       /**
+        * @throws MWException
+        * @param $text string
+        * @return int
+        */
        function addItem( $text ) {
                if ( $this->mFrozen ) {
                        throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" );
@@ -340,19 +407,35 @@ class DiffHistoryBlob implements HistoryBlob {
                return count( $this->mItems ) - 1;
        }
 
+       /**
+        * @param $key string
+        * @return string
+        */
        function getItem( $key ) {
                return $this->mItems[$key];
        }
 
+       /**
+        * @param $text string
+        */
        function setText( $text ) {
                $this->mDefaultKey = $this->addItem( $text );
        }
 
+       /**
+        * @return string
+        */
        function getText() {
                return $this->getItem( $this->mDefaultKey );
        }
 
+       /**
+        * @throws MWException
+        */
        function compress() {
+               if ( !function_exists( 'xdiff_string_rabdiff' ) ){
+                       throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
+               }
                if ( isset( $this->mDiffs ) ) {
                        // Already compressed
                        return;
@@ -421,20 +504,102 @@ class DiffHistoryBlob implements HistoryBlob {
                }
        }
 
+       /**
+        * @param $t1
+        * @param $t2
+        * @return string
+        */
        function diff( $t1, $t2 ) {
                # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
                # "String is not zero-terminated"
                wfSuppressWarnings();
-               $diff = xdiff_string_bdiff( $t1, $t2 ) . '';
+               $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
                wfRestoreWarnings();
                return $diff;
        }
 
+       /**
+        * @param $base
+        * @param $diff
+        * @return bool|string
+        */
        function patch( $base, $diff ) {
-               wfSuppressWarnings();
-               $text = xdiff_string_bpatch( $base, $diff ) . '';
-               wfRestoreWarnings();
-               return $text;
+               if ( function_exists( 'xdiff_string_bpatch' ) ) {
+                       wfSuppressWarnings();
+                       $text = xdiff_string_bpatch( $base, $diff ) . '';
+                       wfRestoreWarnings();
+                       return $text;
+               }
+
+               # Pure PHP implementation
+
+               $header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
+
+               # Check the checksum if hash/mhash is available
+               $ofp = $this->xdiffAdler32( $base );
+               if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
+                       wfDebug( __METHOD__. ": incorrect base checksum\n" );
+                       return false;
+               }
+               if ( $header['csize'] != strlen( $base ) ) {
+                       wfDebug( __METHOD__. ": incorrect base length\n" );
+                       return false;
+               }
+
+               $p = 8;
+               $out = '';
+               while ( $p < strlen( $diff ) ) {
+                       $x = unpack( 'Cop', substr( $diff, $p, 1 ) );
+                       $op = $x['op'];
+                       ++$p;
+                       switch ( $op ) {
+                       case self::XDL_BDOP_INS:
+                               $x = unpack( 'Csize', substr( $diff, $p, 1 ) );
+                               $p++;
+                               $out .= substr( $diff, $p, $x['size'] );
+                               $p += $x['size'];
+                               break;
+                       case self::XDL_BDOP_INSB:
+                               $x = unpack( 'Vcsize', substr( $diff, $p, 4 ) );
+                               $p += 4;
+                               $out .= substr( $diff, $p, $x['csize'] );
+                               $p += $x['csize'];
+                               break;
+                       case self::XDL_BDOP_CPY:
+                               $x = unpack( 'Voff/Vcsize', substr( $diff, $p, 8 ) );
+                               $p += 8;
+                               $out .= substr( $base, $x['off'], $x['csize'] );
+                               break;
+                       default:
+                               wfDebug( __METHOD__.": invalid op\n" );
+                               return false;
+                       }
+               }
+               return $out;
+       }
+
+       /**
+        * Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e. with
+        * the bytes backwards and initialised with 0 instead of 1. See bug 34428.
+        *
+        * Returns false if no hashing library is available
+        */
+       function xdiffAdler32( $s ) {
+               static $init;
+               if ( $init === null ) {
+                       $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
+               }
+               // The real Adler-32 checksum of $init is zero, so it initialises the
+               // state to zero, as it is at the start of LibXDiff's checksum
+               // algorithm. Appending the subject string then simulates LibXDiff.
+               if ( function_exists( 'hash' ) ) {
+                       $hash = hash( 'adler32', $init . $s, true );
+               } elseif ( function_exists( 'mhash' ) ) {
+                       $hash = mhash( MHASH_ADLER32, $init . $s );
+               } else {
+                       return false;
+               }
+               return strrev( $hash );
        }
 
        function uncompress() {
@@ -450,6 +615,9 @@ class DiffHistoryBlob implements HistoryBlob {
                }
        }
 
+       /**
+        * @return array
+        */
        function __sleep() {
                $this->compress();
                if ( !count( $this->mItems ) ) {
@@ -492,23 +660,34 @@ class DiffHistoryBlob implements HistoryBlob {
                if ( isset( $info['default'] ) ) {
                        $this->mDefaultKey = $info['default'];
                }
-               $map = explode( ',', $info['map'] );
-               $cur = 0;
-               $this->mDiffMap = array();
-               foreach ( $map as $i ) {
-                       $cur += $i;
-                       $this->mDiffMap[] = $cur;
-               }
                $this->mDiffs = $info['diffs'];
+               if ( isset( $info['base'] ) ) {
+                       // Old format
+                       $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
+                       array_unshift( $this->mDiffs,
+                               pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
+                               $info['base'] );
+               } else {
+                       // New format
+                       $map = explode( ',', $info['map'] );
+                       $cur = 0;
+                       $this->mDiffMap = array();
+                       foreach ( $map as $i ) {
+                               $cur += $i;
+                               $this->mDiffMap[] = $cur;
+                       }
+               }
                $this->uncompress();
        }
 
        /**
         * Helper function for compression jobs
         * Returns true until the object is "full" and ready to be committed
+        *
+        * @return bool
         */
        function isHappy() {
-               return $this->mSize < $this->mMaxSize 
+               return $this->mSize < $this->mMaxSize
                        && count( $this->mItems ) < $this->mMaxCount;
        }