<?php
-
+/**
+ * Efficient concatenated text storage.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ */
+
/**
* Base class for general text storage via the "object" flag in old_flags, or
* two-part external storage URLs. Used for represent efficient concatenated
* Adds an item of text, returns a stub object which points to the item.
* You must call setLocation() on the stub object before storing it to the
* database
- * Returns the key for getItem()
+ *
+ * @param $text string
+ *
+ * @return String: the key for getItem()
*/
- public function addItem( $text );
+ function addItem( $text );
/**
* Get item by key, or false if the key is not present
+ *
+ * @param $key string
+ *
+ * @return String or false
*/
- public function getItem( $key );
+ function getItem( $key );
/**
* Set the "default text"
* be other revisions in the same object.
*
* Default text is not required for two-part external storage URLs.
+ *
+ * @param $text string
*/
- public function setText( $text );
+ function setText( $text );
/**
* Get default text. This is called from Revision::getRevisionText()
+ *
+ * @return String
*/
function getText();
}
public $mMaxCount = 100;
/** Constructor */
- public function ConcatenatedGzipHistoryBlob() {
+ public function __construct() {
if ( !function_exists( 'gzdeflate' ) ) {
throw new MWException( "Need zlib support to read or write this kind of history object (ConcatenatedGzipHistoryBlob)\n" );
}
}
+ /**
+ * @param $text string
+ * @return string
+ */
public function addItem( $text ) {
$this->uncompress();
$hash = md5( $text );
return $hash;
}
+ /**
+ * @param $hash string
+ * @return array|bool
+ */
public function getItem( $hash ) {
$this->uncompress();
if ( array_key_exists( $hash, $this->mItems ) ) {
}
}
+ /**
+ * @param $text string
+ * @return void
+ */
public function setText( $text ) {
$this->uncompress();
$this->mDefaultHash = $this->addItem( $text );
}
+ /**
+ * @return array|bool
+ */
public function getText() {
$this->uncompress();
return $this->getItem( $this->mDefaultHash );
/**
* Remove an item
+ *
+ * @param $hash string
*/
public function removeItem( $hash ) {
$this->mSize -= strlen( $this->mItems[$hash] );
}
}
-
+ /**
+ * @return array
+ */
function __sleep() {
$this->compress();
return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
/**
* Helper function for compression jobs
* Returns true until the object is "full" and ready to be committed
+ *
+ * @return bool
*/
public function isHappy() {
return $this->mSize < $this->mMaxSize
}
-/**
- * One-step cache variable to hold base blobs; operations that
- * pull multiple revisions may often pull multiple times from
- * the same blob. By keeping the last-used one open, we avoid
- * redundant unserialization and decompression overhead.
- */
-global $wgBlobCache;
-$wgBlobCache = array();
-
-
/**
* Pointer object for an item within a CGZ blob stored in the text table.
*/
class HistoryBlobStub {
+ /**
+ * One-step cache variable to hold base blobs; operations that
+ * pull multiple revisions may often pull multiple times from
+ * the same blob. By keeping the last-used one open, we avoid
+ * redundant unserialization and decompression overhead.
+ */
+ protected static $blobCache = array();
+
var $mOldId, $mHash, $mRef;
/**
- * @param string $hash The content hash of the text
- * @param integer $oldid The old_id for the CGZ object
+ * @param $hash string the content hash of the text
+ * @param $oldid Integer the old_id for the CGZ object
*/
- function HistoryBlobStub( $hash = '', $oldid = 0 ) {
+ function __construct( $hash = '', $oldid = 0 ) {
$this->mHash = $hash;
}
return $this->mRef;
}
+ /**
+ * @return string
+ */
function getText() {
$fname = 'HistoryBlobStub::getText';
- global $wgBlobCache;
- if( isset( $wgBlobCache[$this->mOldId] ) ) {
- $obj = $wgBlobCache[$this->mOldId];
+
+ if( isset( self::$blobCache[$this->mOldId] ) ) {
+ $obj = self::$blobCache[$this->mOldId];
} else {
$dbr = wfGetDB( DB_SLAVE );
$row = $dbr->selectRow( 'text', array( 'old_flags', 'old_text' ), array( 'old_id' => $this->mOldId ) );
$flags = explode( ',', $row->old_flags );
if( in_array( 'external', $flags ) ) {
$url=$row->old_text;
- @list( /* $proto */ ,$path)=explode('://',$url,2);
- if ($path=="") {
+ $parts = explode( '://', $url, 2 );
+ if ( !isset( $parts[1] ) || $parts[1] == '' ) {
wfProfileOut( $fname );
return false;
}
- $row->old_text=ExternalStore::fetchFromUrl($url);
+ $row->old_text = ExternalStore::fetchFromUrl($url);
}
if( !in_array( 'object', $flags ) ) {
// Save this item for reference; if pulling many
// items in a row we'll likely use it again.
$obj->uncompress();
- $wgBlobCache = array( $this->mOldId => $obj );
+ self::$blobCache = array( $this->mOldId => $obj );
}
return $obj->getItem( $this->mHash );
}
/**
* Get the content hash
+ *
+ * @return string
*/
function getHash() {
return $this->mHash;
var $mCurId;
/**
- * @param integer $curid The cur_id pointed to
+ * @param $curid Integer: the cur_id pointed to
*/
- function HistoryBlobCurStub( $curid = 0 ) {
+ function __construct( $curid = 0 ) {
$this->mCurId = $curid;
}
/**
* Sets the location (cur_id) of the main object to which this object
* points
+ *
+ * @param $id int
*/
function setLocation( $id ) {
$this->mCurId = $id;
}
+ /**
+ * @return string|bool
+ */
function getText() {
$dbr = wfGetDB( DB_SLAVE );
$row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
}
}
+ /**
+ * @throws MWException
+ * @param $text string
+ * @return int
+ */
function addItem( $text ) {
if ( $this->mFrozen ) {
throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" );
return count( $this->mItems ) - 1;
}
+ /**
+ * @param $key string
+ * @return string
+ */
function getItem( $key ) {
return $this->mItems[$key];
}
+ /**
+ * @param $text string
+ */
function setText( $text ) {
$this->mDefaultKey = $this->addItem( $text );
}
+ /**
+ * @return string
+ */
function getText() {
return $this->getItem( $this->mDefaultKey );
}
+ /**
+ * @throws MWException
+ */
function compress() {
- if ( !function_exists( 'xdiff_string_bdiff' ) ){
+ if ( !function_exists( 'xdiff_string_rabdiff' ) ){
throw new MWException( "Need xdiff 1.5+ support to write DiffHistoryBlob\n" );
}
if ( isset( $this->mDiffs ) ) {
}
}
+ /**
+ * @param $t1
+ * @param $t2
+ * @return string
+ */
function diff( $t1, $t2 ) {
# Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
# "String is not zero-terminated"
wfSuppressWarnings();
- $diff = xdiff_string_bdiff( $t1, $t2 ) . '';
+ $diff = xdiff_string_rabdiff( $t1, $t2 ) . '';
wfRestoreWarnings();
return $diff;
}
+ /**
+ * @param $base
+ * @param $diff
+ * @return bool|string
+ */
function patch( $base, $diff ) {
if ( function_exists( 'xdiff_string_bpatch' ) ) {
wfSuppressWarnings();
$header = unpack( 'Vofp/Vcsize', substr( $diff, 0, 8 ) );
- # Check the checksum if mhash is available
- if ( extension_loaded( 'mhash' ) ) {
- $ofp = mhash( MHASH_ADLER32, $base );
- if ( $ofp !== substr( $diff, 0, 4 ) ) {
- wfDebug( __METHOD__. ": incorrect base checksum\n" );
- return false;
- }
+ # Check the checksum if hash/mhash is available
+ $ofp = $this->xdiffAdler32( $base );
+ if ( $ofp !== false && $ofp !== substr( $diff, 0, 4 ) ) {
+ wfDebug( __METHOD__. ": incorrect base checksum\n" );
+ return false;
}
if ( $header['csize'] != strlen( $base ) ) {
- wfDebug( __METHOD__. ": incorrect base length {$header['csize']} -> {strlen($base)}\n" );
+ wfDebug( __METHOD__. ": incorrect base length\n" );
return false;
}
return $out;
}
+ /**
+ * Compute a binary "Adler-32" checksum as defined by LibXDiff, i.e. with
+ * the bytes backwards and initialised with 0 instead of 1. See bug 34428.
+ *
+ * Returns false if no hashing library is available
+ */
+ function xdiffAdler32( $s ) {
+ static $init;
+ if ( $init === null ) {
+ $init = str_repeat( "\xf0", 205 ) . "\xee" . str_repeat( "\xf0", 67 ) . "\x02";
+ }
+ // The real Adler-32 checksum of $init is zero, so it initialises the
+ // state to zero, as it is at the start of LibXDiff's checksum
+ // algorithm. Appending the subject string then simulates LibXDiff.
+ if ( function_exists( 'hash' ) ) {
+ $hash = hash( 'adler32', $init . $s, true );
+ } elseif ( function_exists( 'mhash' ) ) {
+ $hash = mhash( MHASH_ADLER32, $init . $s );
+ } else {
+ return false;
+ }
+ return strrev( $hash );
+ }
+
function uncompress() {
if ( !$this->mDiffs ) {
return;
}
}
+ /**
+ * @return array
+ */
function __sleep() {
$this->compress();
if ( !count( $this->mItems ) ) {
if ( isset( $info['default'] ) ) {
$this->mDefaultKey = $info['default'];
}
- $map = explode( ',', $info['map'] );
- $cur = 0;
- $this->mDiffMap = array();
- foreach ( $map as $i ) {
- $cur += $i;
- $this->mDiffMap[] = $cur;
- }
$this->mDiffs = $info['diffs'];
+ if ( isset( $info['base'] ) ) {
+ // Old format
+ $this->mDiffMap = range( 0, count( $this->mDiffs ) - 1 );
+ array_unshift( $this->mDiffs,
+ pack( 'VVCV', 0, 0, self::XDL_BDOP_INSB, strlen( $info['base'] ) ) .
+ $info['base'] );
+ } else {
+ // New format
+ $map = explode( ',', $info['map'] );
+ $cur = 0;
+ $this->mDiffMap = array();
+ foreach ( $map as $i ) {
+ $cur += $i;
+ $this->mDiffMap[] = $cur;
+ }
+ }
$this->uncompress();
}
/**
* Helper function for compression jobs
* Returns true until the object is "full" and ready to be committed
+ *
+ * @return bool
*/
function isHappy() {
return $this->mSize < $this->mMaxSize