<?php
/**
- * Base class for general text storage via the "object" flag in old_flags, or
- * two-part external storage URLs. Used for represent efficient concatenated
- * storage, and migration-related pointer objects.
+ * Pure virtual parent
+ * @todo document (needs a one-sentence top-level class description, that answers the question: "what is a HistoryBlob?")
*/
interface HistoryBlob
{
+ /**
+ * setMeta and getMeta currently aren't used for anything, I just thought
+ * they might be useful in the future.
+ * @param $meta String: a single string.
+ */
+ public function setMeta( $meta );
+
+ /**
+ * setMeta and getMeta currently aren't used for anything, I just thought
+ * they might be useful in the future.
+ * Gets the meta-value
+ */
+ public function getMeta();
+
/**
* Adds an item of text, returns a stub object which points to the item.
* You must call setLocation() on the stub object before storing it to the
* database
- * Returns the key for getItem()
*/
public function addItem( $text );
/**
- * Get item by key, or false if the key is not present
+ * Get item by hash
*/
- public function getItem( $key );
+ public function getItem( $hash );
- /**
- * Set the "default text"
- * This concept is an odd property of the current DB schema, whereby each text item has a revision
- * associated with it. The default text is the text of the associated revision. There may, however,
- * be other revisions in the same object.
- *
- * Default text is not required for two-part external storage URLs.
- */
+ # Set the "default text"
+ # This concept is an odd property of the current DB schema, whereby each text item has a revision
+ # associated with it. The default text is the text of the associated revision. There may, however,
+ # be other revisions in the same object
public function setText( $text );
/**
}
/**
- * Concatenated gzip (CGZ) storage
- * Improves compression ratio by concatenating like objects before gzipping
+ * The real object
+ * @todo document (needs one-sentence top-level class description + function descriptions).
*/
class ConcatenatedGzipHistoryBlob implements HistoryBlob
{
}
}
+ #
+ # HistoryBlob implementation:
+ #
+
+ /** @todo document */
+ public function setMeta( $metaData ) {
+ $this->uncompress();
+ $this->mItems['meta'] = $metaData;
+ }
+
+ /** @todo document */
+ public function getMeta() {
+ $this->uncompress();
+ return $this->mItems['meta'];
+ }
+
+ /** @todo document */
public function addItem( $text ) {
$this->uncompress();
$hash = md5( $text );
$this->mItems[$hash] = $text;
$this->mSize += strlen( $text );
- return $hash;
+ $stub = new HistoryBlobStub( $hash );
+ return $stub;
}
+ /** @todo document */
public function getItem( $hash ) {
$this->uncompress();
if ( array_key_exists( $hash, $this->mItems ) ) {
}
}
+ /** @todo document */
public function setText( $text ) {
$this->uncompress();
$stub = $this->addItem( $text );
$this->mDefaultHash = $stub->mHash;
}
+ /** @todo document */
public function getText() {
$this->uncompress();
return $this->getItem( $this->mDefaultHash );
}
- /**
- * Remove an item
- */
+ # HistoryBlob implemented.
+
+
+ /** @todo document */
public function removeItem( $hash ) {
$this->mSize -= strlen( $this->mItems[$hash] );
unset( $this->mItems[$hash] );
}
- /**
- * Compress the bulk data in the object
- */
+ /** @todo document */
public function compress() {
if ( !$this->mCompressed ) {
$this->mItems = gzdeflate( serialize( $this->mItems ) );
}
}
- /**
- * Uncompress bulk data
- */
+ /** @todo document */
public function uncompress() {
if ( $this->mCompressed ) {
$this->mItems = unserialize( gzinflate( $this->mItems ) );
}
+ /** @todo document */
function __sleep() {
$this->compress();
return array( 'mVersion', 'mCompressed', 'mItems', 'mDefaultHash' );
}
+ /** @todo document */
function __wakeup() {
$this->uncompress();
}
/**
- * Helper function for compression jobs
- * Returns true until the object is "full" and ready to be committed
+ * Determines if this object is happy
*/
public function isHappy( $maxFactor, $factorThreshold ) {
if ( count( $this->mItems ) == 0 ) {
/**
- * Pointer object for an item within a CGZ blob stored in the text table.
+ * @todo document (needs one-sentence top-level class description + some function descriptions).
*/
class HistoryBlobStub {
var $mOldId, $mHash, $mRef;
- /**
- * @param string $hash The content hash of the text
- * @param integer $oldid The old_id for the CGZ object
- */
+ /** @todo document */
function HistoryBlobStub( $hash = '', $oldid = 0 ) {
$this->mHash = $hash;
}
return $this->mRef;
}
+ /** @todo document */
function getText() {
$fname = 'HistoryBlobStub::getText';
global $wgBlobCache;
return $obj->getItem( $this->mHash );
}
- /**
- * Get the content hash
- */
+ /** @todo document */
function getHash() {
return $this->mHash;
}
class HistoryBlobCurStub {
var $mCurId;
- /**
- * @param integer $curid The cur_id pointed to
- */
+ /** @todo document */
function HistoryBlobCurStub( $curid = 0 ) {
$this->mCurId = $curid;
}
$this->mCurId = $id;
}
+ /** @todo document */
function getText() {
$dbr = wfGetDB( DB_SLAVE );
$row = $dbr->selectRow( 'cur', array( 'cur_text' ), array( 'cur_id' => $this->mCurId ) );
return $row->cur_text;
}
}
-
-/**
- * Diff-based history compression
- * Requires xdiff 1.5+ and zlib
- */
-class DiffHistoryBlob implements HistoryBlob {
- /** Uncompressed item cache */
- var $mItems = array();
-
- /**
- * Array of diffs, where $this->mDiffs[0] is the diff between
- * $this->mDiffs[0] and $this->mDiffs[1]
- */
- var $mDiffs = array();
-
- /**
- * The key for getText()
- */
- var $mDefaultKey;
-
- /**
- * Compressed storage
- */
- var $mCompressed;
-
- /**
- * True if the object is locked against further writes
- */
- var $mFrozen = false;
-
-
- function __construct() {
- if ( !function_exists( 'xdiff_string_bdiff' ) ){
- throw new MWException( "Need xdiff 1.5+ support to read or write DiffHistoryBlob\n" );
- }
- if ( !function_exists( 'gzdeflate' ) ) {
- throw new MWException( "Need zlib support to read or write DiffHistoryBlob\n" );
- }
- }
-
- function addItem( $text ) {
- if ( $this->mFrozen ) {
- throw new MWException( __METHOD__.": Cannot add more items after sleep/wakeup" );
- }
-
- $this->mItems[] = $text;
- $i = count( $this->mItems ) - 1;
- if ( $i > 0 ) {
- # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
- # "String is not zero-terminated"
- wfSuppressWarnings();
- $this->mDiffs[] = xdiff_string_bdiff( $this->mItems[$i-1], $text ) . '';
- wfRestoreWarnings();
- }
- return $i;
- }
-
- function getItem( $key ) {
- if ( $key > count( $this->mDiffs ) + 1 ) {
- return false;
- }
- $key = intval( $key );
- if ( $key == 0 ) {
- return $this->mItems[0];
- }
-
- $last = count( $this->mItems ) - 1;
- for ( $i = $last + 1; $i <= $key; $i++ ) {
- # Need to do a null concatenation with warnings off, due to bugs in the current version of xdiff
- # "String is not zero-terminated"
- wfSuppressWarnings();
- $this->mItems[$i] = xdiff_string_bpatch( $this->mItems[$i - 1], $this->mDiffs[$i - 1] ) . '';
- wfRestoreWarnings();
- }
- return $this->mItems[$key];
- }
-
- function setText( $text ) {
- $this->mDefaultKey = $this->addItem( $text );
- }
-
- function getText() {
- return $this->getItem( $this->mDefaultKey );
- }
-
- function __sleep() {
- if ( !isset( $this->mItems[0] ) ) {
- // Empty object
- $info = false;
- } else {
- $info = array(
- 'base' => $this->mItems[0],
- 'diffs' => $this->mDiffs
- );
- }
- if ( isset( $this->mDefaultKey ) ) {
- $info['default'] = $this->mDefaultKey;
- }
- $this->mCompressed = gzdeflate( serialize( $info ) );
- return array( 'mCompressed' );
- }
-
- function __wakeup() {
- // addItem() doesn't work if mItems is partially filled from mDiffs
- $this->mFrozen = true;
- $info = unserialize( gzinflate( $this->mCompressed ) );
- unset( $this->mCompressed );
-
- if ( !$info ) {
- // Empty object
- return;
- }
-
- if ( isset( $info['default'] ) ) {
- $this->mDefaultKey = $info['default'];
- }
- $this->mItems[0] = $info['base'];
- $this->mDiffs = $info['diffs'];
- }
-}
+++ /dev/null
-<?php
-
-$optionsWithArgs = array( 'start', 'limit', 'type' );
-require( dirname(__FILE__).'/../commandLine.inc' );
-
-if ( !isset( $args[0] ) ) {
- echo "Usage: php testCompression.php [--type=<type>] [--start=<start-date>] [--limit=<num-revs>] <page-title>\n";
- exit( 1 );
-}
-
-$title = Title::newFromText( $args[0] );
-if ( isset( $options['start'] ) ) {
- $start = wfTimestamp( TS_MW, strtotime( $options['start'] ) );
- echo "Starting from " . $wgLang->timeanddate( $start ) . "\n";
-} else {
- $start = '19700101000000';
-}
-$limit = isset( $options['limit'] ) ? $options['limit'] : 10;
-$type = isset( $options['type'] ) ? $options['type'] : 'ConcatenatedGzipHistoryBlob';
-
-
-$dbr = wfGetDB( DB_SLAVE );
-$res = $dbr->select(
- array( 'page', 'revision', 'text' ),
- '*',
- array(
- 'page_namespace' => $title->getNamespace(),
- 'page_title' => $title->getDBkey(),
- 'page_id=rev_page',
- 'rev_timestamp > ' . $dbr->addQuotes( $dbr->timestamp( $start ) ),
- 'rev_text_id=old_id'
- ), __FILE__, array( 'LIMIT' => $limit )
-);
-
-$blob = new $type;
-$hashes = array();
-$keys = array();
-$uncompressedSize = 0;
-$t = -microtime( true );
-foreach ( $res as $row ) {
- $revision = new Revision( $row );
- $text = $revision->getText();
- $uncompressedSize += strlen( $text );
- $hashes[$row->rev_id] = md5( $text );
- $keys[$row->rev_id] = $blob->addItem( $text );
-}
-
-$serialized = serialize( $blob );
-$t += microtime( true );
-
-printf( "Compression ratio for %d revisions: %5.2f, %s -> %s\n",
- $res->numRows(),
- $uncompressedSize / strlen( $serialized ),
- $wgLang->formatSize( $uncompressedSize ),
- $wgLang->formatSize( strlen( $serialized ) )
-);
-printf( "Compression time: %5.2f ms\n", $t * 1000 );
-
-$t = -microtime( true );
-$blob = unserialize( $serialized );
-foreach ( $keys as $id => $key ) {
- $text = $blob->getItem( $key );
- if ( md5( $text ) != $hashes[$id] ) {
- echo "Content hash mismatch for rev_id $id\n";
- #var_dump( $text );
- }
-}
-$t += microtime( true );
-printf( "Decompression time: %5.2f ms\n", $t * 1000 );
-