*/
/** */
+require_once( 'Revision.php' );
+
+/** @todo document */
function compressOldPages( $start = 0 ) {
$fname = 'compressOldPages';
} while( true );
}
+/** @todo document */
function compressPage( $row ) {
$fname = 'compressPage';
if( false !== strpos( $row->old_flags, "gzip" ) ) {
define( 'LS_INDIVIDUAL', 0 );
define( 'LS_CHUNKED', 1 );
+/** @todo document */
function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorThreshold, $beginDate, $endDate )
{
$fname = 'compressWithConcat';
$loadStyle = LS_CHUNKED;
+ $dbr =& wfGetDB( DB_SLAVE );
$dbw =& wfGetDB( DB_MASTER );
- # First get a list of all pages
- $pageRes = $dbw->select( 'cur', array('cur_namespace', 'cur_title'), false, $fname );
+ # Get all articles by page_id
+ $maxPageId = $dbr->selectField( 'page', 'max(page_id)', '', $fname );
+ $pageConds = array();
- # For each of those, get a list of revisions which fit the criteria
- $conds = array();
+ if ( $exclude_ns0 ) {
+ print "Excluding main namespace\n";
+ $pageConds[] = 'page_namespace<>0';
+ }
+ if ( $queryExtra ) {
+ $pageConds[] = $queryExtra;
+ }
+
+ # For each article, get a list of revisions which fit the criteria
+ # No recompression, use a condition on old_flags
+ $conds = array("old_flags NOT LIKE '%object%'");
+
if ( $beginDate ) {
- $conds[] = "old_timestamp>'" . $beginDate . "'";
+ $conds[] = "rev_timestamp>'" . $beginDate . "'";
}
if ( $endDate ) {
- $conds[] = "old_timestamp<'" . $endDate . "'";
- }
- if ( $startId ) {
- $conds[] = 'old_id>=' . $startId;
+ $conds[] = "rev_timestamp<'" . $endDate . "'";
}
if ( $loadStyle == LS_CHUNKED ) {
- $fields = array( 'old_id', 'old_flags', 'old_text' );
+ $tables = array( 'revision', 'text' );
+ $fields = array( 'rev_id', 'rev_text_id', 'old_flags', 'old_text' );
+ $conds[] = 'rev_text_id=old_id';
$revLoadOptions = 'FOR UPDATE';
} else {
- $fields = array( 'old_id' );
+ $tables = array( 'revision' );
+ $fields = array( 'rev_id', 'rev_text_id' );
$revLoadOptions = array();
}
- while ( $pageRow = $dbw->fetchObject( $pageRes ) ) {
+ $oldReadsSinceLastSlaveWait = 0; #check slave lag periodically
+ $totalMatchingRevisions = 0;
+ $masterPos = false;
+ for ( $pageId = $startId; $pageId <= $maxPageId; $pageId++ ) {
+ $pageRes = $dbr->select( 'page', array('page_id', 'page_namespace', 'page_title'),
+ $pageConds + array('page_id' => $pageId), $fname );
+ if ( $dbr->numRows( $pageRes ) == 0 ) {
+ continue;
+ }
+ $pageRow = $dbr->fetchObject( $pageRes );
+
# Display progress
- $titleObj = Title::makeTitle( $pageRow->cur_namespace, $pageRow->cur_title );
- print $titleObj->getPrefixedDBkey() . " ";
+ $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title );
+ print "$pageId\t" . $titleObj->getPrefixedDBkey() . " ";
# Load revisions
- $revRes = $dbw->select( 'old', $fields,
- array( 'old_namespace' => $pageRow->cur_namespace, 'old_title' => $pageRow->cur_title ) + $conds,
+ $revRes = $dbw->select( $tables, $fields,
+ array( 'rev_page' => $pageRow->page_id ) + $conds,
$fname,
$revLoadOptions
);
$stubs = array();
$dbw->begin();
$usedChunk = false;
- $primaryOldid = $revs[$i]->old_id;
+ $primaryOldid = $revs[$i]->rev_text_id;
# Get the text of each revision and add it to the object
for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy( $maxChunkFactor, $factorThreshold ); $j++ ) {
- $oldid = $revs[$i + $j]->old_id;
+ $oldid = $revs[$i + $j]->rev_text_id;
# Get text
if ( $loadStyle == LS_INDIVIDUAL ) {
- $textRow = $dbw->selectRow( 'old',
+ $textRow = $dbw->selectRow( 'text',
array( 'old_flags', 'old_text' ),
array( 'old_id' => $oldid ),
$fname,
'FOR UPDATE'
);
- $text = Article::getRevisionText( $textRow );
+ $text = Revision::getRevisionText( $textRow );
} else {
- $text = Article::getRevisionText( $revs[$i + $j] );
+ $text = Revision::getRevisionText( $revs[$i + $j] );
}
if ( $text === false ) {
# If we couldn't actually use any stubs because the pages were too small, do nothing
if ( $usedChunk ) {
# Store the main object
- $dbw->update( 'old',
+ $dbw->update( 'text',
array( /* SET */
'old_text' => serialize( $chunk ),
'old_flags' => 'object',
for ( $j = 1; $j < $thisChunkSize; $j++ ) {
# Skip if not compressing
if ( $stubs[$j] !== false ) {
- $dbw->update( 'old',
+ $dbw->update( 'text',
array( /* SET */
'old_text' => $stubs[$j],
'old_flags' => 'object',
), array( /* WHERE */
- 'old_id' => $revs[$i + $j]->old_id
+ 'old_id' => $revs[$i + $j]->rev_text_id
)
);
}