fixed discussion namespace
[lhc/web/wiklou.git] / maintenance / compressOld.inc
index d5159ba..f6a9f43 100644 (file)
@@ -5,6 +5,9 @@
  */
 
 /** */
+require_once( 'Revision.php' );
+
+/** @todo document */
 function compressOldPages( $start = 0 ) {
        $fname = 'compressOldPages';
 
@@ -31,6 +34,7 @@ function compressOldPages( $start = 0 ) {
        } while( true );
 }
 
+/** @todo document */
 function compressPage( $row ) {
        $fname = 'compressPage';
        if( false !== strpos( $row->old_flags, "gzip" ) ) {
@@ -54,43 +58,66 @@ function compressPage( $row ) {
 define( 'LS_INDIVIDUAL', 0 );
 define( 'LS_CHUNKED', 1 );
 
+/** @todo document */
 function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorThreshold, $beginDate, $endDate )
 {
        $fname = 'compressWithConcat';
        $loadStyle = LS_CHUNKED;
        
+       $dbr =& wfGetDB( DB_SLAVE );
        $dbw =& wfGetDB( DB_MASTER );
 
-       # First get a list of all pages
-       $pageRes = $dbw->select( 'cur', array('cur_namespace', 'cur_title'), false, $fname );
+       # Get all articles by page_id
+       $maxPageId = $dbr->selectField( 'page', 'max(page_id)', '', $fname );
+       $pageConds = array();
 
-       # For each of those, get a list of revisions which fit the criteria
-       $conds = array();
+       if ( $exclude_ns0 ) {
+               print "Excluding main namespace\n";
+               $pageConds[] = 'page_namespace<>0';
+       }
+       if ( $queryExtra ) {
+                $pageConds[] = $queryExtra;
+       }
+
+       # For each article, get a list of revisions which fit the criteria
+       # No recompression, use a condition on old_flags
+       $conds = array("old_flags NOT LIKE '%object%'");
+       
        if ( $beginDate ) {
-               $conds[] = "old_timestamp>'" . $beginDate . "'";
+               $conds[] = "rev_timestamp>'" . $beginDate . "'";
        } 
        if ( $endDate )  {
-               $conds[] = "old_timestamp<'" . $endDate . "'";
-       }
-       if ( $startId ) {
-               $conds[] = 'old_id>=' . $startId;
+               $conds[] = "rev_timestamp<'" . $endDate . "'";
        }
        if ( $loadStyle == LS_CHUNKED ) {
-               $fields = array( 'old_id', 'old_flags', 'old_text' );
+               $tables = array( 'revision', 'text' );
+               $fields = array( 'rev_id', 'rev_text_id', 'old_flags', 'old_text' );
+               $conds[] = 'rev_text_id=old_id';
                $revLoadOptions = 'FOR UPDATE';
        } else {
-               $fields = array( 'old_id' );
+               $tables = array( 'revision' );
+               $fields = array( 'rev_id', 'rev_text_id' );
                $revLoadOptions = array();
        }
 
-       while ( $pageRow = $dbw->fetchObject( $pageRes ) ) {
+       $oldReadsSinceLastSlaveWait = 0;        #check slave lag periodically
+       $totalMatchingRevisions = 0;
+       $masterPos = false;
+       for ( $pageId = $startId; $pageId <= $maxPageId; $pageId++ ) {
+               $pageRes = $dbr->select( 'page', array('page_id', 'page_namespace', 'page_title'), 
+                       $pageConds + array('page_id' => $pageId), $fname );
+               if ( $dbr->numRows( $pageRes ) == 0 ) {
+                       continue;
+               }
+               $pageRow = $dbr->fetchObject( $pageRes );
+
                # Display progress
-               $titleObj = Title::makeTitle( $pageRow->cur_namespace, $pageRow->cur_title );
-               print $titleObj->getPrefixedDBkey() . " ";
+               $titleObj = Title::makeTitle( $pageRow->page_namespace, $pageRow->page_title );
+               print "$pageId\t" . $titleObj->getPrefixedDBkey() . " ";
 
                # Load revisions
-               $revRes = $dbw->select( 'old', $fields,
-                       array( 'old_namespace' => $pageRow->cur_namespace, 'old_title' => $pageRow->cur_title ) + $conds, 
+               $revRes = $dbw->select( $tables, $fields,
+                       array( 'rev_page' => $pageRow->page_id ) + $conds, 
                        $fname,
                        $revLoadOptions
                );
@@ -118,23 +145,23 @@ function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorTh
                        $stubs = array();
                        $dbw->begin();
                        $usedChunk = false;
-                       $primaryOldid = $revs[$i]->old_id;
+                       $primaryOldid = $revs[$i]->rev_text_id;
                        
                        # Get the text of each revision and add it to the object
                        for ( $j = 0; $j < $thisChunkSize && $chunk->isHappy( $maxChunkFactor, $factorThreshold ); $j++ ) {
-                               $oldid = $revs[$i + $j]->old_id;
+                               $oldid = $revs[$i + $j]->rev_text_id;
                                
                                # Get text
                                if ( $loadStyle == LS_INDIVIDUAL ) {
-                                       $textRow = $dbw->selectRow( 'old', 
+                                       $textRow = $dbw->selectRow( 'text', 
                                                array( 'old_flags', 'old_text' ),
                                                array( 'old_id' => $oldid ),
                                                $fname,
                                                'FOR UPDATE'
                                        );
-                                       $text = Article::getRevisionText( $textRow );
+                                       $text = Revision::getRevisionText( $textRow );
                                } else {
-                                       $text = Article::getRevisionText( $revs[$i + $j] );
+                                       $text = Revision::getRevisionText( $revs[$i + $j] );
                                }
 
                                if ( $text === false ) {
@@ -167,7 +194,7 @@ function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorTh
                        # If we couldn't actually use any stubs because the pages were too small, do nothing
                        if ( $usedChunk ) {
                                # Store the main object
-                               $dbw->update( 'old',
+                               $dbw->update( 'text',
                                        array( /* SET */
                                                'old_text' => serialize( $chunk ),
                                                'old_flags' => 'object',
@@ -180,12 +207,12 @@ function compressWithConcat( $startId, $maxChunkSize, $maxChunkFactor, $factorTh
                                for ( $j = 1; $j < $thisChunkSize; $j++ ) {
                                        # Skip if not compressing
                                        if ( $stubs[$j] !== false ) {
-                                               $dbw->update( 'old',
+                                               $dbw->update( 'text',
                                                        array( /* SET */
                                                                'old_text' => $stubs[$j],
                                                                'old_flags' => 'object',
                                                        ), array( /* WHERE */
-                                                               'old_id' => $revs[$i + $j]->old_id
+                                                               'old_id' => $revs[$i + $j]->rev_text_id
                                                        )
                                                );
                                        }