X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2Fexport%2FWikiExporter.php;h=6c7a449372513b156ddcebc1d1a5584a3510d6bb;hb=74af4f507c297d75c1c62cb82b8eb47d65d85630;hp=54de26d4dd5c756600c597144fac8b86d272f018;hpb=d716155c8b2d6e4a51a4110195cee7a1794846e8;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/export/WikiExporter.php b/includes/export/WikiExporter.php index 54de26d4dd..6c7a449372 100644 --- a/includes/export/WikiExporter.php +++ b/includes/export/WikiExporter.php @@ -27,6 +27,9 @@ * @defgroup Dump Dump */ +use Wikimedia\Rdbms\ResultWrapper; +use Wikimedia\Rdbms\IDatabase; + /** * @ingroup SpecialPage Dump */ @@ -88,8 +91,8 @@ class WikiExporter { * @param int $buffer One of WikiExporter::BUFFER or WikiExporter::STREAM * @param int $text One of WikiExporter::TEXT or WikiExporter::STUB */ - function __construct( $db, $history = WikiExporter::CURRENT, - $buffer = WikiExporter::BUFFER, $text = WikiExporter::TEXT ) { + function __construct( $db, $history = self::CURRENT, + $buffer = self::BUFFER, $text = self::TEXT ) { $this->db = $db; $this->history = $history; $this->buffer = $buffer; @@ -103,7 +106,7 @@ class WikiExporter { * various row objects and XML output for filtering. Filters * can be chained or used as callbacks. * - * @param DumpOutput $sink + * @param DumpOutput &$sink */ public function setOutputSink( &$sink ) { $this->sink =& $sink; @@ -134,13 +137,21 @@ class WikiExporter { * @param int $start Inclusive lower limit (this id is included) * @param int $end Exclusive upper limit (this id is not included) * If 0, no upper limit. + * @param bool $orderRevs order revisions within pages in ascending order */ - public function pagesByRange( $start, $end ) { - $condition = 'page_id >= ' . intval( $start ); - if ( $end ) { - $condition .= ' AND page_id < ' . intval( $end ); + public function pagesByRange( $start, $end, $orderRevs ) { + if ( $orderRevs ) { + $condition = 'rev_page >= ' . intval( $start ); + if ( $end ) { + $condition .= ' AND rev_page < ' . intval( $end ); + } + } else { + $condition = 'page_id >= ' . intval( $start ); + if ( $end ) { + $condition .= ' AND page_id < ' . intval( $end ); + } } - $this->dumpFrom( $condition ); + $this->dumpFrom( $condition, $orderRevs ); } /** @@ -216,15 +227,20 @@ class WikiExporter { $this->author_list = ""; // rev_deleted + $revQuery = Revision::getQueryInfo( [ 'page' ] ); $res = $this->db->select( - [ 'page', 'revision' ], - [ 'DISTINCT rev_user_text', 'rev_user' ], + $revQuery['tables'], + [ + 'rev_user_text' => $revQuery['fields']['rev_user_text'], + 'rev_user' => $revQuery['fields']['rev_user'], + ], [ $this->db->bitAnd( 'rev_deleted', Revision::DELETED_USER ) . ' = 0', $cond, - 'page_id = rev_id', ], - __METHOD__ + __METHOD__, + [ 'DISTINCT' ], + $revQuery['joins'] ); foreach ( $res as $row ) { @@ -242,13 +258,14 @@ class WikiExporter { /** * @param string $cond + * @param bool $orderRevs * @throws MWException * @throws Exception */ - protected function dumpFrom( $cond = '' ) { + protected function dumpFrom( $cond = '', $orderRevs = false ) { # For logging dumps... if ( $this->history & self::LOGS ) { - $where = [ 'user_id = log_user' ]; + $where = []; # Hide private logs $hideLogs = LogEventsList::getExcludeClause( $this->db ); if ( $hideLogs ) { @@ -261,19 +278,27 @@ class WikiExporter { # Get logging table name for logging.* clause $logging = $this->db->tableName( 'logging' ); - if ( $this->buffer == WikiExporter::STREAM ) { + if ( $this->buffer == self::STREAM ) { $prev = $this->db->bufferResults( false ); } $result = null; // Assuring $result is not undefined, if exception occurs early + + $commentQuery = CommentStore::getStore()->getJoin( 'log_comment' ); + $actorQuery = ActorMigration::newMigration()->getJoin( 'log_user' ); + try { - $result = $this->db->select( [ 'logging', 'user' ], - [ "{$logging}.*", 'user_name' ], // grab the user name + $result = $this->db->select( + array_merge( [ 'logging' ], $commentQuery['tables'], $actorQuery['tables'], [ 'user' ] ), + [ "{$logging}.*", 'user_name' ] + $commentQuery['fields'] + $actorQuery['fields'], $where, __METHOD__, - [ 'ORDER BY' => 'log_id', 'USE INDEX' => [ 'logging' => 'PRIMARY' ] ] + [ 'ORDER BY' => 'log_id', 'USE INDEX' => [ 'logging' => 'PRIMARY' ] ], + [ + 'user' => [ 'JOIN', 'user_id = ' . $actorQuery['fields']['log_user'] ] + ] + $commentQuery['joins'] + $actorQuery['joins'] ); $this->outputLogStream( $result ); - if ( $this->buffer == WikiExporter::STREAM ) { + if ( $this->buffer == self::STREAM ) { $this->db->bufferResults( $prev ); } } catch ( Exception $e ) { @@ -292,7 +317,7 @@ class WikiExporter { // Putting database back in previous buffer mode try { - if ( $this->buffer == WikiExporter::STREAM ) { + if ( $this->buffer == self::STREAM ) { $this->db->bufferResults( $prev ); } } catch ( Exception $e2 ) { @@ -305,13 +330,29 @@ class WikiExporter { } # For page dumps... } else { - $tables = [ 'page', 'revision' ]; + $revOpts = [ 'page' ]; + if ( $this->text != self::STUB ) { + $revOpts[] = 'text'; + } + $revQuery = Revision::getQueryInfo( $revOpts ); + + // We want page primary rather than revision + $tables = array_merge( [ 'page' ], array_diff( $revQuery['tables'], [ 'page' ] ) ); + $join = $revQuery['joins'] + [ + 'revision' => $revQuery['joins']['page'] + ]; + unset( $join['page'] ); + + $fields = array_merge( $revQuery['fields'], [ 'page_restrictions' ] ); + + $conds = []; + if ( $cond !== '' ) { + $conds[] = $cond; + } $opts = [ 'ORDER BY' => 'page_id ASC' ]; $opts['USE INDEX'] = []; - $join = []; if ( is_array( $this->history ) ) { # Time offset/limit for all pages/history... - $revJoin = 'page_id=rev_page'; # Set time order if ( $this->history['dir'] == 'asc' ) { $op = '>'; @@ -322,24 +363,30 @@ class WikiExporter { } # Set offset if ( !empty( $this->history['offset'] ) ) { - $revJoin .= " AND rev_timestamp $op " . + $conds[] = "rev_timestamp $op " . $this->db->addQuotes( $this->db->timestamp( $this->history['offset'] ) ); } - $join['revision'] = [ 'INNER JOIN', $revJoin ]; # Set query limit if ( !empty( $this->history['limit'] ) ) { $opts['LIMIT'] = intval( $this->history['limit'] ); } - } elseif ( $this->history & WikiExporter::FULL ) { + } elseif ( $this->history & self::FULL ) { # Full history dumps... - $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page' ]; - } elseif ( $this->history & WikiExporter::CURRENT ) { + # query optimization for history stub dumps + if ( $this->text == self::STUB && $orderRevs ) { + $tables = $revQuery['tables']; + $opts['ORDER BY'] = [ 'rev_page ASC', 'rev_id ASC' ]; + $opts['USE INDEX']['revision'] = 'rev_page_id'; + unset( $join['revision'] ); + $join['page'] = [ 'INNER JOIN', 'rev_page=page_id' ]; + } + } elseif ( $this->history & self::CURRENT ) { # Latest revision dumps... if ( $this->list_authors && $cond != '' ) { // List authors, if so desired $this->do_list_authors( $cond ); } $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ]; - } elseif ( $this->history & WikiExporter::STABLE ) { + } elseif ( $this->history & self::STABLE ) { # "Stable" revision dumps... # Default JOIN, to be overridden... $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page AND page_latest=rev_id' ]; @@ -347,40 +394,35 @@ class WikiExporter { if ( Hooks::run( 'WikiExporter::dumpStableQuery', [ &$tables, &$opts, &$join ] ) ) { throw new MWException( __METHOD__ . " given invalid history dump type." ); } - } elseif ( $this->history & WikiExporter::RANGE ) { + } elseif ( $this->history & self::RANGE ) { # Dump of revisions within a specified range - $join['revision'] = [ 'INNER JOIN', 'page_id=rev_page' ]; $opts['ORDER BY'] = [ 'rev_page ASC', 'rev_id ASC' ]; } else { # Unknown history specification parameter? throw new MWException( __METHOD__ . " given invalid history dump type." ); } - # Query optimization hacks - if ( $cond == '' ) { - $opts[] = 'STRAIGHT_JOIN'; - $opts['USE INDEX']['page'] = 'PRIMARY'; - } - # Build text join options - if ( $this->text != WikiExporter::STUB ) { // 1-pass - $tables[] = 'text'; - $join['text'] = [ 'INNER JOIN', 'rev_text_id=old_id' ]; - } - if ( $this->buffer == WikiExporter::STREAM ) { + if ( $this->buffer == self::STREAM ) { $prev = $this->db->bufferResults( false ); } - $result = null; // Assuring $result is not undefined, if exception occurs early try { Hooks::run( 'ModifyExportQuery', [ $this->db, &$tables, &$cond, &$opts, &$join ] ); # Do the query! - $result = $this->db->select( $tables, '*', $cond, __METHOD__, $opts, $join ); + $result = $this->db->select( + $tables, + $fields, + $conds, + __METHOD__, + $opts, + $join + ); # Output dump results $this->outputPageStream( $result ); - if ( $this->buffer == WikiExporter::STREAM ) { + if ( $this->buffer == self::STREAM ) { $this->db->bufferResults( $prev ); } } catch ( Exception $e ) { @@ -399,7 +441,7 @@ class WikiExporter { // Putting database back in previous buffer mode try { - if ( $this->buffer == WikiExporter::STREAM ) { + if ( $this->buffer == self::STREAM ) { $this->db->bufferResults( $prev ); } } catch ( Exception $e2 ) {