From ac02f17a9f96f2960499b5a9ba194bf2e534ff0e Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Tue, 3 Feb 2015 11:53:27 -0500 Subject: [PATCH] API: Improve queries for prop=revisions in enum mode This reworks the queries to better use the indexes available, and at the same time sorts results by rev_timestamp like they always should have been rather than rev_id. See T88084 for details. This also takes the opportunity to replace !is_null with !== null, since it was annoying me while writing this. Bug: T88084 Bug: T91883 Change-Id: Ie175c6014e75848e9dda6b413175c8575d1ef6af --- RELEASE-NOTES-1.26 | 2 + includes/api/ApiQueryRevisions.php | 111 ++++++++++++++++++----------- 2 files changed, 70 insertions(+), 43 deletions(-) diff --git a/RELEASE-NOTES-1.26 b/RELEASE-NOTES-1.26 index 9426635060..57166efb71 100644 --- a/RELEASE-NOTES-1.26 +++ b/RELEASE-NOTES-1.26 @@ -23,6 +23,8 @@ production. tag is meant to be hidden from user interfaces. * action=import no longer allows both the namespace= and rootpage= parameters to be set. If they are both set, the value of rootpage= will be ignored. +* prop=revision output in enum mode is now sorted by timestamp rather than + revision ID. This usually won't make any difference. === Action API internal changes in 1.26 === diff --git a/includes/api/ApiQueryRevisions.php b/includes/api/ApiQueryRevisions.php index 552ca3b4e9..1a65fe38fa 100644 --- a/includes/api/ApiQueryRevisions.php +++ b/includes/api/ApiQueryRevisions.php @@ -91,10 +91,10 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { // Enum mode can only be used when exactly one page is provided. // Enumerating revisions on multiple pages make it extremely // difficult to manage continuations and require additional SQL indexes - $enumRevMode = ( !is_null( $params['user'] ) || !is_null( $params['excludeuser'] ) || - !is_null( $params['limit'] ) || !is_null( $params['startid'] ) || - !is_null( $params['endid'] ) || $params['dir'] === 'newer' || - !is_null( $params['start'] ) || !is_null( $params['end'] ) ); + $enumRevMode = ( $params['user'] !== null || $params['excludeuser'] !== null || + $params['limit'] !== null || $params['startid'] !== null || + $params['endid'] !== null || $params['dir'] === 'newer' || + $params['start'] !== null || $params['end'] !== null ); $pageSet = $this->getPageSet(); $pageCount = $pageSet->getGoodTitleCount(); @@ -149,7 +149,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { } } else { $this->limit = $this->getParameter( 'limit' ) ?: 10; - $this->addFields( array( 'rev_id', 'rev_page' ) ); + $this->addFields( array( 'rev_id', 'rev_timestamp', 'rev_page' ) ); } if ( $this->fld_tags ) { @@ -160,7 +160,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { $this->addFields( 'ts_tags' ); } - if ( !is_null( $params['tag'] ) ) { + if ( $params['tag'] !== null ) { $this->addTables( 'change_tag' ); $this->addJoinConds( array( 'change_tag' => array( 'INNER JOIN', array( 'rev_id=ct_rev_id' ) ) ) @@ -196,58 +196,77 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { } if ( $enumRevMode ) { + // Indexes targeted: + // page_timestamp if we don't have rvuser + // page_user_timestamp if we have a logged-in rvuser + // page_timestamp or usertext_timestamp if we have an IP rvuser + // This is mostly to prevent parameter errors (and optimize SQL?) - if ( !is_null( $params['startid'] ) && !is_null( $params['start'] ) ) { + if ( $params['startid'] !== null && $params['start'] !== null ) { $this->dieUsage( 'start and startid cannot be used together', 'badparams' ); } - if ( !is_null( $params['endid'] ) && !is_null( $params['end'] ) ) { + if ( $params['endid'] !== null && $params['end'] !== null ) { $this->dieUsage( 'end and endid cannot be used together', 'badparams' ); } - if ( !is_null( $params['user'] ) && !is_null( $params['excludeuser'] ) ) { + if ( $params['user'] !== null && $params['excludeuser'] !== null ) { $this->dieUsage( 'user and excludeuser cannot be used together', 'badparams' ); } - // Continuing effectively uses startid. But we can't use rvstartid - // directly, because there is no way to tell the client to ''not'' - // send rvstart if it sent it in the original query. So instead we - // send the continuation startid as rvcontinue, and ignore both - // rvstart and rvstartid when that is supplied. - if ( !is_null( $params['continue'] ) ) { - $params['startid'] = $params['continue']; - $params['start'] = null; + if ( $params['continue'] !== null ) { + $cont = explode( '|', $params['continue'] ); + $this->dieContinueUsageIf( count( $cont ) != 2 ); + $op = ( $params['dir'] === 'newer' ? '>' : '<' ); + $continueTimestamp = $db->addQuotes( $db->timestamp( $cont[0] ) ); + $continueId = (int)$cont[1]; + $this->dieContinueUsageIf( $continueId != $cont[1] ); + $this->addWhere( "rev_timestamp $op $continueTimestamp OR " . + "(rev_timestamp = $continueTimestamp AND " . + "rev_id $op= $continueId)" + ); } - // This code makes an assumption that sorting by rev_id and rev_timestamp produces - // the same result. This way users may request revisions starting at a given time, - // but to page through results use the rev_id returned after each page. - // Switching to rev_id removes the potential problem of having more than - // one row with the same timestamp for the same page. - // The order needs to be the same as start parameter to avoid SQL filesort. - if ( is_null( $params['startid'] ) && is_null( $params['endid'] ) ) { - $this->addTimestampWhereRange( 'rev_timestamp', $params['dir'], - $params['start'], $params['end'] ); - } else { - $this->addWhereRange( 'rev_id', $params['dir'], - $params['startid'], $params['endid'] ); - // One of start and end can be set - // If neither is set, this does nothing - $this->addTimestampWhereRange( 'rev_timestamp', $params['dir'], - $params['start'], $params['end'], false ); + // Query optimization: since we're targeting ranges of + // rev_timestamp,rev_id, if we're given an id then extract the + // corresponding timestamp from the DB. + // Note we don't use Revision::getTimestampFromId() since we don't + // have a Title to pass it and there's not any real need to create one. + if ( $params['startid'] !== null ) { + $params['start'] = $db->selectField( 'revision', 'rev_timestamp', + array( 'rev_id' => $params['startid'] ), __METHOD__ ); + } + if ( $params['endid'] !== null ) { + $params['end'] = $db->selectField( 'revision', 'rev_timestamp', + array( 'rev_id' => $params['endid'] ), __METHOD__ ); } + $this->addTimestampWhereRange( 'rev_timestamp', $params['dir'], + $params['start'], $params['end'] ); + $this->addWhereRange( 'rev_id', $params['dir'], + $params['startid'], $params['endid'] ); + // There is only one ID, use it $ids = array_keys( $pageSet->getGoodTitles() ); $this->addWhereFld( 'rev_page', reset( $ids ) ); - if ( !is_null( $params['user'] ) ) { - $this->addWhereFld( 'rev_user_text', $params['user'] ); - } elseif ( !is_null( $params['excludeuser'] ) ) { - $this->addWhere( 'rev_user_text != ' . - $db->addQuotes( $params['excludeuser'] ) ); + if ( $params['user'] !== null ) { + $user = User::newFromName( $params['user'] ); + if ( $user && $user->getId() > 0 ) { + $this->addWhereFld( 'rev_user', $user->getId() ); + } else { + $this->addWhereFld( 'rev_user_text', $params['user'] ); + } + } elseif ( $params['excludeuser'] !== null ) { + $user = User::newFromName( $params['excludeuser'] ); + if ( $user && $user->getId() > 0 ) { + $this->addWhere( 'rev_user != ' . $user->getId() ); + } else { + $this->addWhere( 'rev_user_text != ' . + $db->addQuotes( $params['excludeuser'] ) ); + } } - if ( !is_null( $params['user'] ) || !is_null( $params['excludeuser'] ) ) { + if ( $params['user'] !== null || $params['excludeuser'] !== null ) { // Paranoia: avoid brute force searches (bug 17342) if ( !$this->getUser()->isAllowed( 'deletedhistory' ) ) { $bitmask = Revision::DELETED_USER; @@ -261,16 +280,20 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { } } } elseif ( $revCount > 0 ) { + // Always targets the PRIMARY index + $revs = $pageSet->getLiveRevisionIDs(); // Get all revision IDs $this->addWhereFld( 'rev_id', array_keys( $revs ) ); - if ( !is_null( $params['continue'] ) ) { + if ( $params['continue'] !== null ) { $this->addWhere( 'rev_id >= ' . intval( $params['continue'] ) ); } $this->addOption( 'ORDER BY', 'rev_id' ); } elseif ( $pageCount > 0 ) { + // Always targets the rev_page_id index + $titles = $pageSet->getGoodTitles(); // When working in multi-page non-enumeration mode, @@ -282,7 +305,7 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { // Every time someone relies on equality propagation, god kills a kitten :) $this->addWhereFld( 'rev_page', array_keys( $titles ) ); - if ( !is_null( $params['continue'] ) ) { + if ( $params['continue'] !== null ) { $cont = explode( '|', $params['continue'] ); $this->dieContinueUsageIf( count( $cont ) != 2 ); $pageid = intval( $cont[0] ); @@ -312,7 +335,8 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { // We've reached the one extra which shows that there are // additional pages to be had. Stop here... if ( $enumRevMode ) { - $this->setContinueEnumParameter( 'continue', intval( $row->rev_id ) ); + $this->setContinueEnumParameter( 'continue', + $row->rev_timestamp . '|' . intval( $row->rev_id ) ); } elseif ( $revCount > 0 ) { $this->setContinueEnumParameter( 'continue', intval( $row->rev_id ) ); } else { @@ -344,7 +368,8 @@ class ApiQueryRevisions extends ApiQueryRevisionsBase { $fit = $this->addPageSubItem( $row->rev_page, $rev, 'rev' ); if ( !$fit ) { if ( $enumRevMode ) { - $this->setContinueEnumParameter( 'continue', intval( $row->rev_id ) ); + $this->setContinueEnumParameter( 'continue', + $row->rev_timestamp . '|' . intval( $row->rev_id ) ); } elseif ( $revCount > 0 ) { $this->setContinueEnumParameter( 'continue', intval( $row->rev_id ) ); } else { -- 2.20.1