X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=includes%2FRevision%2FRevisionStore.php;h=420fe652bf348d7e13a4ee906fdfc55912676f65;hb=748c5eae2fd5d897c94c48771161c259941a7488;hp=c444cc4e7d654e5df84b6a9515beeff8329c2f85;hpb=f725f4182a7b44a311761207cd95541c33f6abd7;p=lhc%2Fweb%2Fwiklou.git diff --git a/includes/Revision/RevisionStore.php b/includes/Revision/RevisionStore.php index c444cc4e7d..420fe652bf 100644 --- a/includes/Revision/RevisionStore.php +++ b/includes/Revision/RevisionStore.php @@ -1653,6 +1653,11 @@ class RevisionStore = $this->emulateContentId( intval( $row->rev_text_id ) ); } + // We may have a fake blob_data field from getSlotRowsForBatch(), use it! + if ( isset( $row->blob_data ) ) { + $slotContents[$row->content_address] = $row->blob_data; + } + $contentCallback = function ( SlotRecord $slot ) use ( $slotContents, $queryFlags ) { $blob = null; if ( isset( $slotContents[$slot->getAddress()] ) ) { @@ -1897,7 +1902,8 @@ class RevisionStore * @param array $options Supports the following options: * 'slots' - whether metadata about revision slots should be * loaded immediately. Supports falsy or truthy value as well - * as an explicit list of slot role names. + * as an explicit list of slot role names. The main slot will + * always be loaded. * 'content'- whether the actual content of the slots should be * preloaded. * @param int $queryFlags @@ -1918,7 +1924,7 @@ class RevisionStore $result = new StatusValue(); $rowsByRevId = []; - $pageIds = []; + $pageIdsToFetchTitles = []; $titlesByPageId = []; foreach ( $rows as $row ) { if ( isset( $rowsByRevId[$row->rev_id] ) ) { @@ -1931,8 +1937,17 @@ class RevisionStore throw new InvalidArgumentException( "Revision {$row->rev_id} doesn't belong to page {$title->getArticleID()}" ); + } elseif ( !$title && !isset( $titlesByPageId[ $row->rev_page ] ) ) { + if ( isset( $row->page_namespace ) && isset( $row->page_title ) && + // This should not happen, but just in case we don't have a page_id + // set or it doesn't match rev_page, let's fetch the title again. + isset( $row->page_id ) && $row->rev_page === $row->page_id + ) { + $titlesByPageId[ $row->rev_page ] = Title::newFromRow( $row ); + } else { + $pageIdsToFetchTitles[] = $row->rev_page; + } } - $pageIds[] = $row->rev_page; $rowsByRevId[$row->rev_id] = $row; } @@ -1944,9 +1959,9 @@ class RevisionStore // If the title is not supplied, batch-fetch Title objects. if ( $title ) { $titlesByPageId[$title->getArticleID()] = $title; - } else { - $pageIds = array_unique( $pageIds ); - foreach ( Title::newFromIDs( $pageIds ) as $t ) { + } elseif ( !empty( $pageIdsToFetchTitles ) ) { + $pageIdsToFetchTitles = array_unique( $pageIdsToFetchTitles ); + foreach ( Title::newFromIDs( $pageIdsToFetchTitles ) as $t ) { $titlesByPageId[$t->getArticleID()] = $t; } } @@ -1969,47 +1984,25 @@ class RevisionStore return $result; } - $slotQueryConds = [ 'slot_revision_id' => array_keys( $rowsByRevId ) ]; - if ( is_array( $options['slots'] ) ) { - $slotQueryConds['slot_role_id'] = array_map( function ( $slot_name ) { - return $this->slotRoleStore->getId( $slot_name ); - }, $options['slots'] ); - } - - // We need to set the `content` flag because newRevisionFromRowAndSlots requires content - // metadata to be loaded. - $slotQueryInfo = self::getSlotsQueryInfo( [ 'content' ] ); - $db = $this->getDBConnectionRefForQueryFlags( $queryFlags ); - $slotRows = $db->select( - $slotQueryInfo['tables'], - $slotQueryInfo['fields'], - $slotQueryConds, - __METHOD__, - [], - $slotQueryInfo['joins'] - ); + $slotRowOptions = [ + 'slots' => $options['slots'] ?? true, + 'blobs' => $options['content'] ?? false, + ]; - $slotRowsByRevId = []; - foreach ( $slotRows as $slotRow ) { - $slotRowsByRevId[$slotRow->slot_revision_id][] = $slotRow; + if ( is_array( $slotRowOptions['slots'] ) + && !in_array( SlotRecord::MAIN, $slotRowOptions['slots'] ) + ) { + // Make sure the main slot is always loaded, RevisionRecord requires this. + $slotRowOptions['slots'][] = SlotRecord::MAIN; } - $slotContents = null; - if ( $options['content'] ?? false ) { - $blobAddresses = []; - foreach ( $slotRows as $slotRow ) { - $blobAddresses[] = $slotRow->content_address; - } - $slotContentFetchStatus = $this->blobStore - ->getBlobBatch( $blobAddresses, $queryFlags ); - foreach ( $slotContentFetchStatus->getErrors() as $error ) { - $result->warning( $error['message'], ...$error['params'] ); - } - $slotContents = $slotContentFetchStatus->getValue(); - } + $slotRowsStatus = $this->getSlotRowsForBatch( $rowsByRevId, $slotRowOptions, $queryFlags ); + + $result->merge( $slotRowsStatus ); + $slotRowsByRevId = $slotRowsStatus->getValue(); $result->setResult( true, array_map( function ( $row ) use - ( $slotRowsByRevId, $queryFlags, $titlesByPageId, $slotContents, $result ) { + ( $slotRowsByRevId, $queryFlags, $titlesByPageId, $result ) { if ( !isset( $slotRowsByRevId[$row->rev_id] ) ) { $result->warning( 'internalerror', @@ -2025,8 +2018,7 @@ class RevisionStore $row->rev_id, $slotRowsByRevId[$row->rev_id], $queryFlags, - $titlesByPageId[$row->rev_page], - $slotContents + $titlesByPageId[$row->rev_page] ) ), $queryFlags, @@ -2040,6 +2032,174 @@ class RevisionStore return $result; } + /** + * Gets the slot rows associated with a batch of revisions. + * The serialized content of each slot can be included by setting the 'blobs' option. + * Callers are responsible for unserializing and interpreting the content blobs + * based on the model_name and role_name fields. + * + * @param Traversable|array $rowsOrIds list of revision ids, or revision rows from a db query. + * @param array $options Supports the following options: + * 'slots' - a list of slot role names to fetch. If omitted or true or null, + * all slots are fetched + * 'blobs'- whether the serialized content of each slot should be loaded. + * If true, the serialiezd content will be present in the slot row + * in the blob_data field. + * @param int $queryFlags + * + * @return StatusValue a status containing, if isOK() returns true, a two-level nested + * associative array, mapping from revision ID to an associative array that maps from + * role name to a database row object. The database row object will contain the fields + * defined by getSlotQueryInfo() with the 'content' flag set, plus the blob_data field + * if the 'blobs' is set in $options. The model_name and role_name fields will also be + * set. + */ + private function getSlotRowsForBatch( + $rowsOrIds, + array $options = [], + $queryFlags = 0 + ) { + $readNew = $this->hasMcrSchemaFlags( SCHEMA_COMPAT_READ_NEW ); + $result = new StatusValue(); + + $revIds = []; + foreach ( $rowsOrIds as $row ) { + $revIds[] = is_object( $row ) ? (int)$row->rev_id : (int)$row; + } + + // Nothing to do. + // Note that $rowsOrIds may not be "empty" even if $revIds is, e.g. if it's a ResultWrapper. + if ( empty( $revIds ) ) { + $result->setResult( true, [] ); + return $result; + } + + // We need to set the `content` flag to join in content meta-data + $slotQueryInfo = self::getSlotsQueryInfo( [ 'content' ] ); + $revIdField = $slotQueryInfo['keys']['rev_id']; + $slotQueryConds = [ $revIdField => $revIds ]; + + if ( $readNew && isset( $options['slots'] ) && is_array( $options['slots'] ) ) { + if ( empty( $options['slots'] ) ) { + // Degenerate case: return no slots for each revision. + $result->setResult( true, array_fill_keys( $revIds, [] ) ); + return $result; + } + + $roleIdField = $slotQueryInfo['keys']['role_id']; + $slotQueryConds[$roleIdField] = array_map( function ( $slot_name ) { + return $this->slotRoleStore->getId( $slot_name ); + }, $options['slots'] ); + } + + $db = $this->getDBConnectionRefForQueryFlags( $queryFlags ); + $slotRows = $db->select( + $slotQueryInfo['tables'], + $slotQueryInfo['fields'], + $slotQueryConds, + __METHOD__, + [], + $slotQueryInfo['joins'] + ); + + $slotContents = null; + if ( $options['blobs'] ?? false ) { + $blobAddresses = []; + foreach ( $slotRows as $slotRow ) { + $blobAddresses[] = $slotRow->content_address; + } + $slotContentFetchStatus = $this->blobStore + ->getBlobBatch( $blobAddresses, $queryFlags ); + foreach ( $slotContentFetchStatus->getErrors() as $error ) { + $result->warning( $error['message'], ...$error['params'] ); + } + $slotContents = $slotContentFetchStatus->getValue(); + } + + $slotRowsByRevId = []; + foreach ( $slotRows as $slotRow ) { + if ( $slotContents === null ) { + // nothing to do + } elseif ( isset( $slotContents[$slotRow->content_address] ) ) { + $slotRow->blob_data = $slotContents[$slotRow->content_address]; + } else { + $result->warning( + 'internalerror', + "Couldn't find blob data for rev {$slotRow->slot_revision_id}" + ); + $slotRow->blob_data = null; + } + + // conditional needed for SCHEMA_COMPAT_READ_OLD + if ( !isset( $slotRow->role_name ) && isset( $slotRow->slot_role_id ) ) { + $slotRow->role_name = $this->slotRoleStore->getName( (int)$slotRow->slot_role_id ); + } + + // conditional needed for SCHEMA_COMPAT_READ_OLD + if ( !isset( $slotRow->model_name ) && isset( $slotRow->content_model ) ) { + $slotRow->model_name = $this->contentModelStore->getName( (int)$slotRow->content_model ); + } + + $slotRowsByRevId[$slotRow->slot_revision_id][$slotRow->role_name] = $slotRow; + } + + $result->setResult( true, $slotRowsByRevId ); + return $result; + } + + /** + * Gets raw (serialized) content blobs for the given set of revisions. + * Callers are responsible for unserializing and interpreting the content blobs + * based on the model_name field and the slot role. + * + * This method is intended for bulk operations in maintenance scripts. + * It may be chosen over newRevisionsFromBatch by code that are only interested + * in raw content, as opposed to meta data. Code that needs to access meta data of revisions, + * slots, or content objects should use newRevisionsFromBatch() instead. + * + * @param Traversable|array $rowsOrIds list of revision ids, or revision rows from a db query. + * @param array|null $slots the role names for which to get slots. + * @param int $queryFlags + * + * @return StatusValue a status containing, if isOK() returns true, a two-level nested + * associative array, mapping from revision ID to an associative array that maps from + * role name to an anonymous object object containing two fields: + * - model_name: the name of the content's model + * - blob_data: serialized content data + */ + public function getContentBlobsForBatch( + $rowsOrIds, + $slots = null, + $queryFlags = 0 + ) { + $result = $this->getSlotRowsForBatch( + $rowsOrIds, + [ 'slots' => $slots, 'blobs' => true ], + $queryFlags + ); + + if ( $result->isOK() ) { + // strip out all internal meta data that we don't want to expose + foreach ( $result->value as $revId => $rowsByRole ) { + foreach ( $rowsByRole as $role => $slotRow ) { + if ( is_array( $slots ) && !in_array( $role, $slots ) ) { + // In SCHEMA_COMPAT_READ_OLD mode we may get the main slot even + // if we didn't ask for it. + unset( $result->value[$revId][$role] ); + continue; + } + + $result->value[$revId][$role] = (object)[ + 'blob_data' => $slotRow->blob_data, + 'model_name' => $slotRow->model_name, + ]; + } + } + } + + return $result; + } + /** * Constructs a new MutableRevisionRecord based on the given associative array following * the MW1.29 convention for the Revision constructor. @@ -2589,16 +2749,22 @@ class RevisionStore * - tables: (string[]) to include in the `$table` to `IDatabase->select()` * - fields: (string[]) to include in the `$vars` to `IDatabase->select()` * - joins: (array) to include in the `$join_conds` to `IDatabase->select()` + * - keys: (associative array) to look up fields to match against. + * In particular, the field that can be used to find slots by rev_id + * can be found in ['keys']['rev_id']. */ public function getSlotsQueryInfo( $options = [] ) { $ret = [ 'tables' => [], 'fields' => [], 'joins' => [], + 'keys' => [], ]; if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_READ_OLD ) ) { $db = $this->getDBConnectionRef( DB_REPLICA ); + $ret['keys']['rev_id'] = 'rev_id'; + $ret['tables'][] = 'revision'; $ret['fields']['slot_revision_id'] = 'rev_id'; @@ -2622,6 +2788,9 @@ class RevisionStore } } } else { + $ret['keys']['rev_id'] = 'slot_revision_id'; + $ret['keys']['role_id'] = 'slot_role_id'; + $ret['tables'][] = 'slots'; $ret['fields'] = array_merge( $ret['fields'], [ 'slot_revision_id', @@ -2639,6 +2808,8 @@ class RevisionStore } if ( in_array( 'content', $options, true ) ) { + $ret['keys']['model_id'] = 'content_model'; + $ret['tables'][] = 'content'; $ret['fields'] = array_merge( $ret['fields'], [ 'content_size',