RevisionStore::newRevisionFromBatch should use Title::newFromRow
[lhc/web/wiklou.git] / includes / Revision / RevisionStore.php
index 735a212..420fe65 100644 (file)
@@ -182,9 +182,9 @@ class RevisionStore
                        'Reading needs to be enabled for the old or the new schema.'
                );
                Assert::parameter(
-                       ( $mcrMigrationStage & SCHEMA_COMPAT_WRITE_BOTH ) !== 0,
+                       ( $mcrMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) !== 0,
                        '$mcrMigrationStage',
-                       'Writing needs to be enabled for the old or the new schema.'
+                       'Writing needs to be enabled for the new schema.'
                );
                Assert::parameter(
                        ( $mcrMigrationStage & SCHEMA_COMPAT_READ_OLD ) === 0
@@ -192,12 +192,6 @@ class RevisionStore
                        '$mcrMigrationStage',
                        'Cannot read the old schema when not also writing it.'
                );
-               Assert::parameter(
-                       ( $mcrMigrationStage & SCHEMA_COMPAT_READ_NEW ) === 0
-                       || ( $mcrMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) !== 0,
-                       '$mcrMigrationStage',
-                       'Cannot read the new schema when not also writing it.'
-               );
 
                $this->loadBalancer = $loadBalancer;
                $this->blobStore = $blobStore;
@@ -1623,10 +1617,18 @@ class RevisionStore
         * @param object[]|IResultWrapper $slotRows
         * @param int $queryFlags
         * @param Title $title
+        * @param array|null $slotContents a map from blobAddress to slot
+        *      content blob or Content object.
         *
         * @return SlotRecord[]
         */
-       private function constructSlotRecords( $revId, $slotRows, $queryFlags, Title $title ) {
+       private function constructSlotRecords(
+               $revId,
+               $slotRows,
+               $queryFlags,
+               Title $title,
+               $slotContents = null
+       ) {
                $slots = [];
 
                foreach ( $slotRows as $row ) {
@@ -1651,8 +1653,20 @@ class RevisionStore
                                        = $this->emulateContentId( intval( $row->rev_text_id ) );
                        }
 
-                       $contentCallback = function ( SlotRecord $slot ) use ( $queryFlags ) {
-                               return $this->loadSlotContent( $slot, null, null, null, $queryFlags );
+                       // We may have a fake blob_data field from getSlotRowsForBatch(), use it!
+                       if ( isset( $row->blob_data ) ) {
+                               $slotContents[$row->content_address] = $row->blob_data;
+                       }
+
+                       $contentCallback = function ( SlotRecord $slot ) use ( $slotContents, $queryFlags ) {
+                               $blob = null;
+                               if ( isset( $slotContents[$slot->getAddress()] ) ) {
+                                       $blob = $slotContents[$slot->getAddress()];
+                                       if ( $blob instanceof Content ) {
+                                               return $blob;
+                                       }
+                               }
+                               return $this->loadSlotContent( $slot, $blob, null, null, $queryFlags );
                        };
 
                        $slots[$row->role_name] = new SlotRecord( $row, $contentCallback );
@@ -1804,8 +1818,10 @@ class RevisionStore
 
        /**
         * @param object $row A database row generated from a query based on getQueryInfo()
-        * @param null|object[] $slotRows Database rows generated from a query based on
-        *        getSlotsQueryInfo with the 'content' flag set.
+        * @param null|object[]|RevisionSlots $slots
+        *      - Database rows generated from a query based on getSlotsQueryInfo
+        *        with the 'content' flag set. Or
+        *  - RevisionSlots instance
         * @param int $queryFlags
         * @param Title|null $title
         * @param bool $fromCache if true, the returned RevisionRecord will ensure that no stale
@@ -1816,11 +1832,10 @@ class RevisionStore
         * @see RevisionFactory::newRevisionFromRow
         *
         * MCR migration note: this replaces Revision::newFromRow
-        *
         */
        public function newRevisionFromRowAndSlots(
                $row,
-               $slotRows,
+               $slots,
                $queryFlags = 0,
                Title $title = null,
                $fromCache = false
@@ -1857,7 +1872,9 @@ class RevisionStore
                // Legacy because $row may have come from self::selectFields()
                $comment = $this->commentStore->getCommentLegacy( $db, 'rev_comment', $row, true );
 
-               $slots = $this->newRevisionSlots( $row->rev_id, $row, $slotRows, $queryFlags, $title );
+               if ( !( $slots instanceof RevisionSlots ) ) {
+                       $slots = $this->newRevisionSlots( $row->rev_id, $row, $slots, $queryFlags, $title );
+               }
 
                // If this is a cached row, instantiate a cache-aware revision class to avoid stale data.
                if ( $fromCache ) {
@@ -1885,11 +1902,16 @@ class RevisionStore
         * @param array $options Supports the following options:
         *               'slots' - whether metadata about revision slots should be
         *               loaded immediately. Supports falsy or truthy value as well
-        *               as an explicit list of slot role names.
+        *               as an explicit list of slot role names. The main slot will
+        *               always be loaded.
         *               'content'- whether the actual content of the slots should be
-        *               preloaded. TODO: no supported yet.
+        *               preloaded.
         * @param int $queryFlags
-        * @param Title|null $title
+        * @param Title|null $title The title to which all the revision rows belong, if there
+        *        is such a title and the caller has it handy, so we don't have to look it up again.
+        *        If this parameter is given and any of the rows has a rev_page_id that is different
+        *        from $title->getArticleID(), an InvalidArgumentException is thrown.
+        *
         * @return StatusValue a status with a RevisionRecord[] of successfully fetched revisions
         *                                         and an array of errors for the revisions failed to fetch.
         */
@@ -1902,18 +1924,30 @@ class RevisionStore
                $result = new StatusValue();
 
                $rowsByRevId = [];
-               $pageIds = [];
+               $pageIdsToFetchTitles = [];
                $titlesByPageId = [];
                foreach ( $rows as $row ) {
                        if ( isset( $rowsByRevId[$row->rev_id] ) ) {
-                               throw new InvalidArgumentException( "Duplicate rows in newRevisionsFromBatch {$row->rev_id}" );
+                               $result->warning(
+                                       'internalerror',
+                                       "Duplicate rows in newRevisionsFromBatch, rev_id {$row->rev_id}"
+                               );
                        }
                        if ( $title && $row->rev_page != $title->getArticleID() ) {
                                throw new InvalidArgumentException(
                                        "Revision {$row->rev_id} doesn't belong to page {$title->getArticleID()}"
                                );
+                       } elseif ( !$title && !isset( $titlesByPageId[ $row->rev_page ] ) ) {
+                               if ( isset( $row->page_namespace ) && isset( $row->page_title ) &&
+                                       // This should not happen, but just in case we don't have a page_id
+                                       // set or it doesn't match rev_page, let's fetch the title again.
+                                       isset( $row->page_id ) && $row->rev_page === $row->page_id
+                               ) {
+                                       $titlesByPageId[ $row->rev_page ] = Title::newFromRow( $row );
+                               } else {
+                                       $pageIdsToFetchTitles[] = $row->rev_page;
+                               }
                        }
-                       $pageIds[] = $row->rev_page;
                        $rowsByRevId[$row->rev_id] = $row;
                }
 
@@ -1925,9 +1959,9 @@ class RevisionStore
                // If the title is not supplied, batch-fetch Title objects.
                if ( $title ) {
                        $titlesByPageId[$title->getArticleID()] = $title;
-               } else {
-                       $pageIds = array_unique( $pageIds );
-                       foreach ( Title::newFromIDs( $pageIds ) as $t ) {
+               } elseif ( !empty( $pageIdsToFetchTitles ) ) {
+                       $pageIdsToFetchTitles = array_unique( $pageIdsToFetchTitles );
+                       foreach ( Title::newFromIDs( $pageIdsToFetchTitles ) as $t ) {
                                $titlesByPageId[$t->getArticleID()] = $t;
                        }
                }
@@ -1950,29 +1984,23 @@ class RevisionStore
                        return $result;
                }
 
-               $slotQueryConds = [ 'slot_revision_id' => array_keys( $rowsByRevId ) ];
-               if ( is_array( $options['slots'] ) ) {
-                       $slotQueryConds['slot_role_id'] = array_map( function ( $slot_name ) {
-                               return $this->slotRoleStore->getId( $slot_name );
-                       }, $options['slots'] );
+               $slotRowOptions = [
+                       'slots' => $options['slots'] ?? true,
+                       'blobs' => $options['content'] ?? false,
+               ];
+
+               if ( is_array( $slotRowOptions['slots'] )
+                       && !in_array( SlotRecord::MAIN, $slotRowOptions['slots'] )
+               ) {
+                       // Make sure the main slot is always loaded, RevisionRecord requires this.
+                       $slotRowOptions['slots'][] = SlotRecord::MAIN;
                }
 
-               // TODO: Support optional fetching of the content
-               $queryInfo = self::getSlotsQueryInfo( [ 'content' ] );
-               $db = $this->getDBConnectionRefForQueryFlags( $queryFlags );
-               $slotRows = $db->select(
-                       $queryInfo['tables'],
-                       $queryInfo['fields'],
-                       $slotQueryConds,
-                       __METHOD__,
-                       [],
-                       $queryInfo['joins']
-               );
+               $slotRowsStatus = $this->getSlotRowsForBatch( $rowsByRevId, $slotRowOptions, $queryFlags );
+
+               $result->merge( $slotRowsStatus );
+               $slotRowsByRevId = $slotRowsStatus->getValue();
 
-               $slotRowsByRevId = [];
-               foreach ( $slotRows as $slotRow ) {
-                       $slotRowsByRevId[$slotRow->slot_revision_id][] = $slotRow;
-               }
                $result->setResult( true, array_map( function ( $row ) use
                        ( $slotRowsByRevId, $queryFlags, $titlesByPageId, $result ) {
                                if ( !isset( $slotRowsByRevId[$row->rev_id] ) ) {
@@ -1985,7 +2013,14 @@ class RevisionStore
                                try {
                                        return $this->newRevisionFromRowAndSlots(
                                                $row,
-                                               $slotRowsByRevId[$row->rev_id],
+                                               new RevisionSlots(
+                                                       $this->constructSlotRecords(
+                                                               $row->rev_id,
+                                                               $slotRowsByRevId[$row->rev_id],
+                                                               $queryFlags,
+                                                               $titlesByPageId[$row->rev_page]
+                                                       )
+                                               ),
                                                $queryFlags,
                                                $titlesByPageId[$row->rev_page]
                                        );
@@ -1997,6 +2032,174 @@ class RevisionStore
                return $result;
        }
 
+       /**
+        * Gets the slot rows associated with a batch of revisions.
+        * The serialized content of each slot can be included by setting the 'blobs' option.
+        * Callers are responsible for unserializing and interpreting the content blobs
+        * based on the model_name and role_name fields.
+        *
+        * @param Traversable|array $rowsOrIds list of revision ids, or revision rows from a db query.
+        * @param array $options Supports the following options:
+        *               'slots' - a list of slot role names to fetch. If omitted or true or null,
+        *                         all slots are fetched
+        *               'blobs'- whether the serialized content of each slot should be loaded.
+        *                        If true, the serialiezd content will be present in the slot row
+        *                        in the blob_data field.
+        * @param int $queryFlags
+        *
+        * @return StatusValue a status containing, if isOK() returns true, a two-level nested
+        *         associative array, mapping from revision ID to an associative array that maps from
+        *         role name to a database row object. The database row object will contain the fields
+        *         defined by getSlotQueryInfo() with the 'content' flag set, plus the blob_data field
+        *         if the 'blobs' is set in $options. The model_name and role_name fields will also be
+        *         set.
+        */
+       private function getSlotRowsForBatch(
+               $rowsOrIds,
+               array $options = [],
+               $queryFlags = 0
+       ) {
+               $readNew = $this->hasMcrSchemaFlags( SCHEMA_COMPAT_READ_NEW );
+               $result = new StatusValue();
+
+               $revIds = [];
+               foreach ( $rowsOrIds as $row ) {
+                       $revIds[] = is_object( $row ) ? (int)$row->rev_id : (int)$row;
+               }
+
+               // Nothing to do.
+               // Note that $rowsOrIds may not be "empty" even if $revIds is, e.g. if it's a ResultWrapper.
+               if ( empty( $revIds ) ) {
+                       $result->setResult( true, [] );
+                       return $result;
+               }
+
+               // We need to set the `content` flag to join in content meta-data
+               $slotQueryInfo = self::getSlotsQueryInfo( [ 'content' ] );
+               $revIdField = $slotQueryInfo['keys']['rev_id'];
+               $slotQueryConds = [ $revIdField => $revIds ];
+
+               if ( $readNew && isset( $options['slots'] ) && is_array( $options['slots'] ) ) {
+                       if ( empty( $options['slots'] ) ) {
+                               // Degenerate case: return no slots for each revision.
+                               $result->setResult( true, array_fill_keys( $revIds, [] ) );
+                               return $result;
+                       }
+
+                       $roleIdField = $slotQueryInfo['keys']['role_id'];
+                       $slotQueryConds[$roleIdField] = array_map( function ( $slot_name ) {
+                               return $this->slotRoleStore->getId( $slot_name );
+                       }, $options['slots'] );
+               }
+
+               $db = $this->getDBConnectionRefForQueryFlags( $queryFlags );
+               $slotRows = $db->select(
+                       $slotQueryInfo['tables'],
+                       $slotQueryInfo['fields'],
+                       $slotQueryConds,
+                       __METHOD__,
+                       [],
+                       $slotQueryInfo['joins']
+               );
+
+               $slotContents = null;
+               if ( $options['blobs'] ?? false ) {
+                       $blobAddresses = [];
+                       foreach ( $slotRows as $slotRow ) {
+                               $blobAddresses[] = $slotRow->content_address;
+                       }
+                       $slotContentFetchStatus = $this->blobStore
+                               ->getBlobBatch( $blobAddresses, $queryFlags );
+                       foreach ( $slotContentFetchStatus->getErrors() as $error ) {
+                               $result->warning( $error['message'], ...$error['params'] );
+                       }
+                       $slotContents = $slotContentFetchStatus->getValue();
+               }
+
+               $slotRowsByRevId = [];
+               foreach ( $slotRows as $slotRow ) {
+                       if ( $slotContents === null ) {
+                               // nothing to do
+                       } elseif ( isset( $slotContents[$slotRow->content_address] ) ) {
+                               $slotRow->blob_data = $slotContents[$slotRow->content_address];
+                       } else {
+                               $result->warning(
+                                       'internalerror',
+                                       "Couldn't find blob data for rev {$slotRow->slot_revision_id}"
+                               );
+                               $slotRow->blob_data = null;
+                       }
+
+                       // conditional needed for SCHEMA_COMPAT_READ_OLD
+                       if ( !isset( $slotRow->role_name ) && isset( $slotRow->slot_role_id ) ) {
+                               $slotRow->role_name = $this->slotRoleStore->getName( (int)$slotRow->slot_role_id );
+                       }
+
+                       // conditional needed for SCHEMA_COMPAT_READ_OLD
+                       if ( !isset( $slotRow->model_name ) && isset( $slotRow->content_model ) ) {
+                               $slotRow->model_name = $this->contentModelStore->getName( (int)$slotRow->content_model );
+                       }
+
+                       $slotRowsByRevId[$slotRow->slot_revision_id][$slotRow->role_name] = $slotRow;
+               }
+
+               $result->setResult( true, $slotRowsByRevId );
+               return $result;
+       }
+
+       /**
+        * Gets raw (serialized) content blobs for the given set of revisions.
+        * Callers are responsible for unserializing and interpreting the content blobs
+        * based on the model_name field and the slot role.
+        *
+        * This method is intended for bulk operations in maintenance scripts.
+        * It may be chosen over newRevisionsFromBatch by code that are only interested
+        * in raw content, as opposed to meta data. Code that needs to access meta data of revisions,
+        * slots, or content objects should use newRevisionsFromBatch() instead.
+        *
+        * @param Traversable|array $rowsOrIds list of revision ids, or revision rows from a db query.
+        * @param array|null $slots the role names for which to get slots.
+        * @param int $queryFlags
+        *
+        * @return StatusValue a status containing, if isOK() returns true, a two-level nested
+        *         associative array, mapping from revision ID to an associative array that maps from
+        *         role name to an anonymous object object containing two fields:
+        *         - model_name: the name of the content's model
+        *         - blob_data: serialized content data
+        */
+       public function getContentBlobsForBatch(
+               $rowsOrIds,
+               $slots = null,
+               $queryFlags = 0
+       ) {
+               $result = $this->getSlotRowsForBatch(
+                       $rowsOrIds,
+                       [ 'slots' => $slots, 'blobs' => true ],
+                       $queryFlags
+               );
+
+               if ( $result->isOK() ) {
+                       // strip out all internal meta data that we don't want to expose
+                       foreach ( $result->value as $revId => $rowsByRole ) {
+                               foreach ( $rowsByRole as $role => $slotRow ) {
+                                       if ( is_array( $slots ) && !in_array( $role, $slots ) ) {
+                                               // In SCHEMA_COMPAT_READ_OLD mode we may get the main slot even
+                                               // if we didn't ask for it.
+                                               unset( $result->value[$revId][$role] );
+                                               continue;
+                                       }
+
+                                       $result->value[$revId][$role] = (object)[
+                                               'blob_data' => $slotRow->blob_data,
+                                               'model_name' => $slotRow->model_name,
+                                       ];
+                               }
+                       }
+               }
+
+               return $result;
+       }
+
        /**
         * Constructs a new MutableRevisionRecord based on the given associative array following
         * the MW1.29 convention for the Revision constructor.
@@ -2546,16 +2749,22 @@ class RevisionStore
         *  - tables: (string[]) to include in the `$table` to `IDatabase->select()`
         *  - fields: (string[]) to include in the `$vars` to `IDatabase->select()`
         *  - joins: (array) to include in the `$join_conds` to `IDatabase->select()`
+        *  - keys: (associative array) to look up fields to match against.
+        *          In particular, the field that can be used to find slots by rev_id
+        *          can be found in ['keys']['rev_id'].
         */
        public function getSlotsQueryInfo( $options = [] ) {
                $ret = [
                        'tables' => [],
                        'fields' => [],
                        'joins'  => [],
+                       'keys'  => [],
                ];
 
                if ( $this->hasMcrSchemaFlags( SCHEMA_COMPAT_READ_OLD ) ) {
                        $db = $this->getDBConnectionRef( DB_REPLICA );
+                       $ret['keys']['rev_id'] = 'rev_id';
+
                        $ret['tables'][] = 'revision';
 
                        $ret['fields']['slot_revision_id'] = 'rev_id';
@@ -2579,6 +2788,9 @@ class RevisionStore
                                }
                        }
                } else {
+                       $ret['keys']['rev_id'] = 'slot_revision_id';
+                       $ret['keys']['role_id'] = 'slot_role_id';
+
                        $ret['tables'][] = 'slots';
                        $ret['fields'] = array_merge( $ret['fields'], [
                                'slot_revision_id',
@@ -2596,6 +2808,8 @@ class RevisionStore
                        }
 
                        if ( in_array( 'content', $options, true ) ) {
+                               $ret['keys']['model_id'] = 'content_model';
+
                                $ret['tables'][] = 'content';
                                $ret['fields'] = array_merge( $ret['fields'], [
                                        'content_size',