Merge "build: Upgrade eslint to 5.x"
[lhc/web/wiklou.git] / maintenance / populateContentTables.php
1 <?php
2 /**
3 * This program is free software; you can redistribute it and/or modify
4 * it under the terms of the GNU General Public License as published by
5 * the Free Software Foundation; either version 2 of the License, or
6 * (at your option) any later version.
7 *
8 * This program is distributed in the hope that it will be useful,
9 * but WITHOUT ANY WARRANTY; without even the implied warranty of
10 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
11 * GNU General Public License for more details.
12 *
13 * You should have received a copy of the GNU General Public License along
14 * with this program; if not, write to the Free Software Foundation, Inc.,
15 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
16 * http://www.gnu.org/copyleft/gpl.html
17 *
18 * @file
19 * @ingroup Maintenance
20 */
21
22 use MediaWiki\MediaWikiServices;
23 use MediaWiki\Storage\NameTableStore;
24 use MediaWiki\Storage\SqlBlobStore;
25 use Wikimedia\Assert\Assert;
26 use Wikimedia\Rdbms\IDatabase;
27 use Wikimedia\Rdbms\ResultWrapper;
28
29 require_once __DIR__ . '/Maintenance.php';
30
31 /**
32 * Populate the content and slot tables.
33 * @since 1.32
34 */
35 class PopulateContentTables extends Maintenance {
36
37 /** @var IDatabase */
38 private $dbw;
39
40 /** @var NameTableStore */
41 private $contentModelStore;
42
43 /** @var int */
44 private $mainRoleId;
45
46 /** @var array|null Map "{$modelId}:{$address}" to content_id */
47 private $contentRowMap = null;
48
49 private $count = 0, $totalCount = 0;
50
51 public function __construct() {
52 parent::__construct();
53
54 $this->addDescription( 'Populate content and slot tables' );
55 $this->addOption( 'table', 'revision or archive table, or `all` to populate both', false,
56 true );
57 $this->addOption( 'reuse-content',
58 'Reuse content table rows when the address and model are the same. '
59 . 'This will increase the script\'s time and memory usage, perhaps significantly.',
60 false, false );
61 $this->setBatchSize( 500 );
62 }
63
64 private function initServices() {
65 $this->dbw = $this->getDB( DB_MASTER );
66 $this->contentModelStore = MediaWikiServices::getInstance()->getContentModelStore();
67 $this->mainRoleId = MediaWikiServices::getInstance()->getSlotRoleStore()->acquireId( 'main' );
68 }
69
70 public function execute() {
71 global $wgMultiContentRevisionSchemaMigrationStage;
72
73 $t0 = microtime( true );
74
75 if ( ( $wgMultiContentRevisionSchemaMigrationStage & SCHEMA_COMPAT_WRITE_NEW ) === 0 ) {
76 $this->writeln(
77 '...cannot update while \$wgMultiContentRevisionSchemaMigrationStage '
78 . 'does not have the SCHEMA_COMPAT_WRITE_NEW bit set.'
79 );
80 return false;
81 }
82
83 $this->initServices();
84
85 if ( $this->getOption( 'reuse-content', false ) ) {
86 $this->loadContentMap();
87 }
88
89 foreach ( $this->getTables() as $table ) {
90 $this->populateTable( $table );
91 }
92
93 $elapsed = microtime( true ) - $t0;
94 $this->writeln( "Done. Processed $this->totalCount rows in $elapsed seconds" );
95 }
96
97 /**
98 * @return string[]
99 */
100 private function getTables() {
101 $table = $this->getOption( 'table', 'all' );
102 $validTableOptions = [ 'all', 'revision', 'archive' ];
103
104 if ( !in_array( $table, $validTableOptions ) ) {
105 $this->fatalError( 'Invalid table. Must be either `revision` or `archive` or `all`' );
106 }
107
108 if ( $table === 'all' ) {
109 $tables = [ 'revision', 'archive' ];
110 } else {
111 $tables = [ $table ];
112 }
113
114 return $tables;
115 }
116
117 private function loadContentMap() {
118 $t0 = microtime( true );
119 $this->writeln( "Loading existing content table rows..." );
120 $this->contentRowMap = [];
121 $dbr = $this->getDB( DB_REPLICA );
122 $from = false;
123 while ( true ) {
124 $res = $dbr->select(
125 'content',
126 [ 'content_id', 'content_address', 'content_model' ],
127 $from ? "content_id > $from" : '',
128 __METHOD__,
129 [ 'ORDER BY' => 'content_id', 'LIMIT' => $this->getBatchSize() ]
130 );
131 if ( !$res || !$res->numRows() ) {
132 break;
133 }
134 foreach ( $res as $row ) {
135 $from = $row->content_id;
136 $this->contentRowMap["{$row->content_model}:{$row->content_address}"] = $row->content_id;
137 }
138 }
139 $elapsed = microtime( true ) - $t0;
140 $this->writeln( "Loaded " . count( $this->contentRowMap ) . " rows in $elapsed seconds" );
141 }
142
143 /**
144 * @param string $table
145 */
146 private function populateTable( $table ) {
147 $t0 = microtime( true );
148 $this->count = 0;
149 $this->writeln( "Populating $table..." );
150
151 if ( $table === 'revision' ) {
152 $idField = 'rev_id';
153 $tables = [ 'revision', 'slots', 'page' ];
154 $fields = [
155 'rev_id',
156 'len' => 'rev_len',
157 'sha1' => 'rev_sha1',
158 'text_id' => 'rev_text_id',
159 'content_model' => 'rev_content_model',
160 'namespace' => 'page_namespace',
161 'title' => 'page_title',
162 ];
163 $joins = [
164 'slots' => [ 'LEFT JOIN', 'rev_id=slot_revision_id' ],
165 'page' => [ 'LEFT JOIN', 'rev_page=page_id' ],
166 ];
167 } else {
168 $idField = 'ar_rev_id';
169 $tables = [ 'archive', 'slots' ];
170 $fields = [
171 'rev_id' => 'ar_rev_id',
172 'len' => 'ar_len',
173 'sha1' => 'ar_sha1',
174 'text_id' => 'ar_text_id',
175 'content_model' => 'ar_content_model',
176 'namespace' => 'ar_namespace',
177 'title' => 'ar_title',
178 ];
179 $joins = [
180 'slots' => [ 'LEFT JOIN', 'ar_rev_id=slot_revision_id' ],
181 ];
182 }
183
184 $minmax = $this->dbw->selectRow(
185 $table,
186 [ 'min' => "MIN( $idField )", 'max' => "MAX( $idField )" ],
187 '',
188 __METHOD__
189 );
190 $batchSize = $this->getBatchSize();
191
192 for ( $startId = $minmax->min; $startId <= $minmax->max; $startId += $batchSize ) {
193 $endId = min( $startId + $batchSize - 1, $minmax->max );
194 $rows = $this->dbw->select(
195 $tables,
196 $fields,
197 [
198 "$idField >= $startId",
199 "$idField <= $endId",
200 'slot_revision_id IS NULL',
201 ],
202 __METHOD__,
203 [ 'ORDER BY' => 'rev_id' ],
204 $joins
205 );
206 if ( $rows->numRows() !== 0 ) {
207 $this->populateContentTablesForRowBatch( $rows, $startId, $table );
208 }
209
210 $elapsed = microtime( true ) - $t0;
211 $this->writeln(
212 "... $table processed up to revision id $endId of {$minmax->max}"
213 . " ($this->count rows in $elapsed seconds)"
214 );
215 }
216
217 $elapsed = microtime( true ) - $t0;
218 $this->writeln( "Done populating $table table. Processed $this->count rows in $elapsed seconds" );
219 }
220
221 /**
222 * @param ResultWrapper $rows
223 * @param int $startId
224 * @param string $table
225 * @return int|null
226 */
227 private function populateContentTablesForRowBatch( ResultWrapper $rows, $startId, $table ) {
228 $this->beginTransaction( $this->dbw, __METHOD__ );
229
230 if ( $this->contentRowMap === null ) {
231 $map = [];
232 } else {
233 $map = &$this->contentRowMap;
234 }
235 $contentKeys = [];
236
237 try {
238 // Step 1: Figure out content rows needing insertion.
239 $contentRows = [];
240 foreach ( $rows as $row ) {
241 $revisionId = $row->rev_id;
242
243 Assert::invariant( $revisionId !== null, 'rev_id must not be null' );
244
245 $modelId = $this->contentModelStore->acquireId( $this->getContentModel( $row ) );
246 $address = SqlBlobStore::makeAddressFromTextId( $row->text_id );
247
248 $key = "{$modelId}:{$address}";
249 $contentKeys[$revisionId] = $key;
250
251 if ( !isset( $map[$key] ) ) {
252 $map[$key] = false;
253 $contentRows[] = [
254 'content_size' => (int)$row->len,
255 'content_sha1' => $row->sha1,
256 'content_model' => $modelId,
257 'content_address' => $address,
258 ];
259 }
260 }
261
262 // Step 2: Insert them, then read them back in for use in the next step.
263 if ( $contentRows ) {
264 $id = $this->dbw->selectField( 'content', 'MAX(content_id)', '', __METHOD__ );
265 $this->dbw->insert( 'content', $contentRows, __METHOD__ );
266 $res = $this->dbw->select(
267 'content',
268 [ 'content_id', 'content_model', 'content_address' ],
269 'content_id > ' . (int)$id,
270 __METHOD__
271 );
272 foreach ( $res as $row ) {
273 $key = $row->content_model . ':' . $row->content_address;
274 $map[$key] = $row->content_id;
275 }
276 }
277
278 // Step 3: Insert the slot rows.
279 $slotRows = [];
280 foreach ( $rows as $row ) {
281 $revisionId = $row->rev_id;
282 $contentId = $map[$contentKeys[$revisionId]] ?? false;
283 if ( $contentId === false ) {
284 throw new \RuntimeException( "Content row for $revisionId not found after content insert" );
285 }
286 $slotRows[] = [
287 'slot_revision_id' => $revisionId,
288 'slot_role_id' => $this->mainRoleId,
289 'slot_content_id' => $contentId,
290 // There's no way to really know the previous revision, so assume no inheriting.
291 // rev_parent_id can get changed on undeletions, and deletions can screw up
292 // rev_timestamp ordering.
293 'slot_origin' => $revisionId,
294 ];
295 }
296 $this->dbw->insert( 'slots', $slotRows, __METHOD__ );
297 $this->count += count( $slotRows );
298 $this->totalCount += count( $slotRows );
299 } catch ( \Exception $e ) {
300 $this->rollbackTransaction( $this->dbw, __METHOD__ );
301 $this->fatalError( "Failed to populate content table $table row batch starting at $startId "
302 . "due to exception: " . $e->__toString() );
303 }
304
305 $this->commitTransaction( $this->dbw, __METHOD__ );
306 }
307
308 /**
309 * @param \stdClass $row
310 * @return string
311 */
312 private function getContentModel( $row ) {
313 if ( isset( $row->content_model ) ) {
314 return $row->content_model;
315 }
316
317 $title = Title::makeTitle( $row->namespace, $row->title );
318
319 return ContentHandler::getDefaultModelFor( $title );
320 }
321
322 /**
323 * @param string $msg
324 */
325 private function writeln( $msg ) {
326 $this->output( "$msg\n" );
327 }
328 }
329
330 $maintClass = 'PopulateContentTables';
331 require_once RUN_MAINTENANCE_IF_MAIN;