Merge "RevisionStoreDbTestBase, remove redundant needsDB override"
[lhc/web/wiklou.git] / includes / filerepo / LocalRepo.php
1 <?php
2 /**
3 * Local repository that stores files in the local filesystem and registers them
4 * in the wiki's own database.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup FileRepo
23 */
24
25 use MediaWiki\MediaWikiServices;
26 use Wikimedia\Rdbms\ResultWrapper;
27 use Wikimedia\Rdbms\Database;
28 use Wikimedia\Rdbms\IDatabase;
29
30 /**
31 * A repository that stores files in the local filesystem and registers them
32 * in the wiki's own database. This is the most commonly used repository class.
33 *
34 * @ingroup FileRepo
35 */
36 class LocalRepo extends FileRepo {
37 /** @var callable */
38 protected $fileFactory = [ LocalFile::class, 'newFromTitle' ];
39 /** @var callable */
40 protected $fileFactoryKey = [ LocalFile::class, 'newFromKey' ];
41 /** @var callable */
42 protected $fileFromRowFactory = [ LocalFile::class, 'newFromRow' ];
43 /** @var callable */
44 protected $oldFileFromRowFactory = [ OldLocalFile::class, 'newFromRow' ];
45 /** @var callable */
46 protected $oldFileFactory = [ OldLocalFile::class, 'newFromTitle' ];
47 /** @var callable */
48 protected $oldFileFactoryKey = [ OldLocalFile::class, 'newFromKey' ];
49
50 function __construct( array $info = null ) {
51 parent::__construct( $info );
52
53 $this->hasSha1Storage = isset( $info['storageLayout'] )
54 && $info['storageLayout'] === 'sha1';
55
56 if ( $this->hasSha1Storage() ) {
57 $this->backend = new FileBackendDBRepoWrapper( [
58 'backend' => $this->backend,
59 'repoName' => $this->name,
60 'dbHandleFactory' => $this->getDBFactory()
61 ] );
62 }
63 }
64
65 /**
66 * @throws MWException
67 * @param stdClass $row
68 * @return LocalFile
69 */
70 function newFileFromRow( $row ) {
71 if ( isset( $row->img_name ) ) {
72 return call_user_func( $this->fileFromRowFactory, $row, $this );
73 } elseif ( isset( $row->oi_name ) ) {
74 return call_user_func( $this->oldFileFromRowFactory, $row, $this );
75 } else {
76 throw new MWException( __METHOD__ . ': invalid row' );
77 }
78 }
79
80 /**
81 * @param Title $title
82 * @param string $archiveName
83 * @return OldLocalFile
84 */
85 function newFromArchiveName( $title, $archiveName ) {
86 return OldLocalFile::newFromArchiveName( $title, $this, $archiveName );
87 }
88
89 /**
90 * Delete files in the deleted directory if they are not referenced in the
91 * filearchive table. This needs to be done in the repo because it needs to
92 * interleave database locks with file operations, which is potentially a
93 * remote operation.
94 *
95 * @param string[] $storageKeys
96 *
97 * @return Status
98 */
99 function cleanupDeletedBatch( array $storageKeys ) {
100 if ( $this->hasSha1Storage() ) {
101 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
102 return Status::newGood();
103 }
104
105 $backend = $this->backend; // convenience
106 $root = $this->getZonePath( 'deleted' );
107 $dbw = $this->getMasterDB();
108 $status = $this->newGood();
109 $storageKeys = array_unique( $storageKeys );
110 foreach ( $storageKeys as $key ) {
111 $hashPath = $this->getDeletedHashPath( $key );
112 $path = "$root/$hashPath$key";
113 $dbw->startAtomic( __METHOD__ );
114 // Check for usage in deleted/hidden files and preemptively
115 // lock the key to avoid any future use until we are finished.
116 $deleted = $this->deletedFileHasKey( $key, 'lock' );
117 $hidden = $this->hiddenFileHasKey( $key, 'lock' );
118 if ( !$deleted && !$hidden ) { // not in use now
119 wfDebug( __METHOD__ . ": deleting $key\n" );
120 $op = [ 'op' => 'delete', 'src' => $path ];
121 if ( !$backend->doOperation( $op )->isOK() ) {
122 $status->error( 'undelete-cleanup-error', $path );
123 $status->failCount++;
124 }
125 } else {
126 wfDebug( __METHOD__ . ": $key still in use\n" );
127 $status->successCount++;
128 }
129 $dbw->endAtomic( __METHOD__ );
130 }
131
132 return $status;
133 }
134
135 /**
136 * Check if a deleted (filearchive) file has this sha1 key
137 *
138 * @param string $key File storage key (base-36 sha1 key with file extension)
139 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
140 * @return bool File with this key is in use
141 */
142 protected function deletedFileHasKey( $key, $lock = null ) {
143 $options = ( $lock === 'lock' ) ? [ 'FOR UPDATE' ] : [];
144
145 $dbw = $this->getMasterDB();
146
147 return (bool)$dbw->selectField( 'filearchive', '1',
148 [ 'fa_storage_group' => 'deleted', 'fa_storage_key' => $key ],
149 __METHOD__, $options
150 );
151 }
152
153 /**
154 * Check if a hidden (revision delete) file has this sha1 key
155 *
156 * @param string $key File storage key (base-36 sha1 key with file extension)
157 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
158 * @return bool File with this key is in use
159 */
160 protected function hiddenFileHasKey( $key, $lock = null ) {
161 $options = ( $lock === 'lock' ) ? [ 'FOR UPDATE' ] : [];
162
163 $sha1 = self::getHashFromKey( $key );
164 $ext = File::normalizeExtension( substr( $key, strcspn( $key, '.' ) + 1 ) );
165
166 $dbw = $this->getMasterDB();
167
168 return (bool)$dbw->selectField( 'oldimage', '1',
169 [ 'oi_sha1' => $sha1,
170 'oi_archive_name ' . $dbw->buildLike( $dbw->anyString(), ".$ext" ),
171 $dbw->bitAnd( 'oi_deleted', File::DELETED_FILE ) => File::DELETED_FILE ],
172 __METHOD__, $options
173 );
174 }
175
176 /**
177 * Gets the SHA1 hash from a storage key
178 *
179 * @param string $key
180 * @return string
181 */
182 public static function getHashFromKey( $key ) {
183 return strtok( $key, '.' );
184 }
185
186 /**
187 * Checks if there is a redirect named as $title
188 *
189 * @param Title $title Title of file
190 * @return bool|Title
191 */
192 function checkRedirect( Title $title ) {
193 $title = File::normalizeTitle( $title, 'exception' );
194
195 $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
196 if ( $memcKey === false ) {
197 $memcKey = $this->getLocalCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
198 $expiry = 300; // no invalidation, 5 minutes
199 } else {
200 $expiry = 86400; // has invalidation, 1 day
201 }
202
203 $method = __METHOD__;
204 $redirDbKey = MediaWikiServices::getInstance()->getMainWANObjectCache()->getWithSetCallback(
205 $memcKey,
206 $expiry,
207 function ( $oldValue, &$ttl, array &$setOpts ) use ( $method, $title ) {
208 $dbr = $this->getReplicaDB(); // possibly remote DB
209
210 $setOpts += Database::getCacheSetOptions( $dbr );
211
212 if ( $title instanceof Title ) {
213 $row = $dbr->selectRow(
214 [ 'page', 'redirect' ],
215 [ 'rd_namespace', 'rd_title' ],
216 [
217 'page_namespace' => $title->getNamespace(),
218 'page_title' => $title->getDBkey(),
219 'rd_from = page_id'
220 ],
221 $method
222 );
223 } else {
224 $row = false;
225 }
226
227 return ( $row && $row->rd_namespace == NS_FILE )
228 ? Title::makeTitle( $row->rd_namespace, $row->rd_title )->getDBkey()
229 : ''; // negative cache
230 },
231 [ 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
232 );
233
234 // @note: also checks " " for b/c
235 if ( $redirDbKey !== ' ' && strval( $redirDbKey ) !== '' ) {
236 // Page is a redirect to another file
237 return Title::newFromText( $redirDbKey, NS_FILE );
238 }
239
240 return false; // no redirect
241 }
242
243 public function findFiles( array $items, $flags = 0 ) {
244 $finalFiles = []; // map of (DB key => corresponding File) for matches
245
246 $searchSet = []; // map of (normalized DB key => search params)
247 foreach ( $items as $item ) {
248 if ( is_array( $item ) ) {
249 $title = File::normalizeTitle( $item['title'] );
250 if ( $title ) {
251 $searchSet[$title->getDBkey()] = $item;
252 }
253 } else {
254 $title = File::normalizeTitle( $item );
255 if ( $title ) {
256 $searchSet[$title->getDBkey()] = [];
257 }
258 }
259 }
260
261 $fileMatchesSearch = function ( File $file, array $search ) {
262 // Note: file name comparison done elsewhere (to handle redirects)
263 $user = ( !empty( $search['private'] ) && $search['private'] instanceof User )
264 ? $search['private']
265 : null;
266
267 return (
268 $file->exists() &&
269 (
270 ( empty( $search['time'] ) && !$file->isOld() ) ||
271 ( !empty( $search['time'] ) && $search['time'] === $file->getTimestamp() )
272 ) &&
273 ( !empty( $search['private'] ) || !$file->isDeleted( File::DELETED_FILE ) ) &&
274 $file->userCan( File::DELETED_FILE, $user )
275 );
276 };
277
278 $applyMatchingFiles = function ( ResultWrapper $res, &$searchSet, &$finalFiles )
279 use ( $fileMatchesSearch, $flags )
280 {
281 $contLang = MediaWikiServices::getInstance()->getContentLanguage();
282 $info = $this->getInfo();
283 foreach ( $res as $row ) {
284 $file = $this->newFileFromRow( $row );
285 // There must have been a search for this DB key, but this has to handle the
286 // cases were title capitalization is different on the client and repo wikis.
287 $dbKeysLook = [ strtr( $file->getName(), ' ', '_' ) ];
288 if ( !empty( $info['initialCapital'] ) ) {
289 // Search keys for "hi.png" and "Hi.png" should use the "Hi.png file"
290 $dbKeysLook[] = $contLang->lcfirst( $file->getName() );
291 }
292 foreach ( $dbKeysLook as $dbKey ) {
293 if ( isset( $searchSet[$dbKey] )
294 && $fileMatchesSearch( $file, $searchSet[$dbKey] )
295 ) {
296 $finalFiles[$dbKey] = ( $flags & FileRepo::NAME_AND_TIME_ONLY )
297 ? [ 'title' => $dbKey, 'timestamp' => $file->getTimestamp() ]
298 : $file;
299 unset( $searchSet[$dbKey] );
300 }
301 }
302 }
303 };
304
305 $dbr = $this->getReplicaDB();
306
307 // Query image table
308 $imgNames = [];
309 foreach ( array_keys( $searchSet ) as $dbKey ) {
310 $imgNames[] = $this->getNameFromTitle( File::normalizeTitle( $dbKey ) );
311 }
312
313 if ( count( $imgNames ) ) {
314 $fileQuery = LocalFile::getQueryInfo();
315 $res = $dbr->select( $fileQuery['tables'], $fileQuery['fields'], [ 'img_name' => $imgNames ],
316 __METHOD__, [], $fileQuery['joins'] );
317 $applyMatchingFiles( $res, $searchSet, $finalFiles );
318 }
319
320 // Query old image table
321 $oiConds = []; // WHERE clause array for each file
322 foreach ( $searchSet as $dbKey => $search ) {
323 if ( isset( $search['time'] ) ) {
324 $oiConds[] = $dbr->makeList(
325 [
326 'oi_name' => $this->getNameFromTitle( File::normalizeTitle( $dbKey ) ),
327 'oi_timestamp' => $dbr->timestamp( $search['time'] )
328 ],
329 LIST_AND
330 );
331 }
332 }
333
334 if ( count( $oiConds ) ) {
335 $fileQuery = OldLocalFile::getQueryInfo();
336 $res = $dbr->select( $fileQuery['tables'], $fileQuery['fields'],
337 $dbr->makeList( $oiConds, LIST_OR ),
338 __METHOD__, [], $fileQuery['joins'] );
339 $applyMatchingFiles( $res, $searchSet, $finalFiles );
340 }
341
342 // Check for redirects...
343 foreach ( $searchSet as $dbKey => $search ) {
344 if ( !empty( $search['ignoreRedirect'] ) ) {
345 continue;
346 }
347
348 $title = File::normalizeTitle( $dbKey );
349 $redir = $this->checkRedirect( $title ); // hopefully hits memcached
350
351 if ( $redir && $redir->getNamespace() == NS_FILE ) {
352 $file = $this->newFile( $redir );
353 if ( $file && $fileMatchesSearch( $file, $search ) ) {
354 $file->redirectedFrom( $title->getDBkey() );
355 if ( $flags & FileRepo::NAME_AND_TIME_ONLY ) {
356 $finalFiles[$dbKey] = [
357 'title' => $file->getTitle()->getDBkey(),
358 'timestamp' => $file->getTimestamp()
359 ];
360 } else {
361 $finalFiles[$dbKey] = $file;
362 }
363 }
364 }
365 }
366
367 return $finalFiles;
368 }
369
370 /**
371 * Get an array or iterator of file objects for files that have a given
372 * SHA-1 content hash.
373 *
374 * @param string $hash A sha1 hash to look for
375 * @return LocalFile[]
376 */
377 function findBySha1( $hash ) {
378 $dbr = $this->getReplicaDB();
379 $fileQuery = LocalFile::getQueryInfo();
380 $res = $dbr->select(
381 $fileQuery['tables'],
382 $fileQuery['fields'],
383 [ 'img_sha1' => $hash ],
384 __METHOD__,
385 [ 'ORDER BY' => 'img_name' ],
386 $fileQuery['joins']
387 );
388
389 $result = [];
390 foreach ( $res as $row ) {
391 $result[] = $this->newFileFromRow( $row );
392 }
393 $res->free();
394
395 return $result;
396 }
397
398 /**
399 * Get an array of arrays or iterators of file objects for files that
400 * have the given SHA-1 content hashes.
401 *
402 * Overrides generic implementation in FileRepo for performance reason
403 *
404 * @param string[] $hashes An array of hashes
405 * @return array[] An Array of arrays or iterators of file objects and the hash as key
406 */
407 function findBySha1s( array $hashes ) {
408 if ( !count( $hashes ) ) {
409 return []; // empty parameter
410 }
411
412 $dbr = $this->getReplicaDB();
413 $fileQuery = LocalFile::getQueryInfo();
414 $res = $dbr->select(
415 $fileQuery['tables'],
416 $fileQuery['fields'],
417 [ 'img_sha1' => $hashes ],
418 __METHOD__,
419 [ 'ORDER BY' => 'img_name' ],
420 $fileQuery['joins']
421 );
422
423 $result = [];
424 foreach ( $res as $row ) {
425 $file = $this->newFileFromRow( $row );
426 $result[$file->getSha1()][] = $file;
427 }
428 $res->free();
429
430 return $result;
431 }
432
433 /**
434 * Return an array of files where the name starts with $prefix.
435 *
436 * @param string $prefix The prefix to search for
437 * @param int $limit The maximum amount of files to return
438 * @return LocalFile[]
439 */
440 public function findFilesByPrefix( $prefix, $limit ) {
441 $selectOptions = [ 'ORDER BY' => 'img_name', 'LIMIT' => intval( $limit ) ];
442
443 // Query database
444 $dbr = $this->getReplicaDB();
445 $fileQuery = LocalFile::getQueryInfo();
446 $res = $dbr->select(
447 $fileQuery['tables'],
448 $fileQuery['fields'],
449 'img_name ' . $dbr->buildLike( $prefix, $dbr->anyString() ),
450 __METHOD__,
451 $selectOptions,
452 $fileQuery['joins']
453 );
454
455 // Build file objects
456 $files = [];
457 foreach ( $res as $row ) {
458 $files[] = $this->newFileFromRow( $row );
459 }
460
461 return $files;
462 }
463
464 /**
465 * Get a connection to the replica DB
466 * @return IDatabase
467 */
468 function getReplicaDB() {
469 return wfGetDB( DB_REPLICA );
470 }
471
472 /**
473 * Alias for getReplicaDB()
474 *
475 * @return IDatabase
476 * @deprecated Since 1.29
477 */
478 function getSlaveDB() {
479 return $this->getReplicaDB();
480 }
481
482 /**
483 * Get a connection to the master DB
484 * @return IDatabase
485 */
486 function getMasterDB() {
487 return wfGetDB( DB_MASTER );
488 }
489
490 /**
491 * Get a callback to get a DB handle given an index (DB_REPLICA/DB_MASTER)
492 * @return Closure
493 */
494 protected function getDBFactory() {
495 return function ( $index ) {
496 return wfGetDB( $index );
497 };
498 }
499
500 /**
501 * Get a key on the primary cache for this repository.
502 * Returns false if the repository's cache is not accessible at this site.
503 * The parameters are the parts of the key, as for wfMemcKey().
504 *
505 * @return string
506 */
507 function getSharedCacheKey( /*...*/ ) {
508 $args = func_get_args();
509
510 return wfMemcKey( ...$args );
511 }
512
513 /**
514 * Invalidates image redirect cache related to that image
515 *
516 * @param Title $title Title of page
517 * @return void
518 */
519 function invalidateImageRedirect( Title $title ) {
520 $key = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
521 if ( $key ) {
522 $this->getMasterDB()->onTransactionPreCommitOrIdle(
523 function () use ( $key ) {
524 MediaWikiServices::getInstance()->getMainWANObjectCache()->delete( $key );
525 },
526 __METHOD__
527 );
528 }
529 }
530
531 /**
532 * Return information about the repository.
533 *
534 * @return array
535 * @since 1.22
536 */
537 function getInfo() {
538 global $wgFavicon;
539
540 return array_merge( parent::getInfo(), [
541 'favicon' => wfExpandUrl( $wgFavicon ),
542 ] );
543 }
544
545 public function store( $srcPath, $dstZone, $dstRel, $flags = 0 ) {
546 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
547 }
548
549 public function storeBatch( array $triplets, $flags = 0 ) {
550 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
551 }
552
553 public function cleanupBatch( array $files, $flags = 0 ) {
554 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
555 }
556
557 public function publish(
558 $src,
559 $dstRel,
560 $archiveRel,
561 $flags = 0,
562 array $options = []
563 ) {
564 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
565 }
566
567 public function publishBatch( array $ntuples, $flags = 0 ) {
568 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
569 }
570
571 public function delete( $srcRel, $archiveRel ) {
572 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
573 }
574
575 public function deleteBatch( array $sourceDestPairs ) {
576 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
577 }
578
579 /**
580 * Skips the write operation if storage is sha1-based, executes it normally otherwise
581 *
582 * @param string $function
583 * @param array $args
584 * @return Status
585 */
586 protected function skipWriteOperationIfSha1( $function, array $args ) {
587 $this->assertWritableRepo(); // fail out if read-only
588
589 if ( $this->hasSha1Storage() ) {
590 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
591 return Status::newGood();
592 } else {
593 return parent::$function( ...$args );
594 }
595 }
596 }