Merge "Add attributes parameter to ShowSearchHitTitle"
[lhc/web/wiklou.git] / includes / filerepo / LocalRepo.php
1 <?php
2 /**
3 * Local repository that stores files in the local filesystem and registers them
4 * in the wiki's own database.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup FileRepo
23 */
24
25 use Wikimedia\Rdbms\ResultWrapper;
26 use Wikimedia\Rdbms\Database;
27 use Wikimedia\Rdbms\IDatabase;
28
29 /**
30 * A repository that stores files in the local filesystem and registers them
31 * in the wiki's own database. This is the most commonly used repository class.
32 *
33 * @ingroup FileRepo
34 */
35 class LocalRepo extends FileRepo {
36 /** @var callable */
37 protected $fileFactory = [ 'LocalFile', 'newFromTitle' ];
38 /** @var callable */
39 protected $fileFactoryKey = [ 'LocalFile', 'newFromKey' ];
40 /** @var callable */
41 protected $fileFromRowFactory = [ 'LocalFile', 'newFromRow' ];
42 /** @var callable */
43 protected $oldFileFromRowFactory = [ 'OldLocalFile', 'newFromRow' ];
44 /** @var callable */
45 protected $oldFileFactory = [ 'OldLocalFile', 'newFromTitle' ];
46 /** @var callable */
47 protected $oldFileFactoryKey = [ 'OldLocalFile', 'newFromKey' ];
48
49 function __construct( array $info = null ) {
50 parent::__construct( $info );
51
52 $this->hasSha1Storage = isset( $info['storageLayout'] )
53 && $info['storageLayout'] === 'sha1';
54
55 if ( $this->hasSha1Storage() ) {
56 $this->backend = new FileBackendDBRepoWrapper( [
57 'backend' => $this->backend,
58 'repoName' => $this->name,
59 'dbHandleFactory' => $this->getDBFactory()
60 ] );
61 }
62 }
63
64 /**
65 * @throws MWException
66 * @param stdClass $row
67 * @return LocalFile
68 */
69 function newFileFromRow( $row ) {
70 if ( isset( $row->img_name ) ) {
71 return call_user_func( $this->fileFromRowFactory, $row, $this );
72 } elseif ( isset( $row->oi_name ) ) {
73 return call_user_func( $this->oldFileFromRowFactory, $row, $this );
74 } else {
75 throw new MWException( __METHOD__ . ': invalid row' );
76 }
77 }
78
79 /**
80 * @param Title $title
81 * @param string $archiveName
82 * @return OldLocalFile
83 */
84 function newFromArchiveName( $title, $archiveName ) {
85 return OldLocalFile::newFromArchiveName( $title, $this, $archiveName );
86 }
87
88 /**
89 * Delete files in the deleted directory if they are not referenced in the
90 * filearchive table. This needs to be done in the repo because it needs to
91 * interleave database locks with file operations, which is potentially a
92 * remote operation.
93 *
94 * @param array $storageKeys
95 *
96 * @return Status
97 */
98 function cleanupDeletedBatch( array $storageKeys ) {
99 if ( $this->hasSha1Storage() ) {
100 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
101 return Status::newGood();
102 }
103
104 $backend = $this->backend; // convenience
105 $root = $this->getZonePath( 'deleted' );
106 $dbw = $this->getMasterDB();
107 $status = $this->newGood();
108 $storageKeys = array_unique( $storageKeys );
109 foreach ( $storageKeys as $key ) {
110 $hashPath = $this->getDeletedHashPath( $key );
111 $path = "$root/$hashPath$key";
112 $dbw->startAtomic( __METHOD__ );
113 // Check for usage in deleted/hidden files and preemptively
114 // lock the key to avoid any future use until we are finished.
115 $deleted = $this->deletedFileHasKey( $key, 'lock' );
116 $hidden = $this->hiddenFileHasKey( $key, 'lock' );
117 if ( !$deleted && !$hidden ) { // not in use now
118 wfDebug( __METHOD__ . ": deleting $key\n" );
119 $op = [ 'op' => 'delete', 'src' => $path ];
120 if ( !$backend->doOperation( $op )->isOK() ) {
121 $status->error( 'undelete-cleanup-error', $path );
122 $status->failCount++;
123 }
124 } else {
125 wfDebug( __METHOD__ . ": $key still in use\n" );
126 $status->successCount++;
127 }
128 $dbw->endAtomic( __METHOD__ );
129 }
130
131 return $status;
132 }
133
134 /**
135 * Check if a deleted (filearchive) file has this sha1 key
136 *
137 * @param string $key File storage key (base-36 sha1 key with file extension)
138 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
139 * @return bool File with this key is in use
140 */
141 protected function deletedFileHasKey( $key, $lock = null ) {
142 $options = ( $lock === 'lock' ) ? [ 'FOR UPDATE' ] : [];
143
144 $dbw = $this->getMasterDB();
145
146 return (bool)$dbw->selectField( 'filearchive', '1',
147 [ 'fa_storage_group' => 'deleted', 'fa_storage_key' => $key ],
148 __METHOD__, $options
149 );
150 }
151
152 /**
153 * Check if a hidden (revision delete) file has this sha1 key
154 *
155 * @param string $key File storage key (base-36 sha1 key with file extension)
156 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
157 * @return bool File with this key is in use
158 */
159 protected function hiddenFileHasKey( $key, $lock = null ) {
160 $options = ( $lock === 'lock' ) ? [ 'FOR UPDATE' ] : [];
161
162 $sha1 = self::getHashFromKey( $key );
163 $ext = File::normalizeExtension( substr( $key, strcspn( $key, '.' ) + 1 ) );
164
165 $dbw = $this->getMasterDB();
166
167 return (bool)$dbw->selectField( 'oldimage', '1',
168 [ 'oi_sha1' => $sha1,
169 'oi_archive_name ' . $dbw->buildLike( $dbw->anyString(), ".$ext" ),
170 $dbw->bitAnd( 'oi_deleted', File::DELETED_FILE ) => File::DELETED_FILE ],
171 __METHOD__, $options
172 );
173 }
174
175 /**
176 * Gets the SHA1 hash from a storage key
177 *
178 * @param string $key
179 * @return string
180 */
181 public static function getHashFromKey( $key ) {
182 return strtok( $key, '.' );
183 }
184
185 /**
186 * Checks if there is a redirect named as $title
187 *
188 * @param Title $title Title of file
189 * @return bool|Title
190 */
191 function checkRedirect( Title $title ) {
192 $title = File::normalizeTitle( $title, 'exception' );
193
194 $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
195 if ( $memcKey === false ) {
196 $memcKey = $this->getLocalCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
197 $expiry = 300; // no invalidation, 5 minutes
198 } else {
199 $expiry = 86400; // has invalidation, 1 day
200 }
201
202 $method = __METHOD__;
203 $redirDbKey = ObjectCache::getMainWANInstance()->getWithSetCallback(
204 $memcKey,
205 $expiry,
206 function ( $oldValue, &$ttl, array &$setOpts ) use ( $method, $title ) {
207 $dbr = $this->getReplicaDB(); // possibly remote DB
208
209 $setOpts += Database::getCacheSetOptions( $dbr );
210
211 if ( $title instanceof Title ) {
212 $row = $dbr->selectRow(
213 [ 'page', 'redirect' ],
214 [ 'rd_namespace', 'rd_title' ],
215 [
216 'page_namespace' => $title->getNamespace(),
217 'page_title' => $title->getDBkey(),
218 'rd_from = page_id'
219 ],
220 $method
221 );
222 } else {
223 $row = false;
224 }
225
226 return ( $row && $row->rd_namespace == NS_FILE )
227 ? Title::makeTitle( $row->rd_namespace, $row->rd_title )->getDBkey()
228 : ''; // negative cache
229 },
230 [ 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
231 );
232
233 // @note: also checks " " for b/c
234 if ( $redirDbKey !== ' ' && strval( $redirDbKey ) !== '' ) {
235 // Page is a redirect to another file
236 return Title::newFromText( $redirDbKey, NS_FILE );
237 }
238
239 return false; // no redirect
240 }
241
242 public function findFiles( array $items, $flags = 0 ) {
243 $finalFiles = []; // map of (DB key => corresponding File) for matches
244
245 $searchSet = []; // map of (normalized DB key => search params)
246 foreach ( $items as $item ) {
247 if ( is_array( $item ) ) {
248 $title = File::normalizeTitle( $item['title'] );
249 if ( $title ) {
250 $searchSet[$title->getDBkey()] = $item;
251 }
252 } else {
253 $title = File::normalizeTitle( $item );
254 if ( $title ) {
255 $searchSet[$title->getDBkey()] = [];
256 }
257 }
258 }
259
260 $fileMatchesSearch = function ( File $file, array $search ) {
261 // Note: file name comparison done elsewhere (to handle redirects)
262 $user = ( !empty( $search['private'] ) && $search['private'] instanceof User )
263 ? $search['private']
264 : null;
265
266 return (
267 $file->exists() &&
268 (
269 ( empty( $search['time'] ) && !$file->isOld() ) ||
270 ( !empty( $search['time'] ) && $search['time'] === $file->getTimestamp() )
271 ) &&
272 ( !empty( $search['private'] ) || !$file->isDeleted( File::DELETED_FILE ) ) &&
273 $file->userCan( File::DELETED_FILE, $user )
274 );
275 };
276
277 $applyMatchingFiles = function ( ResultWrapper $res, &$searchSet, &$finalFiles )
278 use ( $fileMatchesSearch, $flags )
279 {
280 global $wgContLang;
281 $info = $this->getInfo();
282 foreach ( $res as $row ) {
283 $file = $this->newFileFromRow( $row );
284 // There must have been a search for this DB key, but this has to handle the
285 // cases were title capitalization is different on the client and repo wikis.
286 $dbKeysLook = [ strtr( $file->getName(), ' ', '_' ) ];
287 if ( !empty( $info['initialCapital'] ) ) {
288 // Search keys for "hi.png" and "Hi.png" should use the "Hi.png file"
289 $dbKeysLook[] = $wgContLang->lcfirst( $file->getName() );
290 }
291 foreach ( $dbKeysLook as $dbKey ) {
292 if ( isset( $searchSet[$dbKey] )
293 && $fileMatchesSearch( $file, $searchSet[$dbKey] )
294 ) {
295 $finalFiles[$dbKey] = ( $flags & FileRepo::NAME_AND_TIME_ONLY )
296 ? [ 'title' => $dbKey, 'timestamp' => $file->getTimestamp() ]
297 : $file;
298 unset( $searchSet[$dbKey] );
299 }
300 }
301 }
302 };
303
304 $dbr = $this->getReplicaDB();
305
306 // Query image table
307 $imgNames = [];
308 foreach ( array_keys( $searchSet ) as $dbKey ) {
309 $imgNames[] = $this->getNameFromTitle( File::normalizeTitle( $dbKey ) );
310 }
311
312 if ( count( $imgNames ) ) {
313 $fileQuery = LocalFile::getQueryInfo();
314 $res = $dbr->select( $fileQuery['tables'], $fileQuery['fields'], [ 'img_name' => $imgNames ],
315 __METHOD__, [], $fileQuery['joins'] );
316 $applyMatchingFiles( $res, $searchSet, $finalFiles );
317 }
318
319 // Query old image table
320 $oiConds = []; // WHERE clause array for each file
321 foreach ( $searchSet as $dbKey => $search ) {
322 if ( isset( $search['time'] ) ) {
323 $oiConds[] = $dbr->makeList(
324 [
325 'oi_name' => $this->getNameFromTitle( File::normalizeTitle( $dbKey ) ),
326 'oi_timestamp' => $dbr->timestamp( $search['time'] )
327 ],
328 LIST_AND
329 );
330 }
331 }
332
333 if ( count( $oiConds ) ) {
334 $fileQuery = OldLocalFile::getQueryInfo();
335 $res = $dbr->select( $fileQuery['tables'], $fileQuery['fields'],
336 $dbr->makeList( $oiConds, LIST_OR ),
337 __METHOD__, [], $fileQuery['joins'] );
338 $applyMatchingFiles( $res, $searchSet, $finalFiles );
339 }
340
341 // Check for redirects...
342 foreach ( $searchSet as $dbKey => $search ) {
343 if ( !empty( $search['ignoreRedirect'] ) ) {
344 continue;
345 }
346
347 $title = File::normalizeTitle( $dbKey );
348 $redir = $this->checkRedirect( $title ); // hopefully hits memcached
349
350 if ( $redir && $redir->getNamespace() == NS_FILE ) {
351 $file = $this->newFile( $redir );
352 if ( $file && $fileMatchesSearch( $file, $search ) ) {
353 $file->redirectedFrom( $title->getDBkey() );
354 if ( $flags & FileRepo::NAME_AND_TIME_ONLY ) {
355 $finalFiles[$dbKey] = [
356 'title' => $file->getTitle()->getDBkey(),
357 'timestamp' => $file->getTimestamp()
358 ];
359 } else {
360 $finalFiles[$dbKey] = $file;
361 }
362 }
363 }
364 }
365
366 return $finalFiles;
367 }
368
369 /**
370 * Get an array or iterator of file objects for files that have a given
371 * SHA-1 content hash.
372 *
373 * @param string $hash A sha1 hash to look for
374 * @return File[]
375 */
376 function findBySha1( $hash ) {
377 $dbr = $this->getReplicaDB();
378 $fileQuery = LocalFile::getQueryInfo();
379 $res = $dbr->select(
380 $fileQuery['tables'],
381 $fileQuery['fields'],
382 [ 'img_sha1' => $hash ],
383 __METHOD__,
384 [ 'ORDER BY' => 'img_name' ],
385 $fileQuery['joins']
386 );
387
388 $result = [];
389 foreach ( $res as $row ) {
390 $result[] = $this->newFileFromRow( $row );
391 }
392 $res->free();
393
394 return $result;
395 }
396
397 /**
398 * Get an array of arrays or iterators of file objects for files that
399 * have the given SHA-1 content hashes.
400 *
401 * Overrides generic implementation in FileRepo for performance reason
402 *
403 * @param array $hashes An array of hashes
404 * @return array An Array of arrays or iterators of file objects and the hash as key
405 */
406 function findBySha1s( array $hashes ) {
407 if ( !count( $hashes ) ) {
408 return []; // empty parameter
409 }
410
411 $dbr = $this->getReplicaDB();
412 $fileQuery = LocalFile::getQueryInfo();
413 $res = $dbr->select(
414 $fileQuery['tables'],
415 $fileQuery['fields'],
416 [ 'img_sha1' => $hashes ],
417 __METHOD__,
418 [ 'ORDER BY' => 'img_name' ],
419 $fileQuery['joins']
420 );
421
422 $result = [];
423 foreach ( $res as $row ) {
424 $file = $this->newFileFromRow( $row );
425 $result[$file->getSha1()][] = $file;
426 }
427 $res->free();
428
429 return $result;
430 }
431
432 /**
433 * Return an array of files where the name starts with $prefix.
434 *
435 * @param string $prefix The prefix to search for
436 * @param int $limit The maximum amount of files to return
437 * @return array
438 */
439 public function findFilesByPrefix( $prefix, $limit ) {
440 $selectOptions = [ 'ORDER BY' => 'img_name', 'LIMIT' => intval( $limit ) ];
441
442 // Query database
443 $dbr = $this->getReplicaDB();
444 $fileQuery = LocalFile::getQueryInfo();
445 $res = $dbr->select(
446 $fileQuery['tables'],
447 $fileQuery['fields'],
448 'img_name ' . $dbr->buildLike( $prefix, $dbr->anyString() ),
449 __METHOD__,
450 $selectOptions,
451 $fileQuery['joins']
452 );
453
454 // Build file objects
455 $files = [];
456 foreach ( $res as $row ) {
457 $files[] = $this->newFileFromRow( $row );
458 }
459
460 return $files;
461 }
462
463 /**
464 * Get a connection to the replica DB
465 * @return IDatabase
466 */
467 function getReplicaDB() {
468 return wfGetDB( DB_REPLICA );
469 }
470
471 /**
472 * Alias for getReplicaDB()
473 *
474 * @return IDatabase
475 * @deprecated Since 1.29
476 */
477 function getSlaveDB() {
478 return $this->getReplicaDB();
479 }
480
481 /**
482 * Get a connection to the master DB
483 * @return IDatabase
484 */
485 function getMasterDB() {
486 return wfGetDB( DB_MASTER );
487 }
488
489 /**
490 * Get a callback to get a DB handle given an index (DB_REPLICA/DB_MASTER)
491 * @return Closure
492 */
493 protected function getDBFactory() {
494 return function ( $index ) {
495 return wfGetDB( $index );
496 };
497 }
498
499 /**
500 * Get a key on the primary cache for this repository.
501 * Returns false if the repository's cache is not accessible at this site.
502 * The parameters are the parts of the key, as for wfMemcKey().
503 *
504 * @return string
505 */
506 function getSharedCacheKey( /*...*/ ) {
507 $args = func_get_args();
508
509 return call_user_func_array( 'wfMemcKey', $args );
510 }
511
512 /**
513 * Invalidates image redirect cache related to that image
514 *
515 * @param Title $title Title of page
516 * @return void
517 */
518 function invalidateImageRedirect( Title $title ) {
519 $key = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
520 if ( $key ) {
521 $this->getMasterDB()->onTransactionPreCommitOrIdle(
522 function () use ( $key ) {
523 ObjectCache::getMainWANInstance()->delete( $key );
524 },
525 __METHOD__
526 );
527 }
528 }
529
530 /**
531 * Return information about the repository.
532 *
533 * @return array
534 * @since 1.22
535 */
536 function getInfo() {
537 global $wgFavicon;
538
539 return array_merge( parent::getInfo(), [
540 'favicon' => wfExpandUrl( $wgFavicon ),
541 ] );
542 }
543
544 public function store( $srcPath, $dstZone, $dstRel, $flags = 0 ) {
545 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
546 }
547
548 public function storeBatch( array $triplets, $flags = 0 ) {
549 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
550 }
551
552 public function cleanupBatch( array $files, $flags = 0 ) {
553 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
554 }
555
556 public function publish(
557 $src,
558 $dstRel,
559 $archiveRel,
560 $flags = 0,
561 array $options = []
562 ) {
563 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
564 }
565
566 public function publishBatch( array $ntuples, $flags = 0 ) {
567 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
568 }
569
570 public function delete( $srcRel, $archiveRel ) {
571 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
572 }
573
574 public function deleteBatch( array $sourceDestPairs ) {
575 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
576 }
577
578 /**
579 * Skips the write operation if storage is sha1-based, executes it normally otherwise
580 *
581 * @param string $function
582 * @param array $args
583 * @return Status
584 */
585 protected function skipWriteOperationIfSha1( $function, array $args ) {
586 $this->assertWritableRepo(); // fail out if read-only
587
588 if ( $this->hasSha1Storage() ) {
589 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
590 return Status::newGood();
591 } else {
592 return call_user_func_array( 'parent::' . $function, $args );
593 }
594 }
595 }