Merge "Handle missing namespace prefix in XML dumps more gracefully"
[lhc/web/wiklou.git] / includes / filerepo / LocalRepo.php
1 <?php
2 /**
3 * Local repository that stores files in the local filesystem and registers them
4 * in the wiki's own database.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup FileRepo
23 */
24
25 use Wikimedia\Rdbms\ResultWrapper;
26
27 /**
28 * A repository that stores files in the local filesystem and registers them
29 * in the wiki's own database. This is the most commonly used repository class.
30 *
31 * @ingroup FileRepo
32 */
33 class LocalRepo extends FileRepo {
34 /** @var callable */
35 protected $fileFactory = [ 'LocalFile', 'newFromTitle' ];
36 /** @var callable */
37 protected $fileFactoryKey = [ 'LocalFile', 'newFromKey' ];
38 /** @var callable */
39 protected $fileFromRowFactory = [ 'LocalFile', 'newFromRow' ];
40 /** @var callable */
41 protected $oldFileFromRowFactory = [ 'OldLocalFile', 'newFromRow' ];
42 /** @var callable */
43 protected $oldFileFactory = [ 'OldLocalFile', 'newFromTitle' ];
44 /** @var callable */
45 protected $oldFileFactoryKey = [ 'OldLocalFile', 'newFromKey' ];
46
47 function __construct( array $info = null ) {
48 parent::__construct( $info );
49
50 $this->hasSha1Storage = isset( $info['storageLayout'] )
51 && $info['storageLayout'] === 'sha1';
52
53 if ( $this->hasSha1Storage() ) {
54 $this->backend = new FileBackendDBRepoWrapper( [
55 'backend' => $this->backend,
56 'repoName' => $this->name,
57 'dbHandleFactory' => $this->getDBFactory()
58 ] );
59 }
60 }
61
62 /**
63 * @throws MWException
64 * @param stdClass $row
65 * @return LocalFile
66 */
67 function newFileFromRow( $row ) {
68 if ( isset( $row->img_name ) ) {
69 return call_user_func( $this->fileFromRowFactory, $row, $this );
70 } elseif ( isset( $row->oi_name ) ) {
71 return call_user_func( $this->oldFileFromRowFactory, $row, $this );
72 } else {
73 throw new MWException( __METHOD__ . ': invalid row' );
74 }
75 }
76
77 /**
78 * @param Title $title
79 * @param string $archiveName
80 * @return OldLocalFile
81 */
82 function newFromArchiveName( $title, $archiveName ) {
83 return OldLocalFile::newFromArchiveName( $title, $this, $archiveName );
84 }
85
86 /**
87 * Delete files in the deleted directory if they are not referenced in the
88 * filearchive table. This needs to be done in the repo because it needs to
89 * interleave database locks with file operations, which is potentially a
90 * remote operation.
91 *
92 * @param array $storageKeys
93 *
94 * @return Status
95 */
96 function cleanupDeletedBatch( array $storageKeys ) {
97 if ( $this->hasSha1Storage() ) {
98 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
99 return Status::newGood();
100 }
101
102 $backend = $this->backend; // convenience
103 $root = $this->getZonePath( 'deleted' );
104 $dbw = $this->getMasterDB();
105 $status = $this->newGood();
106 $storageKeys = array_unique( $storageKeys );
107 foreach ( $storageKeys as $key ) {
108 $hashPath = $this->getDeletedHashPath( $key );
109 $path = "$root/$hashPath$key";
110 $dbw->startAtomic( __METHOD__ );
111 // Check for usage in deleted/hidden files and preemptively
112 // lock the key to avoid any future use until we are finished.
113 $deleted = $this->deletedFileHasKey( $key, 'lock' );
114 $hidden = $this->hiddenFileHasKey( $key, 'lock' );
115 if ( !$deleted && !$hidden ) { // not in use now
116 wfDebug( __METHOD__ . ": deleting $key\n" );
117 $op = [ 'op' => 'delete', 'src' => $path ];
118 if ( !$backend->doOperation( $op )->isOK() ) {
119 $status->error( 'undelete-cleanup-error', $path );
120 $status->failCount++;
121 }
122 } else {
123 wfDebug( __METHOD__ . ": $key still in use\n" );
124 $status->successCount++;
125 }
126 $dbw->endAtomic( __METHOD__ );
127 }
128
129 return $status;
130 }
131
132 /**
133 * Check if a deleted (filearchive) file has this sha1 key
134 *
135 * @param string $key File storage key (base-36 sha1 key with file extension)
136 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
137 * @return bool File with this key is in use
138 */
139 protected function deletedFileHasKey( $key, $lock = null ) {
140 $options = ( $lock === 'lock' ) ? [ 'FOR UPDATE' ] : [];
141
142 $dbw = $this->getMasterDB();
143
144 return (bool)$dbw->selectField( 'filearchive', '1',
145 [ 'fa_storage_group' => 'deleted', 'fa_storage_key' => $key ],
146 __METHOD__, $options
147 );
148 }
149
150 /**
151 * Check if a hidden (revision delete) file has this sha1 key
152 *
153 * @param string $key File storage key (base-36 sha1 key with file extension)
154 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
155 * @return bool File with this key is in use
156 */
157 protected function hiddenFileHasKey( $key, $lock = null ) {
158 $options = ( $lock === 'lock' ) ? [ 'FOR UPDATE' ] : [];
159
160 $sha1 = self::getHashFromKey( $key );
161 $ext = File::normalizeExtension( substr( $key, strcspn( $key, '.' ) + 1 ) );
162
163 $dbw = $this->getMasterDB();
164
165 return (bool)$dbw->selectField( 'oldimage', '1',
166 [ 'oi_sha1' => $sha1,
167 'oi_archive_name ' . $dbw->buildLike( $dbw->anyString(), ".$ext" ),
168 $dbw->bitAnd( 'oi_deleted', File::DELETED_FILE ) => File::DELETED_FILE ],
169 __METHOD__, $options
170 );
171 }
172
173 /**
174 * Gets the SHA1 hash from a storage key
175 *
176 * @param string $key
177 * @return string
178 */
179 public static function getHashFromKey( $key ) {
180 return strtok( $key, '.' );
181 }
182
183 /**
184 * Checks if there is a redirect named as $title
185 *
186 * @param Title $title Title of file
187 * @return bool|Title
188 */
189 function checkRedirect( Title $title ) {
190 $title = File::normalizeTitle( $title, 'exception' );
191
192 $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
193 if ( $memcKey === false ) {
194 $memcKey = $this->getLocalCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
195 $expiry = 300; // no invalidation, 5 minutes
196 } else {
197 $expiry = 86400; // has invalidation, 1 day
198 }
199
200 $method = __METHOD__;
201 $redirDbKey = ObjectCache::getMainWANInstance()->getWithSetCallback(
202 $memcKey,
203 $expiry,
204 function ( $oldValue, &$ttl, array &$setOpts ) use ( $method, $title ) {
205 $dbr = $this->getReplicaDB(); // possibly remote DB
206
207 $setOpts += Database::getCacheSetOptions( $dbr );
208
209 if ( $title instanceof Title ) {
210 $row = $dbr->selectRow(
211 [ 'page', 'redirect' ],
212 [ 'rd_namespace', 'rd_title' ],
213 [
214 'page_namespace' => $title->getNamespace(),
215 'page_title' => $title->getDBkey(),
216 'rd_from = page_id'
217 ],
218 $method
219 );
220 } else {
221 $row = false;
222 }
223
224 return ( $row && $row->rd_namespace == NS_FILE )
225 ? Title::makeTitle( $row->rd_namespace, $row->rd_title )->getDBkey()
226 : ''; // negative cache
227 },
228 [ 'pcTTL' => WANObjectCache::TTL_PROC_LONG ]
229 );
230
231 // @note: also checks " " for b/c
232 if ( $redirDbKey !== ' ' && strval( $redirDbKey ) !== '' ) {
233 // Page is a redirect to another file
234 return Title::newFromText( $redirDbKey, NS_FILE );
235 }
236
237 return false; // no redirect
238 }
239
240 public function findFiles( array $items, $flags = 0 ) {
241 $finalFiles = []; // map of (DB key => corresponding File) for matches
242
243 $searchSet = []; // map of (normalized DB key => search params)
244 foreach ( $items as $item ) {
245 if ( is_array( $item ) ) {
246 $title = File::normalizeTitle( $item['title'] );
247 if ( $title ) {
248 $searchSet[$title->getDBkey()] = $item;
249 }
250 } else {
251 $title = File::normalizeTitle( $item );
252 if ( $title ) {
253 $searchSet[$title->getDBkey()] = [];
254 }
255 }
256 }
257
258 $fileMatchesSearch = function ( File $file, array $search ) {
259 // Note: file name comparison done elsewhere (to handle redirects)
260 $user = ( !empty( $search['private'] ) && $search['private'] instanceof User )
261 ? $search['private']
262 : null;
263
264 return (
265 $file->exists() &&
266 (
267 ( empty( $search['time'] ) && !$file->isOld() ) ||
268 ( !empty( $search['time'] ) && $search['time'] === $file->getTimestamp() )
269 ) &&
270 ( !empty( $search['private'] ) || !$file->isDeleted( File::DELETED_FILE ) ) &&
271 $file->userCan( File::DELETED_FILE, $user )
272 );
273 };
274
275 $that = $this;
276 $applyMatchingFiles = function ( ResultWrapper $res, &$searchSet, &$finalFiles )
277 use ( $that, $fileMatchesSearch, $flags )
278 {
279 global $wgContLang;
280 $info = $that->getInfo();
281 foreach ( $res as $row ) {
282 $file = $that->newFileFromRow( $row );
283 // There must have been a search for this DB key, but this has to handle the
284 // cases were title capitalization is different on the client and repo wikis.
285 $dbKeysLook = [ strtr( $file->getName(), ' ', '_' ) ];
286 if ( !empty( $info['initialCapital'] ) ) {
287 // Search keys for "hi.png" and "Hi.png" should use the "Hi.png file"
288 $dbKeysLook[] = $wgContLang->lcfirst( $file->getName() );
289 }
290 foreach ( $dbKeysLook as $dbKey ) {
291 if ( isset( $searchSet[$dbKey] )
292 && $fileMatchesSearch( $file, $searchSet[$dbKey] )
293 ) {
294 $finalFiles[$dbKey] = ( $flags & FileRepo::NAME_AND_TIME_ONLY )
295 ? [ 'title' => $dbKey, 'timestamp' => $file->getTimestamp() ]
296 : $file;
297 unset( $searchSet[$dbKey] );
298 }
299 }
300 }
301 };
302
303 $dbr = $this->getReplicaDB();
304
305 // Query image table
306 $imgNames = [];
307 foreach ( array_keys( $searchSet ) as $dbKey ) {
308 $imgNames[] = $this->getNameFromTitle( File::normalizeTitle( $dbKey ) );
309 }
310
311 if ( count( $imgNames ) ) {
312 $res = $dbr->select( 'image',
313 LocalFile::selectFields(), [ 'img_name' => $imgNames ], __METHOD__ );
314 $applyMatchingFiles( $res, $searchSet, $finalFiles );
315 }
316
317 // Query old image table
318 $oiConds = []; // WHERE clause array for each file
319 foreach ( $searchSet as $dbKey => $search ) {
320 if ( isset( $search['time'] ) ) {
321 $oiConds[] = $dbr->makeList(
322 [
323 'oi_name' => $this->getNameFromTitle( File::normalizeTitle( $dbKey ) ),
324 'oi_timestamp' => $dbr->timestamp( $search['time'] )
325 ],
326 LIST_AND
327 );
328 }
329 }
330
331 if ( count( $oiConds ) ) {
332 $res = $dbr->select( 'oldimage',
333 OldLocalFile::selectFields(), $dbr->makeList( $oiConds, LIST_OR ), __METHOD__ );
334 $applyMatchingFiles( $res, $searchSet, $finalFiles );
335 }
336
337 // Check for redirects...
338 foreach ( $searchSet as $dbKey => $search ) {
339 if ( !empty( $search['ignoreRedirect'] ) ) {
340 continue;
341 }
342
343 $title = File::normalizeTitle( $dbKey );
344 $redir = $this->checkRedirect( $title ); // hopefully hits memcached
345
346 if ( $redir && $redir->getNamespace() == NS_FILE ) {
347 $file = $this->newFile( $redir );
348 if ( $file && $fileMatchesSearch( $file, $search ) ) {
349 $file->redirectedFrom( $title->getDBkey() );
350 if ( $flags & FileRepo::NAME_AND_TIME_ONLY ) {
351 $finalFiles[$dbKey] = [
352 'title' => $file->getTitle()->getDBkey(),
353 'timestamp' => $file->getTimestamp()
354 ];
355 } else {
356 $finalFiles[$dbKey] = $file;
357 }
358 }
359 }
360 }
361
362 return $finalFiles;
363 }
364
365 /**
366 * Get an array or iterator of file objects for files that have a given
367 * SHA-1 content hash.
368 *
369 * @param string $hash A sha1 hash to look for
370 * @return File[]
371 */
372 function findBySha1( $hash ) {
373 $dbr = $this->getReplicaDB();
374 $res = $dbr->select(
375 'image',
376 LocalFile::selectFields(),
377 [ 'img_sha1' => $hash ],
378 __METHOD__,
379 [ 'ORDER BY' => 'img_name' ]
380 );
381
382 $result = [];
383 foreach ( $res as $row ) {
384 $result[] = $this->newFileFromRow( $row );
385 }
386 $res->free();
387
388 return $result;
389 }
390
391 /**
392 * Get an array of arrays or iterators of file objects for files that
393 * have the given SHA-1 content hashes.
394 *
395 * Overrides generic implementation in FileRepo for performance reason
396 *
397 * @param array $hashes An array of hashes
398 * @return array An Array of arrays or iterators of file objects and the hash as key
399 */
400 function findBySha1s( array $hashes ) {
401 if ( !count( $hashes ) ) {
402 return []; // empty parameter
403 }
404
405 $dbr = $this->getReplicaDB();
406 $res = $dbr->select(
407 'image',
408 LocalFile::selectFields(),
409 [ 'img_sha1' => $hashes ],
410 __METHOD__,
411 [ 'ORDER BY' => 'img_name' ]
412 );
413
414 $result = [];
415 foreach ( $res as $row ) {
416 $file = $this->newFileFromRow( $row );
417 $result[$file->getSha1()][] = $file;
418 }
419 $res->free();
420
421 return $result;
422 }
423
424 /**
425 * Return an array of files where the name starts with $prefix.
426 *
427 * @param string $prefix The prefix to search for
428 * @param int $limit The maximum amount of files to return
429 * @return array
430 */
431 public function findFilesByPrefix( $prefix, $limit ) {
432 $selectOptions = [ 'ORDER BY' => 'img_name', 'LIMIT' => intval( $limit ) ];
433
434 // Query database
435 $dbr = $this->getReplicaDB();
436 $res = $dbr->select(
437 'image',
438 LocalFile::selectFields(),
439 'img_name ' . $dbr->buildLike( $prefix, $dbr->anyString() ),
440 __METHOD__,
441 $selectOptions
442 );
443
444 // Build file objects
445 $files = [];
446 foreach ( $res as $row ) {
447 $files[] = $this->newFileFromRow( $row );
448 }
449
450 return $files;
451 }
452
453 /**
454 * Get a connection to the replica DB
455 * @return IDatabase
456 */
457 function getReplicaDB() {
458 return wfGetDB( DB_REPLICA );
459 }
460
461 /**
462 * Alias for getReplicaDB()
463 *
464 * @return IDatabase
465 * @deprecated Since 1.29
466 */
467 function getSlaveDB() {
468 return $this->getReplicaDB();
469 }
470
471 /**
472 * Get a connection to the master DB
473 * @return IDatabase
474 */
475 function getMasterDB() {
476 return wfGetDB( DB_MASTER );
477 }
478
479 /**
480 * Get a callback to get a DB handle given an index (DB_REPLICA/DB_MASTER)
481 * @return Closure
482 */
483 protected function getDBFactory() {
484 return function( $index ) {
485 return wfGetDB( $index );
486 };
487 }
488
489 /**
490 * Get a key on the primary cache for this repository.
491 * Returns false if the repository's cache is not accessible at this site.
492 * The parameters are the parts of the key, as for wfMemcKey().
493 *
494 * @return string
495 */
496 function getSharedCacheKey( /*...*/ ) {
497 $args = func_get_args();
498
499 return call_user_func_array( 'wfMemcKey', $args );
500 }
501
502 /**
503 * Invalidates image redirect cache related to that image
504 *
505 * @param Title $title Title of page
506 * @return void
507 */
508 function invalidateImageRedirect( Title $title ) {
509 $key = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
510 if ( $key ) {
511 $this->getMasterDB()->onTransactionPreCommitOrIdle(
512 function () use ( $key ) {
513 ObjectCache::getMainWANInstance()->delete( $key );
514 },
515 __METHOD__
516 );
517 }
518 }
519
520 /**
521 * Return information about the repository.
522 *
523 * @return array
524 * @since 1.22
525 */
526 function getInfo() {
527 global $wgFavicon;
528
529 return array_merge( parent::getInfo(), [
530 'favicon' => wfExpandUrl( $wgFavicon ),
531 ] );
532 }
533
534 public function store( $srcPath, $dstZone, $dstRel, $flags = 0 ) {
535 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
536 }
537
538 public function storeBatch( array $triplets, $flags = 0 ) {
539 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
540 }
541
542 public function cleanupBatch( array $files, $flags = 0 ) {
543 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
544 }
545
546 public function publish(
547 $src,
548 $dstRel,
549 $archiveRel,
550 $flags = 0,
551 array $options = []
552 ) {
553 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
554 }
555
556 public function publishBatch( array $ntuples, $flags = 0 ) {
557 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
558 }
559
560 public function delete( $srcRel, $archiveRel ) {
561 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
562 }
563
564 public function deleteBatch( array $sourceDestPairs ) {
565 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
566 }
567
568 /**
569 * Skips the write operation if storage is sha1-based, executes it normally otherwise
570 *
571 * @param string $function
572 * @param array $args
573 * @return Status
574 */
575 protected function skipWriteOperationIfSha1( $function, array $args ) {
576 $this->assertWritableRepo(); // fail out if read-only
577
578 if ( $this->hasSha1Storage() ) {
579 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
580 return Status::newGood();
581 } else {
582 return call_user_func_array( 'parent::' . $function, $args );
583 }
584 }
585 }