Added $opts to WANObjectCache::set() to detect snapshot lag
[lhc/web/wiklou.git] / includes / filerepo / LocalRepo.php
1 <?php
2 /**
3 * Local repository that stores files in the local filesystem and registers them
4 * in the wiki's own database.
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License along
17 * with this program; if not, write to the Free Software Foundation, Inc.,
18 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
19 * http://www.gnu.org/copyleft/gpl.html
20 *
21 * @file
22 * @ingroup FileRepo
23 */
24
25 /**
26 * A repository that stores files in the local filesystem and registers them
27 * in the wiki's own database. This is the most commonly used repository class.
28 *
29 * @ingroup FileRepo
30 */
31 class LocalRepo extends FileRepo {
32 /** @var bool */
33 protected $hasSha1Storage = false;
34
35 /** @var array */
36 protected $fileFactory = array( 'LocalFile', 'newFromTitle' );
37
38 /** @var array */
39 protected $fileFactoryKey = array( 'LocalFile', 'newFromKey' );
40
41 /** @var array */
42 protected $fileFromRowFactory = array( 'LocalFile', 'newFromRow' );
43
44 /** @var array */
45 protected $oldFileFromRowFactory = array( 'OldLocalFile', 'newFromRow' );
46
47 /** @var array */
48 protected $oldFileFactory = array( 'OldLocalFile', 'newFromTitle' );
49
50 /** @var array */
51 protected $oldFileFactoryKey = array( 'OldLocalFile', 'newFromKey' );
52
53 function __construct( array $info = null ) {
54 parent::__construct( $info );
55
56 $this->hasSha1Storage = isset( $info['storageLayout'] ) && $info['storageLayout'] === 'sha1';
57
58 if ( $this->hasSha1Storage() ) {
59 $this->backend = new FileBackendDBRepoWrapper( array(
60 'backend' => $this->backend,
61 'repoName' => $this->name,
62 'dbHandleFactory' => $this->getDBFactory()
63 ) );
64 }
65 }
66
67 /**
68 * @throws MWException
69 * @param stdClass $row
70 * @return LocalFile
71 */
72 function newFileFromRow( $row ) {
73 if ( isset( $row->img_name ) ) {
74 return call_user_func( $this->fileFromRowFactory, $row, $this );
75 } elseif ( isset( $row->oi_name ) ) {
76 return call_user_func( $this->oldFileFromRowFactory, $row, $this );
77 } else {
78 throw new MWException( __METHOD__ . ': invalid row' );
79 }
80 }
81
82 /**
83 * @param Title $title
84 * @param string $archiveName
85 * @return OldLocalFile
86 */
87 function newFromArchiveName( $title, $archiveName ) {
88 return OldLocalFile::newFromArchiveName( $title, $this, $archiveName );
89 }
90
91 /**
92 * Delete files in the deleted directory if they are not referenced in the
93 * filearchive table. This needs to be done in the repo because it needs to
94 * interleave database locks with file operations, which is potentially a
95 * remote operation.
96 *
97 * @param array $storageKeys
98 *
99 * @return FileRepoStatus
100 */
101 function cleanupDeletedBatch( array $storageKeys ) {
102 if ( $this->hasSha1Storage() ) {
103 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
104 return Status::newGood();
105 }
106
107 $backend = $this->backend; // convenience
108 $root = $this->getZonePath( 'deleted' );
109 $dbw = $this->getMasterDB();
110 $status = $this->newGood();
111 $storageKeys = array_unique( $storageKeys );
112 foreach ( $storageKeys as $key ) {
113 $hashPath = $this->getDeletedHashPath( $key );
114 $path = "$root/$hashPath$key";
115 $dbw->startAtomic( __METHOD__ );
116 // Check for usage in deleted/hidden files and preemptively
117 // lock the key to avoid any future use until we are finished.
118 $deleted = $this->deletedFileHasKey( $key, 'lock' );
119 $hidden = $this->hiddenFileHasKey( $key, 'lock' );
120 if ( !$deleted && !$hidden ) { // not in use now
121 wfDebug( __METHOD__ . ": deleting $key\n" );
122 $op = array( 'op' => 'delete', 'src' => $path );
123 if ( !$backend->doOperation( $op )->isOK() ) {
124 $status->error( 'undelete-cleanup-error', $path );
125 $status->failCount++;
126 }
127 } else {
128 wfDebug( __METHOD__ . ": $key still in use\n" );
129 $status->successCount++;
130 }
131 $dbw->endAtomic( __METHOD__ );
132 }
133
134 return $status;
135 }
136
137 /**
138 * Check if a deleted (filearchive) file has this sha1 key
139 *
140 * @param string $key File storage key (base-36 sha1 key with file extension)
141 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
142 * @return bool File with this key is in use
143 */
144 protected function deletedFileHasKey( $key, $lock = null ) {
145 $options = ( $lock === 'lock' ) ? array( 'FOR UPDATE' ) : array();
146
147 $dbw = $this->getMasterDB();
148
149 return (bool)$dbw->selectField( 'filearchive', '1',
150 array( 'fa_storage_group' => 'deleted', 'fa_storage_key' => $key ),
151 __METHOD__, $options
152 );
153 }
154
155 /**
156 * Check if a hidden (revision delete) file has this sha1 key
157 *
158 * @param string $key File storage key (base-36 sha1 key with file extension)
159 * @param string|null $lock Use "lock" to lock the row via FOR UPDATE
160 * @return bool File with this key is in use
161 */
162 protected function hiddenFileHasKey( $key, $lock = null ) {
163 $options = ( $lock === 'lock' ) ? array( 'FOR UPDATE' ) : array();
164
165 $sha1 = self::getHashFromKey( $key );
166 $ext = File::normalizeExtension( substr( $key, strcspn( $key, '.' ) + 1 ) );
167
168 $dbw = $this->getMasterDB();
169
170 return (bool)$dbw->selectField( 'oldimage', '1',
171 array( 'oi_sha1' => $sha1,
172 'oi_archive_name ' . $dbw->buildLike( $dbw->anyString(), ".$ext" ),
173 $dbw->bitAnd( 'oi_deleted', File::DELETED_FILE ) => File::DELETED_FILE ),
174 __METHOD__, $options
175 );
176 }
177
178 /**
179 * Gets the SHA1 hash from a storage key
180 *
181 * @param string $key
182 * @return string
183 */
184 public static function getHashFromKey( $key ) {
185 return strtok( $key, '.' );
186 }
187
188 /**
189 * Checks if there is a redirect named as $title
190 *
191 * @param Title $title Title of file
192 * @return bool|Title
193 */
194 function checkRedirect( Title $title ) {
195 $cache = ObjectCache::getMainWANInstance();
196
197 $title = File::normalizeTitle( $title, 'exception' );
198
199 $memcKey = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
200 if ( $memcKey === false ) {
201 $memcKey = $this->getLocalCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
202 $expiry = 300; // no invalidation, 5 minutes
203 } else {
204 $expiry = 86400; // has invalidation, 1 day
205 }
206
207 $cachedValue = $cache->get( $memcKey );
208 if ( $cachedValue === ' ' || $cachedValue === '' ) {
209 // Does not exist
210 return false;
211 } elseif ( strval( $cachedValue ) !== '' && $cachedValue !== ' PURGED' ) {
212 return Title::newFromText( $cachedValue, NS_FILE );
213 } // else $cachedValue is false or null: cache miss
214
215 $opts = array( 'since' => $this->getSlaveDB()->trxTimestamp() );
216
217 $id = $this->getArticleID( $title );
218 if ( !$id ) {
219 $cache->set( $memcKey, " ", $expiry, $opts );
220
221 return false;
222 }
223 $dbr = $this->getSlaveDB();
224 $row = $dbr->selectRow(
225 'redirect',
226 array( 'rd_title', 'rd_namespace' ),
227 array( 'rd_from' => $id ),
228 __METHOD__
229 );
230
231 if ( $row && $row->rd_namespace == NS_FILE ) {
232 $targetTitle = Title::makeTitle( $row->rd_namespace, $row->rd_title );
233 $cache->set( $memcKey, $targetTitle->getDBkey(), $expiry, $opts );
234
235 return $targetTitle;
236 } else {
237 $cache->set( $memcKey, '', $expiry, $opts );
238
239 return false;
240 }
241 }
242
243 /**
244 * Function link Title::getArticleID().
245 * We can't say Title object, what database it should use, so we duplicate that function here.
246 *
247 * @param Title $title
248 * @return bool|int|mixed
249 */
250 protected function getArticleID( $title ) {
251 if ( !$title instanceof Title ) {
252 return 0;
253 }
254 $dbr = $this->getSlaveDB();
255 $id = $dbr->selectField(
256 'page', // Table
257 'page_id', // Field
258 array( // Conditions
259 'page_namespace' => $title->getNamespace(),
260 'page_title' => $title->getDBkey(),
261 ),
262 __METHOD__ // Function name
263 );
264
265 return $id;
266 }
267
268 public function findFiles( array $items, $flags = 0 ) {
269 $finalFiles = array(); // map of (DB key => corresponding File) for matches
270
271 $searchSet = array(); // map of (normalized DB key => search params)
272 foreach ( $items as $item ) {
273 if ( is_array( $item ) ) {
274 $title = File::normalizeTitle( $item['title'] );
275 if ( $title ) {
276 $searchSet[$title->getDBkey()] = $item;
277 }
278 } else {
279 $title = File::normalizeTitle( $item );
280 if ( $title ) {
281 $searchSet[$title->getDBkey()] = array();
282 }
283 }
284 }
285
286 $fileMatchesSearch = function ( File $file, array $search ) {
287 // Note: file name comparison done elsewhere (to handle redirects)
288 $user = ( !empty( $search['private'] ) && $search['private'] instanceof User )
289 ? $search['private']
290 : null;
291
292 return (
293 $file->exists() &&
294 (
295 ( empty( $search['time'] ) && !$file->isOld() ) ||
296 ( !empty( $search['time'] ) && $search['time'] === $file->getTimestamp() )
297 ) &&
298 ( !empty( $search['private'] ) || !$file->isDeleted( File::DELETED_FILE ) ) &&
299 $file->userCan( File::DELETED_FILE, $user )
300 );
301 };
302
303 $that = $this;
304 $applyMatchingFiles = function ( ResultWrapper $res, &$searchSet, &$finalFiles )
305 use ( $that, $fileMatchesSearch, $flags )
306 {
307 global $wgContLang;
308 $info = $that->getInfo();
309 foreach ( $res as $row ) {
310 $file = $that->newFileFromRow( $row );
311 // There must have been a search for this DB key, but this has to handle the
312 // cases were title capitalization is different on the client and repo wikis.
313 $dbKeysLook = array( strtr( $file->getName(), ' ', '_' ) );
314 if ( !empty( $info['initialCapital'] ) ) {
315 // Search keys for "hi.png" and "Hi.png" should use the "Hi.png file"
316 $dbKeysLook[] = $wgContLang->lcfirst( $file->getName() );
317 }
318 foreach ( $dbKeysLook as $dbKey ) {
319 if ( isset( $searchSet[$dbKey] )
320 && $fileMatchesSearch( $file, $searchSet[$dbKey] )
321 ) {
322 $finalFiles[$dbKey] = ( $flags & FileRepo::NAME_AND_TIME_ONLY )
323 ? array( 'title' => $dbKey, 'timestamp' => $file->getTimestamp() )
324 : $file;
325 unset( $searchSet[$dbKey] );
326 }
327 }
328 }
329 };
330
331 $dbr = $this->getSlaveDB();
332
333 // Query image table
334 $imgNames = array();
335 foreach ( array_keys( $searchSet ) as $dbKey ) {
336 $imgNames[] = $this->getNameFromTitle( File::normalizeTitle( $dbKey ) );
337 }
338
339 if ( count( $imgNames ) ) {
340 $res = $dbr->select( 'image',
341 LocalFile::selectFields(), array( 'img_name' => $imgNames ), __METHOD__ );
342 $applyMatchingFiles( $res, $searchSet, $finalFiles );
343 }
344
345 // Query old image table
346 $oiConds = array(); // WHERE clause array for each file
347 foreach ( $searchSet as $dbKey => $search ) {
348 if ( isset( $search['time'] ) ) {
349 $oiConds[] = $dbr->makeList(
350 array(
351 'oi_name' => $this->getNameFromTitle( File::normalizeTitle( $dbKey ) ),
352 'oi_timestamp' => $dbr->timestamp( $search['time'] )
353 ),
354 LIST_AND
355 );
356 }
357 }
358
359 if ( count( $oiConds ) ) {
360 $res = $dbr->select( 'oldimage',
361 OldLocalFile::selectFields(), $dbr->makeList( $oiConds, LIST_OR ), __METHOD__ );
362 $applyMatchingFiles( $res, $searchSet, $finalFiles );
363 }
364
365 // Check for redirects...
366 foreach ( $searchSet as $dbKey => $search ) {
367 if ( !empty( $search['ignoreRedirect'] ) ) {
368 continue;
369 }
370
371 $title = File::normalizeTitle( $dbKey );
372 $redir = $this->checkRedirect( $title ); // hopefully hits memcached
373
374 if ( $redir && $redir->getNamespace() == NS_FILE ) {
375 $file = $this->newFile( $redir );
376 if ( $file && $fileMatchesSearch( $file, $search ) ) {
377 $file->redirectedFrom( $title->getDBkey() );
378 if ( $flags & FileRepo::NAME_AND_TIME_ONLY ) {
379 $finalFiles[$dbKey] = array(
380 'title' => $file->getTitle()->getDBkey(),
381 'timestamp' => $file->getTimestamp()
382 );
383 } else {
384 $finalFiles[$dbKey] = $file;
385 }
386 }
387 }
388 }
389
390 return $finalFiles;
391 }
392
393 /**
394 * Get an array or iterator of file objects for files that have a given
395 * SHA-1 content hash.
396 *
397 * @param string $hash A sha1 hash to look for
398 * @return File[]
399 */
400 function findBySha1( $hash ) {
401 $dbr = $this->getSlaveDB();
402 $res = $dbr->select(
403 'image',
404 LocalFile::selectFields(),
405 array( 'img_sha1' => $hash ),
406 __METHOD__,
407 array( 'ORDER BY' => 'img_name' )
408 );
409
410 $result = array();
411 foreach ( $res as $row ) {
412 $result[] = $this->newFileFromRow( $row );
413 }
414 $res->free();
415
416 return $result;
417 }
418
419 /**
420 * Get an array of arrays or iterators of file objects for files that
421 * have the given SHA-1 content hashes.
422 *
423 * Overrides generic implementation in FileRepo for performance reason
424 *
425 * @param array $hashes An array of hashes
426 * @return array An Array of arrays or iterators of file objects and the hash as key
427 */
428 function findBySha1s( array $hashes ) {
429 if ( !count( $hashes ) ) {
430 return array(); // empty parameter
431 }
432
433 $dbr = $this->getSlaveDB();
434 $res = $dbr->select(
435 'image',
436 LocalFile::selectFields(),
437 array( 'img_sha1' => $hashes ),
438 __METHOD__,
439 array( 'ORDER BY' => 'img_name' )
440 );
441
442 $result = array();
443 foreach ( $res as $row ) {
444 $file = $this->newFileFromRow( $row );
445 $result[$file->getSha1()][] = $file;
446 }
447 $res->free();
448
449 return $result;
450 }
451
452 /**
453 * Return an array of files where the name starts with $prefix.
454 *
455 * @param string $prefix The prefix to search for
456 * @param int $limit The maximum amount of files to return
457 * @return array
458 */
459 public function findFilesByPrefix( $prefix, $limit ) {
460 $selectOptions = array( 'ORDER BY' => 'img_name', 'LIMIT' => intval( $limit ) );
461
462 // Query database
463 $dbr = $this->getSlaveDB();
464 $res = $dbr->select(
465 'image',
466 LocalFile::selectFields(),
467 'img_name ' . $dbr->buildLike( $prefix, $dbr->anyString() ),
468 __METHOD__,
469 $selectOptions
470 );
471
472 // Build file objects
473 $files = array();
474 foreach ( $res as $row ) {
475 $files[] = $this->newFileFromRow( $row );
476 }
477
478 return $files;
479 }
480
481 /**
482 * Get a connection to the slave DB
483 * @return DatabaseBase
484 */
485 function getSlaveDB() {
486 return wfGetDB( DB_SLAVE );
487 }
488
489 /**
490 * Get a connection to the master DB
491 * @return DatabaseBase
492 */
493 function getMasterDB() {
494 return wfGetDB( DB_MASTER );
495 }
496
497 /**
498 * Get a callback to get a DB handle given an index (DB_SLAVE/DB_MASTER)
499 * @return Closure
500 */
501 protected function getDBFactory() {
502 return function( $index ) {
503 return wfGetDB( $index );
504 };
505 }
506
507 /**
508 * Get a key on the primary cache for this repository.
509 * Returns false if the repository's cache is not accessible at this site.
510 * The parameters are the parts of the key, as for wfMemcKey().
511 *
512 * @return string
513 */
514 function getSharedCacheKey( /*...*/ ) {
515 $args = func_get_args();
516
517 return call_user_func_array( 'wfMemcKey', $args );
518 }
519
520 /**
521 * Invalidates image redirect cache related to that image
522 *
523 * @param Title $title Title of page
524 * @return void
525 */
526 function invalidateImageRedirect( Title $title ) {
527 $key = $this->getSharedCacheKey( 'image_redirect', md5( $title->getDBkey() ) );
528 if ( $key ) {
529 $this->getMasterDB()->onTransactionPreCommitOrIdle( function() use ( $key ) {
530 ObjectCache::getMainWANInstance()->delete( $key );
531 } );
532 }
533 }
534
535 /**
536 * Return information about the repository.
537 *
538 * @return array
539 * @since 1.22
540 */
541 function getInfo() {
542 global $wgFavicon;
543
544 return array_merge( parent::getInfo(), array(
545 'favicon' => wfExpandUrl( $wgFavicon ),
546 ) );
547 }
548
549 public function store( $srcPath, $dstZone, $dstRel, $flags = 0 ) {
550 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
551 }
552
553 public function storeBatch( array $triplets, $flags = 0 ) {
554 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
555 }
556
557 public function cleanupBatch( array $files, $flags = 0 ) {
558 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
559 }
560
561 public function publish(
562 $srcPath,
563 $dstRel,
564 $archiveRel,
565 $flags = 0,
566 array $options = array()
567 ) {
568 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
569 }
570
571 public function publishBatch( array $ntuples, $flags = 0 ) {
572 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
573 }
574
575 public function delete( $srcRel, $archiveRel ) {
576 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
577 }
578
579 public function deleteBatch( array $sourceDestPairs ) {
580 return $this->skipWriteOperationIfSha1( __FUNCTION__, func_get_args() );
581 }
582
583 /**
584 * Skips the write operation if storage is sha1-based, executes it normally otherwise
585 *
586 * @param string $function
587 * @param array $args
588 * @return FileRepoStatus
589 */
590 protected function skipWriteOperationIfSha1( $function, array $args ) {
591 $this->assertWritableRepo(); // fail out if read-only
592
593 if ( $this->hasSha1Storage() ) {
594 wfDebug( __METHOD__ . ": skipped because storage uses sha1 paths\n" );
595 return Status::newGood();
596 } else {
597 return call_user_func_array( 'parent::' . $function, $args );
598 }
599 }
600 }