* Added FileBackendBase::getFileContents() function with a default FileBackend version.
[lhc/web/wiklou.git] / includes / filerepo / backend / FileBackend.php
1 <?php
2 /**
3 * @file
4 * @ingroup FileBackend
5 * @author Aaron Schulz
6 */
7
8 /**
9 * Base class for all file backend classes (including multi-write backends).
10 * This class defines the methods as abstract that subclasses must implement.
11 * Outside callers can assume that all backends will have these functions.
12 *
13 * All "storage paths" are of the format "mwstore://backend/container/path".
14 * The paths use typical file system (FS) notation, though any particular backend may
15 * not actually be using a local filesystem. Therefore, the paths are only virtual.
16 *
17 * FS-based backends are somewhat more restrictive due to the existence of real
18 * directory files; a regular file cannot have the same name as a directory. Other
19 * backends with virtual directories may not have this limitation. Callers should
20 * store files in such a way that no files and directories under the same path.
21 *
22 * Methods should avoid throwing exceptions at all costs.
23 * As a corollary, external dependencies should be kept to a minimum.
24 *
25 * @ingroup FileBackend
26 * @since 1.19
27 */
28 abstract class FileBackendBase {
29 protected $name; // unique backend name
30 protected $wikiId; // unique wiki name
31 protected $readOnly; // string
32 /** @var LockManager */
33 protected $lockManager;
34
35 /**
36 * Build a new object from configuration.
37 * This should only be called from within FileBackendGroup.
38 *
39 * $config includes:
40 * 'name' : The unique name of this backend
41 * 'wikiId' : Prefix to container names that is unique to this wiki
42 * 'lockManager' : Registered name of a file lock manager to use
43 * 'readOnly' : Write operations are disallowed if this is a non-empty string.
44 * It should be an explanation for the backend being read-only.
45 *
46 * @param $config Array
47 */
48 public function __construct( array $config ) {
49 $this->name = $config['name'];
50 $this->wikiId = isset( $config['wikiId'] )
51 ? $config['wikiId']
52 : wfWikiID();
53 $this->lockManager = LockManagerGroup::singleton()->get( $config['lockManager'] );
54 $this->readOnly = isset( $config['readOnly'] )
55 ? (string)$config['readOnly']
56 : '';
57 }
58
59 /**
60 * Get the unique backend name.
61 *
62 * We may have multiple different backends of the same type.
63 * For example, we can have two Swift backends using different proxies.
64 *
65 * @return string
66 */
67 final public function getName() {
68 return $this->name;
69 }
70
71 /**
72 * This is the main entry point into the backend for write operations.
73 * Callers supply an ordered list of operations to perform as a transaction.
74 * If any serious errors occur, all attempted operations will be rolled back.
75 *
76 * $ops is an array of arrays. The outer array holds a list of operations.
77 * Each inner array is a set of key value pairs that specify an operation.
78 *
79 * Supported operations and their parameters:
80 * a) Create a new file in storage with the contents of a string
81 * array(
82 * 'op' => 'create',
83 * 'dst' => <storage path>,
84 * 'content' => <string of new file contents>,
85 * 'overwriteDest' => <boolean>,
86 * 'overwriteSame' => <boolean>
87 * )
88 * b) Copy a file system file into storage
89 * array(
90 * 'op' => 'store',
91 * 'src' => <file system path>,
92 * 'dst' => <storage path>,
93 * 'overwriteDest' => <boolean>,
94 * 'overwriteSame' => <boolean>
95 * )
96 * c) Copy a file within storage
97 * array(
98 * 'op' => 'copy',
99 * 'src' => <storage path>,
100 * 'dst' => <storage path>,
101 * 'overwriteDest' => <boolean>,
102 * 'overwriteSame' => <boolean>
103 * )
104 * d) Move a file within storage
105 * array(
106 * 'op' => 'move',
107 * 'src' => <storage path>,
108 * 'dst' => <storage path>,
109 * 'overwriteDest' => <boolean>,
110 * 'overwriteSame' => <boolean>
111 * )
112 * e) Delete a file within storage
113 * array(
114 * 'op' => 'delete',
115 * 'src' => <storage path>,
116 * 'ignoreMissingSource' => <boolean>
117 * )
118 * f) Concatenate a list of files within storage into a single temp file
119 * array(
120 * 'op' => 'concatenate',
121 * 'srcs' => <ordered array of storage paths>,
122 * 'dst' => <file system path to 0-byte temp file>
123 * )
124 * g) Do nothing (no-op)
125 * array(
126 * 'op' => 'null',
127 * )
128 *
129 * Boolean flags for operations (operation-specific):
130 * 'ignoreMissingSource' : The operation will simply succeed and do
131 * nothing if the source file does not exist.
132 * 'overwriteDest' : Any destination file will be overwritten.
133 * 'overwriteSame' : An error will not be given if a file already
134 * exists at the destination that has the same
135 * contents as the new contents to be written there.
136 *
137 * $opts is an associative of boolean flags, including:
138 * 'ignoreErrors' : Errors that would normally cause a rollback do not.
139 * The remaining operations are still attempted if any fail.
140 * 'nonLocking' : No locks are acquired for the operations.
141 * This can increase performance for non-critical writes.
142 * This has no effect unless the 'ignoreErrors' flag is set.
143 * 'allowStale' : Don't require the latest available data.
144 * This can increase performance for non-critical writes.
145 * This has no effect unless the 'ignoreErrors' flag is set.
146 *
147 * Return value:
148 * This returns a Status, which contains all warnings and fatals that occured
149 * during the operation. The 'failCount', 'successCount', and 'success' members
150 * will reflect each operation attempted. The status will be "OK" unless any
151 * of the operations failed and the 'ignoreErrors' parameter was not set.
152 *
153 * @param $ops Array List of operations to execute in order
154 * @param $opts Array Batch operation options
155 * @return Status
156 */
157 final public function doOperations( array $ops, array $opts = array() ) {
158 if ( $this->readOnly != '' ) {
159 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
160 }
161 if ( empty( $opts['ignoreErrors'] ) ) { // sanity
162 unset( $opts['nonLocking'] );
163 unset( $opts['allowStale'] );
164 }
165 return $this->doOperationsInternal( $ops, $opts );
166 }
167
168 /**
169 * @see FileBackendBase::doOperations()
170 */
171 abstract protected function doOperationsInternal( array $ops, array $opts );
172
173 /**
174 * Same as doOperations() except it takes a single operation.
175 * If you are doing a batch of operations that should either
176 * all succeed or all fail, then use that function instead.
177 *
178 * @see FileBackendBase::doOperations()
179 *
180 * @param $op Array Operation
181 * @param $opts Array Operation options
182 * @return Status
183 */
184 final public function doOperation( array $op, array $opts = array() ) {
185 return $this->doOperations( array( $op ), $opts );
186 }
187
188 /**
189 * Performs a single store operation.
190 * This sets $params['op'] to 'store' and passes it to doOperation().
191 *
192 * @see FileBackendBase::doOperation()
193 *
194 * @param $params Array Operation parameters
195 * @param $opts Array Operation options
196 * @return Status
197 */
198 final public function store( array $params, array $opts = array() ) {
199 $params['op'] = 'store';
200 return $this->doOperation( $params, $opts );
201 }
202
203 /**
204 * Performs a single copy operation.
205 * This sets $params['op'] to 'copy' and passes it to doOperation().
206 *
207 * @see FileBackendBase::doOperation()
208 *
209 * @param $params Array Operation parameters
210 * @param $opts Array Operation options
211 * @return Status
212 */
213 final public function copy( array $params, array $opts = array() ) {
214 $params['op'] = 'copy';
215 return $this->doOperation( $params, $opts );
216 }
217
218 /**
219 * Performs a single move operation.
220 * This sets $params['op'] to 'move' and passes it to doOperation().
221 *
222 * @see FileBackendBase::doOperation()
223 *
224 * @param $params Array Operation parameters
225 * @param $opts Array Operation options
226 * @return Status
227 */
228 final public function move( array $params, array $opts = array() ) {
229 $params['op'] = 'move';
230 return $this->doOperation( $params, $opts );
231 }
232
233 /**
234 * Performs a single delete operation.
235 * This sets $params['op'] to 'delete' and passes it to doOperation().
236 *
237 * @see FileBackendBase::doOperation()
238 *
239 * @param $params Array Operation parameters
240 * @param $opts Array Operation options
241 * @return Status
242 */
243 final public function delete( array $params, array $opts = array() ) {
244 $params['op'] = 'delete';
245 return $this->doOperation( $params, $opts );
246 }
247
248 /**
249 * Performs a single create operation.
250 * This sets $params['op'] to 'create' and passes it to doOperation().
251 *
252 * @see FileBackendBase::doOperation()
253 *
254 * @param $params Array Operation parameters
255 * @param $opts Array Operation options
256 * @return Status
257 */
258 final public function create( array $params, array $opts = array() ) {
259 $params['op'] = 'create';
260 return $this->doOperation( $params, $opts );
261 }
262
263 /**
264 * Performs a single concatenate operation.
265 * This sets $params['op'] to 'concatenate' and passes it to doOperation().
266 *
267 * @see FileBackendBase::doOperation()
268 *
269 * @param $params Array Operation parameters
270 * @param $opts Array Operation options
271 * @return Status
272 */
273 final public function concatenate( array $params, array $opts = array() ) {
274 $params['op'] = 'concatenate';
275 return $this->doOperation( $params, $opts );
276 }
277
278 /**
279 * Prepare a storage path for usage. This will create containers
280 * that don't yet exist or, on FS backends, create parent directories.
281 *
282 * $params include:
283 * dir : storage directory
284 *
285 * @param $params Array
286 * @return Status
287 */
288 abstract public function prepare( array $params );
289
290 /**
291 * Take measures to block web access to a directory and
292 * the container it belongs to. FS backends might add .htaccess
293 * files wheras backends like Swift this might restrict container
294 * access to backend user that represents end-users in web request.
295 * This is not guaranteed to actually do anything.
296 *
297 * $params include:
298 * dir : storage directory
299 * noAccess : try to deny file access
300 * noListing : try to deny file listing
301 *
302 * @param $params Array
303 * @return Status
304 */
305 abstract public function secure( array $params );
306
307 /**
308 * Clean up an empty storage directory.
309 * On FS backends, the directory will be deleted. Others may do nothing.
310 *
311 * $params include:
312 * dir : storage directory
313 *
314 * @param $params Array
315 * @return Status
316 */
317 abstract public function clean( array $params );
318
319 /**
320 * Check if a file exists at a storage path in the backend.
321 *
322 * $params include:
323 * src : source storage path
324 * latest : use the latest available data
325 *
326 * @param $params Array
327 * @return bool|null Returns null on failure
328 */
329 abstract public function fileExists( array $params );
330
331 /**
332 * Get the last-modified timestamp of the file at a storage path.
333 *
334 * $params include:
335 * src : source storage path
336 * latest : use the latest available data
337 *
338 * @param $params Array
339 * @return string|false TS_MW timestamp or false on failure
340 */
341 abstract public function getFileTimestamp( array $params );
342
343 /**
344 * Get the contents of a file at a storage path in the backend.
345 * This should be avoided for potentially large files.
346 *
347 * $params include:
348 * src : source storage path
349 * latest : use the latest available data
350 *
351 * @param $params Array
352 * @return string|false Returns false on failure
353 */
354 abstract public function getFileContents( array $params );
355
356 /**
357 * Get a SHA-1 hash of the file at a storage path in the backend.
358 *
359 * $params include:
360 * src : source storage path
361 * latest : use the latest available data
362 *
363 * @param $params Array
364 * @return string|false Hash string or false on failure
365 */
366 abstract public function getFileSha1Base36( array $params );
367
368 /**
369 * Get the properties of the file at a storage path in the backend.
370 * Returns FSFile::placeholderProps() on failure.
371 *
372 * $params include:
373 * src : source storage path
374 * latest : use the latest available data
375 *
376 * @param $params Array
377 * @return Array
378 */
379 abstract public function getFileProps( array $params );
380
381 /**
382 * Stream the file at a storage path in the backend.
383 * Appropriate HTTP headers (Status, Content-Type, Content-Length)
384 * must be sent if streaming began, while none should be sent otherwise.
385 * Implementations should flush the output buffer before sending data.
386 *
387 * $params include:
388 * src : source storage path
389 * headers : additional HTTP headers to send on success
390 * latest : use the latest available data
391 *
392 * @param $params Array
393 * @return Status
394 */
395 abstract public function streamFile( array $params );
396
397 /**
398 * Returns a file system file, identical to the file at a storage path.
399 * The file returned is either:
400 * a) A local copy of the file at a storage path in the backend.
401 * The temporary copy will have the same extension as the source.
402 * b) An original of the file at a storage path in the backend.
403 * Temporary files may be purged when the file object falls out of scope.
404 *
405 * Write operations should *never* be done on this file as some backends
406 * may do internal tracking or may be instances of FileBackendMultiWrite.
407 * In that later case, there are copies of the file that must stay in sync.
408 *
409 * $params include:
410 * src : source storage path
411 * latest : use the latest available data
412 *
413 * @param $params Array
414 * @return FSFile|null Returns null on failure
415 */
416 abstract public function getLocalReference( array $params );
417
418 /**
419 * Get a local copy on disk of the file at a storage path in the backend.
420 * The temporary copy will have the same file extension as the source.
421 * Temporary files may be purged when the file object falls out of scope.
422 *
423 * $params include:
424 * src : source storage path
425 * latest : use the latest available data
426 *
427 * @param $params Array
428 * @return TempFSFile|null Returns null on failure
429 */
430 abstract public function getLocalCopy( array $params );
431
432 /**
433 * Get an iterator to list out all object files under a storage directory.
434 * If the directory is of the form "mwstore://container", then all items in
435 * the container should be listed. If of the form "mwstore://container/dir",
436 * then all items under that container directory should be listed.
437 * Results should be storage paths relative to the given directory.
438 *
439 * $params include:
440 * dir : storage path directory
441 *
442 * @return Traversable|Array|null Returns null on failure
443 */
444 abstract public function getFileList( array $params );
445
446 /**
447 * Lock the files at the given storage paths in the backend.
448 * This will either lock all the files or none (on failure).
449 *
450 * Callers should consider using getScopedFileLocks() instead.
451 *
452 * @param $paths Array Storage paths
453 * @param $type integer LockManager::LOCK_* constant
454 * @return Status
455 */
456 final public function lockFiles( array $paths, $type ) {
457 return $this->lockManager->lock( $paths, $type );
458 }
459
460 /**
461 * Unlock the files at the given storage paths in the backend.
462 *
463 * @param $paths Array Storage paths
464 * @param $type integer LockManager::LOCK_* constant
465 * @return Status
466 */
467 final public function unlockFiles( array $paths, $type ) {
468 return $this->lockManager->unlock( $paths, $type );
469 }
470
471 /**
472 * Lock the files at the given storage paths in the backend.
473 * This will either lock all the files or none (on failure).
474 * On failure, the status object will be updated with errors.
475 *
476 * Once the return value goes out scope, the locks will be released and
477 * the status updated. Unlock fatals will not change the status "OK" value.
478 *
479 * @param $paths Array Storage paths
480 * @param $type integer LockManager::LOCK_* constant
481 * @param $status Status Status to update on lock/unlock
482 * @return ScopedLock|null Returns null on failure
483 */
484 final public function getScopedFileLocks( array $paths, $type, Status $status ) {
485 return ScopedLock::factory( $this->lockManager, $paths, $type, $status );
486 }
487 }
488
489 /**
490 * Base class for all single-write backends.
491 * This class defines the methods as abstract that subclasses must implement.
492 *
493 * @ingroup FileBackend
494 * @since 1.19
495 */
496 abstract class FileBackend extends FileBackendBase {
497 /** @var Array */
498 protected $cache = array(); // (storage path => key => value)
499 protected $maxCacheSize = 50; // integer; max paths with entries
500 /** @var Array */
501 protected $shardViaHashLevels = array(); // (container name => integer)
502
503 /**
504 * Create a file in the backend with the given contents.
505 * Do not call this function from places outside FileBackend and FileOp.
506 * $params include:
507 * content : the raw file contents
508 * dst : destination storage path
509 * overwriteDest : overwrite any file that exists at the destination
510 *
511 * @param $params Array
512 * @return Status
513 */
514 final public function createInternal( array $params ) {
515 $status = $this->doCreateInternal( $params );
516 $this->clearCache( array( $params['dst'] ) );
517 return $status;
518 }
519
520 /**
521 * @see FileBackend::createInternal()
522 */
523 abstract protected function doCreateInternal( array $params );
524
525 /**
526 * Store a file into the backend from a file on disk.
527 * Do not call this function from places outside FileBackend and FileOp.
528 * $params include:
529 * src : source path on disk
530 * dst : destination storage path
531 * overwriteDest : overwrite any file that exists at the destination
532 *
533 * @param $params Array
534 * @return Status
535 */
536 final public function storeInternal( array $params ) {
537 $status = $this->doStoreInternal( $params );
538 $this->clearCache( array( $params['dst'] ) );
539 return $status;
540 }
541
542 /**
543 * @see FileBackend::storeInternal()
544 */
545 abstract protected function doStoreInternal( array $params );
546
547 /**
548 * Copy a file from one storage path to another in the backend.
549 * Do not call this function from places outside FileBackend and FileOp.
550 * $params include:
551 * src : source storage path
552 * dst : destination storage path
553 * overwriteDest : overwrite any file that exists at the destination
554 *
555 * @param $params Array
556 * @return Status
557 */
558 final public function copyInternal( array $params ) {
559 $status = $this->doCopyInternal( $params );
560 $this->clearCache( array( $params['dst'] ) );
561 return $status;
562 }
563
564 /**
565 * @see FileBackend::copyInternal()
566 */
567 abstract protected function doCopyInternal( array $params );
568
569 /**
570 * Delete a file at the storage path.
571 * Do not call this function from places outside FileBackend and FileOp.
572 * $params include:
573 * src : source storage path
574 * ignoreMissingSource : do nothing if the source file does not exist
575 *
576 * @param $params Array
577 * @return Status
578 */
579 final public function deleteInternal( array $params ) {
580 $status = $this->doDeleteInternal( $params );
581 $this->clearCache( array( $params['src'] ) );
582 return $status;
583 }
584
585 /**
586 * @see FileBackend::deleteInternal()
587 */
588 abstract protected function doDeleteInternal( array $params );
589
590 /**
591 * Move a file from one storage path to another in the backend.
592 * Do not call this function from places outside FileBackend and FileOp.
593 * $params include:
594 * src : source storage path
595 * dst : destination storage path
596 * overwriteDest : overwrite any file that exists at the destination
597 *
598 * @param $params Array
599 * @return Status
600 */
601 final public function moveInternal( array $params ) {
602 $status = $this->doMoveInternal( $params );
603 $this->clearCache( array( $params['src'], $params['dst'] ) );
604 return $status;
605 }
606
607 /**
608 * @see FileBackend::moveInternal()
609 */
610 protected function doMoveInternal( array $params ) {
611 // Copy source to dest
612 $status = $this->copyInternal( $params );
613 if ( !$status->isOK() ) {
614 return $status;
615 }
616 // Delete source (only fails due to races or medium going down)
617 $status->merge( $this->deleteInternal( array( 'src' => $params['src'] ) ) );
618 $status->setResult( true, $status->value ); // ignore delete() errors
619 return $status;
620 }
621
622 /**
623 * Combines files from several storage paths into a new file in the backend.
624 * Do not call this function from places outside FileBackend and FileOp.
625 * $params include:
626 * srcs : ordered source storage paths (e.g. chunk1, chunk2, ...)
627 * dst : file system path to 0-byte temp file
628 * overwriteDest : overwrite any file that exists at the destination
629 *
630 * @param $params Array
631 * @return Status
632 */
633 final public function concatenateInternal( array $params ) {
634 $status = $this->doConcatenateInternal( $params );
635 return $status;
636 }
637
638 /**
639 * @see FileBackend::concatenateInternal()
640 */
641 protected function doConcatenateInternal( array $params ) {
642 $status = Status::newGood();
643 $tmpPath = $params['dst']; // convenience
644
645 // Check that the specified temp file is valid...
646 wfSuppressWarnings();
647 $ok = ( is_file( $tmpPath ) && !filesize( $tmpPath ) );
648 wfRestoreWarnings();
649 if ( !$ok ) { // not present or not empty
650 $status->fatal( 'backend-fail-opentemp', $tmpPath );
651 return $status;
652 }
653
654 // Build up the temp file using the source chunks (in order)...
655 $tmpHandle = fopen( $tmpPath, 'a' );
656 if ( $tmpHandle === false ) {
657 $status->fatal( 'backend-fail-opentemp', $tmpPath );
658 return $status;
659 }
660 foreach ( $params['srcs'] as $virtualSource ) {
661 // Get a local FS version of the chunk
662 $tmpFile = $this->getLocalReference( array( 'src' => $virtualSource ) );
663 if ( !$tmpFile ) {
664 $status->fatal( 'backend-fail-read', $virtualSource );
665 return $status;
666 }
667 // Get a handle to the local FS version
668 $sourceHandle = fopen( $tmpFile->getPath(), 'r' );
669 if ( $sourceHandle === false ) {
670 fclose( $tmpHandle );
671 $status->fatal( 'backend-fail-read', $virtualSource );
672 return $status;
673 }
674 // Append chunk to file (pass chunk size to avoid magic quotes)
675 if ( !stream_copy_to_stream( $sourceHandle, $tmpHandle ) ) {
676 fclose( $sourceHandle );
677 fclose( $tmpHandle );
678 $status->fatal( 'backend-fail-writetemp', $tmpPath );
679 return $status;
680 }
681 fclose( $sourceHandle );
682 }
683 if ( !fclose( $tmpHandle ) ) {
684 $status->fatal( 'backend-fail-closetemp', $tmpPath );
685 return $status;
686 }
687
688 return $status;
689 }
690
691 /**
692 * @see FileBackendBase::prepare()
693 */
694 final public function prepare( array $params ) {
695 $status = Status::newGood();
696 list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
697 if ( $dir === null ) {
698 $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
699 return $status; // invalid storage path
700 }
701 if ( $shard !== null ) { // confined to a single container/shard
702 $status->merge( $this->doPrepare( $fullCont, $dir, $params ) );
703 } else { // directory is on several shards
704 wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
705 list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
706 foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
707 $status->merge( $this->doPrepare( "{$fullCont}{$suffix}", $dir, $params ) );
708 }
709 }
710 return $status;
711 }
712
713 /**
714 * @see FileBackend::prepare()
715 */
716 protected function doPrepare( $container, $dir, array $params ) {
717 return Status::newGood();
718 }
719
720 /**
721 * @see FileBackendBase::secure()
722 */
723 final public function secure( array $params ) {
724 $status = Status::newGood();
725 list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
726 if ( $dir === null ) {
727 $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
728 return $status; // invalid storage path
729 }
730 if ( $shard !== null ) { // confined to a single container/shard
731 $status->merge( $this->doSecure( $fullCont, $dir, $params ) );
732 } else { // directory is on several shards
733 wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
734 list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
735 foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
736 $status->merge( $this->doSecure( "{$fullCont}{$suffix}", $dir, $params ) );
737 }
738 }
739 return $status;
740 }
741
742 /**
743 * @see FileBackend::secure()
744 */
745 protected function doSecure( $container, $dir, array $params ) {
746 return Status::newGood();
747 }
748
749 /**
750 * @see FileBackendBase::clean()
751 */
752 final public function clean( array $params ) {
753 $status = Status::newGood();
754 list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
755 if ( $dir === null ) {
756 $status->fatal( 'backend-fail-invalidpath', $params['dir'] );
757 return $status; // invalid storage path
758 }
759 if ( $shard !== null ) { // confined to a single container/shard
760 $status->merge( $this->doClean( $fullCont, $dir, $params ) );
761 } else { // directory is on several shards
762 wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
763 list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
764 foreach ( $this->getContainerSuffixes( $shortCont ) as $suffix ) {
765 $status->merge( $this->doClean( "{$fullCont}{$suffix}", $dir, $params ) );
766 }
767 }
768 return $status;
769 }
770
771 /**
772 * @see FileBackend::clean()
773 */
774 protected function doClean( $container, $dir, array $params ) {
775 return Status::newGood();
776 }
777
778 /**
779 * @see FileBackendBase::getFileContents()
780 */
781 public function getFileContents( array $params ) {
782 $tmpFile = $this->getLocalReference( $params );
783 if ( !$tmpFile ) {
784 return false;
785 }
786 wfSuppressWarnings();
787 $data = file_get_contents( $tmpFile->getPath() );
788 wfRestoreWarnings();
789 return $data;
790 }
791
792 /**
793 * @see FileBackendBase::getFileSha1Base36()
794 */
795 public function getFileSha1Base36( array $params ) {
796 $path = $params['src'];
797 if ( isset( $this->cache[$path]['sha1'] ) ) {
798 return $this->cache[$path]['sha1'];
799 }
800 $fsFile = $this->getLocalReference( $params );
801 if ( !$fsFile ) {
802 return false;
803 } else {
804 $sha1 = $fsFile->getSha1Base36();
805 if ( $sha1 !== false ) { // don't cache negatives
806 $this->trimCache(); // limit memory
807 $this->cache[$path]['sha1'] = $sha1;
808 }
809 return $sha1;
810 }
811 }
812
813 /**
814 * @see FileBackendBase::getFileProps()
815 */
816 public function getFileProps( array $params ) {
817 $fsFile = $this->getLocalReference( $params );
818 if ( !$fsFile ) {
819 return FSFile::placeholderProps();
820 } else {
821 return $fsFile->getProps();
822 }
823 }
824
825 /**
826 * @see FileBackendBase::getLocalReference()
827 */
828 public function getLocalReference( array $params ) {
829 return $this->getLocalCopy( $params );
830 }
831
832 /**
833 * @see FileBackendBase::streamFile()
834 */
835 public function streamFile( array $params ) {
836 $status = Status::newGood();
837
838 $fsFile = $this->getLocalReference( $params );
839 if ( !$fsFile ) {
840 $status->fatal( 'backend-fail-stream', $params['src'] );
841 return $status;
842 }
843
844 $extraHeaders = isset( $params['headers'] )
845 ? $params['headers']
846 : array();
847
848 $ok = StreamFile::stream( $fsFile->getPath(), $extraHeaders, false );
849 if ( !$ok ) {
850 $status->fatal( 'backend-fail-stream', $params['src'] );
851 return $status;
852 }
853
854 return $status;
855 }
856
857 /**
858 * @see FileBackendBase::getFileList()
859 */
860 final public function getFileList( array $params ) {
861 list( $fullCont, $dir, $shard ) = $this->resolveStoragePath( $params['dir'] );
862 if ( $dir === null ) { // invalid storage path
863 return null;
864 }
865 if ( $shard !== null ) {
866 // File listing is confined to a single container/shard
867 return $this->getFileListInternal( $fullCont, $dir, $params );
868 } else {
869 wfDebug( __METHOD__ . ": iterating over all container shards.\n" );
870 // File listing spans multiple containers/shards
871 list( $b, $shortCont, $r ) = self::splitStoragePath( $params['dir'] );
872 return new ContainerShardListIterator( $this,
873 $fullCont, $this->getContainerSuffixes( $shortCont ), $params );
874 }
875 }
876
877 /**
878 * Do not call this function from places outside FileBackend and ContainerFileListIterator
879 *
880 * @param $container string Resolved container name
881 * @param $dir string Resolved path relative to container
882 * @param $params Array
883 * @see FileBackend::getFileList()
884 */
885 abstract public function getFileListInternal( $container, $dir, array $params );
886
887 /**
888 * Get the list of supported operations and their corresponding FileOp classes.
889 *
890 * @return Array
891 */
892 protected function supportedOperations() {
893 return array(
894 'store' => 'StoreFileOp',
895 'copy' => 'CopyFileOp',
896 'move' => 'MoveFileOp',
897 'delete' => 'DeleteFileOp',
898 'concatenate' => 'ConcatenateFileOp',
899 'create' => 'CreateFileOp',
900 'null' => 'NullFileOp'
901 );
902 }
903
904 /**
905 * Return a list of FileOp objects from a list of operations.
906 * Do not call this function from places outside FileBackend.
907 *
908 * The result must have the same number of items as the input.
909 * An exception is thrown if an unsupported operation is requested.
910 *
911 * @param $ops Array Same format as doOperations()
912 * @return Array List of FileOp objects
913 * @throws MWException
914 */
915 final public function getOperations( array $ops ) {
916 $supportedOps = $this->supportedOperations();
917
918 $performOps = array(); // array of FileOp objects
919 // Build up ordered array of FileOps...
920 foreach ( $ops as $operation ) {
921 $opName = $operation['op'];
922 if ( isset( $supportedOps[$opName] ) ) {
923 $class = $supportedOps[$opName];
924 // Get params for this operation
925 $params = $operation;
926 // Append the FileOp class
927 $performOps[] = new $class( $this, $params );
928 } else {
929 throw new MWException( "Operation `$opName` is not supported." );
930 }
931 }
932
933 return $performOps;
934 }
935
936 /**
937 * @see FileBackendBase::doOperationsInternal()
938 */
939 protected function doOperationsInternal( array $ops, array $opts ) {
940 $status = Status::newGood();
941
942 // Build up a list of FileOps...
943 $performOps = $this->getOperations( $ops );
944
945 // Acquire any locks as needed...
946 if ( empty( $opts['nonLocking'] ) ) {
947 // Build up a list of files to lock...
948 $filesLockEx = $filesLockSh = array();
949 foreach ( $performOps as $fileOp ) {
950 $filesLockSh = array_merge( $filesLockSh, $fileOp->storagePathsRead() );
951 $filesLockEx = array_merge( $filesLockEx, $fileOp->storagePathsChanged() );
952 }
953 // Optimization: if doing an EX lock anyway, don't also set an SH one
954 $filesLockSh = array_diff( $filesLockSh, $filesLockEx );
955 // Try to lock those files for the scope of this function...
956 $scopeLockS = $this->getScopedFileLocks( $filesLockSh, LockManager::LOCK_UW, $status );
957 $scopeLockE = $this->getScopedFileLocks( $filesLockEx, LockManager::LOCK_EX, $status );
958 if ( !$status->isOK() ) {
959 return $status; // abort
960 }
961 }
962
963 // Clear any cache entries (after locks acquired)
964 $this->clearCache();
965 // Actually attempt the operation batch...
966 $status->merge( FileOp::attemptBatch( $performOps, $opts ) );
967
968 return $status;
969 }
970
971 /**
972 * Invalidate the file existence and property cache
973 *
974 * @param $paths Array Clear cache for specific files
975 * @return void
976 */
977 final public function clearCache( array $paths = null ) {
978 if ( $paths === null ) {
979 $this->cache = array();
980 } else {
981 foreach ( $paths as $path ) {
982 unset( $this->cache[$path] );
983 }
984 }
985 }
986
987 /**
988 * Prune the cache if it is too big to add an item
989 *
990 * @return void
991 */
992 protected function trimCache() {
993 if ( count( $this->cache ) >= $this->maxCacheSize ) {
994 reset( $this->cache );
995 $key = key( $this->cache );
996 unset( $this->cache[$key] );
997 }
998 }
999
1000 /**
1001 * Check if a given path is a mwstore:// path.
1002 * This does not do any actual validation or existence checks.
1003 *
1004 * @param $path string
1005 * @return bool
1006 */
1007 final public static function isStoragePath( $path ) {
1008 return ( strpos( $path, 'mwstore://' ) === 0 );
1009 }
1010
1011 /**
1012 * Split a storage path (e.g. "mwstore://backend/container/path/to/object")
1013 * into a backend name, a container name, and a relative object path.
1014 *
1015 * @param $storagePath string
1016 * @return Array (backend, container, rel object) or (null, null, null)
1017 */
1018 final public static function splitStoragePath( $storagePath ) {
1019 if ( self::isStoragePath( $storagePath ) ) {
1020 // Note: strlen( 'mwstore://' ) = 10
1021 $parts = explode( '/', substr( $storagePath, 10 ), 3 );
1022 if ( count( $parts ) == 3 ) {
1023 return $parts; // e.g. "backend/container/path"
1024 } elseif ( count( $parts ) == 2 ) {
1025 return array( $parts[0], $parts[1], '' ); // e.g. "backend/container"
1026 }
1027 }
1028 return array( null, null, null );
1029 }
1030
1031 /**
1032 * Check if a container name is valid.
1033 * This checks for for length and illegal characters.
1034 *
1035 * @param $container string
1036 * @return bool
1037 */
1038 final protected static function isValidContainerName( $container ) {
1039 // This accounts for Swift and S3 restrictions. Also note
1040 // that these urlencode to the same string, which is useful
1041 // since the Swift size limit is *after* URL encoding.
1042 // Limit to 200 to leave room for '.shard-XX' or '.segment'.
1043 return preg_match( '/^[a-zA-Z0-9._-]{1,200}$/u', $container );
1044 }
1045
1046 /**
1047 * Validate and normalize a relative storage path.
1048 * Null is returned if the path involves directory traversal.
1049 * Traversal is insecure for FS backends and broken for others.
1050 *
1051 * @param $path string
1052 * @return string|null
1053 */
1054 final protected static function normalizeStoragePath( $path ) {
1055 // Normalize directory separators
1056 $path = strtr( $path, '\\', '/' );
1057 // Use the same traversal protection as Title::secureAndSplit()
1058 if ( strpos( $path, '.' ) !== false ) {
1059 if (
1060 $path === '.' ||
1061 $path === '..' ||
1062 strpos( $path, './' ) === 0 ||
1063 strpos( $path, '../' ) === 0 ||
1064 strpos( $path, '/./' ) !== false ||
1065 strpos( $path, '/../' ) !== false
1066 ) {
1067 return null;
1068 }
1069 }
1070 return $path;
1071 }
1072
1073 /**
1074 * Splits a storage path into an internal container name,
1075 * an internal relative object name, and a container shard suffix.
1076 * Any shard suffix is already appended to the internal container name.
1077 * This also checks that the storage path is valid and within this backend.
1078 *
1079 * If the container is sharded but a suffix could not be determined,
1080 * this means that the path can only refer to a directory and can only
1081 * be scanned by looking in all the container shards.
1082 *
1083 * @param $storagePath string
1084 * @return Array (container, path, container suffix) or (null, null, null) if invalid
1085 */
1086 final protected function resolveStoragePath( $storagePath ) {
1087 list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath );
1088 if ( $backend === $this->name ) { // must be for this backend
1089 $relPath = self::normalizeStoragePath( $relPath );
1090 if ( $relPath !== null ) {
1091 // Get shard for the normalized path if this container is sharded
1092 $cShard = $this->getContainerShard( $container, $relPath );
1093 // Validate and sanitize the relative path (backend-specific)
1094 $relPath = $this->resolveContainerPath( $container, $relPath );
1095 if ( $relPath !== null ) {
1096 // Prepend any wiki ID prefix to the container name
1097 $container = $this->fullContainerName( $container );
1098 if ( self::isValidContainerName( $container ) ) {
1099 // Validate and sanitize the container name (backend-specific)
1100 $container = $this->resolveContainerName( "{$container}{$cShard}" );
1101 if ( $container !== null ) {
1102 return array( $container, $relPath, $cShard );
1103 }
1104 }
1105 }
1106 }
1107 }
1108 return array( null, null, null );
1109 }
1110
1111 /**
1112 * Like resolveStoragePath() except null values are returned if
1113 * the container is sharded and the shard could not be determined.
1114 *
1115 * @see FileBackend::resolveStoragePath()
1116 *
1117 * @param $storagePath string
1118 * @return Array (container, path) or (null, null) if invalid
1119 */
1120 final protected function resolveStoragePathReal( $storagePath ) {
1121 list( $container, $relPath, $cShard ) = $this->resolveStoragePath( $storagePath );
1122 if ( $cShard !== null ) {
1123 return array( $container, $relPath );
1124 }
1125 return array( null, null );
1126 }
1127
1128 /**
1129 * Get the container name shard suffix for a given path.
1130 * Any empty suffix means the container is not sharded.
1131 *
1132 * @param $container string Container name
1133 * @param $relStoragePath string Storage path relative to the container
1134 * @return string|null Returns null if shard could not be determined
1135 */
1136 final protected function getContainerShard( $container, $relPath ) {
1137 $hashLevels = $this->getContainerHashLevels( $container );
1138 if ( $hashLevels === 1 ) { // 16 shards per container
1139 $hashDirRegex = '(?P<shard>[0-9a-f])';
1140 } elseif ( $hashLevels === 2 ) { // 256 shards per container
1141 $hashDirRegex = '[0-9a-f]/(?P<shard>[0-9a-f]{2})';
1142 } else {
1143 return ''; // no sharding
1144 }
1145 // Allow certain directories to be above the hash dirs so as
1146 // to work with FileRepo (e.g. "archive/a/ab" or "temp/a/ab").
1147 // They must be 2+ chars to avoid any hash directory ambiguity.
1148 if ( preg_match( "!^(?:[^/]{2,}/)*$hashDirRegex(?:/|$)!", $relPath, $m ) ) {
1149 return '.shard-' . str_pad( $m['shard'], $hashLevels, '0', STR_PAD_LEFT );
1150 }
1151 return null; // failed to match
1152 }
1153
1154 /**
1155 * Get the number of hash levels for a container.
1156 * If greater than 0, then all file storage paths within
1157 * the container are required to be hashed accordingly.
1158 *
1159 * @param $container string
1160 * @return integer
1161 */
1162 final protected function getContainerHashLevels( $container ) {
1163 if ( isset( $this->shardViaHashLevels[$container] ) ) {
1164 $hashLevels = (int)$this->shardViaHashLevels[$container];
1165 if ( $hashLevels >= 0 && $hashLevels <= 2 ) {
1166 return $hashLevels;
1167 }
1168 }
1169 return 0; // no sharding
1170 }
1171
1172 /**
1173 * Get a list of full container shard suffixes for a container
1174 *
1175 * @param $container string
1176 * @return Array
1177 */
1178 final protected function getContainerSuffixes( $container ) {
1179 $shards = array();
1180 $digits = $this->getContainerHashLevels( $container );
1181 if ( $digits > 0 ) {
1182 $numShards = 1 << ( $digits * 4 );
1183 for ( $index = 0; $index < $numShards; $index++ ) {
1184 $shards[] = '.shard-' . str_pad( dechex( $index ), $digits, '0', STR_PAD_LEFT );
1185 }
1186 }
1187 return $shards;
1188 }
1189
1190 /**
1191 * Get the full container name, including the wiki ID prefix
1192 *
1193 * @param $container string
1194 * @return string
1195 */
1196 final protected function fullContainerName( $container ) {
1197 if ( $this->wikiId != '' ) {
1198 return "{$this->wikiId}-$container";
1199 } else {
1200 return $container;
1201 }
1202 }
1203
1204 /**
1205 * Resolve a container name, checking if it's allowed by the backend.
1206 * This is intended for internal use, such as encoding illegal chars.
1207 * Subclasses can override this to be more restrictive.
1208 *
1209 * @param $container string
1210 * @return string|null
1211 */
1212 protected function resolveContainerName( $container ) {
1213 return $container;
1214 }
1215
1216 /**
1217 * Resolve a relative storage path, checking if it's allowed by the backend.
1218 * This is intended for internal use, such as encoding illegal chars or perhaps
1219 * getting absolute paths (e.g. FS based backends). Note that the relative path
1220 * may be the empty string (e.g. the path is simply to the container).
1221 *
1222 * @param $container string Container name
1223 * @param $relStoragePath string Storage path relative to the container
1224 * @return string|null Path or null if not valid
1225 */
1226 protected function resolveContainerPath( $container, $relStoragePath ) {
1227 return $relStoragePath;
1228 }
1229
1230 /**
1231 * Get the final extension from a storage or FS path
1232 *
1233 * @param $path string
1234 * @return string
1235 */
1236 final public static function extensionFromPath( $path ) {
1237 $i = strrpos( $path, '.' );
1238 return strtolower( $i ? substr( $path, $i + 1 ) : '' );
1239 }
1240 }
1241
1242 /**
1243 * FileBackend helper function to handle file listings that span container shards.
1244 * Do not use this class from places outside of FileBackend.
1245 *
1246 * @ingroup FileBackend
1247 */
1248 class ContainerShardListIterator implements Iterator {
1249 /* @var FileBackend */
1250 protected $backend;
1251 /* @var Array */
1252 protected $params;
1253 /* @var Array */
1254 protected $shardSuffixes;
1255 protected $container; // string
1256 protected $directory; // string
1257
1258 /* @var Traversable */
1259 protected $iter;
1260 protected $curShard = 0; // integer
1261 protected $pos = 0; // integer
1262
1263 /**
1264 * @param $backend FileBackend
1265 * @param $container string Full storage container name
1266 * @param $dir string Storage directory relative to container
1267 * @param $suffixes Array List of container shard suffixes
1268 * @param $params Array
1269 */
1270 public function __construct(
1271 FileBackend $backend, $container, $dir, array $suffixes, array $params
1272 ) {
1273 $this->backend = $backend;
1274 $this->container = $container;
1275 $this->directory = $dir;
1276 $this->shardSuffixes = $suffixes;
1277 $this->params = $params;
1278 }
1279
1280 public function current() {
1281 if ( is_array( $this->iter ) ) {
1282 return current( $this->iter );
1283 } else {
1284 return $this->iter->current();
1285 }
1286 }
1287
1288 public function key() {
1289 return $this->pos;
1290 }
1291
1292 public function next() {
1293 ++$this->pos;
1294 if ( is_array( $this->iter ) ) {
1295 next( $this->iter );
1296 } else {
1297 $this->iter->next();
1298 }
1299 // Find the next non-empty shard if no elements are left
1300 $this->nextShardIteratorIfNotValid();
1301 }
1302
1303 /**
1304 * If the iterator for this container shard is out of items,
1305 * then move on to the next container that has items.
1306 */
1307 protected function nextShardIteratorIfNotValid() {
1308 while ( !$this->valid() ) {
1309 if ( ++$this->curShard >= count( $this->shardSuffixes ) ) {
1310 break; // no more container shards
1311 }
1312 $this->setIteratorFromCurrentShard();
1313 }
1314 }
1315
1316 protected function setIteratorFromCurrentShard() {
1317 $suffix = $this->shardSuffixes[$this->curShard];
1318 $this->iter = $this->backend->getFileListInternal(
1319 "{$this->container}{$suffix}", $this->directory, $this->params );
1320 }
1321
1322 public function rewind() {
1323 $this->pos = 0;
1324 $this->curShard = 0;
1325 $this->setIteratorFromCurrentShard();
1326 // Find the next non-empty shard if this one has no elements
1327 $this->nextShardIteratorIfNotValid();
1328 }
1329
1330 public function valid() {
1331 if ( $this->iter == null ) {
1332 return false; // some failure?
1333 } elseif ( is_array( $this->iter ) ) {
1334 return ( current( $this->iter ) !== false ); // no paths can have this value
1335 } else {
1336 return $this->iter->valid();
1337 }
1338 }
1339 }