[FileBackend] Made doOperations() Status handling align with documentation as well...
[lhc/web/wiklou.git] / includes / filerepo / backend / FileBackend.php
1 <?php
2 /**
3 * @defgroup FileBackend File backend
4 * @ingroup FileRepo
5 *
6 * File backend is used to interact with file storage systems,
7 * such as the local file system, NFS, or cloud storage systems.
8 */
9
10 /**
11 * @file
12 * @ingroup FileBackend
13 * @author Aaron Schulz
14 */
15
16 /**
17 * @brief Base class for all file backend classes (including multi-write backends).
18 *
19 * This class defines the methods as abstract that subclasses must implement.
20 * Outside callers can assume that all backends will have these functions.
21 *
22 * All "storage paths" are of the format "mwstore://<backend>/<container>/<path>".
23 * The <path> portion is a relative path that uses UNIX file system (FS) notation,
24 * though any particular backend may not actually be using a local filesystem.
25 * Therefore, the relative paths are only virtual.
26 *
27 * Backend contents are stored under wiki-specific container names by default.
28 * For legacy reasons, this has no effect for the FS backend class, and per-wiki
29 * segregation must be done by setting the container paths appropriately.
30 *
31 * FS-based backends are somewhat more restrictive due to the existence of real
32 * directory files; a regular file cannot have the same name as a directory. Other
33 * backends with virtual directories may not have this limitation. Callers should
34 * store files in such a way that no files and directories are under the same path.
35 *
36 * Methods should avoid throwing exceptions at all costs.
37 * As a corollary, external dependencies should be kept to a minimum.
38 *
39 * @ingroup FileBackend
40 * @since 1.19
41 */
42 abstract class FileBackend {
43 protected $name; // string; unique backend name
44 protected $wikiId; // string; unique wiki name
45 protected $readOnly; // string; read-only explanation message
46 /** @var LockManager */
47 protected $lockManager;
48
49 /**
50 * Create a new backend instance from configuration.
51 * This should only be called from within FileBackendGroup.
52 *
53 * $config includes:
54 * 'name' : The unique name of this backend.
55 * This should consist of alphanumberic, '-', and '_' characters.
56 * This name should not be changed after use.
57 * 'wikiId' : Prefix to container names that is unique to this wiki.
58 * This should consist of alphanumberic, '-', and '_' characters.
59 * 'lockManager' : Registered name of a file lock manager to use.
60 * 'readOnly' : Write operations are disallowed if this is a non-empty string.
61 * It should be an explanation for the backend being read-only.
62 *
63 * @param $config Array
64 */
65 public function __construct( array $config ) {
66 $this->name = $config['name'];
67 if ( !preg_match( '!^[a-zA-Z0-9-_]{1,255}$!', $this->name ) ) {
68 throw new MWException( "Backend name `{$this->name}` is invalid." );
69 }
70 $this->wikiId = isset( $config['wikiId'] )
71 ? $config['wikiId']
72 : wfWikiID(); // e.g. "my_wiki-en_"
73 $this->lockManager = ( $config['lockManager'] instanceof LockManager )
74 ? $config['lockManager']
75 : LockManagerGroup::singleton()->get( $config['lockManager'] );
76 $this->readOnly = isset( $config['readOnly'] )
77 ? (string)$config['readOnly']
78 : '';
79 }
80
81 /**
82 * Get the unique backend name.
83 * We may have multiple different backends of the same type.
84 * For example, we can have two Swift backends using different proxies.
85 *
86 * @return string
87 */
88 final public function getName() {
89 return $this->name;
90 }
91
92 /**
93 * Check if this backend is read-only
94 *
95 * @return bool
96 */
97 final public function isReadOnly() {
98 return ( $this->readOnly != '' );
99 }
100
101 /**
102 * Get an explanatory message if this backend is read-only
103 *
104 * @return string|bool Returns falls if the backend is not read-only
105 */
106 final public function getReadOnlyReason() {
107 return ( $this->readOnly != '' ) ? $this->readOnly : false;
108 }
109
110 /**
111 * This is the main entry point into the backend for write operations.
112 * Callers supply an ordered list of operations to perform as a transaction.
113 * Files will be locked, the stat cache cleared, and then the operations attempted.
114 * If any serious errors occur, all attempted operations will be rolled back.
115 *
116 * $ops is an array of arrays. The outer array holds a list of operations.
117 * Each inner array is a set of key value pairs that specify an operation.
118 *
119 * Supported operations and their parameters:
120 * a) Create a new file in storage with the contents of a string
121 * array(
122 * 'op' => 'create',
123 * 'dst' => <storage path>,
124 * 'content' => <string of new file contents>,
125 * 'overwrite' => <boolean>,
126 * 'overwriteSame' => <boolean>
127 * )
128 * b) Copy a file system file into storage
129 * array(
130 * 'op' => 'store',
131 * 'src' => <file system path>,
132 * 'dst' => <storage path>,
133 * 'overwrite' => <boolean>,
134 * 'overwriteSame' => <boolean>
135 * )
136 * c) Copy a file within storage
137 * array(
138 * 'op' => 'copy',
139 * 'src' => <storage path>,
140 * 'dst' => <storage path>,
141 * 'overwrite' => <boolean>,
142 * 'overwriteSame' => <boolean>
143 * )
144 * d) Move a file within storage
145 * array(
146 * 'op' => 'move',
147 * 'src' => <storage path>,
148 * 'dst' => <storage path>,
149 * 'overwrite' => <boolean>,
150 * 'overwriteSame' => <boolean>
151 * )
152 * e) Delete a file within storage
153 * array(
154 * 'op' => 'delete',
155 * 'src' => <storage path>,
156 * 'ignoreMissingSource' => <boolean>
157 * )
158 * f) Do nothing (no-op)
159 * array(
160 * 'op' => 'null',
161 * )
162 *
163 * Boolean flags for operations (operation-specific):
164 * 'ignoreMissingSource' : The operation will simply succeed and do
165 * nothing if the source file does not exist.
166 * 'overwrite' : Any destination file will be overwritten.
167 * 'overwriteSame' : An error will not be given if a file already
168 * exists at the destination that has the same
169 * contents as the new contents to be written there.
170 *
171 * $opts is an associative of boolean flags, including:
172 * 'force' : Errors that would normally cause a rollback do not.
173 * The remaining operations are still attempted if any fail.
174 * 'nonLocking' : No locks are acquired for the operations.
175 * This can increase performance for non-critical writes.
176 * This has no effect unless the 'force' flag is set.
177 * 'allowStale' : Don't require the latest available data.
178 * This can increase performance for non-critical writes.
179 * This has no effect unless the 'force' flag is set.
180 *
181 * Remarks on locking:
182 * File system paths given to operations should refer to files that are
183 * already locked or otherwise safe from modification from other processes.
184 * Normally these files will be new temp files, which should be adequate.
185 *
186 * Return value:
187 * This returns a Status, which contains all warnings and fatals that occured
188 * during the operation. The 'failCount', 'successCount', and 'success' members
189 * will reflect each operation attempted. The status will be "OK" unless:
190 * a) unexpected operation errors occurred (network partitions, disk full...)
191 * b) significant operation errors occured and 'force' was not set
192 *
193 * @param $ops Array List of operations to execute in order
194 * @param $opts Array Batch operation options
195 * @return Status
196 */
197 final public function doOperations( array $ops, array $opts = array() ) {
198 if ( $this->isReadOnly() ) {
199 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
200 }
201 if ( empty( $opts['force'] ) ) { // sanity
202 unset( $opts['nonLocking'] );
203 unset( $opts['allowStale'] );
204 }
205 return $this->doOperationsInternal( $ops, $opts );
206 }
207
208 /**
209 * @see FileBackend::doOperations()
210 */
211 abstract protected function doOperationsInternal( array $ops, array $opts );
212
213 /**
214 * Same as doOperations() except it takes a single operation.
215 * If you are doing a batch of operations that should either
216 * all succeed or all fail, then use that function instead.
217 *
218 * @see FileBackend::doOperations()
219 *
220 * @param $op Array Operation
221 * @param $opts Array Operation options
222 * @return Status
223 */
224 final public function doOperation( array $op, array $opts = array() ) {
225 return $this->doOperations( array( $op ), $opts );
226 }
227
228 /**
229 * Performs a single create operation.
230 * This sets $params['op'] to 'create' and passes it to doOperation().
231 *
232 * @see FileBackend::doOperation()
233 *
234 * @param $params Array Operation parameters
235 * @param $opts Array Operation options
236 * @return Status
237 */
238 final public function create( array $params, array $opts = array() ) {
239 $params['op'] = 'create';
240 return $this->doOperation( $params, $opts );
241 }
242
243 /**
244 * Performs a single store operation.
245 * This sets $params['op'] to 'store' and passes it to doOperation().
246 *
247 * @see FileBackend::doOperation()
248 *
249 * @param $params Array Operation parameters
250 * @param $opts Array Operation options
251 * @return Status
252 */
253 final public function store( array $params, array $opts = array() ) {
254 $params['op'] = 'store';
255 return $this->doOperation( $params, $opts );
256 }
257
258 /**
259 * Performs a single copy operation.
260 * This sets $params['op'] to 'copy' and passes it to doOperation().
261 *
262 * @see FileBackend::doOperation()
263 *
264 * @param $params Array Operation parameters
265 * @param $opts Array Operation options
266 * @return Status
267 */
268 final public function copy( array $params, array $opts = array() ) {
269 $params['op'] = 'copy';
270 return $this->doOperation( $params, $opts );
271 }
272
273 /**
274 * Performs a single move operation.
275 * This sets $params['op'] to 'move' and passes it to doOperation().
276 *
277 * @see FileBackend::doOperation()
278 *
279 * @param $params Array Operation parameters
280 * @param $opts Array Operation options
281 * @return Status
282 */
283 final public function move( array $params, array $opts = array() ) {
284 $params['op'] = 'move';
285 return $this->doOperation( $params, $opts );
286 }
287
288 /**
289 * Performs a single delete operation.
290 * This sets $params['op'] to 'delete' and passes it to doOperation().
291 *
292 * @see FileBackend::doOperation()
293 *
294 * @param $params Array Operation parameters
295 * @param $opts Array Operation options
296 * @return Status
297 */
298 final public function delete( array $params, array $opts = array() ) {
299 $params['op'] = 'delete';
300 return $this->doOperation( $params, $opts );
301 }
302
303 /**
304 * Concatenate a list of storage files into a single file system file.
305 * The target path should refer to a file that is already locked or
306 * otherwise safe from modification from other processes. Normally,
307 * the file will be a new temp file, which should be adequate.
308 * $params include:
309 * srcs : ordered source storage paths (e.g. chunk1, chunk2, ...)
310 * dst : file system path to 0-byte temp file
311 *
312 * @param $params Array Operation parameters
313 * @return Status
314 */
315 abstract public function concatenate( array $params );
316
317 /**
318 * Prepare a storage directory for usage.
319 * This will create any required containers and parent directories.
320 * Backends using key/value stores only need to create the container.
321 *
322 * $params include:
323 * dir : storage directory
324 *
325 * @param $params Array
326 * @return Status
327 */
328 final public function prepare( array $params ) {
329 if ( $this->isReadOnly() ) {
330 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
331 }
332 return $this->doPrepare( $params );
333 }
334
335 /**
336 * @see FileBackend::prepare()
337 */
338 abstract protected function doPrepare( array $params );
339
340 /**
341 * Take measures to block web access to a storage directory and
342 * the container it belongs to. FS backends might add .htaccess
343 * files whereas key/value store backends might restrict container
344 * access to the auth user that represents end-users in web request.
345 * This is not guaranteed to actually do anything.
346 *
347 * $params include:
348 * dir : storage directory
349 * noAccess : try to deny file access
350 * noListing : try to deny file listing
351 *
352 * @param $params Array
353 * @return Status
354 */
355 final public function secure( array $params ) {
356 if ( $this->isReadOnly() ) {
357 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
358 }
359 $status = $this->doPrepare( $params ); // dir must exist to restrict it
360 if ( $status->isOK() ) {
361 $status->merge( $this->doSecure( $params ) );
362 }
363 return $status;
364 }
365
366 /**
367 * @see FileBackend::secure()
368 */
369 abstract protected function doSecure( array $params );
370
371 /**
372 * Delete a storage directory if it is empty.
373 * Backends using key/value stores may do nothing unless the directory
374 * is that of an empty container, in which case it should be deleted.
375 *
376 * $params include:
377 * dir : storage directory
378 *
379 * @param $params Array
380 * @return Status
381 */
382 final public function clean( array $params ) {
383 if ( $this->isReadOnly() ) {
384 return Status::newFatal( 'backend-fail-readonly', $this->name, $this->readOnly );
385 }
386 return $this->doClean( $params );
387 }
388
389 /**
390 * @see FileBackend::clean()
391 */
392 abstract protected function doClean( array $params );
393
394 /**
395 * Check if a file exists at a storage path in the backend.
396 * This returns false if only a directory exists at the path.
397 *
398 * $params include:
399 * src : source storage path
400 * latest : use the latest available data
401 *
402 * @param $params Array
403 * @return bool|null Returns null on failure
404 */
405 abstract public function fileExists( array $params );
406
407 /**
408 * Get the last-modified timestamp of the file at a storage path.
409 *
410 * $params include:
411 * src : source storage path
412 * latest : use the latest available data
413 *
414 * @param $params Array
415 * @return string|bool TS_MW timestamp or false on failure
416 */
417 abstract public function getFileTimestamp( array $params );
418
419 /**
420 * Get the contents of a file at a storage path in the backend.
421 * This should be avoided for potentially large files.
422 *
423 * $params include:
424 * src : source storage path
425 * latest : use the latest available data
426 *
427 * @param $params Array
428 * @return string|bool Returns false on failure
429 */
430 abstract public function getFileContents( array $params );
431
432 /**
433 * Get the size (bytes) of a file at a storage path in the backend.
434 *
435 * $params include:
436 * src : source storage path
437 * latest : use the latest available data
438 *
439 * @param $params Array
440 * @return integer|bool Returns false on failure
441 */
442 abstract public function getFileSize( array $params );
443
444 /**
445 * Get quick information about a file at a storage path in the backend.
446 * If the file does not exist, then this returns false.
447 * Otherwise, the result is an associative array that includes:
448 * mtime : the last-modified timestamp (TS_MW)
449 * size : the file size (bytes)
450 * Additional values may be included for internal use only.
451 *
452 * $params include:
453 * src : source storage path
454 * latest : use the latest available data
455 *
456 * @param $params Array
457 * @return Array|bool|null Returns null on failure
458 */
459 abstract public function getFileStat( array $params );
460
461 /**
462 * Get a SHA-1 hash of the file at a storage path in the backend.
463 *
464 * $params include:
465 * src : source storage path
466 * latest : use the latest available data
467 *
468 * @param $params Array
469 * @return string|bool Hash string or false on failure
470 */
471 abstract public function getFileSha1Base36( array $params );
472
473 /**
474 * Get the properties of the file at a storage path in the backend.
475 * Returns FSFile::placeholderProps() on failure.
476 *
477 * $params include:
478 * src : source storage path
479 * latest : use the latest available data
480 *
481 * @param $params Array
482 * @return Array
483 */
484 abstract public function getFileProps( array $params );
485
486 /**
487 * Stream the file at a storage path in the backend.
488 * If the file does not exists, a 404 error will be given.
489 * Appropriate HTTP headers (Status, Content-Type, Content-Length)
490 * must be sent if streaming began, while none should be sent otherwise.
491 * Implementations should flush the output buffer before sending data.
492 *
493 * $params include:
494 * src : source storage path
495 * headers : additional HTTP headers to send on success
496 * latest : use the latest available data
497 *
498 * @param $params Array
499 * @return Status
500 */
501 abstract public function streamFile( array $params );
502
503 /**
504 * Returns a file system file, identical to the file at a storage path.
505 * The file returned is either:
506 * a) A local copy of the file at a storage path in the backend.
507 * The temporary copy will have the same extension as the source.
508 * b) An original of the file at a storage path in the backend.
509 * Temporary files may be purged when the file object falls out of scope.
510 *
511 * Write operations should *never* be done on this file as some backends
512 * may do internal tracking or may be instances of FileBackendMultiWrite.
513 * In that later case, there are copies of the file that must stay in sync.
514 * Additionally, further calls to this function may return the same file.
515 *
516 * $params include:
517 * src : source storage path
518 * latest : use the latest available data
519 *
520 * @param $params Array
521 * @return FSFile|null Returns null on failure
522 */
523 abstract public function getLocalReference( array $params );
524
525 /**
526 * Get a local copy on disk of the file at a storage path in the backend.
527 * The temporary copy will have the same file extension as the source.
528 * Temporary files may be purged when the file object falls out of scope.
529 *
530 * $params include:
531 * src : source storage path
532 * latest : use the latest available data
533 *
534 * @param $params Array
535 * @return TempFSFile|null Returns null on failure
536 */
537 abstract public function getLocalCopy( array $params );
538
539 /**
540 * Get an iterator to list out all stored files under a storage directory.
541 * If the directory is of the form "mwstore://backend/container",
542 * then all files in the container should be listed.
543 * If the directory is of form "mwstore://backend/container/dir",
544 * then all files under that container directory should be listed.
545 * Results should be storage paths relative to the given directory.
546 *
547 * Storage backends with eventual consistency might return stale data.
548 *
549 * $params include:
550 * dir : storage path directory
551 *
552 * @return Traversable|Array|null Returns null on failure
553 */
554 abstract public function getFileList( array $params );
555
556 /**
557 * Invalidate any in-process file existence and property cache.
558 * If $paths is given, then only the cache for those files will be cleared.
559 *
560 * @param $paths Array Storage paths (optional)
561 * @return void
562 */
563 public function clearCache( array $paths = null ) {}
564
565 /**
566 * Lock the files at the given storage paths in the backend.
567 * This will either lock all the files or none (on failure).
568 *
569 * Callers should consider using getScopedFileLocks() instead.
570 *
571 * @param $paths Array Storage paths
572 * @param $type integer LockManager::LOCK_* constant
573 * @return Status
574 */
575 final public function lockFiles( array $paths, $type ) {
576 return $this->lockManager->lock( $paths, $type );
577 }
578
579 /**
580 * Unlock the files at the given storage paths in the backend.
581 *
582 * @param $paths Array Storage paths
583 * @param $type integer LockManager::LOCK_* constant
584 * @return Status
585 */
586 final public function unlockFiles( array $paths, $type ) {
587 return $this->lockManager->unlock( $paths, $type );
588 }
589
590 /**
591 * Lock the files at the given storage paths in the backend.
592 * This will either lock all the files or none (on failure).
593 * On failure, the status object will be updated with errors.
594 *
595 * Once the return value goes out scope, the locks will be released and
596 * the status updated. Unlock fatals will not change the status "OK" value.
597 *
598 * @param $paths Array Storage paths
599 * @param $type integer LockManager::LOCK_* constant
600 * @param $status Status Status to update on lock/unlock
601 * @return ScopedLock|null Returns null on failure
602 */
603 final public function getScopedFileLocks( array $paths, $type, Status $status ) {
604 return ScopedLock::factory( $this->lockManager, $paths, $type, $status );
605 }
606
607 /**
608 * Check if a given path is a "mwstore://" path.
609 * This does not do any further validation or any existence checks.
610 *
611 * @param $path string
612 * @return bool
613 */
614 final public static function isStoragePath( $path ) {
615 return ( strpos( $path, 'mwstore://' ) === 0 );
616 }
617
618 /**
619 * Split a storage path into a backend name, a container name,
620 * and a relative file path. The relative path may be the empty string.
621 * This does not do any path normalization or traversal checks.
622 *
623 * @param $storagePath string
624 * @return Array (backend, container, rel object) or (null, null, null)
625 */
626 final public static function splitStoragePath( $storagePath ) {
627 if ( self::isStoragePath( $storagePath ) ) {
628 // Remove the "mwstore://" prefix and split the path
629 $parts = explode( '/', substr( $storagePath, 10 ), 3 );
630 if ( count( $parts ) >= 2 && $parts[0] != '' && $parts[1] != '' ) {
631 if ( count( $parts ) == 3 ) {
632 return $parts; // e.g. "backend/container/path"
633 } else {
634 return array( $parts[0], $parts[1], '' ); // e.g. "backend/container"
635 }
636 }
637 }
638 return array( null, null, null );
639 }
640
641 /**
642 * Normalize a storage path by cleaning up directory separators.
643 * Returns null if the path is not of the format of a valid storage path.
644 *
645 * @param $storagePath string
646 * @return string|null
647 */
648 final public static function normalizeStoragePath( $storagePath ) {
649 list( $backend, $container, $relPath ) = self::splitStoragePath( $storagePath );
650 if ( $relPath !== null ) { // must be for this backend
651 $relPath = self::normalizeContainerPath( $relPath );
652 if ( $relPath !== null ) {
653 return ( $relPath != '' )
654 ? "mwstore://{$backend}/{$container}/{$relPath}"
655 : "mwstore://{$backend}/{$container}";
656 }
657 }
658 return null;
659 }
660
661 /**
662 * Get the parent storage directory of a storage path.
663 * This returns a path like "mwstore://backend/container",
664 * "mwstore://backend/container/...", or null if there is no parent.
665 *
666 * @param $storagePath string
667 * @return string|null
668 */
669 final public static function parentStoragePath( $storagePath ) {
670 $storagePath = dirname( $storagePath );
671 list( $b, $cont, $rel ) = self::splitStoragePath( $storagePath );
672 return ( $rel === null ) ? null : $storagePath;
673 }
674
675 /**
676 * Get the final extension from a storage or FS path
677 *
678 * @param $path string
679 * @return string
680 */
681 final public static function extensionFromPath( $path ) {
682 $i = strrpos( $path, '.' );
683 return strtolower( $i ? substr( $path, $i + 1 ) : '' );
684 }
685
686 /**
687 * Validate and normalize a relative storage path.
688 * Null is returned if the path involves directory traversal.
689 * Traversal is insecure for FS backends and broken for others.
690 *
691 * @param $path string Storage path relative to a container
692 * @return string|null
693 */
694 final protected static function normalizeContainerPath( $path ) {
695 // Normalize directory separators
696 $path = strtr( $path, '\\', '/' );
697 // Collapse any consecutive directory separators
698 $path = preg_replace( '![/]{2,}!', '/', $path );
699 // Remove any leading directory separator
700 $path = ltrim( $path, '/' );
701 // Use the same traversal protection as Title::secureAndSplit()
702 if ( strpos( $path, '.' ) !== false ) {
703 if (
704 $path === '.' ||
705 $path === '..' ||
706 strpos( $path, './' ) === 0 ||
707 strpos( $path, '../' ) === 0 ||
708 strpos( $path, '/./' ) !== false ||
709 strpos( $path, '/../' ) !== false
710 ) {
711 return null;
712 }
713 }
714 return $path;
715 }
716 }