[FileBackend] Added getLocalCopyMulti() and getLocalReferenceMulti().
authorAaron <aschulz@wikimedia.org>
Tue, 18 Sep 2012 18:21:30 +0000 (11:21 -0700)
committerAaron Schulz <aschulz@wikimedia.org>
Sat, 22 Sep 2012 04:10:54 +0000 (21:10 -0700)
* Optimized these in Swift to use pipelined GETs.
* This can also be used to improve concatenate().

Change-Id: Ibeb5df7532f9f5c16736b20c28b7c0d9ddfb412f

includes/filebackend/FSFileBackend.php
includes/filebackend/FileBackend.php
includes/filebackend/FileBackendMultiWrite.php
includes/filebackend/FileBackendStore.php
includes/filebackend/SwiftFileBackend.php
tests/phpunit/includes/filerepo/FileBackendTest.php

index 9349534..0922919 100644 (file)
@@ -644,7 +644,7 @@ class FSFileBackend extends FileBackendStore {
 
        /**
         * @see FileBackendStore::getFileListInternal()
-        * @return array|FSFileBackendFileList|null
+        * @return Array|FSFileBackendFileList|null
         */
        public function getFileListInternal( $fullCont, $dirRel, array $params ) {
                list( $b, $shortCont, $r ) = FileBackend::splitStoragePath( $params['dir'] );
@@ -662,44 +662,56 @@ class FSFileBackend extends FileBackendStore {
        }
 
        /**
-        * @see FileBackendStore::getLocalReference()
-        * @return FSFile|null
+        * @see FileBackendStore::doGetLocalReferenceMulti()
+        * @return Array
         */
-       public function getLocalReference( array $params ) {
-               $source = $this->resolveToFSPath( $params['src'] );
-               if ( $source === null ) {
-                       return null;
+       protected function doGetLocalReferenceMulti( array $params ) {
+               $fsFiles = array(); // (path => FSFile)
+
+               foreach ( $params['srcs'] as $src ) {
+                       $source = $this->resolveToFSPath( $src );
+                       if ( $source === null ) {
+                               $fsFiles[$src] = null; // invalid path
+                       } else {
+                               $fsFiles[$src] = new FSFile( $source );
+                       }
                }
-               return new FSFile( $source );
+
+               return $fsFiles;
        }
 
        /**
-        * @see FileBackendStore::getLocalCopy()
-        * @return null|TempFSFile
+        * @see FileBackendStore::doGetLocalCopyMulti()
+        * @return Array
         */
-       public function getLocalCopy( array $params ) {
-               $source = $this->resolveToFSPath( $params['src'] );
-               if ( $source === null ) {
-                       return null;
-               }
+       protected function doGetLocalCopyMulti( array $params ) {
+               $tmpFiles = array(); // (path => TempFSFile)
 
-               // Create a new temporary file with the same extension...
-               $ext = FileBackend::extensionFromPath( $params['src'] );
-               $tmpFile = TempFSFile::factory( 'localcopy_', $ext );
-               if ( !$tmpFile ) {
-                       return null;
-               }
-               $tmpPath = $tmpFile->getPath();
-
-               // Copy the source file over the temp file
-               $ok = copy( $source, $tmpPath );
-               if ( !$ok ) {
-                       return null;
+               foreach ( $params['srcs'] as $src ) {
+                       $source = $this->resolveToFSPath( $src );
+                       if ( $source === null ) {
+                               $tmpFiles[$src] = null; // invalid path
+                       } else {
+                               // Create a new temporary file with the same extension...
+                               $ext = FileBackend::extensionFromPath( $src );
+                               $tmpFile = TempFSFile::factory( 'localcopy_', $ext );
+                               if ( !$tmpFile ) {
+                                       $tmpFiles[$src] = null;
+                               } else {
+                                       $tmpPath = $tmpFile->getPath();
+                                       // Copy the source file over the temp file
+                                       $ok = copy( $source, $tmpPath );
+                                       if ( !$ok ) {
+                                               $tmpFiles[$src] = null;
+                                       } else {
+                                               $this->chmod( $tmpPath );
+                                               $tmpFiles[$src] = $tmpFile;
+                                       }
+                               }
+                       }
                }
 
-               $this->chmod( $tmpPath );
-
-               return $tmpFile;
+               return $tmpFiles;
        }
 
        /**
index 042cb67..94c068c 100644 (file)
@@ -807,7 +807,29 @@ abstract class FileBackend {
         *   - latest : use the latest available data
         * @return FSFile|null Returns null on failure
         */
-       abstract public function getLocalReference( array $params );
+       final public function getLocalReference( array $params ) {
+               $fsFiles = $this->getLocalReferenceMulti(
+                       array( 'srcs' => array( $params['src'] ) ) + $params );
+
+               return $fsFiles[$params['src']];
+       }
+
+       /**
+        * Like getLocalReference() except it takes an array of storage paths
+        * and returns a map of storage paths to FSFile objects (or null on failure).
+        * The map keys (paths) are in the same order as the provided list of paths.
+        *
+        * @see FileBackend::getLocalReference()
+        *
+        * @param $params Array
+        * $params include:
+        *   - srcs        : list of source storage paths
+        *   - latest      : use the latest available data
+        *   - parallelize : try to do operations in parallel when possible
+        * @return Array Map of (path name => FSFile or null on failure)
+        * @since 1.20
+        */
+       abstract public function getLocalReferenceMulti( array $params );
 
        /**
         * Get a local copy on disk of the file at a storage path in the backend.
@@ -820,7 +842,29 @@ abstract class FileBackend {
         *   - latest : use the latest available data
         * @return TempFSFile|null Returns null on failure
         */
-       abstract public function getLocalCopy( array $params );
+       final public function getLocalCopy( array $params ) {
+               $tmpFiles = $this->getLocalCopyMulti(
+                       array( 'srcs' => array( $params['src'] ) ) + $params );
+
+               return $tmpFiles[$params['src']];
+       }
+
+       /**
+        * Like getLocalCopy() except it takes an array of storage paths and
+        * returns a map of storage paths to TempFSFile objects (or null on failure).
+        * The map keys (paths) are in the same order as the provided list of paths.
+        *
+        * @see FileBackend::getLocalCopy()
+        *
+        * @param $params Array
+        * $params include:
+        *   - srcs        : list of source storage paths
+        *   - latest      : use the latest available data
+        *   - parallelize : try to do operations in parallel when possible
+        * @return Array Map of (path name => TempFSFile or null on failure)
+        * @since 1.20
+        */
+       abstract public function getLocalCopyMulti( array $params );
 
        /**
         * Check if a directory exists at a given storage path.
index 4be0323..4203878 100644 (file)
@@ -612,23 +612,35 @@ class FileBackendMultiWrite extends FileBackend {
        }
 
        /**
-        * @see FileBackend::getLocalReference()
+        * @see FileBackend::getLocalReferenceMulti()
         * @param $params array
         * @return FSFile|null
         */
-       public function getLocalReference( array $params ) {
+       public function getLocalReferenceMulti( array $params ) {
                $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
-               return $this->backends[$this->masterIndex]->getLocalReference( $realParams );
+               $fsFilesM = $this->backends[$this->masterIndex]->getLocalReferenceMulti( $realParams );
+
+               $fsFiles = array(); // (path => FSFile) mapping using the proxy backend's name
+               foreach ( $fsFilesM as $path => $fsFile ) {
+                       $fsFiles[$this->unsubstPaths( $path )] = $fsFile;
+               }
+               return $fsFiles;
        }
 
        /**
-        * @see FileBackend::getLocalCopy()
+        * @see FileBackend::getLocalCopyMulti()
         * @param $params array
         * @return null|TempFSFile
         */
-       public function getLocalCopy( array $params ) {
+       public function getLocalCopyMulti( array $params ) {
                $realParams = $this->substOpPaths( $params, $this->backends[$this->masterIndex] );
-               return $this->backends[$this->masterIndex]->getLocalCopy( $realParams );
+               $tempFilesM = $this->backends[$this->masterIndex]->getLocalCopyMulti( $realParams );
+
+               $tempFiles = array(); // (path => TempFSFile) mapping using the proxy backend's name
+               foreach ( $tempFilesM as $path => $tempFile ) {
+                       $tempFiles[$this->unsubstPaths( $path )] = $tempFile;
+               }
+               return $tempFiles;
        }
 
        /**
index 5823326..21832be 100644 (file)
@@ -338,7 +338,7 @@ abstract class FileBackendStore extends FileBackend {
                                return $status;
                        }
                        // Get a handle to the local FS version
-                       $sourceHandle = fopen( $tmpFile->getPath(), 'r' );
+                       $sourceHandle = fopen( $tmpFile->getPath(), 'rb' );
                        if ( $sourceHandle === false ) {
                                fclose( $tmpHandle );
                                $status->fatal( 'backend-fail-read', $virtualSource );
@@ -720,37 +720,76 @@ abstract class FileBackendStore extends FileBackend {
        }
 
        /**
-        * @see FileBackend::getLocalReference()
-        * @return TempFSFile|null
+        * @see FileBackend::getLocalReferenceMulti()
+        * @return Array
         */
-       public function getLocalReference( array $params ) {
-               $path = self::normalizeStoragePath( $params['src'] );
-               if ( $path === null ) {
-                       return null; // invalid storage path
-               }
+       final public function getLocalReferenceMulti( array $params ) {
                wfProfileIn( __METHOD__ );
                wfProfileIn( __METHOD__ . '-' . $this->name );
+
+               $params = $this->setConcurrencyFlags( $params );
+
+               $fsFiles = array(); // (path => FSFile)
                $latest = !empty( $params['latest'] ); // use latest data?
-               if ( $this->expensiveCache->has( $path, 'localRef' ) ) {
-                       $val = $this->expensiveCache->get( $path, 'localRef' );
-                       // If we want the latest data, check that this cached
-                       // value was in fact fetched with the latest available data.
-                       if ( !$latest || $val['latest'] ) {
-                               wfProfileOut( __METHOD__ . '-' . $this->name );
-                               wfProfileOut( __METHOD__ );
-                               return $val['object'];
+               // Reuse any files already in process cache...
+               foreach ( $params['srcs'] as $src ) {
+                       $path = self::normalizeStoragePath( $src );
+                       if ( $path === null ) {
+                               $fsFiles[$src] = null; // invalid storage path
+                       } elseif ( $this->expensiveCache->has( $path, 'localRef' ) ) {
+                               $val = $this->expensiveCache->get( $path, 'localRef' );
+                               // If we want the latest data, check that this cached
+                               // value was in fact fetched with the latest available data.
+                               if ( !$latest || $val['latest'] ) {
+                                       $fsFiles[$src] = $val['object'];
+                               }
                        }
                }
-               $tmpFile = $this->getLocalCopy( $params );
-               if ( $tmpFile ) { // don't cache negatives
-                       $this->expensiveCache->set( $path, 'localRef',
-                               array( 'object' => $tmpFile, 'latest' => $latest ) );
+               // Fetch local references of any remaning files...
+               $params['srcs'] = array_diff( $params['srcs'], array_keys( $fsFiles ) );
+               foreach ( $this->doGetLocalReferenceMulti( $params ) as $path => $fsFile ) {
+                       $fsFiles[$path] = $fsFile;
+                       if ( $fsFile ) { // update the process cache...
+                               $this->expensiveCache->set( $path, 'localRef',
+                                       array( 'object' => $fsFile, 'latest' => $latest ) );
+                       }
                }
+
                wfProfileOut( __METHOD__ . '-' . $this->name );
                wfProfileOut( __METHOD__ );
-               return $tmpFile;
+               return $fsFiles;
        }
 
+       /**
+        * @see FileBackendStore::getLocalReferenceMulti()
+        * @return Array
+        */
+       protected function doGetLocalReferenceMulti( array $params ) {
+               return $this->doGetLocalCopyMulti( $params );
+       }
+
+       /**
+        * @see FileBackend::getLocalCopyMulti()
+        * @return Array
+        */
+       final public function getLocalCopyMulti( array $params ) {
+               wfProfileIn( __METHOD__ );
+               wfProfileIn( __METHOD__ . '-' . $this->name );
+
+               $params = $this->setConcurrencyFlags( $params );
+               $tmpFiles = $this->doGetLocalCopyMulti( $params );
+
+               wfProfileOut( __METHOD__ . '-' . $this->name );
+               wfProfileOut( __METHOD__ );
+               return $tmpFiles;
+       }
+
+       /**
+        * @see FileBackendStore::getLocalCopyMulti()
+        * @return Array
+        */
+       abstract protected function doGetLocalCopyMulti( array $params );
+
        /**
         * @see FileBackend::streamFile()
         * @return Status
index abe834a..0b74f56 100644 (file)
@@ -1029,44 +1029,77 @@ class SwiftFileBackend extends FileBackendStore {
        }
 
        /**
-        * @see FileBackendStore::getLocalCopy()
+        * @see FileBackendStore::doGetLocalCopyMulti()
         * @return null|TempFSFile
         */
-       public function getLocalCopy( array $params ) {
-               list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] );
-               if ( $srcRel === null ) {
-                       return null;
-               }
+       protected function doGetLocalCopyMulti( array $params ) {
+               $tmpFiles = array();
+
+               $ep = array_diff_key( $params, array( 'srcs' => 1 ) ); // for error logging
+               // Blindly create tmp files and stream to them, catching any exception if the file does
+               // not exist. Doing a stat here is useless causes infinite loops in addMissingMetadata().
+               foreach ( array_chunk( $params['srcs'], $params['concurrency'] ) as $pathBatch ) {
+                       $cfOps = array(); // (path => CF_Async_Op)
+
+                       $handles = array(); // open file handles for async ops
+                       foreach ( $pathBatch as $path ) { // each path in this concurrent batch
+                               list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $path );
+                               if ( $srcRel === null ) {
+                                       $tmpFiles[$path] = null;
+                                       continue;
+                               }
+                               $tmpFile = null;
+                               try {
+                                       $sContObj = $this->getContainer( $srcCont );
+                                       $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD
+                                       // Get source file extension
+                                       $ext = FileBackend::extensionFromPath( $path );
+                                       // Create a new temporary file...
+                                       $tmpFile = TempFSFile::factory( 'localcopy_', $ext );
+                                       if ( $tmpFile ) {
+                                               $handle = fopen( $tmpFile->getPath(), 'wb' );
+                                               if ( $handle ) {
+                                                       $headers = $this->headersFromParams( $params );
+                                                       if ( count( $pathBatch ) > 1 ) {
+                                                               $cfOps[$path] = $obj->stream_async( $handle, $headers );
+                                                               $handles[] = $handle; // close this later
+                                                       } else {
+                                                               $obj->stream( $handle, $headers );
+                                                               fclose( $handle );
+                                                       }
+                                               } else {
+                                                       $tmpFile = null;
+                                               }
+                                       }
+                               } catch ( NoSuchContainerException $e ) {
+                                       $tmpFile = null;
+                               } catch ( NoSuchObjectException $e ) {
+                                       $tmpFile = null;
+                               } catch ( CloudFilesException $e ) { // some other exception?
+                                       $tmpFile = null;
+                                       $this->handleException( $e, null, __METHOD__, array( 'src' => $path ) + $ep );
+                               }
+                               $tmpFiles[$path] = $tmpFile;
+                       }
 
-               // Blindly create a tmp file and stream to it, catching any exception if the file does
-               // not exist. Also, doing a stat here will cause infinite loops in addMissingMetadata().
-               $tmpFile = null;
-               try {
-                       $sContObj = $this->getContainer( $srcCont );
-                       $obj = new CF_Object( $sContObj, $srcRel, false, false ); // skip HEAD
-                       // Get source file extension
-                       $ext = FileBackend::extensionFromPath( $srcRel );
-                       // Create a new temporary file...
-                       $tmpFile = TempFSFile::factory( 'localcopy_', $ext );
-                       if ( $tmpFile ) {
-                               $handle = fopen( $tmpFile->getPath(), 'wb' );
-                               if ( $handle ) {
-                                       $obj->stream( $handle, $this->headersFromParams( $params ) );
-                                       fclose( $handle );
-                               } else {
-                                       $tmpFile = null; // couldn't open temp file
+                       $batch = new CF_Async_Op_Batch( $cfOps );
+                       $cfOps = $batch->execute();
+                       foreach ( $cfOps as $path => $cfOp ) {
+                               try {
+                                       $cfOp->getLastResponse();
+                               } catch ( NoSuchContainerException $e ) {
+                                       $tmpFiles[$path] = null;
+                               } catch ( NoSuchObjectException $e ) {
+                                       $tmpFiles[$path] = null;
+                               } catch ( CloudFilesException $e ) { // some other exception?
+                                       $tmpFiles[$path] = null;
+                                       $this->handleException( $e, null, __METHOD__, array( 'src' => $path ) + $ep );
                                }
                        }
-               } catch ( NoSuchContainerException $e ) {
-                       $tmpFile = null;
-               } catch ( NoSuchObjectException $e ) {
-                       $tmpFile = null;
-               } catch ( CloudFilesException $e ) { // some other exception?
-                       $tmpFile = null;
-                       $this->handleException( $e, null, __METHOD__, $params );
+                       array_map( 'fclose', $handles ); // close open handles
                }
 
-               return $tmpFile;
+               return $tmpFiles;
        }
 
        /**
index a2dc5c6..33f1727 100644 (file)
@@ -982,19 +982,33 @@ class FileBackendTest extends MediaWikiTestCase {
        private function doTestGetLocalCopy( $source, $content ) {
                $backendName = $this->backendClass();
 
-               $this->prepare( array( 'dir' => dirname( $source ) ) );
-
-               $status = $this->backend->doOperation(
-                       array( 'op' => 'create', 'content' => $content, 'dst' => $source ) );
-               $this->assertGoodStatus( $status,
-                       "Creation of file at $source succeeded ($backendName)." );
-
-               $tmpFile = $this->backend->getLocalCopy( array( 'src' => $source ) );
-               $this->assertNotNull( $tmpFile,
-                       "Creation of local copy of $source succeeded ($backendName)." );
+               $srcs = (array)$source;
+               foreach ( $srcs as $src ) {
+                       $this->prepare( array( 'dir' => dirname( $src ) ) );
+                       $status = $this->backend->doOperation(
+                               array( 'op' => 'create', 'content' => $content, 'dst' => $src ) );
+                       $this->assertGoodStatus( $status,
+                               "Creation of file at $src succeeded ($backendName)." );
+               }
 
-               $contents = file_get_contents( $tmpFile->getPath() );
-               $this->assertNotEquals( false, $contents, "Local copy of $source exists ($backendName)." );
+               if ( is_array( $source ) ) {
+                       $tmpFiles = $this->backend->getLocalCopyMulti( array( 'srcs' => $source ) );
+                       foreach ( $tmpFiles as $path => $tmpFile ) {
+                               $this->assertNotNull( $tmpFile,
+                                       "Creation of local copy of $path succeeded ($backendName)." );
+                               $contents = file_get_contents( $tmpFile->getPath() );
+                               $this->assertNotEquals( false, $contents, "Local copy of $path exists ($backendName)." );
+                               $this->assertEquals( $content, $contents, "Local copy of $path is correct ($backendName)." );
+                       }
+                       $this->assertEquals( $source, array_keys( $tmpFiles ), "Local copies in right order ($backendName)." );
+               } else {
+                       $tmpFile = $this->backend->getLocalCopy( array( 'src' => $source ) );
+                       $this->assertNotNull( $tmpFile,
+                               "Creation of local copy of $source succeeded ($backendName)." );
+                       $contents = file_get_contents( $tmpFile->getPath() );
+                       $this->assertNotEquals( false, $contents, "Local copy of $source exists ($backendName)." );
+                       $this->assertEquals( $content, $contents, "Local copy of $source is correct ($backendName)." );
+               }
        }
 
        function provider_testGetLocalCopy() {
@@ -1003,6 +1017,11 @@ class FileBackendTest extends MediaWikiTestCase {
                $base = $this->baseStorePath();
                $cases[] = array( "$base/unittest-cont1/e/a/z/some_file.txt", "some file contents" );
                $cases[] = array( "$base/unittest-cont1/e/a/some-other_file.txt", "more file contents" );
+               $cases[] = array( "$base/unittest-cont1/e/a/\$odd&.txt", "test file contents" );
+               $cases[] = array(
+                       array( "$base/unittest-cont1/e/a/x.txt", "$base/unittest-cont1/e/a/y.txt",
+                                "$base/unittest-cont1/e/a/z.txt" ),
+                       "clone file contents" );
 
                return $cases;
        }
@@ -1025,18 +1044,33 @@ class FileBackendTest extends MediaWikiTestCase {
        private function doTestGetLocalReference( $source, $content ) {
                $backendName = $this->backendClass();
 
-               $this->prepare( array( 'dir' => dirname( $source ) ) );
-
-               $status = $this->create( array( 'content' => $content, 'dst' => $source ) );
-               $this->assertGoodStatus( $status,
-                       "Creation of file at $source succeeded ($backendName)." );
-
-               $tmpFile = $this->backend->getLocalReference( array( 'src' => $source ) );
-               $this->assertNotNull( $tmpFile,
-                       "Creation of local copy of $source succeeded ($backendName)." );
+               $srcs = (array)$source;
+               foreach ( $srcs as $src ) {
+                       $this->prepare( array( 'dir' => dirname( $src ) ) );
+                       $status = $this->backend->doOperation(
+                               array( 'op' => 'create', 'content' => $content, 'dst' => $src ) );
+                       $this->assertGoodStatus( $status,
+                               "Creation of file at $src succeeded ($backendName)." );
+               }
 
-               $contents = file_get_contents( $tmpFile->getPath() );
-               $this->assertNotEquals( false, $contents, "Local copy of $source exists ($backendName)." );
+               if ( is_array( $source ) ) {
+                       $tmpFiles = $this->backend->getLocalReferenceMulti( array( 'srcs' => $source ) );
+                       foreach ( $tmpFiles as $path => $tmpFile ) {
+                               $this->assertNotNull( $tmpFile,
+                                       "Creation of local copy of $path succeeded ($backendName)." );
+                               $contents = file_get_contents( $tmpFile->getPath() );
+                               $this->assertNotEquals( false, $contents, "Local ref of $path exists ($backendName)." );
+                               $this->assertEquals( $content, $contents, "Local ref of $path is correct ($backendName)." );
+                       }
+                       $this->assertEquals( $source, array_keys( $tmpFiles ), "Local refs in right order ($backendName)." );
+               } else {
+                       $tmpFile = $this->backend->getLocalReference( array( 'src' => $source ) );
+                       $this->assertNotNull( $tmpFile,
+                               "Creation of local copy of $source succeeded ($backendName)." );
+                       $contents = file_get_contents( $tmpFile->getPath() );
+                       $this->assertNotEquals( false, $contents, "Local ref of $source exists ($backendName)." );
+                       $this->assertEquals( $content, $contents, "Local ref of $source is correct ($backendName)." );
+               }
        }
 
        function provider_testGetLocalReference() {
@@ -1045,6 +1079,11 @@ class FileBackendTest extends MediaWikiTestCase {
                $base = $this->baseStorePath();
                $cases[] = array( "$base/unittest-cont1/e/a/z/some_file.txt", "some file contents" );
                $cases[] = array( "$base/unittest-cont1/e/a/some-other_file.txt", "more file contents" );
+               $cases[] = array( "$base/unittest-cont1/e/a/\$odd&.txt", "test file contents" );
+               $cases[] = array(
+                       array( "$base/unittest-cont1/e/a/x.txt", "$base/unittest-cont1/e/a/y.txt",
+                                "$base/unittest-cont1/e/a/z.txt" ),
+                       "clone file contents" );
 
                return $cases;
        }