X-Git-Url: https://git.heureux-cyclage.org/?a=blobdiff_plain;f=maintenance%2FcopyFileBackend.php;h=b39ff55eda1b68697dfc8677b3b2b1d0cbbf12c4;hb=78b6bde01f1ff2af9c20bf624908f65666347639;hp=09b9295e1a16e57bbe2f3d8c167f654663c81e71;hpb=93131fcd385cdae84ecb7ccb6faa1c7340214211;p=lhc%2Fweb%2Fwiklou.git diff --git a/maintenance/copyFileBackend.php b/maintenance/copyFileBackend.php index 09b9295e1a..b39ff55eda 100644 --- a/maintenance/copyFileBackend.php +++ b/maintenance/copyFileBackend.php @@ -35,7 +35,7 @@ require_once __DIR__ . '/Maintenance.php'; * @ingroup Maintenance */ class CopyFileBackend extends Maintenance { - /** @var Array|null (path sha1 => stat) Pre-computed dst stat entries from listings */ + /** @var array|null (path sha1 => stat) Pre-computed dst stat entries from listings */ protected $statCache = null; public function __construct() { @@ -226,8 +226,8 @@ class CopyFileBackend extends Maintenance { } $t_start = microtime( true ); $fsFiles = $src->getLocalReferenceMulti( array( 'srcs' => $srcPaths, 'latest' => 1 ) ); - $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); - $this->output( "\n\tDownloaded these file(s) [{$ellapsed_ms}ms]:\n\t" . + $elapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); + $this->output( "\n\tDownloaded these file(s) [{$elapsed_ms}ms]:\n\t" . implode( "\n\t", $srcPaths ) . "\n\n" ); } @@ -281,12 +281,12 @@ class CopyFileBackend extends Maintenance { sleep( 10 ); // wait and retry copy again $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); } - $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); + $elapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); if ( !$status->isOK() ) { $this->error( print_r( $status->getErrorsArray(), true ) ); $this->error( "$wikiId: Could not copy file batch.", 1 ); // die } elseif ( count( $copiedRel ) ) { - $this->output( "\n\tCopied these file(s) [{$ellapsed_ms}ms]:\n\t" . + $this->output( "\n\tCopied these file(s) [{$elapsed_ms}ms]:\n\t" . implode( "\n\t", $copiedRel ) . "\n\n" ); } } @@ -318,12 +318,12 @@ class CopyFileBackend extends Maintenance { sleep( 10 ); // wait and retry copy again $status = $dst->doQuickOperations( $ops, array( 'bypassReadOnly' => 1 ) ); } - $ellapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); + $elapsed_ms = floor( ( microtime( true ) - $t_start ) * 1000 ); if ( !$status->isOK() ) { $this->error( print_r( $status->getErrorsArray(), true ) ); $this->error( "$wikiId: Could not delete file batch.", 1 ); // die } elseif ( count( $deletedRel ) ) { - $this->output( "\n\tDeleted these file(s) [{$ellapsed_ms}ms]:\n\t" . + $this->output( "\n\tDeleted these file(s) [{$elapsed_ms}ms]:\n\t" . implode( "\n\t", $deletedRel ) . "\n\n" ); } } @@ -347,15 +347,34 @@ class CopyFileBackend extends Maintenance { } else { $dstStat = $dst->getFileStat( array( 'src' => $dPath ) ); } - return ( + // Initial fast checks to see if files are obviously different + $sameFast = ( is_array( $srcStat ) // sanity check that source exists && is_array( $dstStat ) // dest exists && $srcStat['size'] === $dstStat['size'] - && ( !$skipHash || $srcStat['mtime'] <= $dstStat['mtime'] ) - && ( $skipHash || $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) ) - === $dst->getFileSha1Base36( array( 'src' => $dPath, 'latest' => 1 ) ) - ) ); + // More thorough checks against files + if ( !$sameFast ) { + $same = false; // no need to look farther + } elseif ( isset( $srcStat['md5'] ) && isset( $dstStat['md5'] ) ) { + // If MD5 was already in the stat info, just use it. + // This is useful as many objects stores can return this in object listing, + // so we can use it to avoid slow per-file HEADs. + $same = ( $srcStat['md5'] === $dstStat['md5'] ); + } elseif ( $skipHash ) { + // This mode is good for copying to a backup location or resyncing clone + // backends in FileBackendMultiWrite (since they get writes second, they have + // higher timestamps). However, when copying the other way, this hits loads of + // false positives (possibly 100%) and wastes a bunch of time on GETs/PUTs. + $same = ( $srcStat['mtime'] <= $dstStat['mtime'] ); + } else { + // This is the slowest method which does many per-file HEADs (unless an object + // store tracks SHA-1 in listings). + $same = ( $src->getFileSha1Base36( array( 'src' => $sPath, 'latest' => 1 ) ) + === $dst->getFileSha1Base36( array( 'src' => $dPath, 'latest' => 1 ) ) ); + } + + return $same; } }