Make FindOrphanedFiles handle files with ! properly
authorAaron Schulz <aschulz@wikimedia.org>
Wed, 4 Nov 2015 22:51:38 +0000 (14:51 -0800)
committerAaron Schulz <aschulz@wikimedia.org>
Wed, 4 Nov 2015 23:32:05 +0000 (15:32 -0800)
Previously, the showed up in the "missing" list

Change-Id: Ia094d0c58f6d15671b0c25440ddcc6b51544368a

maintenance/findOrphanedFiles.php

index eac277a..a1e274c 100644 (file)
@@ -55,37 +55,41 @@ class FindOrphanedFiles extends Maintenance {
                        $this->error( "Could not get file listing.", 1 );
                }
 
-               $nameBatch = array();
+               $pathBatch = array();
                foreach ( $list as $path ) {
                        if ( preg_match( '#^(thumb|deleted)/#', $path ) ) {
                                continue; // handle ugly nested containers on stock installs
                        }
 
-                       $nameBatch[] = basename( $path );
-                       if ( count( $nameBatch ) >= $this->mBatchSize ) {
-                               $this->checkFiles( $repo, $nameBatch, $verbose );
-                               $nameBatch = array();
+                       $pathBatch[] = $path;
+                       if ( count( $pathBatch ) >= $this->mBatchSize ) {
+                               $this->checkFiles( $repo, $pathBatch, $verbose );
+                               $pathBatch = array();
                        }
                }
-               $this->checkFiles( $repo, $nameBatch, $verbose );
+               $this->checkFiles( $repo, $pathBatch, $verbose );
        }
 
-       protected function checkFiles( LocalRepo $repo, array $names, $verbose ) {
-               if ( !count( $names ) ) {
+       protected function checkFiles( LocalRepo $repo, array $paths, $verbose ) {
+               if ( !count( $paths ) ) {
                        return;
                }
 
                $dbr = $repo->getSlaveDB();
 
+               $curNames = array();
+               $oldNames = array();
                $imgIN = array();
                $oiWheres = array();
-               foreach ( $names as $name ) {
-                       if ( strpos( $name, '!' ) !== false ) {
+               foreach ( $paths as $path ) {
+                       $name = basename( $path );
+                       if ( preg_match( '#^archive/#', $path ) ) {
                                if ( $verbose ) {
                                        $this->output( "Checking old file $name\n" );
                                }
 
-                               list( , $base ) = explode( '!', $name ); // <TS_MW>!<img_name>
+                               $oldNames[] = $name;
+                               list( , $base ) = explode( '!', $name, 2 ); // <TS_MW>!<img_name>
                                $oiWheres[] = $dbr->makeList(
                                        array( 'oi_name' => $base, 'oi_archive_name' => $name ),
                                        LIST_AND
@@ -95,6 +99,7 @@ class FindOrphanedFiles extends Maintenance {
                                        $this->output( "Checking current file $name\n" );
                                }
 
+                               $curNames[] = $name;
                                $imgIN[] = $name;
                        }
                }
@@ -104,12 +109,12 @@ class FindOrphanedFiles extends Maintenance {
                                array(
                                        $dbr->selectSQLText(
                                                'image',
-                                               array( 'name' => 'img_name' ),
+                                               array( 'name' => 'img_name', 'old' => 0 ),
                                                $imgIN ? array( 'img_name' => $imgIN ) : '1=0'
                                        ),
                                        $dbr->selectSQLText(
                                                'oldimage',
-                                               array( 'name' => 'oi_archive_name' ),
+                                               array( 'name' => 'oi_archive_name', 'old' => 1 ),
                                                $oiWheres ? $dbr->makeList( $oiWheres, LIST_OR ) : '1=0'
                                        )
                                ),
@@ -118,20 +123,26 @@ class FindOrphanedFiles extends Maintenance {
                        __METHOD__
                );
 
-               $namesFound = array();
+               $curNamesFound = array();
+               $oldNamesFound = array();
                foreach ( $res as $row ) {
-                       $namesFound[] = $row->name;
+                       if ( $row->old ) {
+                               $oldNamesFound[] = $row->name;
+                       } else {
+                               $curNamesFound[] = $row->name;
+                       }
                }
 
-               $namesOrphans = array_diff( $names, $namesFound );
-               foreach ( $namesOrphans as $name ) {
+               foreach ( array_diff( $curNames, $curNamesFound ) as $name ) {
+                       $file = $repo->newFile( $name );
+                       // Print name and public URL to ease recovery
+                       $this->output( $name . "\n" . $file->getCanonicalUrl() . "\n\n" );
+               }
+
+               foreach ( array_diff( $oldNames, $oldNamesFound ) as $name ) {
+                       list( , $base ) = explode( '!', $name, 2 ); // <TS_MW>!<img_name>
+                       $file = $repo->newFromArchiveName( Title::makeTitle( NS_FILE, $base ), $name );
                        // Print name and public URL to ease recovery
-                       if ( strpos( $name, '!' ) !== false ) {
-                               list( , $base ) = explode( '!', $name ); // <TS_MW>!<img_name>
-                               $file = $repo->newFromArchiveName( Title::makeTitle( NS_FILE, $base ), $name );
-                       } else {
-                               $file = $repo->newFile( $name );
-                       }
                        $this->output( $name . "\n" . $file->getCanonicalUrl() . "\n\n" );
                }
        }