Merge "resourceloader: Release saveFileDependencies() lock on rollback"
authorjenkins-bot <jenkins-bot@gerrit.wikimedia.org>
Tue, 19 Jul 2016 19:41:43 +0000 (19:41 +0000)
committerGerrit Code Review <gerrit@wikimedia.org>
Tue, 19 Jul 2016 19:41:43 +0000 (19:41 +0000)
19 files changed:
RELEASE-NOTES-1.28
autoload.php
img_auth.php
includes/Category.php
includes/HttpFunctions.php
includes/StreamFile.php
includes/deferred/LinksDeletionUpdate.php
includes/filebackend/FileBackend.php
includes/filebackend/FileBackendStore.php
includes/filebackend/MemoryFileBackend.php
includes/filebackend/SwiftFileBackend.php
includes/filerepo/FileRepo.php
includes/installer/DatabaseUpdater.php
includes/libs/MultiHttpClient.php
includes/page/WikiPage.php
maintenance/cleanupEmptyCategories.php [new file with mode: 0644]
maintenance/mssql/tables.sql
maintenance/tables.sql
tests/phpunit/includes/filebackend/FileBackendTest.php

index ff8e038..831ad58 100644 (file)
@@ -6,6 +6,9 @@ MediaWiki 1.28 is an alpha-quality branch and is not recommended for use in
 production.
 
 === Configuration changes in 1.28 ===
+* BREAKING CHANGE: $wgHTTPProxy is now *required* for all external requests
+  made by MediaWiki via a proxy. Relying on the http_proxy environment
+  variable is no longer supported.
 * The load.php entry point now enforces the existing policy of not allowing
   access to session data, which includes the session user and the session
   user's language. If such access is attempted, an exception will be thrown.
index b139399..ecbc9b3 100644 (file)
@@ -245,6 +245,7 @@ $wgAutoloadLocalClasses = [
        'ClassCollector' => __DIR__ . '/includes/utils/AutoloadGenerator.php',
        'CleanupAncientTables' => __DIR__ . '/maintenance/cleanupAncientTables.php',
        'CleanupBlocks' => __DIR__ . '/maintenance/cleanupBlocks.php',
+       'CleanupEmptyCategories' => __DIR__ . '/maintenance/cleanupEmptyCategories.php',
        'CleanupPreferences' => __DIR__ . '/maintenance/cleanupPreferences.php',
        'CleanupRemovedModules' => __DIR__ . '/maintenance/cleanupRemovedModules.php',
        'CleanupSpam' => __DIR__ . '/maintenance/cleanupSpam.php',
index d636188..fa1609f 100644 (file)
@@ -162,13 +162,21 @@ function wfImageAuthMain() {
                }
        }
 
+       $options = []; // HTTP header options
+       if ( isset( $_SERVER['HTTP_RANGE'] ) ) {
+               $options['range'] = $_SERVER['HTTP_RANGE'];
+       }
+       if ( isset( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) ) {
+               $options['if-modified-since'] = $_SERVER['HTTP_IF_MODIFIED_SINCE'];
+       }
+
        if ( $request->getCheck( 'download' ) ) {
                $headers[] = 'Content-Disposition: attachment';
        }
 
        // Stream the requested file
        wfDebugLog( 'img_auth', "Streaming `" . $filename . "`." );
-       $repo->streamFile( $filename, $headers );
+       $repo->streamFile( $filename, $headers, $options );
 }
 
 /**
index 28b566a..531e0be 100644 (file)
@@ -79,6 +79,11 @@ class Category {
                                $this->mSubcats = 0;
                                $this->mFiles = 0;
 
+                               # If the title exists, call refreshCounts to add a row for it.
+                               if ( $this->mTitle->exists() ) {
+                                       DeferredUpdates::addCallableUpdate( [ $this, 'refreshCounts' ] );
+                               }
+
                                return true;
                        } else {
                                return false; # Fail
@@ -331,21 +336,35 @@ class Category {
                        [ 'LOCK IN SHARE MODE' ]
                );
 
+               $shouldExist = $result->pages > 0 || $this->getTitle()->exists();
+
                if ( $this->mID ) {
-                       # The category row already exists, so do a plain UPDATE instead
-                       # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating a gap
-                       # in the cat_id sequence. The row may or may not be "affected".
-                       $dbw->update(
-                               'category',
-                               [
-                                       'cat_pages' => $result->pages,
-                                       'cat_subcats' => $result->subcats,
-                                       'cat_files' => $result->files
-                               ],
-                               [ 'cat_title' => $this->mName ],
-                               __METHOD__
-                       );
-               } else {
+                       if ( $shouldExist ) {
+                               # The category row already exists, so do a plain UPDATE instead
+                               # of INSERT...ON DUPLICATE KEY UPDATE to avoid creating a gap
+                               # in the cat_id sequence. The row may or may not be "affected".
+                               $dbw->update(
+                                       'category',
+                                       [
+                                               'cat_pages' => $result->pages,
+                                               'cat_subcats' => $result->subcats,
+                                               'cat_files' => $result->files
+                                       ],
+                                       [ 'cat_title' => $this->mName ],
+                                       __METHOD__
+                               );
+                       } else {
+                               # The category is empty and has no description page, delete it
+                               $dbw->delete(
+                                       'category',
+                                       [ 'cat_title' => $this->mName ],
+                                       __METHOD__
+                               );
+                               $this->mID = false;
+                       }
+               } elseif ( $shouldExist ) {
+                       # The category row doesn't exist but should, so create it. Use
+                       # upsert in case of races.
                        $dbw->upsert(
                                'category',
                                [
@@ -362,6 +381,8 @@ class Category {
                                ],
                                __METHOD__
                        );
+                       // @todo: Should we update $this->mID here? Or not since Category
+                       // objects tend to be short lived enough to not matter?
                }
 
                $dbw->endAtomic( __METHOD__ );
index b12f49f..694bbb5 100644 (file)
@@ -194,7 +194,7 @@ class Http {
        }
 
        /**
-        * Gets the relevant proxy from $wgHTTPProxy/http_proxy (when set).
+        * Gets the relevant proxy from $wgHTTPProxy
         *
         * @return mixed The proxy address or an empty string if not set.
         */
@@ -205,11 +205,6 @@ class Http {
                        return $wgHTTPProxy;
                }
 
-               $envHttpProxy = getenv( "http_proxy" );
-               if ( $envHttpProxy ) {
-                       return $envHttpProxy;
-               }
-
                return "";
        }
 }
@@ -393,7 +388,7 @@ class MWHttpRequest {
                        return;
                }
 
-               // Otherwise, fallback to $wgHTTPProxy/http_proxy (when set) if this is not a machine
+               // Otherwise, fallback to $wgHTTPProxy if this is not a machine
                // local URL and proxies are not disabled
                if ( Http::isLocalURL( $this->url ) || $this->noProxy ) {
                        $this->proxy = '';
index 8d0b8f1..0fc7980 100644 (file)
  * Functions related to the output of file content
  */
 class StreamFile {
-       const READY_STREAM = 1;
-       const NOT_MODIFIED = 2;
+       // Do not send any HTTP headers unless requested by caller (e.g. body only)
+       const STREAM_HEADLESS = 1;
+       // Do not try to tear down any PHP output buffers
+       const STREAM_ALLOW_OB = 2;
 
        /**
         * Stream a file to the browser, adding all the headings and fun stuff.
@@ -33,107 +35,183 @@ class StreamFile {
         * and Content-Disposition.
         *
         * @param string $fname Full name and path of the file to stream
-        * @param array $headers Any additional headers to send
+        * @param array $headers Any additional headers to send if the file exists
         * @param bool $sendErrors Send error messages if errors occur (like 404)
+        * @param array $optHeaders HTTP request header map (e.g. "range") (use lowercase keys)
+        * @param integer $flags Bitfield of STREAM_* constants
         * @throws MWException
         * @return bool Success
         */
-       public static function stream( $fname, $headers = [], $sendErrors = true ) {
+       public static function stream(
+               $fname, $headers = [], $sendErrors = true, $optHeaders = [], $flags = 0
+       ) {
+               $section = new ProfileSection( __METHOD__ );
 
                if ( FileBackend::isStoragePath( $fname ) ) { // sanity
                        throw new MWException( __FUNCTION__ . " given storage path '$fname'." );
                }
 
-               MediaWiki\suppressWarnings();
-               $stat = stat( $fname );
-               MediaWiki\restoreWarnings();
-
-               $res = self::prepareForStream( $fname, $stat, $headers, $sendErrors );
-               if ( $res == self::NOT_MODIFIED ) {
-                       $ok = true; // use client cache
-               } elseif ( $res == self::READY_STREAM ) {
-                       $ok = readfile( $fname );
-               } else {
-                       $ok = false; // failed
+               // Don't stream it out as text/html if there was a PHP error
+               if ( ( ( $flags & self::STREAM_HEADLESS ) == 0 || $headers ) && headers_sent() ) {
+                       echo "Headers already sent, terminating.\n";
+                       return false;
                }
 
-               return $ok;
-       }
+               $headerFunc = ( $flags & self::STREAM_HEADLESS )
+                       ? function ( $header ) {
+                                // no-op
+                       }
+                       : function ( $header ) {
+                               is_int( $header ) ? HttpStatus::header( $header ) : header( $header );
+                       };
+
+               MediaWiki\suppressWarnings();
+               $info = stat( $fname );
+               MediaWiki\restoreWarnings();
 
-       /**
-        * Call this function used in preparation before streaming a file.
-        * This function does the following:
-        * (a) sends Last-Modified, Content-type, and Content-Disposition headers
-        * (b) cancels any PHP output buffering and automatic gzipping of output
-        * (c) sends Content-Length header based on HTTP_IF_MODIFIED_SINCE check
-        *
-        * @param string $path Storage path or file system path
-        * @param array|bool $info File stat info with 'mtime' and 'size' fields
-        * @param array $headers Additional headers to send
-        * @param bool $sendErrors Send error messages if errors occur (like 404)
-        * @return int|bool READY_STREAM, NOT_MODIFIED, or false on failure
-        */
-       public static function prepareForStream(
-               $path, $info, $headers = [], $sendErrors = true
-       ) {
                if ( !is_array( $info ) ) {
                        if ( $sendErrors ) {
-                               HttpStatus::header( 404 );
-                               header( 'Cache-Control: no-cache' );
-                               header( 'Content-Type: text/html; charset=utf-8' );
-                               $encFile = htmlspecialchars( $path );
-                               $encScript = htmlspecialchars( $_SERVER['SCRIPT_NAME'] );
-                               echo "<html><body>
-                                       <h1>File not found</h1>
-                                       <p>Although this PHP script ($encScript) exists, the file requested for output
-                                       ($encFile) does not.</p>
-                                       </body></html>
-                                       ";
+                               self::send404Message( $fname, $flags );
                        }
                        return false;
                }
 
-               // Sent Last-Modified HTTP header for client-side caching
-               header( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $info['mtime'] ) );
+               // Send Last-Modified HTTP header for client-side caching
+               $headerFunc( 'Last-Modified: ' . wfTimestamp( TS_RFC2822, $info['mtime'] ) );
 
-               // Cancel output buffering and gzipping if set
-               wfResetOutputBuffers();
+               if ( ( $flags & self::STREAM_ALLOW_OB ) == 0 ) {
+                       // Cancel output buffering and gzipping if set
+                       wfResetOutputBuffers();
+               }
 
-               $type = self::contentTypeFromPath( $path );
+               $type = self::contentTypeFromPath( $fname );
                if ( $type && $type != 'unknown/unknown' ) {
-                       header( "Content-type: $type" );
+                       $headerFunc( "Content-type: $type" );
                } else {
                        // Send a content type which is not known to Internet Explorer, to
                        // avoid triggering IE's content type detection. Sending a standard
                        // unknown content type here essentially gives IE license to apply
                        // whatever content type it likes.
-                       header( 'Content-type: application/x-wiki' );
+                       $headerFunc( 'Content-type: application/x-wiki' );
                }
 
-               // Don't stream it out as text/html if there was a PHP error
-               if ( headers_sent() ) {
-                       echo "Headers already sent, terminating.\n";
-                       return false;
+               // Don't send if client has up to date cache
+               if ( isset( $optHeaders['if-modified-since'] ) ) {
+                       $modsince = preg_replace( '/;.*$/', '', $optHeaders['if-modified-since'] );
+                       if ( wfTimestamp( TS_UNIX, $info['mtime'] ) <= strtotime( $modsince ) ) {
+                               ini_set( 'zlib.output_compression', 0 );
+                               $headerFunc( 304 );
+                               return true; // ok
+                       }
                }
 
                // Send additional headers
                foreach ( $headers as $header ) {
-                       header( $header );
+                       header( $header ); // always use header(); specifically requested
                }
 
-               // Don't send if client has up to date cache
-               if ( !empty( $_SERVER['HTTP_IF_MODIFIED_SINCE'] ) ) {
-                       $modsince = preg_replace( '/;.*$/', '', $_SERVER['HTTP_IF_MODIFIED_SINCE'] );
-                       if ( wfTimestamp( TS_UNIX, $info['mtime'] ) <= strtotime( $modsince ) ) {
-                               ini_set( 'zlib.output_compression', 0 );
-                               HttpStatus::header( 304 );
-                               return self::NOT_MODIFIED; // ok
+               if ( isset( $optHeaders['range'] ) ) {
+                       $range = self::parseRange( $optHeaders['range'], $info['size'] );
+                       if ( is_array( $range ) ) {
+                               $headerFunc( 206 );
+                               $headerFunc( 'Content-Length: ' . $range[2] );
+                               $headerFunc( "Content-Range: bytes {$range[0]}-{$range[1]}/{$info['size']}" );
+                       } elseif ( $range === 'invalid' ) {
+                               if ( $sendErrors ) {
+                                       $headerFunc( 416 );
+                                       $headerFunc( 'Cache-Control: no-cache' );
+                                       $headerFunc( 'Content-Type: text/html; charset=utf-8' );
+                                       $headerFunc( 'Content-Range: bytes */' . $info['size'] );
+                               }
+                               return false;
+                       } else { // unsupported Range request (e.g. multiple ranges)
+                               $range = null;
+                               $headerFunc( 'Content-Length: ' . $info['size'] );
+                       }
+               } else {
+                       $range = null;
+                       $headerFunc( 'Content-Length: ' . $info['size'] );
+               }
+
+               if ( is_array( $range ) ) {
+                       $handle = fopen( $fname, 'rb' );
+                       if ( $handle ) {
+                               $ok = true;
+                               fseek( $handle, $range[0] );
+                               $remaining = $range[2];
+                               while ( $remaining > 0 && $ok ) {
+                                       $bytes = min( $remaining, 8 * 1024 );
+                                       $data = fread( $handle, $bytes );
+                                       $remaining -= $bytes;
+                                       $ok = ( $data !== false );
+                                       print $data;
+                               }
+                       } else {
+                               return false;
                        }
+               } else {
+                       return readfile( $fname ) !== false; // faster
                }
 
-               header( 'Content-Length: ' . $info['size'] );
+               return true;
+       }
+
+       /**
+        * Send out a standard 404 message for a file
+        *
+        * @param string $fname Full name and path of the file to stream
+        * @param integer $flags Bitfield of STREAM_* constants
+        * @since 1.24
+        */
+       public static function send404Message( $fname, $flags = 0 ) {
+               if ( ( $flags & self::STREAM_HEADLESS ) == 0 ) {
+                       HttpStatus::header( 404 );
+                       header( 'Cache-Control: no-cache' );
+                       header( 'Content-Type: text/html; charset=utf-8' );
+               }
+               $encFile = htmlspecialchars( $fname );
+               $encScript = htmlspecialchars( $_SERVER['SCRIPT_NAME'] );
+               echo "<!DOCTYPE html><html><body>
+                       <h1>File not found</h1>
+                       <p>Although this PHP script ($encScript) exists, the file requested for output
+                       ($encFile) does not.</p>
+                       </body></html>
+                       ";
+       }
 
-               return self::READY_STREAM; // ok
+       /**
+        * Convert a Range header value to an absolute (start, end) range tuple
+        *
+        * @param string $range Range header value
+        * @param integer $size File size
+        * @return array|string Returns error string on failure (start, end, length)
+        * @since 1.24
+        */
+       public static function parseRange( $range, $size ) {
+               $m = [];
+               if ( preg_match( '#^bytes=(\d*)-(\d*)$#', $range, $m ) ) {
+                       list( , $start, $end ) = $m;
+                       if ( $start === '' && $end === '' ) {
+                               $absRange = [ 0, $size - 1 ];
+                       } elseif ( $start === '' ) {
+                               $absRange = [ $size - $end, $size - 1 ];
+                       } elseif ( $end === '' ) {
+                               $absRange = [ $start, $size - 1 ];
+                       } else {
+                               $absRange = [ $start, $end ];
+                       }
+                       if ( $absRange[0] >= 0 && $absRange[1] >= $absRange[0] ) {
+                               if ( $absRange[0] < $size ) {
+                                       $absRange[1] = min( $absRange[1], $size - 1 ); // stop at EOF
+                                       $absRange[2] = $absRange[1] - $absRange[0] + 1;
+                                       return $absRange;
+                               } elseif ( $absRange[0] == 0 && $size == 0 ) {
+                                       return 'unrecognized'; // the whole file should just be sent
+                               }
+                       }
+                       return 'invalid';
+               }
+               return 'unrecognized';
        }
 
        /**
index a7c39ca..b60ed8a 100644 (file)
@@ -61,6 +61,8 @@ class LinksDeletionUpdate extends SqlDataUpdate implements EnqueueableDataUpdate
                // This handles the case when updates have to batched into several COMMITs.
                $scopedLock = LinksUpdate::acquirePageLock( $this->mDb, $id );
 
+               $title = $this->page->getTitle();
+
                // Delete restrictions for it
                $this->mDb->delete( 'page_restrictions', [ 'pr_page' => $id ], __METHOD__ );
 
@@ -80,6 +82,20 @@ class LinksDeletionUpdate extends SqlDataUpdate implements EnqueueableDataUpdate
                        }
                }
 
+               // Refresh the category table entry if it seems to have no pages. Check
+               // master for the most up-to-date cat_pages count.
+               if ( $title->getNamespace() === NS_CATEGORY ) {
+                       $row = $this->mDb->selectRow(
+                               'category',
+                               [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ],
+                               [ 'cat_title' => $title->getDBkey(), 'cat_pages <= 0' ],
+                               __METHOD__
+                       );
+                       if ( $row ) {
+                               $cat = Category::newFromRow( $row, $title )->refreshCounts();
+                       }
+               }
+
                // If using cascading deletes, we can skip some explicit deletes
                if ( !$this->mDb->cascadingDeletes() ) {
                        // Delete outgoing links
@@ -132,7 +148,6 @@ class LinksDeletionUpdate extends SqlDataUpdate implements EnqueueableDataUpdate
 
                // If using cleanup triggers, we can skip some manual deletes
                if ( !$this->mDb->cleanupTriggers() ) {
-                       $title = $this->page->getTitle();
                        // Find recentchanges entries to clean up...
                        $rcIdsForTitle = $this->mDb->selectFieldValues(
                                'recentchanges',
index 03974f7..10183f4 100644 (file)
@@ -1005,15 +1005,21 @@ abstract class FileBackend {
 
        /**
         * Stream the file at a storage path in the backend.
+        *
         * If the file does not exists, an HTTP 404 error will be given.
         * Appropriate HTTP headers (Status, Content-Type, Content-Length)
         * will be sent if streaming began, while none will be sent otherwise.
         * Implementations should flush the output buffer before sending data.
         *
         * @param array $params Parameters include:
-        *   - src     : source storage path
-        *   - headers : list of additional HTTP headers to send on success
-        *   - latest  : use the latest available data
+        *   - src      : source storage path
+        *   - headers  : list of additional HTTP headers to send if the file exists
+        *   - options  : HTTP request header map with lower case keys (since 1.28). Supports:
+        *                range             : format is "bytes=(\d*-\d*)"
+        *                if-modified-since : format is an HTTP date
+        *   - headless : only include the body (and headers from "headers") (since 1.28)
+        *   - latest   : use the latest available data
+        *   - allowOB  : preserve any output buffers (since 1.28)
         * @return Status
         */
        abstract public function streamFile( array $params );
index 4d9587e..a29119c 100644 (file)
@@ -844,30 +844,19 @@ abstract class FileBackendStore extends FileBackend {
                $ps = Profiler::instance()->scopedProfileIn( __METHOD__ . "-{$this->name}" );
                $status = Status::newGood();
 
-               $info = $this->getFileStat( $params );
-               if ( !$info ) { // let StreamFile handle the 404
-                       $status->fatal( 'backend-fail-notexists', $params['src'] );
-               }
-
-               // Set output buffer and HTTP headers for stream
-               $extraHeaders = isset( $params['headers'] ) ? $params['headers'] : [];
-               $res = StreamFile::prepareForStream( $params['src'], $info, $extraHeaders );
-               if ( $res == StreamFile::NOT_MODIFIED ) {
-                       // do nothing; client cache is up to date
-               } elseif ( $res == StreamFile::READY_STREAM ) {
-                       $status = $this->doStreamFile( $params );
-                       if ( !$status->isOK() ) {
-                               // Per bug 41113, nasty things can happen if bad cache entries get
-                               // stuck in cache. It's also possible that this error can come up
-                               // with simple race conditions. Clear out the stat cache to be safe.
-                               $this->clearCache( [ $params['src'] ] );
-                               $this->deleteFileCache( $params['src'] );
-                               trigger_error( "Bad stat cache or race condition for file {$params['src']}." );
-                       }
-               } else {
+               // Always set some fields for subclass convenience
+               $params['options'] = isset( $params['options'] ) ? $params['options'] : [];
+               $params['headers'] = isset( $params['headers'] ) ? $params['headers'] : [];
+
+               // Don't stream it out as text/html if there was a PHP error
+               if ( ( empty( $params['headless'] ) || $params['headers'] ) && headers_sent() ) {
+                       print "Headers already sent, terminating.\n";
                        $status->fatal( 'backend-fail-stream', $params['src'] );
+                       return $status;
                }
 
+               $status->merge( $this->doStreamFile( $params ) );
+
                return $status;
        }
 
@@ -879,10 +868,21 @@ abstract class FileBackendStore extends FileBackend {
        protected function doStreamFile( array $params ) {
                $status = Status::newGood();
 
+               $flags = 0;
+               $flags |= !empty( $params['headless'] ) ? StreamFile::STREAM_HEADLESS : 0;
+               $flags |= !empty( $params['allowOB'] ) ? StreamFile::STREAM_ALLOW_OB : 0;
+
                $fsFile = $this->getLocalReference( $params );
-               if ( !$fsFile ) {
-                       $status->fatal( 'backend-fail-stream', $params['src'] );
-               } elseif ( !readfile( $fsFile->getPath() ) ) {
+
+               if ( $fsFile ) {
+                       $res = StreamFile::stream( $fsFile->getPath(),
+                               $params['headers'], true, $params['options'], $flags );
+               } else {
+                       $res = false;
+                       StreamFile::send404Message( $params['src'], $flags );
+               }
+
+               if ( !$res ) {
                        $status->fatal( 'backend-fail-stream', $params['src'] );
                }
 
index 6e32c62..e2c1ede 100644 (file)
@@ -183,21 +183,6 @@ class MemoryFileBackend extends FileBackendStore {
                return $tmpFiles;
        }
 
-       protected function doStreamFile( array $params ) {
-               $status = Status::newGood();
-
-               $src = $this->resolveHashKey( $params['src'] );
-               if ( $src === null || !isset( $this->files[$src] ) ) {
-                       $status->fatal( 'backend-fail-stream', $params['src'] );
-
-                       return $status;
-               }
-
-               print $this->files[$src]['data'];
-
-               return $status;
-       }
-
        protected function doDirectoryExists( $container, $dir, array $params ) {
                $prefix = rtrim( "$container/$dir", '/' ) . '/';
                foreach ( $this->files as $path => $data ) {
index 0f7e4b5..2adf934 100644 (file)
@@ -1045,32 +1045,62 @@ class SwiftFileBackend extends FileBackendStore {
        protected function doStreamFile( array $params ) {
                $status = Status::newGood();
 
+               $flags = !empty( $params['headless'] ) ? StreamFile::STREAM_HEADLESS : 0;
+
                list( $srcCont, $srcRel ) = $this->resolveStoragePathReal( $params['src'] );
                if ( $srcRel === null ) {
+                       StreamFile::send404Message( $params['src'], $flags );
                        $status->fatal( 'backend-fail-invalidpath', $params['src'] );
+
+                       return $status;
                }
 
                $auth = $this->getAuthentication();
                if ( !$auth || !is_array( $this->getContainerStat( $srcCont ) ) ) {
+                       StreamFile::send404Message( $params['src'], $flags );
                        $status->fatal( 'backend-fail-stream', $params['src'] );
 
                        return $status;
                }
 
-               $handle = fopen( 'php://output', 'wb' );
+               // If "headers" is set, we only want to send them if the file is there.
+               // Do not bother checking if the file exists if headers are not set though.
+               if ( $params['headers'] && !$this->fileExists( $params ) ) {
+                       StreamFile::send404Message( $params['src'], $flags );
+                       $status->fatal( 'backend-fail-stream', $params['src'] );
 
+                       return $status;
+               }
+
+               // Send the requested additional headers
+               foreach ( $params['headers'] as $header ) {
+                       header( $header ); // aways send
+               }
+
+               if ( empty( $params['allowOB'] ) ) {
+                       // Cancel output buffering and gzipping if set
+                       wfResetOutputBuffers();
+               }
+
+               $handle = fopen( 'php://output', 'wb' );
                list( $rcode, $rdesc, $rhdrs, $rbody, $rerr ) = $this->http->run( [
                        'method' => 'GET',
                        'url' => $this->storageUrl( $auth, $srcCont, $srcRel ),
                        'headers' => $this->authTokenHeaders( $auth )
-                               + $this->headersFromParams( $params ),
+                               + $this->headersFromParams( $params ) + $params['options'],
                        'stream' => $handle,
+                       'flags'  => [ 'relayResponseHeaders' => empty( $params['headless'] ) ]
                ] );
 
                if ( $rcode >= 200 && $rcode <= 299 ) {
                        // good
                } elseif ( $rcode === 404 ) {
                        $status->fatal( 'backend-fail-stream', $params['src'] );
+                       // Per bug 41113, nasty things can happen if bad cache entries get
+                       // stuck in cache. It's also possible that this error can come up
+                       // with simple race conditions. Clear out the stat cache to be safe.
+                       $this->clearCache( [ $params['src'] ] );
+                       $this->deleteFileCache( $params['src'] );
                } else {
                        $this->onError( $status, __METHOD__, $params, $rerr, $rcode, $rdesc );
                }
index d7559d0..4ab913d 100644 (file)
@@ -1585,12 +1585,13 @@ class FileRepo {
         *
         * @param string $virtualUrl
         * @param array $headers Additional HTTP headers to send on success
+        * @param array $optHeaders HTTP request headers (if-modified-since, range, ...)
         * @return Status
         * @since 1.27
         */
-       public function streamFileWithStatus( $virtualUrl, $headers = [] ) {
+       public function streamFileWithStatus( $virtualUrl, $headers = [], $optHeaders = [] ) {
                $path = $this->resolveToStoragePath( $virtualUrl );
-               $params = [ 'src' => $path, 'headers' => $headers ];
+               $params = [ 'src' => $path, 'headers' => $headers, 'options' => $optHeaders ];
 
                return $this->backend->streamFile( $params );
        }
index 6a20abc..0d8137c 100644 (file)
@@ -75,6 +75,7 @@ abstract class DatabaseUpdater {
                PopulateFilearchiveSha1::class,
                PopulateBacklinkNamespace::class,
                FixDefaultJsonContentPages::class,
+               CleanupEmptyCategories::class,
        ];
 
        /**
index 0371f24..320a0b6 100644 (file)
@@ -35,6 +35,8 @@
  *                use application/x-www-form-urlencoded (headers sent automatically)
  *   - stream   : resource to stream the HTTP response body to
  *   - proxy    : HTTP proxy to use
+ *   - flags    : map of boolean flags which supports:
+ *                  - relayResponseHeaders : write out header via header()
  * Request maps can use integer index 0 instead of 'method' and 1 instead of 'url'.
  *
  * @author Aaron Schulz
@@ -172,6 +174,7 @@ class MultiHttpClient {
                                $req['body'] = '';
                                $req['headers']['content-length'] = 0;
                        }
+                       $req['flags'] = isset( $req['flags'] ) ? $req['flags'] : [];
                        $handles[$index] = $this->getCurlHandle( $req, $opts );
                        if ( count( $reqs ) > 1 ) {
                                // https://github.com/guzzle/guzzle/issues/349
@@ -373,6 +376,9 @@ class MultiHttpClient {
 
                curl_setopt( $ch, CURLOPT_HEADERFUNCTION,
                        function ( $ch, $header ) use ( &$req ) {
+                               if ( !empty( $req['flags']['relayResponseHeaders'] ) ) {
+                                       header( $header );
+                               }
                                $length = strlen( $header );
                                $matches = [];
                                if ( preg_match( "/^(HTTP\/1\.[01]) (\d{3}) (.*)/", $header, $matches ) ) {
index b06b519..b64604e 100644 (file)
@@ -3279,6 +3279,14 @@ class WikiPage implements Page, IDBAccessObject {
                $title->touchLinks();
                $title->purgeSquid();
                $title->deleteTitleProtection();
+
+               if ( $title->getNamespace() == NS_CATEGORY ) {
+                       // Load the Category object, which will schedule a job to create
+                       // the category table row if necessary. Checking a slave is ok
+                       // here, in the worst case it'll run an unnecessary recount job on
+                       // a category that probably doesn't have many members.
+                       Category::newFromTitle( $title )->getID();
+               }
        }
 
        /**
@@ -3525,6 +3533,22 @@ class WikiPage implements Page, IDBAccessObject {
                                        $cat = Category::newFromName( $catName );
                                        Hooks::run( 'CategoryAfterPageRemoved', [ $cat, $this, $id ] );
                                }
+
+                               // Refresh counts on categories that should be empty now, to
+                               // trigger possible deletion. Check master for the most
+                               // up-to-date cat_pages.
+                               if ( count( $deleted ) ) {
+                                       $rows = $dbw->select(
+                                               'category',
+                                               [ 'cat_id', 'cat_title', 'cat_pages', 'cat_subcats', 'cat_files' ],
+                                               [ 'cat_title' => $deleted, 'cat_pages <= 0' ],
+                                               $method
+                                       );
+                                       foreach ( $rows as $row ) {
+                                               $cat = Category::newFromRow( $row );
+                                               $cat->refreshCounts();
+                                       }
+                               }
                        }
                );
        }
diff --git a/maintenance/cleanupEmptyCategories.php b/maintenance/cleanupEmptyCategories.php
new file mode 100644 (file)
index 0000000..b8a246e
--- /dev/null
@@ -0,0 +1,204 @@
+<?php
+/**
+ * Clean up empty categories in the category table.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
+ * http://www.gnu.org/copyleft/gpl.html
+ *
+ * @file
+ * @ingroup Maintenance
+ */
+
+require_once __DIR__ . '/Maintenance.php';
+
+/**
+ * Maintenance script to clean up empty categories in the category table.
+ *
+ * @ingroup Maintenance
+ * @since 1.28
+ */
+class CleanupEmptyCategories extends LoggedUpdateMaintenance {
+
+       public function __construct() {
+               parent::__construct();
+               $this->addDescription(
+                       <<<TEXT
+This script will clean up the category table by removing entries for empty
+categories without a description page and adding entries for empty categories
+with a description page. It will print out progress indicators every batch. The
+script is perfectly safe to run on large, live wikis, and running it multiple
+times is harmless. You may want to use the throttling options if it's causing
+too much load; they will not affect correctness.
+
+If the script is stopped and later resumed, you can use the --mode and --begin
+options with the last printed progress indicator to pick up where you left off.
+
+When the script has finished, it will make a note of this in the database, and
+will not run again without the --force option.
+TEXT
+               );
+
+               $this->addOption(
+                       'mode',
+                       '"add" empty categories with description pages, "remove" empty categories '
+                       . 'without description pages, or "both"',
+                       false,
+                       true
+               );
+               $this->addOption(
+                       'begin',
+                       'Only do categories whose names are alphabetically after the provided name',
+                       false,
+                       true
+               );
+               $this->addOption(
+                       'throttle',
+                       'Wait this many milliseconds after each batch. Default: 0',
+                       false,
+                       true
+               );
+       }
+
+       protected function getUpdateKey() {
+               return 'cleanup empty categories';
+       }
+
+       protected function doDBUpdates() {
+               $mode = $this->getOption( 'mode', 'both' );
+               $begin = $this->getOption( 'begin', '' );
+               $throttle = $this->getOption( 'throttle', 0 );
+
+               if ( !in_array( $mode, [ 'add', 'remove', 'both' ] ) ) {
+                       $this->output( "--mode must be 'add', 'remove', or 'both'.\n" );
+                       return false;
+               }
+
+               $dbw = $this->getDB( DB_MASTER );
+
+               $throttle = intval( $throttle );
+
+               if ( $mode === 'add' || $mode === 'both' ) {
+                       if ( $begin !== '' ) {
+                               $where = [ 'page_title > ' . $dbw->addQuotes( $begin ) ];
+                       } else {
+                               $where = [];
+                       }
+
+                       $this->output( "Adding empty categories with description pages...\n" );
+                       while ( true ) {
+                               # Find which category to update
+                               $rows = $dbw->select(
+                                       [ 'page', 'category' ],
+                                       'page_title',
+                                       array_merge( $where, [
+                                               'page_namespace' => NS_CATEGORY,
+                                               'cat_title' => null,
+                                       ] ),
+                                       __METHOD__,
+                                       [
+                                               'ORDER BY' => 'page_title',
+                                               'LIMIT' => $this->mBatchSize,
+                                       ],
+                                       [
+                                               'category' => [ 'LEFT JOIN', 'page_title = cat_title' ],
+                                       ]
+                               );
+                               if ( !$rows || $rows->numRows() <= 0 ) {
+                                       # Done, hopefully.
+                                       break;
+                               }
+
+                               foreach ( $rows as $row ) {
+                                       $name = $row->page_title;
+                                       $where = [ 'page_title > ' . $dbw->addQuotes( $name ) ];
+
+                                       # Use the row to update the category count
+                                       $cat = Category::newFromName( $name );
+                                       if ( !is_object( $cat ) ) {
+                                               $this->output( "The category named $name is not valid?!\n" );
+                                       } else {
+                                               $cat->refreshCounts();
+                                       }
+                               }
+                               $this->output( "--mode=$mode --begin=$name\n" );
+
+                               wfWaitForSlaves();
+                               usleep( $throttle * 1000 );
+                       }
+
+                       $begin = '';
+               }
+
+               if ( $mode === 'remove' || $mode === 'both' ) {
+                       if ( $begin !== '' ) {
+                               $where = [ 'cat_title > ' . $dbw->addQuotes( $begin ) ];
+                       } else {
+                               $where = [];
+                       }
+                       $i = 0;
+
+                       $this->output( "Removing empty categories without description pages...\n" );
+                       while ( true ) {
+                               # Find which category to update
+                               $rows = $dbw->select(
+                                       [ 'category', 'page' ],
+                                       'cat_title',
+                                       array_merge( $where, [
+                                               'page_title' => null,
+                                               'cat_pages' => 0,
+                                       ] ),
+                                       __METHOD__,
+                                       [
+                                               'ORDER BY' => 'cat_title',
+                                               'LIMIT' => $this->mBatchSize,
+                                       ],
+                                       [
+                                               'page' => [ 'LEFT JOIN', [
+                                                       'page_namespace' => NS_CATEGORY, 'page_title = cat_title'
+                                               ] ],
+                                       ]
+                               );
+                               if ( !$rows || $rows->numRows() <= 0 ) {
+                                       # Done, hopefully.
+                                       break;
+                               }
+                               foreach ( $rows as $row ) {
+                                       $name = $row->cat_title;
+                                       $where = [ 'cat_title > ' . $dbw->addQuotes( $name ) ];
+
+                                       # Use the row to update the category count
+                                       $cat = Category::newFromName( $name );
+                                       if ( !is_object( $cat ) ) {
+                                               $this->output( "The category named $name is not valid?!\n" );
+                                       } else {
+                                               $cat->refreshCounts();
+                                       }
+                               }
+
+                               $this->output( "--mode=remove --begin=$name\n" );
+
+                               wfWaitForSlaves();
+                               usleep( $throttle * 1000 );
+                       }
+               }
+
+               $this->output( "Category cleanup complete.\n" );
+
+               return true;
+       }
+}
+
+$maintClass = 'CleanupEmptyCategories';
+require_once RUN_MAINTENANCE_IF_MAIN;
index 12cfed8..48b2250 100644 (file)
@@ -336,9 +336,9 @@ CREATE INDEX /*i*/cl_timestamp ON /*_*/categorylinks (cl_to,cl_timestamp);
 CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from);
 
 --
--- Track all existing categories.  Something is a category if 1) it has an en-
--- try somewhere in categorylinks, or 2) it once did.  Categories might not
--- have corresponding pages, so they need to be tracked separately.
+-- Track all existing categories. Something is a category if 1) it has an entry
+-- somewhere in categorylinks, or 2) it has a description page. Categories
+-- might not have corresponding pages, so they need to be tracked separately.
 --
 CREATE TABLE /*_*/category (
   -- Primary key
index 89aeb9c..9c9bdfb 100644 (file)
@@ -623,9 +623,9 @@ CREATE INDEX /*i*/cl_timestamp ON /*_*/categorylinks (cl_to,cl_timestamp);
 CREATE INDEX /*i*/cl_collation_ext ON /*_*/categorylinks (cl_collation, cl_to, cl_type, cl_from);
 
 --
--- Track all existing categories.  Something is a category if 1) it has an en-
--- try somewhere in categorylinks, or 2) it once did.  Categories might not
--- have corresponding pages, so they need to be tracked separately.
+-- Track all existing categories. Something is a category if 1) it has an entry
+-- somewhere in categorylinks, or 2) it has a description page. Categories
+-- might not have corresponding pages, so they need to be tracked separately.
 --
 CREATE TABLE /*_*/category (
   -- Primary key
index af5d3ac..254cfbd 100644 (file)
@@ -1139,16 +1139,16 @@ class FileBackendTest extends MediaWikiTestCase {
                $this->tearDownFiles();
                $this->doTestStreamFile( $path, $content, $alreadyExists );
                $this->tearDownFiles();
+
+               $this->backend = $this->multiBackend;
+               $this->tearDownFiles();
+               $this->doTestStreamFile( $path, $content, $alreadyExists );
+               $this->tearDownFiles();
        }
 
        private function doTestStreamFile( $path, $content ) {
                $backendName = $this->backendClass();
 
-               // Test doStreamFile() directly to avoid header madness
-               $class = new ReflectionClass( $this->backend );
-               $method = $class->getMethod( 'doStreamFile' );
-               $method->setAccessible( true );
-
                if ( $content !== null ) {
                        $this->prepare( [ 'dir' => dirname( $path ) ] );
                        $status = $this->create( [ 'dst' => $path, 'content' => $content ] );
@@ -1156,18 +1156,19 @@ class FileBackendTest extends MediaWikiTestCase {
                                "Creation of file at $path succeeded ($backendName)." );
 
                        ob_start();
-                       $method->invokeArgs( $this->backend, [ [ 'src' => $path ] ] );
+                       $this->backend->streamFile( [ 'src' => $path, 'headless' => 1, 'allowOB' => 1 ] );
                        $data = ob_get_contents();
                        ob_end_clean();
 
                        $this->assertEquals( $content, $data, "Correct content streamed from '$path'" );
                } else { // 404 case
                        ob_start();
-                       $method->invokeArgs( $this->backend, [ [ 'src' => $path ] ] );
+                       $this->backend->streamFile( [ 'src' => $path, 'headless' => 1, 'allowOB' => 1 ] );
                        $data = ob_get_contents();
                        ob_end_clean();
 
-                       $this->assertEquals( '', $data, "Correct content streamed from '$path' ($backendName)" );
+                       $this->assertRegExp( '#<h1>File not found</h1>#', $data,
+                               "Correct content streamed from '$path' ($backendName)" );
                }
        }
 
@@ -1181,6 +1182,53 @@ class FileBackendTest extends MediaWikiTestCase {
                return $cases;
        }
 
+       public function testStreamFileRange() {
+               $this->backend = $this->singleBackend;
+               $this->tearDownFiles();
+               $this->doTestStreamFileRange();
+               $this->tearDownFiles();
+
+               $this->backend = $this->multiBackend;
+               $this->tearDownFiles();
+               $this->doTestStreamFileRange();
+               $this->tearDownFiles();
+       }
+
+       private function doTestStreamFileRange() {
+               $backendName = $this->backendClass();
+
+               $base = self::baseStorePath();
+               $path = "$base/unittest-cont1/e/b/z/range_file.txt";
+               $content = "0123456789ABCDEF";
+
+               $this->prepare( [ 'dir' => dirname( $path ) ] );
+               $status = $this->create( [ 'dst' => $path, 'content' => $content ] );
+               $this->assertGoodStatus( $status,
+                       "Creation of file at $path succeeded ($backendName)." );
+
+               static $ranges = [
+                       'bytes=0-0'   => '0',
+                       'bytes=0-3'   => '0123',
+                       'bytes=4-8'   => '45678',
+                       'bytes=15-15' => 'F',
+                       'bytes=14-15' => 'EF',
+                       'bytes=-5'    => 'BCDEF',
+                       'bytes=-1'    => 'F',
+                       'bytes=10-16' => 'ABCDEF',
+                       'bytes=10-99' => 'ABCDEF',
+               ];
+
+               foreach ( $ranges as $range => $chunk ) {
+                       ob_start();
+                       $this->backend->streamFile( [ 'src' => $path, 'headless' => 1, 'allowOB' => 1,
+                               'options' => [ 'range' => $range ] ] );
+                       $data = ob_get_contents();
+                       ob_end_clean();
+
+                       $this->assertEquals( $chunk, $data, "Correct chunk streamed from '$path' for '$range'" );
+               }
+       }
+
        /**
         * @dataProvider provider_testGetFileContents
         * @covers FileBackend::getFileContents