* (bug 29174) Fix regression in upload-by-URL: files larger than PHP memory limit...
authorBrion Vibber <brion@users.mediawiki.org>
Fri, 27 May 2011 22:31:48 +0000 (22:31 +0000)
committerBrion Vibber <brion@users.mediawiki.org>
Fri, 27 May 2011 22:31:48 +0000 (22:31 +0000)
r65152 switched upload-by-URL ($wgAllowCopyUploads) to use Http / MwHttpRequest class instead of CURL directly.
While this is mostly nice, it switched from saving large files directly to a temp file to buffering them in memory, causing large files to fail when they hit the PHP memory limit.

Fix uses MwHttpRequest's callback capability to override the read handler; now appending it to the temporary file as we go, and can confirm that largish files work again; was able to upload a 64mb .ogv that previously didn't work for me: http://prototype.wikimedia.org/tmh/images/b/b2/File-Arborophila_brunneopectus_pair_feeding_-_Kaeng_Krachan.ogv

Also expanded the documentation on MwHttpRequest::setCallback to clarify the function parameters and return value for the callback (which currently matches the low-level CURL handler's callback directly).
Note that the non-CURL implementation doesn't abort the read if the callback doesn't return the expected number of bytes, but this is an immediate fatal end of request on the Curl backend. May want further cleanup.

includes/HttpFunctions.php
includes/upload/UploadFromUrl.php

index 841b341..1ec083c 100644 (file)
@@ -316,11 +316,26 @@ class MWHttpRequest {
        }
 
        /**
-        * Set the callback
+        * Set a read callback to accept data read from the HTTP request.
+        * By default, data is appended to an internal buffer which can be
+        * retrieved through $req->getContent().
+        *
+        * To handle data as it comes in -- especially for large files that
+        * would not fit in memory -- you can instead set your own callback,
+        * in the form function($resource, $buffer) where the first parameter
+        * is the low-level resource being read (implementation specific),
+        * and the second parameter is the data buffer.
+        *
+        * You MUST return the number of bytes handled in the buffer; if fewer
+        * bytes are reported handled than were passed to you, the HTTP fetch
+        * will be aborted.
         *
         * @param $callback Callback
         */
        public function setCallback( $callback ) {
+               if ( !is_callable( $callback ) ) {
+                       throw new MWException( 'Invalid MwHttpRequest callback' );
+               }
                $this->callback = $callback;
        }
 
index ed5ad5e..e8178cd 100644 (file)
@@ -105,41 +105,62 @@ class UploadFromUrl extends UploadBase {
        protected function makeTemporaryFile() {
                return tempnam( wfTempDir(), 'URL' );
        }
+
        /**
-        * Save the result of a HTTP request to the temporary file
+        * Callback: save a chunk of the result of a HTTP request to the temporary file
         *
-        * @param $req MWHttpRequest
-        * @return Status
+        * @param $req mixed
+        * @param $buffer string
+        * @return int number of bytes handled
         */
-       private function saveTempFile( $req ) {
-               if ( $this->mTempPath === false ) {
-                       return Status::newFatal( 'tmp-create-error' );
+       public function saveTempFileChunk( $req, $buffer ) {
+               $nbytes = fwrite( $this->mTmpHandle, $buffer );
+
+               if ( $nbytes == strlen( $buffer ) ) {
+                       $this->mFileSize += $nbytes;
+               } else {
+                       // Well... that's not good!
+                       fclose( $this->mTmpHandle );
+                       $this->mTmpHandle = false;
                }
-               if ( file_put_contents( $this->mTempPath, $req->getContent() ) === false ) {
-                       return Status::newFatal( 'tmp-write-error' );
-               }
-
-               $this->mFileSize = filesize( $this->mTempPath );
 
-               return Status::newGood();
+               return $nbytes;
        }
+
        /**
         * Download the file, save it to the temporary file and update the file
         * size and set $mRemoveTempFile to true.
         */
        protected function reallyFetchFile() {
+               if ( $this->mTempPath === false ) {
+                       return Status::newFatal( 'tmp-create-error' );
+               }
+
+               // Note the temporary file should already be created by makeTemporaryFile()
+               $this->mTmpHandle = fopen( $this->mTempPath, 'wb' );
+               if ( !$this->mTmpHandle ) {
+                       return Status::newFatal( 'tmp-create-error' );
+               }
+
+               $this->mRemoveTempFile = true;
+               $this->mFileSize = 0;
+
                $req = MWHttpRequest::factory( $this->mUrl );
+               $req->setCallback( array( $this, 'saveTempFileChunk' ) );
                $status = $req->execute();
 
-               if ( !$status->isOk() ) {
-                       return $status;
+               if ( $this->mTmpHandle ) {
+                       // File got written ok...
+                       fclose( $this->mTmpHandle );
+                       $this->mTmpHandle = null;
+               } else {
+                       // We encountered a write error during the download...
+                       return Status::newFatal( 'tmp-write-error' );
                }
 
-               $status = $this->saveTempFile( $req );
-               if ( !$status->isGood() ) {
+               if ( !$status->isOk() ) {
                        return $status;
                }
-               $this->mRemoveTempFile = true;
 
                return $status;
        }