ApiFormatBase: Encode filenames in Content-Disposition
authorBrad Jorsch <bjorsch@wikimedia.org>
Wed, 7 Feb 2018 20:12:33 +0000 (15:12 -0500)
committerBrad Jorsch <bjorsch@wikimedia.org>
Wed, 7 Feb 2018 20:12:33 +0000 (15:12 -0500)
The return value for ApiFormatBase::getFilename() was formerly
documented as "must be encoded for inclusion in a Content-Disposition
header's filename parameter." While this is ok for the common use case
where the module is returning a constant string or can assume whatever
it gets back from getExtensionsForType() is ok, it's not in general a
good idea to make all callers handle that.

Further, it's not possible to represent characters outside of the
ISO-8859-1 character set in a 'filename' parameter. You have to use
'filename*' to do that (see RFC 5987 and RFC 6266).

So, this patch changes the definition of getFilename() to remove the
encoding requirement, and adds code to properly convert and escape the
value for the 'filename' and (if necessary) 'filename*' parameters.

Note this may give unexpected results (double encoding) if any module
actually is returning an encoded filename. I don't see any such cases in
core or in extensions in Gerrit.

Change-Id: I0c2749a847b639f228efff4e1917a61612a1f7d1

includes/api/ApiFormatBase.php
tests/phpunit/includes/api/format/ApiFormatBaseTest.php

index 471ce21..18c36de 100644 (file)
@@ -65,8 +65,7 @@ abstract class ApiFormatBase extends ApiBase {
         * @note If $this->getIsWrappedHtml() || $this->getIsHtml(), you'll very
         *  likely want to fall back to this class's version.
         * @since 1.27
         * @note If $this->getIsWrappedHtml() || $this->getIsHtml(), you'll very
         *  likely want to fall back to this class's version.
         * @since 1.27
-        * @return string Generally this should be "api-result.$ext", and must be
-        *  encoded for inclusion in a Content-Disposition header's filename parameter.
+        * @return string Generally this should be "api-result.$ext"
         */
        public function getFilename() {
                if ( $this->getIsWrappedHtml() ) {
         */
        public function getFilename() {
                if ( $this->getIsWrappedHtml() ) {
@@ -212,10 +211,25 @@ abstract class ApiFormatBase extends ApiBase {
 
                // Set a Content-Disposition header so something downloading an API
                // response uses a halfway-sensible filename (T128209).
 
                // Set a Content-Disposition header so something downloading an API
                // response uses a halfway-sensible filename (T128209).
+               $header = 'Content-Disposition: inline';
                $filename = $this->getFilename();
                $filename = $this->getFilename();
-               $this->getMain()->getRequest()->response()->header(
-                       "Content-Disposition: inline; filename=\"{$filename}\""
-               );
+               $compatFilename = mb_convert_encoding( $filename, 'ISO-8859-1' );
+               if ( preg_match( '/^[0-9a-zA-Z!#$%&\'*+\-.^_`|~]+$/', $compatFilename ) ) {
+                       $header .= '; filename=' . $compatFilename;
+               } else {
+                       $header .= '; filename="'
+                               . preg_replace( '/([\0-\x1f"\x5c\x7f])/', '\\\\$1', $compatFilename ) . '"';
+               }
+               if ( $compatFilename !== $filename ) {
+                       $value = "UTF-8''" . rawurlencode( $filename );
+                       // rawurlencode() encodes more characters than RFC 5987 specifies. Unescape the ones it allows.
+                       $value = strtr( $value, [
+                               '%21' => '!', '%23' => '#', '%24' => '$', '%26' => '&', '%2B' => '+', '%5E' => '^',
+                               '%60' => '`', '%7C' => '|',
+                       ] );
+                       $header .= '; filename*=' . $value;
+               }
+               $this->getMain()->getRequest()->response()->header( $header );
        }
 
        /**
        }
 
        /**
index d6a1390..55f760f 100644 (file)
@@ -82,7 +82,7 @@ class ApiFormatBaseTest extends ApiFormatTestBase {
                $this->assertSame( "$ct; charset=utf-8", strtolower( $response->getHeader( 'Content-Type' ) ) );
                $this->assertSame( 'DENY', $response->getHeader( 'X-Frame-Options' ) );
                $this->assertSame( $file, $printer->getFilename() );
                $this->assertSame( "$ct; charset=utf-8", strtolower( $response->getHeader( 'Content-Type' ) ) );
                $this->assertSame( 'DENY', $response->getHeader( 'X-Frame-Options' ) );
                $this->assertSame( $file, $printer->getFilename() );
-               $this->assertSame( "inline; filename=\"$file\"", $response->getHeader( 'Content-Disposition' ) );
+               $this->assertSame( "inline; filename=$file", $response->getHeader( 'Content-Disposition' ) );
                $this->assertSame( $status, $response->getStatusCode() );
 
                return $text;
                $this->assertSame( $status, $response->getStatusCode() );
 
                return $text;
@@ -144,6 +144,49 @@ class ApiFormatBaseTest extends ApiFormatTestBase {
                ];
        }
 
                ];
        }
 
+       /**
+        * @dataProvider provideFilenameEncoding
+        */
+       public function testFilenameEncoding( $filename, $expect ) {
+               $ret = parent::encodeData( [], [], [
+                       'name' => 'mock',
+                       'class' => ApiFormatBase::class,
+                       'factory' => function ( ApiMain $main, $format ) use ( $filename ) {
+                               $mock = $this->getMockFormatter( $main, $format, [ 'getFilename' ] );
+                               $mock->method( 'getFilename' )->willReturn( $filename );
+                               return $mock;
+                       },
+                       'returnPrinter' => true,
+               ] );
+               $response = $ret['printer']->getMain()->getRequest()->response();
+
+               $this->assertSame( "inline; $expect", $response->getHeader( 'Content-Disposition' ) );
+       }
+
+       public static function provideFilenameEncoding() {
+               return [
+                       'something simple' => [
+                               'foo.xyz', 'filename=foo.xyz'
+                       ],
+                       'more complicated, but still simple' => [
+                               'foo.!#$%&\'*+-^_`|~', 'filename=foo.!#$%&\'*+-^_`|~'
+                       ],
+                       'Needs quoting' => [
+                               'foo\\bar.xyz', 'filename="foo\\\\bar.xyz"'
+                       ],
+                       'Needs quoting (2)' => [
+                               'foo (bar).xyz', 'filename="foo (bar).xyz"'
+                       ],
+                       'Needs quoting (3)' => [
+                               "foo\t\"b\x5car\"\0.xyz", "filename=\"foo\x5c\t\x5c\"b\x5c\x5car\x5c\"\x5c\0.xyz\""
+                       ],
+                       'Non-ASCII characters' => [
+                               'fóo bár.🙌!',
+                               "filename=\"f\xF3o b\xE1r.?!\"; filename*=UTF-8''f%C3%B3o%20b%C3%A1r.%F0%9F%99%8C!"
+                       ]
+               ];
+       }
+
        public function testBasics() {
                $printer = $this->getMockFormatter( null, 'mock' );
                $this->assertTrue( $printer->canPrintErrors() );
        public function testBasics() {
                $printer = $this->getMockFormatter( null, 'mock' );
                $this->assertTrue( $printer->canPrintErrors() );
@@ -220,7 +263,7 @@ class ApiFormatBaseTest extends ApiFormatTestBase {
                );
                $this->assertSame( 'DENY', $response->getHeader( 'X-Frame-Options' ) );
                $this->assertSame(
                );
                $this->assertSame( 'DENY', $response->getHeader( 'X-Frame-Options' ) );
                $this->assertSame(
-                       'inline; filename="api-result.html"', $response->getHeader( 'Content-Disposition' )
+                       'inline; filename=api-result.html', $response->getHeader( 'Content-Disposition' )
                );
        }
 
                );
        }