From 7bc541a4a70899475b88daaf2f26db1e1fc5e18a Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Wed, 7 Feb 2018 15:12:33 -0500 Subject: [PATCH 1/1] ApiFormatBase: Encode filenames in Content-Disposition The return value for ApiFormatBase::getFilename() was formerly documented as "must be encoded for inclusion in a Content-Disposition header's filename parameter." While this is ok for the common use case where the module is returning a constant string or can assume whatever it gets back from getExtensionsForType() is ok, it's not in general a good idea to make all callers handle that. Further, it's not possible to represent characters outside of the ISO-8859-1 character set in a 'filename' parameter. You have to use 'filename*' to do that (see RFC 5987 and RFC 6266). So, this patch changes the definition of getFilename() to remove the encoding requirement, and adds code to properly convert and escape the value for the 'filename' and (if necessary) 'filename*' parameters. Note this may give unexpected results (double encoding) if any module actually is returning an encoded filename. I don't see any such cases in core or in extensions in Gerrit. Change-Id: I0c2749a847b639f228efff4e1917a61612a1f7d1 --- includes/api/ApiFormatBase.php | 24 ++++++++-- .../includes/api/format/ApiFormatBaseTest.php | 47 ++++++++++++++++++- 2 files changed, 64 insertions(+), 7 deletions(-) diff --git a/includes/api/ApiFormatBase.php b/includes/api/ApiFormatBase.php index 471ce21553..18c36deb07 100644 --- a/includes/api/ApiFormatBase.php +++ b/includes/api/ApiFormatBase.php @@ -65,8 +65,7 @@ abstract class ApiFormatBase extends ApiBase { * @note If $this->getIsWrappedHtml() || $this->getIsHtml(), you'll very * likely want to fall back to this class's version. * @since 1.27 - * @return string Generally this should be "api-result.$ext", and must be - * encoded for inclusion in a Content-Disposition header's filename parameter. + * @return string Generally this should be "api-result.$ext" */ public function getFilename() { if ( $this->getIsWrappedHtml() ) { @@ -212,10 +211,25 @@ abstract class ApiFormatBase extends ApiBase { // Set a Content-Disposition header so something downloading an API // response uses a halfway-sensible filename (T128209). + $header = 'Content-Disposition: inline'; $filename = $this->getFilename(); - $this->getMain()->getRequest()->response()->header( - "Content-Disposition: inline; filename=\"{$filename}\"" - ); + $compatFilename = mb_convert_encoding( $filename, 'ISO-8859-1' ); + if ( preg_match( '/^[0-9a-zA-Z!#$%&\'*+\-.^_`|~]+$/', $compatFilename ) ) { + $header .= '; filename=' . $compatFilename; + } else { + $header .= '; filename="' + . preg_replace( '/([\0-\x1f"\x5c\x7f])/', '\\\\$1', $compatFilename ) . '"'; + } + if ( $compatFilename !== $filename ) { + $value = "UTF-8''" . rawurlencode( $filename ); + // rawurlencode() encodes more characters than RFC 5987 specifies. Unescape the ones it allows. + $value = strtr( $value, [ + '%21' => '!', '%23' => '#', '%24' => '$', '%26' => '&', '%2B' => '+', '%5E' => '^', + '%60' => '`', '%7C' => '|', + ] ); + $header .= '; filename*=' . $value; + } + $this->getMain()->getRequest()->response()->header( $header ); } /** diff --git a/tests/phpunit/includes/api/format/ApiFormatBaseTest.php b/tests/phpunit/includes/api/format/ApiFormatBaseTest.php index d6a13904eb..55f760f64d 100644 --- a/tests/phpunit/includes/api/format/ApiFormatBaseTest.php +++ b/tests/phpunit/includes/api/format/ApiFormatBaseTest.php @@ -82,7 +82,7 @@ class ApiFormatBaseTest extends ApiFormatTestBase { $this->assertSame( "$ct; charset=utf-8", strtolower( $response->getHeader( 'Content-Type' ) ) ); $this->assertSame( 'DENY', $response->getHeader( 'X-Frame-Options' ) ); $this->assertSame( $file, $printer->getFilename() ); - $this->assertSame( "inline; filename=\"$file\"", $response->getHeader( 'Content-Disposition' ) ); + $this->assertSame( "inline; filename=$file", $response->getHeader( 'Content-Disposition' ) ); $this->assertSame( $status, $response->getStatusCode() ); return $text; @@ -144,6 +144,49 @@ class ApiFormatBaseTest extends ApiFormatTestBase { ]; } + /** + * @dataProvider provideFilenameEncoding + */ + public function testFilenameEncoding( $filename, $expect ) { + $ret = parent::encodeData( [], [], [ + 'name' => 'mock', + 'class' => ApiFormatBase::class, + 'factory' => function ( ApiMain $main, $format ) use ( $filename ) { + $mock = $this->getMockFormatter( $main, $format, [ 'getFilename' ] ); + $mock->method( 'getFilename' )->willReturn( $filename ); + return $mock; + }, + 'returnPrinter' => true, + ] ); + $response = $ret['printer']->getMain()->getRequest()->response(); + + $this->assertSame( "inline; $expect", $response->getHeader( 'Content-Disposition' ) ); + } + + public static function provideFilenameEncoding() { + return [ + 'something simple' => [ + 'foo.xyz', 'filename=foo.xyz' + ], + 'more complicated, but still simple' => [ + 'foo.!#$%&\'*+-^_`|~', 'filename=foo.!#$%&\'*+-^_`|~' + ], + 'Needs quoting' => [ + 'foo\\bar.xyz', 'filename="foo\\\\bar.xyz"' + ], + 'Needs quoting (2)' => [ + 'foo (bar).xyz', 'filename="foo (bar).xyz"' + ], + 'Needs quoting (3)' => [ + "foo\t\"b\x5car\"\0.xyz", "filename=\"foo\x5c\t\x5c\"b\x5c\x5car\x5c\"\x5c\0.xyz\"" + ], + 'Non-ASCII characters' => [ + 'fóo bár.🙌!', + "filename=\"f\xF3o b\xE1r.?!\"; filename*=UTF-8''f%C3%B3o%20b%C3%A1r.%F0%9F%99%8C!" + ] + ]; + } + public function testBasics() { $printer = $this->getMockFormatter( null, 'mock' ); $this->assertTrue( $printer->canPrintErrors() ); @@ -220,7 +263,7 @@ class ApiFormatBaseTest extends ApiFormatTestBase { ); $this->assertSame( 'DENY', $response->getHeader( 'X-Frame-Options' ) ); $this->assertSame( - 'inline; filename="api-result.html"', $response->getHeader( 'Content-Disposition' ) + 'inline; filename=api-result.html', $response->getHeader( 'Content-Disposition' ) ); } -- 2.20.1