Issue 301 redirects for Special:Search/searchterm
authorErik Bernhardson <ebernhardson@wikimedia.org>
Wed, 17 Aug 2016 18:35:54 +0000 (11:35 -0700)
committerErik Bernhardson <ebernhardson@wikimedia.org>
Thu, 18 Aug 2016 22:31:45 +0000 (15:31 -0700)
Including the search term, which is PII, in the page title allows for
leaking this information through page view dumps. Instead of happily
handling these issue a 301 redirect to tell clients they should not
be issueing these requests, and should instead use the search query
parameter. Dumps at wikimedia remove 30[123] response codes from the
dump output so this will also stop leaking the PII.

Change-Id: Icce7cc4585e90742a8dd3513e7c9f7276e479cd7

includes/specials/SpecialSearch.php
tests/phpunit/specials/SpecialSearchTest.php [new file with mode: 0644]

index 9690d45..26b86f9 100644 (file)
@@ -100,6 +100,25 @@ class SpecialSearch extends SpecialPage {
         * @param string $par
         */
        public function execute( $par ) {
+               $request = $this->getRequest();
+
+               // Fetch the search term
+               $search = str_replace( "\n", " ", $request->getText( 'search' ) );
+
+               // Historically search terms have been accepted not only in the search query
+               // parameter, but also as part of the primary url. This can have PII implications
+               // in releasing page view data. As such issue a 301 redirect to the correct
+               // URL.
+               if ( strlen( $par ) && !strlen( $search ) ) {
+                       $query = $request->getValues();
+                       unset( $query['title'] );
+                       // Strip underscores from title parameter; most of the time we'll want
+                       // text form here. But don't strip underscores from actual text params!
+                       $query['search'] = str_replace( '_', ' ', $par );
+                       $this->getOutput()->redirect( $this->getPageTitle()->getFullURL( $query ), 301 );
+                       return;
+               }
+
                $this->setHeaders();
                $this->outputHeader();
                $out = $this->getOutput();
@@ -110,15 +129,6 @@ class SpecialSearch extends SpecialPage {
                ] );
                $this->addHelpLink( 'Help:Searching' );
 
-               // Strip underscores from title parameter; most of the time we'll want
-               // text form here. But don't strip underscores from actual text params!
-               $titleParam = str_replace( '_', ' ', $par );
-
-               $request = $this->getRequest();
-
-               // Fetch the search term
-               $search = str_replace( "\n", " ", $request->getText( 'search', $titleParam ) );
-
                $this->load();
                if ( !is_null( $request->getVal( 'nsRemember' ) ) ) {
                        $this->saveNamespaces();
diff --git a/tests/phpunit/specials/SpecialSearchTest.php b/tests/phpunit/specials/SpecialSearchTest.php
new file mode 100644 (file)
index 0000000..20e88f5
--- /dev/null
@@ -0,0 +1,23 @@
+<?php
+
+class SpecialSearchText extends \PHPUnit_Framework_TestCase {
+       public function testSubPageRedirect() {
+               $ctx = new RequestContext;
+
+               SpecialPageFactory::executePath(
+                       Title::newFromText( 'Special:Search/foo_bar' ),
+                       $ctx
+               );
+               $url = $ctx->getOutput()->getRedirect();
+               // some older versions of hhvm have a bug that doesn't parse relative
+               // urls with a port, so help it out a little bit.
+               // https://github.com/facebook/hhvm/issues/7136
+               $url = wfExpandUrl( $url, PROTO_CURRENT );
+
+               $parts = parse_url( $url );
+               $this->assertEquals( '/w/index.php', $parts['path'] );
+               parse_str( $parts['query'], $query );
+               $this->assertEquals( 'Special:Search', $query['title'] );
+               $this->assertEquals( 'foo bar', $query['search'] );
+       }
+}