Implement path routing code.
authorDaniel Friesen <dantman@users.mediawiki.org>
Sat, 26 Nov 2011 16:29:25 +0000 (16:29 +0000)
committerDaniel Friesen <dantman@users.mediawiki.org>
Sat, 26 Nov 2011 16:29:25 +0000 (16:29 +0000)
- Makes extending paths with extensions simpler.
- Should fix bug 32621 by parsing paths based on pattern weight rather than pattern order.

RELEASE-NOTES-1.19
docs/hooks.txt
includes/AutoLoader.php
includes/PathRouter.php [new file with mode: 0644]
includes/WebRequest.php
tests/phpunit/includes/PathRouterTest.php [new file with mode: 0644]

index ee8a54b..5085fda 100644 (file)
@@ -93,6 +93,8 @@ production.
 * (bug 24037) Add byte length of revision to Special:Contributions.
 * (bug 1672) Added $wgDisableUploadScriptChecks to allow uploading of files
   containing HTML or JS. DISABLING THESE CHECKS IS VERY DANGEROUS.
+* New path mappings can be added using the WebRequestPathInfoRouter hook
+  and adding paths to the PathRouter.
 
 === Bug fixes in 1.19 ===
 * $wgUploadNavigationUrl should be used for file redlinks if.
index 72f6576..4e1cae8 100644 (file)
@@ -2186,14 +2186,8 @@ $title: Title object
 $redirect: whether the page is a redirect
 $skin: Skin object
 
-'WebRequestGetPathInfoRequestURI': while extracting path info from REQUEST_URI.
-       Allows an extension to extend the extraction of titles from paths.
-       Implementing hooks should follow the pattern used in core:
-       * Use the `$matches = WebRequest::extractTitle` pattern
-       * Ensure that you test `if ( !$matches ) {` before you try extracting a title
-         from the path so that you don't override an already found match.
-$path: The request path to extract a title from.
-&$matches: The array to apply matches to.
+'WebRequestPathInfoRouter': While building the PathRouter to parse the REQUEST_URI.
+$router: The PathRouter instance
 
 'WikiExporter::dumpStableQuery': Get the SELECT query for "stable" revisions
 dumps
index 307326a..e7a9577 100644 (file)
@@ -163,6 +163,7 @@ $wgAutoloadLocalClasses = array(
        'PageQueryPage' => 'includes/PageQueryPage.php',
        'Pager' => 'includes/Pager.php',
        'PasswordError' => 'includes/User.php',
+       'PathRouter' => 'includes/PathRouter.php',
        'PermissionsError' => 'includes/Exception.php',
        'PhpHttpRequest' => 'includes/HttpFunctions.php',
        'PoolCounter' => 'includes/PoolCounter.php',
diff --git a/includes/PathRouter.php b/includes/PathRouter.php
new file mode 100644 (file)
index 0000000..5dbcfb1
--- /dev/null
@@ -0,0 +1,269 @@
+<?php
+/**
+ * PathRouter class.
+ * This class can take patterns such as /wiki/$1 and use them to
+ * parse query parameters out of REQUEST_URI paths.
+ *
+ * $router->add( "/wiki/$1" );
+ *   - Matches /wiki/Foo style urls and extracts the title
+ * $router->add( array( 'edit' => "/edit/$1" ), array( 'action' => '$key' ) );
+ *   - Matches /edit/Foo style urls and sets action=edit
+ * $router->add( '/$2/$1',
+ *   array( 'variant' => '$2' ),
+ *   array( '$2' => array( 'zh-hant', 'zh-hans' )
+ * );
+ *   - Matches /zh-hant/Foo or /zh-hans/Foo
+ * $router->addStrict( "/foo/Bar", array( 'title' => 'Baz' ) );
+ *   - Matches /foo/Bar explicitly and uses "Baz" as the title
+ * $router->add( '/help/$1', array( 'title' => 'Help:$1' ) );
+ *   - Matches /help/Foo with "Help:Foo" as the title
+ * $router->add( '/help/$1', array( 'title' => 'Help:$1' ) );
+ *   - Matches 
+ * $router->add( '/$1', array( 'foo' => array( 'value' => 'bar$2' ) );
+ *   - Matches /Foo and sets 'foo=bar$2' without $2 being replaced
+ * $router->add( '/$1', array( 'data:foo' => 'bar' ), array( 'callback' => 'functionname' ) );
+ *   - Matches /Foo, adds the key 'foo' with the value 'bar' to the data array
+ *     and calls functionname( &$matches, $data );
+ *
+ * Path patterns:
+ *   - Paths may contain $# patterns such as $1, $2, etc...
+ *   - $1 will match 0 or more while the rest will match 1 or more
+ *   - Unless you use addStrict "/wiki" and "/wiki/" will be expanded to "/wiki/$1"
+ *
+ * Params:
+ *   - In a pattern $1, $2, etc... will be replaced with the relevant contents
+ *   - If you used a keyed array as a path pattern $key will be replaced with the relevant contents
+ *   - The default behavior is equivalent to `array( 'title' => '$1' )`, if you don't want the title parameter you can explicitly use `array( 'title' => false )`
+ *   - You can specify a value that won't have replacements in it using `'foo' => array( 'value' => 'bar' );`
+ *
+ * Options:
+ *   - The option keys $1, $2, etc... can be specified to restrict the possible values of that variable.
+ *     A string can be used for a single value, or an array for multiple.
+ *   - When the option key 'strict' is set (Using addStrict is simpler than doing this directly)
+ *     the path won't have $1 implicitly added to it.
+ *   - The option key 'callback' can specify a callback that will be run when a path is matched.
+ *     The callback will have the arguments ( &$matches, $data ) and the matches array can be modified.
+ *
+ * @since 1.19
+ * @author Daniel Friesen
+ */
+class PathRouter {
+       
+       protected function doAdd( $path, $params, $options, $key = null ) {
+               if ( $path[0] !== '/' ) {
+                       $path = '/' . $path;
+               }
+
+               if ( !isset( $options['strict'] ) || !$options['strict'] ) {
+                       // Unless this is a strict path make sure that the path has a $1
+                       if ( strpos( $path, '$1' ) === false ) {
+                               if ( substr( $path, -1 ) !== '/' ) {
+                                       $path .= '/';
+                               }
+                               $path .= '$1';
+                       }
+               }
+
+               if ( !isset( $params['title'] ) && strpos( $path, '$1' ) !== false ) {
+                       $params['title'] = '$1';
+               }
+               if ( isset( $params['title'] ) && $params['title'] === false ) {
+                       unset( $params['title'] );
+               }
+
+               foreach ( $params as $paramName => $paramData ) {
+                       if ( is_string( $paramData ) ) {
+                               if ( preg_match( '/\$(\d+|key)/', $paramData ) ) {
+                                       $paramArrKey = 'pattern';
+                               } else {
+                                       // If there's no replacement use a value instead
+                                       // of a pattern for a little more efficiency
+                                       $paramArrKey = 'value';
+                               }
+                               $params[$paramName] = array(
+                                       $paramArrKey => $paramData
+                               );
+                       }
+               }
+
+               foreach ( $options as $optionName => $optionData ) {
+                       if ( preg_match( '/^\$\d+$/', $optionName ) ) {
+                               if ( !is_array( $optionData ) ) {
+                                       $options[$optionName] = array( $optionData );
+                               }
+                       }
+               }
+
+               $pattern = (object)array(
+                       'path' => $path,
+                       'params' => $params,
+                       'options' => $options,
+                       'key' => $key,
+               );
+               $pattern->weight = self::makeWeight( $pattern );
+               $this->patterns[] = $pattern;
+       }
+
+       /**
+        * Add a new path pattern to the path router
+        *
+        * @param $path The path pattern to add
+        * @param $params The params for this path pattern
+        * @param $options The options for this path pattern
+        */
+       public function add( $path, $params = array(), $options = array() ) {
+               if ( is_array( $path ) ) {
+                       foreach ( $path as $key => $onePath ) {
+                               $this->doAdd( $onePath, $params, $options, $key );
+                       }
+               } else {
+                       $this->doAdd( $path, $params, $options );
+               }
+       }
+
+       /**
+        * Add a new path pattern to the path router with the strict option on
+        * @see self::add
+        */
+       public function addStrict( $path, $params = array(), $options = array() ) {
+               $options['strict'] = true;
+               $this->add( $path, $params, $options );
+       }
+
+       protected function sortByWeight() {
+               $weights = array();
+               foreach( $this->patterns as $key => $pattern ) {
+                       $weights[$key] = $pattern->weight;
+               }
+               array_multisort( $weights, SORT_DESC, SORT_NUMERIC, $this->patterns );
+       }
+
+       public static function makeWeight( $pattern ) {
+               # Start with a weight of 0
+               $weight = 0;
+
+               // Explode the path to work with
+               $path = explode( '/', $pattern->path );
+
+               # For each level of the path
+               foreach( $path as $piece ) {
+                       if ( preg_match( '/^\$(\d+|key)$/', $piece ) ) {
+                               # For a piece that is only a $1 variable add 1 points of weight
+                               $weight += 1;
+                       } elseif ( preg_match( '/\$(\d+|key)/', $piece ) ) {
+                               # For a piece that simply contains a $1 variable add 2 points of weight
+                               $weight += 2;
+                       } else {
+                               # For a solid piece add a full 3 points of weight
+                               $weight += 3;
+                       }
+               }
+
+               foreach ( $pattern->options as $key => $option ) {
+                       if ( preg_match( '/^\$\d+$/', $key ) ) {
+                               # Add 0.5 for restrictions to values
+                               # This way given two separate "/$2/$1" patterns the
+                               # one with a limited set of $2 values will dominate
+                               # the one that'll match more loosely
+                               $weight += 0.5;
+                       }
+               }
+
+               return $weight;
+       }
+
+       /**
+        * Parse a path and return the query matches for the path
+        *
+        * @param $path The path to parse
+        * @return Array The array of matches for the path
+        */
+       public function parse( $path ) {
+               $this->sortByWeight();
+               
+               $matches = null;
+
+               foreach ( $this->patterns as $pattern ) {
+                       $matches = self::extractTitle( $path, $pattern );
+                       if ( !is_null( $matches ) ) {
+                               break;
+                       }
+               }
+
+               return is_null( $matches ) ? array() : $matches;
+       }
+
+       protected static function extractTitle( $path, $pattern ) {
+               $regexp = preg_quote( $pattern->path, '#' );
+               $regexp = preg_replace( '#\\\\\$1#', '(?P<par1>.*)', $regexp );
+               $regexp = preg_replace( '#\\\\\$(\d+)#', '(?P<par$1>.+?)', $regexp );
+               $regexp = "#^{$regexp}$#";
+
+               $matches = array();
+               $data = array();
+
+               if ( preg_match( $regexp, $path, $m ) ) {
+                       foreach ( $pattern->options as $key => $option ) {
+                               if ( preg_match( '/^\$\d+$/', $key ) ) {
+                                       $n = intval( substr( $key, 1 ) );
+                                       $value = $m["par{$n}"];
+                                       if ( !in_array( $value, $option ) ) {
+                                               return null;
+                                       }
+                               }
+                       }
+
+                       foreach ( $m as $matchKey => $matchValue ) {
+                               if ( preg_match( '/^par\d+$/', $matchKey ) ) {
+                                       $n = intval( substr( $matchKey, 3 ) );
+                                       $data['$'.$n] = $matchValue;
+                               }
+                       }
+
+                       foreach ( $pattern->params as $paramName => $paramData ) {
+                               $value = null;
+                               if ( preg_match( '/^data:/', $paramName ) ) {
+                                       $isData = true;
+                                       $key = substr( $paramName, 5 );
+                               } else {
+                                       $isData = false;
+                                       $key = $paramName;
+                               }
+
+                               if ( isset( $paramData['value'] ) ) {
+                                       $value = $paramData['value'];
+                               } elseif ( isset( $paramData['pattern'] ) ) {
+                                       $value = $paramData['pattern'];
+                                       foreach ( $m as $matchKey => $matchValue ) {
+                                               if ( preg_match( '/^par\d+$/', $matchKey ) ) {
+                                                       $n = intval( substr( $matchKey, 3 ) );
+                                                       $value = str_replace( '$' . $n, $matchValue, $value );
+                                               }
+                                       }
+                                       if ( isset( $pattern->key ) ) {
+                                               $value = str_replace( '$key', $pattern->key, $value );
+                                       }
+                                       if ( preg_match( '/\$(\d+|key)/', $value ) ) {
+                                               // Still contains $# or $key patterns after replacement
+                                               // Seams like we don't have all the data, abort
+                                               return null;
+                                       }
+                               }
+
+                               if ( $isData ) {
+                                       $data[$key] = $value;
+                               } else {
+                                       $matches[$key] = $value;
+                               }
+                       }
+
+                       if ( isset( $pattern->options['callback'] ) ) {
+                               call_user_func_array( $pattern->options['callback'], array( &$matches, $data ) );
+                       }
+               } else {
+                       return null;
+               }
+               return $matches;
+       }
+
+}
index 9f6d277..7b43c8d 100644 (file)
@@ -93,40 +93,49 @@ class WebRequest {
                                        // Abort to keep from breaking...
                                        return $matches;
                                }
+                               
+                               $router = new PathRouter;
+
                                // Raw PATH_INFO style
-                               $matches = self::extractTitle( $path, "$wgScript/$1" );
+                               $router->add( "$wgScript/$1" );
 
-                               if( !$matches
-                                       && isset( $_SERVER['SCRIPT_NAME'] )
+                               if( isset( $_SERVER['SCRIPT_NAME'] )
                                        && preg_match( '/\.php5?/', $_SERVER['SCRIPT_NAME'] ) )
                                {
                                        # Check for SCRIPT_NAME, we handle index.php explicitly
                                        # But we do have some other .php files such as img_auth.php
                                        # Don't let root article paths clober the parsing for them
-                                       $matches = self::extractTitle( $path, $_SERVER['SCRIPT_NAME'] . "/$1" );
+                                       $router->add( $_SERVER['SCRIPT_NAME'] . "/$1" );
                                }
 
                                global $wgArticlePath;
-                               if( !$matches && $wgArticlePath ) {
-                                       $matches = self::extractTitle( $path, $wgArticlePath );
+                               if( $wgArticlePath ) {
+                                       $router->add( $wgArticlePath );
                                }
 
                                global $wgActionPaths;
-                               if( !$matches && $wgActionPaths ) {
-                                       $matches = self::extractTitle( $path, $wgActionPaths, 'action' );
+                               if( $wgActionPaths ) {
+                                       $router->add( $wgActionPaths, array( 'action' => '$key' ) );
                                }
 
                                global $wgVariantArticlePath, $wgContLang;
-                               if( !$matches && $wgVariantArticlePath ) {
-                                       $variantPaths = array();
+                               if( $wgVariantArticlePath ) {
+                                       /*$variantPaths = array();
                                        foreach( $wgContLang->getVariants() as $variant ) {
                                                $variantPaths[$variant] =
                                                        str_replace( '$2', $variant, $wgVariantArticlePath );
                                        }
-                                       $matches = self::extractTitle( $path, $variantPaths, 'variant' );
+                                       $router->add( $variantPaths, array( 'parameter' => 'variant' ) );*/
+                                       // Maybe actually this?
+                                       $router->add( $wgVariantArticlePath,
+                                               array( 'variant' => '$2'),
+                                               array( '$2' => $wgContLang->getVariants() )
+                                       );
                                }
 
-                               wfRunHooks( 'WebRequestGetPathInfoRequestURI', array( $path, &$matches ) );
+                               wfRunHooks( 'WebRequestPathInfoRouter', array( $router ) );
+
+                               $matches = $router->parse( $path );
                        }
                } elseif ( isset( $_SERVER['ORIG_PATH_INFO'] ) && $_SERVER['ORIG_PATH_INFO'] != '' ) {
                        // Mangled PATH_INFO
diff --git a/tests/phpunit/includes/PathRouterTest.php b/tests/phpunit/includes/PathRouterTest.php
new file mode 100644 (file)
index 0000000..07fa65d
--- /dev/null
@@ -0,0 +1,155 @@
+<?php
+/**
+ * Tests for the PathRouter parsing
+ */
+
+class PathRouterTest extends MediaWikiTestCase {
+
+       /**
+        * Test basic path parsing
+        */
+       public function testBasic() {
+               $router = new PathRouter;
+               $router->add("/$1");
+               $matches = $router->parse( "/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo" ) );
+       }
+
+       /**
+        * Test loose path auto-$1
+        */
+       public function testLoose() {
+               $router = new PathRouter;
+               $router->add("/"); # Should be the same as "/$1"
+               $matches = $router->parse( "/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo" ) );
+
+               $router = new PathRouter;
+               $router->add("/wiki"); # Should be the same as /wiki/$1
+               $matches = $router->parse( "/wiki/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo" ) );
+
+               $router = new PathRouter;
+               $router->add("/wiki/"); # Should be the same as /wiki/$1
+               $matches = $router->parse( "/wiki/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo" ) );
+       }
+
+       /**
+        * Test to ensure that path is based on specifity, not order
+        */
+       public function testOrder() {
+               $router = new PathRouter;
+               $router->add("/$1");
+               $router->add("/a/$1");
+               $router->add("/b/$1");
+               $matches = $router->parse( "/a/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo" ) );
+
+               $router = new PathRouter;
+               $router->add("/b/$1");
+               $router->add("/a/$1");
+               $router->add("/$1");
+               $matches = $router->parse( "/a/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo" ) );
+       }
+
+       /**
+        * Test the handling of key based arrays with a url parameter
+        */
+       public function testKeyParameter() {
+               $router = new PathRouter;
+               $router->add( array( 'edit' => "/edit/$1" ), array( 'action' => '$key' ) );
+               $matches = $router->parse( "/edit/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo", 'action' => 'edit' ) );
+       }
+
+       /**
+        * Test the handling of $2 inside paths
+        */
+       public function testAdditionalParameter() {
+               // Basic $2
+               $router = new PathRouter;
+               $router->add( '/$2/$1', array( 'test' => '$2' ) );
+               $matches = $router->parse( "/asdf/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo", 'test' => 'asdf' ) );
+       }
+
+       /**
+        * Test additional restricted value parameter
+        */
+       public function testRestrictedValue() {
+               $router = new PathRouter;
+               $router->add( '/$2/$1',
+                       array( 'test' => '$2' ),
+                       array( '$2' => array( 'a', 'b' ) )
+               );
+               $router->add( '/$2/$1',
+                       array( 'test2' => '$2' ),
+                       array( '$2' => 'c' )
+               );
+               $router->add( '/$1' );
+
+               $matches = $router->parse( "/asdf/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "asdf/Foo" ) );
+
+               $matches = $router->parse( "/a/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo", 'test' => 'a' ) );
+
+               $matches = $router->parse( "/c/Foo" );
+               $this->assertEquals( $matches, array( 'title' => "Foo", 'test2' => 'c' ) );
+       }
+
+       public function callbackForTest( &$matches, $data ) {
+               $matches['x'] = $data['$1'];
+               $matches['foo'] = $data['foo'];
+       }
+
+       public function testCallback() {
+               $router = new PathRouter;
+               $router->add( "/$1",
+                       array( 'a' => 'b', 'data:foo' => 'bar' ),
+                       array( 'callback' => array( $this, 'callbackForTest' ) )
+               );
+               $matches = $router->parse( '/Foo');
+               $this->assertEquals( $matches, array(
+                       'title' => "Foo",
+                       'x' => 'Foo',
+                       'a' => 'b',
+                       'foo' => 'bar'
+               ) );
+       }
+
+       /**
+        * Test to ensure weight of paths is handled correctly
+        */
+       public function testWeight() {
+               $router = new PathRouter;
+               $router->addStrict( "/Bar", array( 'ping' => 'pong' ) );
+               $router->add( "/asdf-$1", array( 'title' => 'qwerty-$1' ) );
+               $router->add( "/$1" );
+               $router->add( "/qwerty-$1", array( 'title' => 'asdf-$1' ) );
+               $router->addStrict( "/Baz", array( 'marco' => 'polo' ) );
+               $router->add( "/a/$1" );
+               $router->add( "/asdf/$1" );
+               $router->add( "/$2/$1", array( 'unrestricted' => '$2' ) );
+               $router->add( array( 'qwerty' => "/qwerty/$1" ), array( 'qwerty' => '$key' ) );
+               $router->add( "/$2/$1", array( 'restricted-to-y' => '$2' ), array( '$2' => 'y' ) );
+
+               foreach( array(
+                       "/Foo" => array( 'title' => "Foo" ),
+                       "/Bar" => array( 'ping' => 'pong' ),
+                       "/Baz" => array( 'marco' => 'polo' ),
+                       "/asdf-foo" => array( 'title' => "qwerty-foo" ),
+                       "/qwerty-bar" => array( 'title' => "asdf-bar" ),
+                       "/a/Foo" => array( 'title' => "Foo" ),
+                       "/asdf/Foo" => array( 'title' => "Foo" ),
+                       "/qwerty/Foo" => array( 'title' => "Foo", 'qwerty' => 'qwerty' ),
+                       "/baz/Foo" => array( 'title' => "Foo", 'unrestricted' => 'baz' ),
+                       "/y/Foo" => array( 'title' => "Foo", 'restricted-to-y' => 'y' ),
+               ) as $path => $result ) {
+                       $this->assertEquals( $router->parse( $path ), $result );
+               }
+       }
+
+}