Database: Support parenthesized JOINs
authorBrad Jorsch <bjorsch@wikimedia.org>
Fri, 13 Oct 2017 17:51:43 +0000 (13:51 -0400)
committerAnomie <bjorsch@wikimedia.org>
Fri, 13 Oct 2017 19:12:16 +0000 (19:12 +0000)
SQL supports parentheses for grouping in the FROM clause.[1] This is
useful when you want to left-join against a join of other tables.

For example, say you have tables 'a', 'b', and 'c'. You want all rows
from 'a', along with rows from 'b' + 'c' only where both of those
exist.

 SELECT * FROM a LEFT JOIN b ON (a_b = b_id) JOIN c ON (b_c = c_id)

doesn't work, it'll only give you the rows where 'c' exists.

 SELECT * FROM a LEFT JOIN b ON (a_b = b_id) LEFT JOIN c ON (b_c = c_id)

doesn't work either, it'll give you rows from 'b' without a
corresponding row in 'c'. What you need to do is

 SELECT * FROM a LEFT JOIN (b JOIN c ON (b_c = c_id)) ON (a_b = b_id)

This patch implements this by extending the syntax for the $table
parameter to IDatabase::select(). When passing an array of tables, if a
value in the array is itself an array that is interpreted as a request
for a parenthesized join. To produce the example above, you'd do
something like

 $db->select(
     [ 'a', 'nest' => [ 'b', 'c' ] ],
     '*',
     [],
     __METHOD__,
     [],
     [
         'c' => [ 'JOIN', 'b_c = c_id ],
         'nest' => [ 'LEFT JOIN', 'a_b = b_id' ],
     ]
 );

[1]: In standards as far back as SQL-1992 (I couldn't find an earlier
 version), and it seems to be supported by at least MySQL 5.6, MariaDB
 10.1.28, PostgreSQL 9.3, PostgreSQL 10.0, Oracle 11g R2, SQLite 3.20.1,
 and MSSQL 2014 (from local testing and sqlfiddle.com).

Change-Id: I1e0a77381e06d885650a94f53847fb82f01c2694

RELEASE-NOTES-1.31
includes/libs/rdbms/database/Database.php
includes/libs/rdbms/database/DatabaseMssql.php
includes/libs/rdbms/database/DatabasePostgres.php
includes/libs/rdbms/database/IDatabase.php
tests/phpunit/includes/libs/rdbms/database/DatabaseTest.php

index efadf9a..7597e13 100644 (file)
@@ -11,7 +11,8 @@ production.
   essential.
 
 === New features in 1.31 ===
-* …
+* Wikimedia\Rdbms\IDatabase->select() and similar methods now support
+  joins with parentheses for grouping.
 
 === External library changes in 1.31 ===
 
index bc1454b..c04e167 100644 (file)
@@ -2015,11 +2015,21 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware
                                // No alias? Set it equal to the table name
                                $alias = $table;
                        }
+
+                       if ( is_array( $table ) ) {
+                               // A parenthesized group
+                               $joinedTable = '('
+                                       . $this->tableNamesWithIndexClauseOrJOIN( $table, $use_index, $ignore_index, $join_conds )
+                                       . ')';
+                       } else {
+                               $joinedTable = $this->tableNameWithAlias( $table, $alias );
+                       }
+
                        // Is there a JOIN clause for this table?
                        if ( isset( $join_conds[$alias] ) ) {
                                list( $joinType, $conds ) = $join_conds[$alias];
                                $tableClause = $joinType;
-                               $tableClause .= ' ' . $this->tableNameWithAlias( $table, $alias );
+                               $tableClause .= ' ' . $joinedTable;
                                if ( isset( $use_index[$alias] ) ) { // has USE INDEX?
                                        $use = $this->useIndexClause( implode( ',', (array)$use_index[$alias] ) );
                                        if ( $use != '' ) {
@@ -2041,7 +2051,7 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware
                                $retJOIN[] = $tableClause;
                        } elseif ( isset( $use_index[$alias] ) ) {
                                // Is there an INDEX clause for this table?
-                               $tableClause = $this->tableNameWithAlias( $table, $alias );
+                               $tableClause = $joinedTable;
                                $tableClause .= ' ' . $this->useIndexClause(
                                                implode( ',', (array)$use_index[$alias] )
                                        );
@@ -2049,14 +2059,14 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware
                                $ret[] = $tableClause;
                        } elseif ( isset( $ignore_index[$alias] ) ) {
                                // Is there an INDEX clause for this table?
-                               $tableClause = $this->tableNameWithAlias( $table, $alias );
+                               $tableClause = $joinedTable;
                                $tableClause .= ' ' . $this->ignoreIndexClause(
                                                implode( ',', (array)$ignore_index[$alias] )
                                        );
 
                                $ret[] = $tableClause;
                        } else {
-                               $tableClause = $this->tableNameWithAlias( $table, $alias );
+                               $tableClause = $joinedTable;
 
                                $ret[] = $tableClause;
                        }
index 8a69eec..53beb65 100644 (file)
@@ -440,8 +440,14 @@ class DatabaseMssql extends Database {
                if ( strpos( $sql, 'MAX(' ) !== false || strpos( $sql, 'MIN(' ) !== false ) {
                        $bitColumns = [];
                        if ( is_array( $table ) ) {
-                               foreach ( $table as $t ) {
-                                       $bitColumns += $this->getBitColumns( $this->tableName( $t ) );
+                               $tables = $table;
+                               while ( $tables ) {
+                                       $t = array_pop( $tables );
+                                       if ( is_array( $t ) ) {
+                                               $tables = array_merge( $tables, $t );
+                                       } else {
+                                               $bitColumns += $this->getBitColumns( $this->tableName( $t ) );
+                                       }
                                }
                        } else {
                                $bitColumns = $this->getBitColumns( $this->tableName( $table ) );
index 5a7da49..8c21d72 100644 (file)
@@ -532,26 +532,30 @@ __INDEXATTR__;
                                unset( $options[$forUpdateKey] );
                                $options['FOR UPDATE'] = [];
 
-                               // All tables not in $join_conds are good
-                               foreach ( $table as $alias => $name ) {
-                                       if ( is_numeric( $alias ) ) {
+                               $toCheck = $table;
+                               reset( $toCheck );
+                               while ( $toCheck ) {
+                                       $alias = key( $toCheck );
+                                       $name = $toCheck[$alias];
+                                       unset( $toCheck[$alias] );
+
+                                       $hasAlias = !is_numeric( $alias );
+                                       if ( !$hasAlias && is_string( $name ) ) {
                                                $alias = $name;
                                        }
-                                       if ( !isset( $join_conds[$alias] ) ) {
-                                               $options['FOR UPDATE'][] = $alias;
-                                       }
-                               }
 
-                               foreach ( $join_conds as $table_cond => $join_cond ) {
-                                       if ( 0 === preg_match( '/^(?:LEFT|RIGHT|FULL)(?: OUTER)? JOIN$/i', $join_cond[0] ) ) {
-                                               $options['FOR UPDATE'][] = $table_cond;
+                                       if ( !isset( $join_conds[$alias] ) ||
+                                               !preg_match( '/^(?:LEFT|RIGHT|FULL)(?: OUTER)? JOIN$/i', $join_conds[$alias][0] )
+                                       ) {
+                                               if ( is_array( $name ) ) {
+                                                       // It's a parenthesized group, process all the tables inside the group.
+                                                       $toCheck = array_merge( $toCheck, $name );
+                                               } else {
+                                                       // Quote alias names so $this->tableName() won't mangle them
+                                                       $options['FOR UPDATE'][] = $hasAlias ? $this->addIdentifierQuotes( $alias ) : $alias;
+                                               }
                                        }
                                }
-
-                               // Quote alias names so $this->tableName() won't mangle them
-                               $options['FOR UPDATE'] = array_map( function ( $name ) use ( $table ) {
-                                       return isset( $table[$name] ) ? $this->addIdentifierQuotes( $name ) : $name;
-                               }, $options['FOR UPDATE'] );
                        }
 
                        if ( isset( $options['ORDER BY'] ) && $options['ORDER BY'] == 'NULL' ) {
index 67e8e85..868c2d4 100644 (file)
@@ -620,6 +620,19 @@ interface IDatabase {
         * This includes the user table in the query, with the alias "a" available
         * for use in field names (e.g. a.user_name).
         *
+        * Joins using parentheses for grouping (since MediaWiki 1.31) may be
+        * constructed using nested arrays. For example,
+        *
+        *    [ 'tableA', 'nestedB' => [ 'tableB', 'b2' => 'tableB2' ] ]
+        *
+        * along with `$join_conds` like
+        *
+        *    [ 'b2' => [ 'JOIN', 'b_id = b2_id' ], 'nestedB' => [ 'LEFT JOIN', 'b_a = a_id' ] ]
+        *
+        * will produce SQL something like
+        *
+        *    FROM tableA LEFT JOIN (tableB JOIN tableB2 AS b2 ON (b_id = b2_id)) ON (b_a = a_id)
+        *
         * All of the table names given here are automatically run through
         * Database::tableName(), which causes the table prefix (if any) to be
         * added, and various other table name mappings to be performed.
index 70b6c36..ee7ad2f 100644 (file)
@@ -94,6 +94,45 @@ class DatabaseTest extends PHPUnit_Framework_TestCase {
                );
        }
 
+       public function provideTableNamesWithIndexClauseOrJOIN() {
+               return [
+                       'one-element array' => [
+                               [ 'table' ], [], 'table '
+                       ],
+                       'comma join' => [
+                               [ 'table1', 'table2' ], [], 'table1,table2 '
+                       ],
+                       'real join' => [
+                               [ 'table1', 'table2' ],
+                               [ 'table2' => [ 'LEFT JOIN', 't1_id = t2_id' ] ],
+                               'table1 LEFT JOIN table2 ON ((t1_id = t2_id))'
+                       ],
+                       'real join with multiple conditionals' => [
+                               [ 'table1', 'table2' ],
+                               [ 'table2' => [ 'LEFT JOIN', [ 't1_id = t2_id', 't2_x = \'X\'' ] ] ],
+                               'table1 LEFT JOIN table2 ON ((t1_id = t2_id) AND (t2_x = \'X\'))'
+                       ],
+                       'join with parenthesized group' => [
+                               [ 'table1', 'n' => [ 'table2', 'table3' ] ],
+                               [
+                                       'table3' => [ 'JOIN', 't2_id = t3_id' ],
+                                       'n' => [ 'LEFT JOIN', 't1_id = t2_id' ],
+                               ],
+                               'table1 LEFT JOIN (table2 JOIN table3 ON ((t2_id = t3_id))) ON ((t1_id = t2_id))'
+                       ],
+               ];
+       }
+
+       /**
+        * @dataProvider provideTableNamesWithIndexClauseOrJOIN
+        * @covers Wikimedia\Rdbms\Database::tableNamesWithIndexClauseOrJOIN
+        */
+       public function testTableNamesWithIndexClauseOrJOIN( $tables, $join_conds, $expect ) {
+               $clause = TestingAccessWrapper::newFromObject( $this->db )
+                       ->tableNamesWithIndexClauseOrJOIN( $tables, [], [], $join_conds );
+               $this->assertSame( $expect, $clause );
+       }
+
        /**
         * @covers Wikimedia\Rdbms\Database::onTransactionIdle
         * @covers Wikimedia\Rdbms\Database::runOnTransactionIdleCallbacks