From 4bd3e80e7ac013fc225f295a08c7709954889f6e Mon Sep 17 00:00:00 2001 From: Brad Jorsch Date: Fri, 13 Oct 2017 13:51:43 -0400 Subject: [PATCH] Database: Support parenthesized JOINs SQL supports parentheses for grouping in the FROM clause.[1] This is useful when you want to left-join against a join of other tables. For example, say you have tables 'a', 'b', and 'c'. You want all rows from 'a', along with rows from 'b' + 'c' only where both of those exist. SELECT * FROM a LEFT JOIN b ON (a_b = b_id) JOIN c ON (b_c = c_id) doesn't work, it'll only give you the rows where 'c' exists. SELECT * FROM a LEFT JOIN b ON (a_b = b_id) LEFT JOIN c ON (b_c = c_id) doesn't work either, it'll give you rows from 'b' without a corresponding row in 'c'. What you need to do is SELECT * FROM a LEFT JOIN (b JOIN c ON (b_c = c_id)) ON (a_b = b_id) This patch implements this by extending the syntax for the $table parameter to IDatabase::select(). When passing an array of tables, if a value in the array is itself an array that is interpreted as a request for a parenthesized join. To produce the example above, you'd do something like $db->select( [ 'a', 'nest' => [ 'b', 'c' ] ], '*', [], __METHOD__, [], [ 'c' => [ 'JOIN', 'b_c = c_id ], 'nest' => [ 'LEFT JOIN', 'a_b = b_id' ], ] ); [1]: In standards as far back as SQL-1992 (I couldn't find an earlier version), and it seems to be supported by at least MySQL 5.6, MariaDB 10.1.28, PostgreSQL 9.3, PostgreSQL 10.0, Oracle 11g R2, SQLite 3.20.1, and MSSQL 2014 (from local testing and sqlfiddle.com). Change-Id: I1e0a77381e06d885650a94f53847fb82f01c2694 --- RELEASE-NOTES-1.31 | 3 +- includes/libs/rdbms/database/Database.php | 18 +++++++-- .../libs/rdbms/database/DatabaseMssql.php | 10 ++++- .../libs/rdbms/database/DatabasePostgres.php | 34 +++++++++------- includes/libs/rdbms/database/IDatabase.php | 13 +++++++ .../libs/rdbms/database/DatabaseTest.php | 39 +++++++++++++++++++ 6 files changed, 95 insertions(+), 22 deletions(-) diff --git a/RELEASE-NOTES-1.31 b/RELEASE-NOTES-1.31 index efadf9abb1..7597e13463 100644 --- a/RELEASE-NOTES-1.31 +++ b/RELEASE-NOTES-1.31 @@ -11,7 +11,8 @@ production. essential. === New features in 1.31 === -* … +* Wikimedia\Rdbms\IDatabase->select() and similar methods now support + joins with parentheses for grouping. === External library changes in 1.31 === diff --git a/includes/libs/rdbms/database/Database.php b/includes/libs/rdbms/database/Database.php index bc1454baec..c04e167738 100644 --- a/includes/libs/rdbms/database/Database.php +++ b/includes/libs/rdbms/database/Database.php @@ -2015,11 +2015,21 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware // No alias? Set it equal to the table name $alias = $table; } + + if ( is_array( $table ) ) { + // A parenthesized group + $joinedTable = '(' + . $this->tableNamesWithIndexClauseOrJOIN( $table, $use_index, $ignore_index, $join_conds ) + . ')'; + } else { + $joinedTable = $this->tableNameWithAlias( $table, $alias ); + } + // Is there a JOIN clause for this table? if ( isset( $join_conds[$alias] ) ) { list( $joinType, $conds ) = $join_conds[$alias]; $tableClause = $joinType; - $tableClause .= ' ' . $this->tableNameWithAlias( $table, $alias ); + $tableClause .= ' ' . $joinedTable; if ( isset( $use_index[$alias] ) ) { // has USE INDEX? $use = $this->useIndexClause( implode( ',', (array)$use_index[$alias] ) ); if ( $use != '' ) { @@ -2041,7 +2051,7 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware $retJOIN[] = $tableClause; } elseif ( isset( $use_index[$alias] ) ) { // Is there an INDEX clause for this table? - $tableClause = $this->tableNameWithAlias( $table, $alias ); + $tableClause = $joinedTable; $tableClause .= ' ' . $this->useIndexClause( implode( ',', (array)$use_index[$alias] ) ); @@ -2049,14 +2059,14 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware $ret[] = $tableClause; } elseif ( isset( $ignore_index[$alias] ) ) { // Is there an INDEX clause for this table? - $tableClause = $this->tableNameWithAlias( $table, $alias ); + $tableClause = $joinedTable; $tableClause .= ' ' . $this->ignoreIndexClause( implode( ',', (array)$ignore_index[$alias] ) ); $ret[] = $tableClause; } else { - $tableClause = $this->tableNameWithAlias( $table, $alias ); + $tableClause = $joinedTable; $ret[] = $tableClause; } diff --git a/includes/libs/rdbms/database/DatabaseMssql.php b/includes/libs/rdbms/database/DatabaseMssql.php index 8a69eec428..53beb65f9a 100644 --- a/includes/libs/rdbms/database/DatabaseMssql.php +++ b/includes/libs/rdbms/database/DatabaseMssql.php @@ -440,8 +440,14 @@ class DatabaseMssql extends Database { if ( strpos( $sql, 'MAX(' ) !== false || strpos( $sql, 'MIN(' ) !== false ) { $bitColumns = []; if ( is_array( $table ) ) { - foreach ( $table as $t ) { - $bitColumns += $this->getBitColumns( $this->tableName( $t ) ); + $tables = $table; + while ( $tables ) { + $t = array_pop( $tables ); + if ( is_array( $t ) ) { + $tables = array_merge( $tables, $t ); + } else { + $bitColumns += $this->getBitColumns( $this->tableName( $t ) ); + } } } else { $bitColumns = $this->getBitColumns( $this->tableName( $table ) ); diff --git a/includes/libs/rdbms/database/DatabasePostgres.php b/includes/libs/rdbms/database/DatabasePostgres.php index 5a7da4976f..8c21d726b0 100644 --- a/includes/libs/rdbms/database/DatabasePostgres.php +++ b/includes/libs/rdbms/database/DatabasePostgres.php @@ -532,26 +532,30 @@ __INDEXATTR__; unset( $options[$forUpdateKey] ); $options['FOR UPDATE'] = []; - // All tables not in $join_conds are good - foreach ( $table as $alias => $name ) { - if ( is_numeric( $alias ) ) { + $toCheck = $table; + reset( $toCheck ); + while ( $toCheck ) { + $alias = key( $toCheck ); + $name = $toCheck[$alias]; + unset( $toCheck[$alias] ); + + $hasAlias = !is_numeric( $alias ); + if ( !$hasAlias && is_string( $name ) ) { $alias = $name; } - if ( !isset( $join_conds[$alias] ) ) { - $options['FOR UPDATE'][] = $alias; - } - } - foreach ( $join_conds as $table_cond => $join_cond ) { - if ( 0 === preg_match( '/^(?:LEFT|RIGHT|FULL)(?: OUTER)? JOIN$/i', $join_cond[0] ) ) { - $options['FOR UPDATE'][] = $table_cond; + if ( !isset( $join_conds[$alias] ) || + !preg_match( '/^(?:LEFT|RIGHT|FULL)(?: OUTER)? JOIN$/i', $join_conds[$alias][0] ) + ) { + if ( is_array( $name ) ) { + // It's a parenthesized group, process all the tables inside the group. + $toCheck = array_merge( $toCheck, $name ); + } else { + // Quote alias names so $this->tableName() won't mangle them + $options['FOR UPDATE'][] = $hasAlias ? $this->addIdentifierQuotes( $alias ) : $alias; + } } } - - // Quote alias names so $this->tableName() won't mangle them - $options['FOR UPDATE'] = array_map( function ( $name ) use ( $table ) { - return isset( $table[$name] ) ? $this->addIdentifierQuotes( $name ) : $name; - }, $options['FOR UPDATE'] ); } if ( isset( $options['ORDER BY'] ) && $options['ORDER BY'] == 'NULL' ) { diff --git a/includes/libs/rdbms/database/IDatabase.php b/includes/libs/rdbms/database/IDatabase.php index 67e8e85518..868c2d4b00 100644 --- a/includes/libs/rdbms/database/IDatabase.php +++ b/includes/libs/rdbms/database/IDatabase.php @@ -620,6 +620,19 @@ interface IDatabase { * This includes the user table in the query, with the alias "a" available * for use in field names (e.g. a.user_name). * + * Joins using parentheses for grouping (since MediaWiki 1.31) may be + * constructed using nested arrays. For example, + * + * [ 'tableA', 'nestedB' => [ 'tableB', 'b2' => 'tableB2' ] ] + * + * along with `$join_conds` like + * + * [ 'b2' => [ 'JOIN', 'b_id = b2_id' ], 'nestedB' => [ 'LEFT JOIN', 'b_a = a_id' ] ] + * + * will produce SQL something like + * + * FROM tableA LEFT JOIN (tableB JOIN tableB2 AS b2 ON (b_id = b2_id)) ON (b_a = a_id) + * * All of the table names given here are automatically run through * Database::tableName(), which causes the table prefix (if any) to be * added, and various other table name mappings to be performed. diff --git a/tests/phpunit/includes/libs/rdbms/database/DatabaseTest.php b/tests/phpunit/includes/libs/rdbms/database/DatabaseTest.php index 70b6c36032..ee7ad2f2a2 100644 --- a/tests/phpunit/includes/libs/rdbms/database/DatabaseTest.php +++ b/tests/phpunit/includes/libs/rdbms/database/DatabaseTest.php @@ -94,6 +94,45 @@ class DatabaseTest extends PHPUnit_Framework_TestCase { ); } + public function provideTableNamesWithIndexClauseOrJOIN() { + return [ + 'one-element array' => [ + [ 'table' ], [], 'table ' + ], + 'comma join' => [ + [ 'table1', 'table2' ], [], 'table1,table2 ' + ], + 'real join' => [ + [ 'table1', 'table2' ], + [ 'table2' => [ 'LEFT JOIN', 't1_id = t2_id' ] ], + 'table1 LEFT JOIN table2 ON ((t1_id = t2_id))' + ], + 'real join with multiple conditionals' => [ + [ 'table1', 'table2' ], + [ 'table2' => [ 'LEFT JOIN', [ 't1_id = t2_id', 't2_x = \'X\'' ] ] ], + 'table1 LEFT JOIN table2 ON ((t1_id = t2_id) AND (t2_x = \'X\'))' + ], + 'join with parenthesized group' => [ + [ 'table1', 'n' => [ 'table2', 'table3' ] ], + [ + 'table3' => [ 'JOIN', 't2_id = t3_id' ], + 'n' => [ 'LEFT JOIN', 't1_id = t2_id' ], + ], + 'table1 LEFT JOIN (table2 JOIN table3 ON ((t2_id = t3_id))) ON ((t1_id = t2_id))' + ], + ]; + } + + /** + * @dataProvider provideTableNamesWithIndexClauseOrJOIN + * @covers Wikimedia\Rdbms\Database::tableNamesWithIndexClauseOrJOIN + */ + public function testTableNamesWithIndexClauseOrJOIN( $tables, $join_conds, $expect ) { + $clause = TestingAccessWrapper::newFromObject( $this->db ) + ->tableNamesWithIndexClauseOrJOIN( $tables, [], [], $join_conds ); + $this->assertSame( $expect, $clause ); + } + /** * @covers Wikimedia\Rdbms\Database::onTransactionIdle * @covers Wikimedia\Rdbms\Database::runOnTransactionIdleCallbacks -- 2.20.1