From d395dfb039fce787c9e4acbfeecbfe0ec372a9be Mon Sep 17 00:00:00 2001 From: Aaron Schulz Date: Wed, 14 Feb 2018 19:46:04 -0800 Subject: [PATCH] rdbms: make selectRowCount() use $var argument to exclude NULLs If the $var argument is provided, then it will make the resulting count exclude rows where the value for that column is NULL. Also add buildSelectSubquery() method and Subquery wrapper class for use with select() for calculated tables. Change-Id: I549d629af99afdf370602de095f7fba6d1546c37 --- autoload.php | 1 + includes/libs/rdbms/database/DBConnRef.php | 7 + includes/libs/rdbms/database/Database.php | 135 +++++++++++++++--- .../libs/rdbms/database/DatabaseMssql.php | 12 +- .../libs/rdbms/database/DatabaseMysqlBase.php | 12 +- .../libs/rdbms/database/DatabasePostgres.php | 12 +- includes/libs/rdbms/database/IDatabase.php | 38 ++++- includes/libs/rdbms/encasing/Subquery.php | 44 ++++++ .../libs/rdbms/database/DatabaseSQLTest.php | 98 ++++++++++++- 9 files changed, 323 insertions(+), 36 deletions(-) create mode 100644 includes/libs/rdbms/encasing/Subquery.php diff --git a/autoload.php b/autoload.php index b5f3e4a067..6386a5e095 100644 --- a/autoload.php +++ b/autoload.php @@ -1727,6 +1727,7 @@ $wgAutoloadLocalClasses = [ 'Wikimedia\\Rdbms\\SQLiteField' => __DIR__ . '/includes/libs/rdbms/field/SQLiteField.php', 'Wikimedia\\Rdbms\\SavepointPostgres' => __DIR__ . '/includes/libs/rdbms/database/utils/SavepointPostgres.php', 'Wikimedia\\Rdbms\\SessionConsistentConnectionManager' => __DIR__ . '/includes/libs/rdbms/connectionmanager/SessionConsistentConnectionManager.php', + 'Wikimedia\\Rdbms\\Subquery' => __DIR__ . '/includes/libs/rdbms/encasing/Subquery.php', 'Wikimedia\\Rdbms\\TransactionProfiler' => __DIR__ . '/includes/libs/rdbms/TransactionProfiler.php', 'WikitextContent' => __DIR__ . '/includes/content/WikitextContent.php', 'WikitextContentHandler' => __DIR__ . '/includes/content/WikitextContentHandler.php', diff --git a/includes/libs/rdbms/database/DBConnRef.php b/includes/libs/rdbms/database/DBConnRef.php index acb21ed9b6..4910219b62 100644 --- a/includes/libs/rdbms/database/DBConnRef.php +++ b/includes/libs/rdbms/database/DBConnRef.php @@ -362,6 +362,13 @@ class DBConnRef implements IDatabase { return $this->__call( __FUNCTION__, func_get_args() ); } + public function buildSelectSubquery( + $table, $vars, $conds = '', $fname = __METHOD__, + $options = [], $join_conds = [] + ) { + return $this->__call( __FUNCTION__, func_get_args() ); + } + public function databasesAreIndependent() { return $this->__call( __FUNCTION__, func_get_args() ); } diff --git a/includes/libs/rdbms/database/Database.php b/includes/libs/rdbms/database/Database.php index 97ea26645c..5ef1744b97 100644 --- a/includes/libs/rdbms/database/Database.php +++ b/includes/libs/rdbms/database/Database.php @@ -1588,8 +1588,14 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware } public function estimateRowCount( - $table, $vars = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] + $table, $var = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] ) { + $conds = $this->normalizeConditions( $conds, $fname ); + $column = $this->extractSingleFieldFromList( $var ); + if ( is_string( $column ) && !in_array( $column, [ '*', '1' ] ) ) { + $conds[] = "$column IS NOT NULL"; + } + $res = $this->select( $table, [ 'rowcount' => 'COUNT(*)' ], $conds, $fname, $options, $join_conds ); @@ -1599,21 +1605,76 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware } public function selectRowCount( - $tables, $vars = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] + $tables, $var = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] ) { - $rows = 0; - $sql = $this->selectSQLText( $tables, '1', $conds, $fname, $options, $join_conds ); - // The identifier quotes is primarily for MSSQL. - $rowCountCol = $this->addIdentifierQuotes( "rowcount" ); - $tableName = $this->addIdentifierQuotes( "tmp_count" ); - $res = $this->query( "SELECT COUNT(*) AS $rowCountCol FROM ($sql) $tableName", $fname ); + $conds = $this->normalizeConditions( $conds, $fname ); + $column = $this->extractSingleFieldFromList( $var ); + if ( is_string( $column ) && !in_array( $column, [ '*', '1' ] ) ) { + $conds[] = "$column IS NOT NULL"; + } + + $res = $this->select( + [ + 'tmp_count' => $this->buildSelectSubquery( + $tables, + '1', + $conds, + $fname, + $options, + $join_conds + ) + ], + [ 'rowcount' => 'COUNT(*)' ], + [], + $fname + ); + $row = $res ? $this->fetchRow( $res ) : []; + + return isset( $row['rowcount'] ) ? (int)$row['rowcount'] : 0; + } + + /** + * @param array|string $conds + * @param string $fname + * @return array + */ + final protected function normalizeConditions( $conds, $fname ) { + if ( $conds === null || $conds === false ) { + $this->queryLogger->warning( + __METHOD__ + . ' called from ' + . $fname + . ' with incorrect parameters: $conds must be a string or an array' + ); + $conds = ''; + } - if ( $res ) { - $row = $this->fetchRow( $res ); - $rows = ( isset( $row['rowcount'] ) ) ? (int)$row['rowcount'] : 0; + if ( !is_array( $conds ) ) { + $conds = ( $conds === '' ) ? [] : [ $conds ]; } - return $rows; + return $conds; + } + + /** + * @param array|string $var Field parameter in the style of select() + * @return string|null Column name or null; ignores aliases + * @throws DBUnexpectedError Errors out if multiple columns are given + */ + final protected function extractSingleFieldFromList( $var ) { + if ( is_array( $var ) ) { + if ( !$var ) { + $column = null; + } elseif ( count( $var ) == 1 ) { + $column = isset( $var[0] ) ? $var[0] : reset( $var ); + } else { + throw new DBUnexpectedError( $this, __METHOD__ . ': got multiple columns.' ); + } + } else { + $column = $var; + } + + return $column; } /** @@ -1963,6 +2024,15 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware return 'CAST( ' . $field . ' AS INTEGER )'; } + public function buildSelectSubquery( + $table, $vars, $conds = '', $fname = __METHOD__, + $options = [], $join_conds = [] + ) { + return new Subquery( + $this->selectSQLText( $table, $vars, $conds, $fname, $options, $join_conds ) + ); + } + public function databasesAreIndependent() { return false; } @@ -1985,6 +2055,13 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware } public function tableName( $name, $format = 'quoted' ) { + if ( $name instanceof Subquery ) { + throw new DBUnexpectedError( + $this, + __METHOD__ . ': got Subquery instance when expecting a string.' + ); + } + # Skip the entire process when we have a string quoted on both ends. # Note that we check the end so that we will still quote any use of # use of `database`.table. But won't break things if someone wants @@ -2001,6 +2078,11 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware # any remote case where a word like on may be inside of a table name # surrounded by symbols which may be considered word breaks. if ( preg_match( '/(^|\s)(DISTINCT|JOIN|ON|AS)(\s|$)/i', $name ) !== 0 ) { + $this->queryLogger->warning( + __METHOD__ . ": use of subqueries is not supported this way.", + [ 'trace' => ( new RuntimeException() )->getTraceAsString() ] + ); + return $name; } @@ -2105,17 +2187,32 @@ abstract class Database implements IDatabase, IMaintainableDatabase, LoggerAware /** * Get an aliased table name - * e.g. tableName AS newTableName * - * @param string $name Table name, see tableName() - * @param string|bool $alias Alias (optional) + * This returns strings like "tableName AS newTableName" for aliased tables + * and "(SELECT * from tableA) newTablename" for subqueries (e.g. derived tables) + * + * @see Database::tableName() + * @param string|Subquery $table Table name or object with a 'sql' field + * @param string|bool $alias Table alias (optional) * @return string SQL name for aliased table. Will not alias a table to its own name */ - protected function tableNameWithAlias( $name, $alias = false ) { - if ( !$alias || $alias == $name ) { - return $this->tableName( $name ); + protected function tableNameWithAlias( $table, $alias = false ) { + if ( is_string( $table ) ) { + $quotedTable = $this->tableName( $table ); + } elseif ( $table instanceof Subquery ) { + $quotedTable = (string)$table; + } else { + throw new InvalidArgumentException( "Table must be a string or Subquery." ); + } + + if ( !strlen( $alias ) || $alias === $table ) { + if ( $table instanceof Subquery ) { + throw new InvalidArgumentException( "Subquery table missing alias." ); + } + + return $quotedTable; } else { - return $this->tableName( $name ) . ' ' . $this->addIdentifierQuotes( $alias ); + return $quotedTable . ' ' . $this->addIdentifierQuotes( $alias ); } } diff --git a/includes/libs/rdbms/database/DatabaseMssql.php b/includes/libs/rdbms/database/DatabaseMssql.php index 885880a1d3..1f6132b85d 100644 --- a/includes/libs/rdbms/database/DatabaseMssql.php +++ b/includes/libs/rdbms/database/DatabaseMssql.php @@ -505,20 +505,26 @@ class DatabaseMssql extends Database { * Returns -1 if count cannot be found * Takes same arguments as Database::select() * @param string $table - * @param string $vars + * @param string $var * @param string $conds * @param string $fname * @param array $options * @param array $join_conds * @return int */ - public function estimateRowCount( $table, $vars = '*', $conds = '', + public function estimateRowCount( $table, $var = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] ) { + $conds = $this->normalizeConditions( $conds, $fname ); + $column = $this->extractSingleFieldFromList( $var ); + if ( is_string( $column ) && !in_array( $column, [ '*', '1' ] ) ) { + $conds[] = "$column IS NOT NULL"; + } + // http://msdn2.microsoft.com/en-us/library/aa259203.aspx $options['EXPLAIN'] = true; $options['FOR COUNT'] = true; - $res = $this->select( $table, $vars, $conds, $fname, $options, $join_conds ); + $res = $this->select( $table, $var, $conds, $fname, $options, $join_conds ); $rows = -1; if ( $res ) { diff --git a/includes/libs/rdbms/database/DatabaseMysqlBase.php b/includes/libs/rdbms/database/DatabaseMysqlBase.php index b7778b4bea..dbfaaed2d6 100644 --- a/includes/libs/rdbms/database/DatabaseMysqlBase.php +++ b/includes/libs/rdbms/database/DatabaseMysqlBase.php @@ -558,18 +558,24 @@ abstract class DatabaseMysqlBase extends Database { * Takes same arguments as Database::select() * * @param string|array $table - * @param string|array $vars + * @param string|array $var * @param string|array $conds * @param string $fname * @param string|array $options * @param array $join_conds * @return bool|int */ - public function estimateRowCount( $table, $vars = '*', $conds = '', + public function estimateRowCount( $table, $var = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] ) { + $conds = $this->normalizeConditions( $conds, $fname ); + $column = $this->extractSingleFieldFromList( $var ); + if ( is_string( $column ) && !in_array( $column, [ '*', '1' ] ) ) { + $conds[] = "$column IS NOT NULL"; + } + $options['EXPLAIN'] = true; - $res = $this->select( $table, $vars, $conds, $fname, $options, $join_conds ); + $res = $this->select( $table, $var, $conds, $fname, $options, $join_conds ); if ( $res === false ) { return false; } diff --git a/includes/libs/rdbms/database/DatabasePostgres.php b/includes/libs/rdbms/database/DatabasePostgres.php index 7b2ef831e5..c945c35e5e 100644 --- a/includes/libs/rdbms/database/DatabasePostgres.php +++ b/includes/libs/rdbms/database/DatabasePostgres.php @@ -409,18 +409,24 @@ class DatabasePostgres extends Database { * Takes same arguments as Database::select() * * @param string $table - * @param string $vars + * @param string $var * @param string $conds * @param string $fname * @param array $options * @param array $join_conds * @return int */ - public function estimateRowCount( $table, $vars = '*', $conds = '', + public function estimateRowCount( $table, $var = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] ) { + $conds = $this->normalizeConditions( $conds, $fname ); + $column = $this->extractSingleFieldFromList( $var ); + if ( is_string( $column ) && !in_array( $column, [ '*', '1' ] ) ) { + $conds[] = "$column IS NOT NULL"; + } + $options['EXPLAIN'] = true; - $res = $this->select( $table, $vars, $conds, $fname, $options, $join_conds ); + $res = $this->select( $table, $var, $conds, $fname, $options, $join_conds ); $rows = -1; if ( $res ) { $row = $this->fetchRow( $res ); diff --git a/includes/libs/rdbms/database/IDatabase.php b/includes/libs/rdbms/database/IDatabase.php index 09abaa8b82..8af73e07c7 100644 --- a/includes/libs/rdbms/database/IDatabase.php +++ b/includes/libs/rdbms/database/IDatabase.php @@ -624,6 +624,11 @@ interface IDatabase { * This includes the user table in the query, with the alias "a" available * for use in field names (e.g. a.user_name). * + * A derived table, defined by the result of selectSQLText(), requires an alias + * key and a Subquery instance value which wraps the SQL query, for example: + * + * [ 'c' => new Subquery( 'SELECT ...' ) ] + * * Joins using parentheses for grouping (since MediaWiki 1.31) may be * constructed using nested arrays. For example, * @@ -773,15 +778,15 @@ interface IDatabase { * doing UNION queries, where the SQL text of each query is needed. In general, * however, callers outside of Database classes should just use select(). * + * @see IDatabase::select() + * * @param string|array $table Table name * @param string|array $vars Field names * @param string|array $conds Conditions * @param string $fname Caller function name * @param string|array $options Query options * @param string|array $join_conds Join conditions - * - * @return string SQL query string. - * @see IDatabase::select() + * @return string SQL query string */ public function selectSQLText( $table, $vars, $conds = '', $fname = __METHOD__, @@ -821,7 +826,7 @@ interface IDatabase { * Takes the same arguments as IDatabase::select(). * * @param string $table Table name - * @param string $vars Unused + * @param string $var Column for which NULL values are not counted [default "*"] * @param array|string $conds Filters on the table * @param string $fname Function name for profiling * @param array $options Options for select @@ -830,7 +835,7 @@ interface IDatabase { * @throws DBError */ public function estimateRowCount( - $table, $vars = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] + $table, $var = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] ); /** @@ -843,7 +848,7 @@ interface IDatabase { * @since 1.27 Added $join_conds parameter * * @param array|string $tables Table names - * @param string $vars Unused + * @param string $var Column for which NULL values are not counted [default "*"] * @param array|string $conds Filters on the table * @param string $fname Function name for profiling * @param array $options Options for select @@ -852,7 +857,7 @@ interface IDatabase { * @throws DBError */ public function selectRowCount( - $tables, $vars = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] + $tables, $var = '*', $conds = '', $fname = __METHOD__, $options = [], $join_conds = [] ); /** @@ -1083,6 +1088,25 @@ interface IDatabase { */ public function buildIntegerCast( $field ); + /** + * Equivalent to IDatabase::selectSQLText() except wraps the result in Subqyery + * + * @see IDatabase::selectSQLText() + * + * @param string|array $table Table name + * @param string|array $vars Field names + * @param string|array $conds Conditions + * @param string $fname Caller function name + * @param string|array $options Query options + * @param string|array $join_conds Join conditions + * @return Subquery + * @since 1.31 + */ + public function buildSelectSubquery( + $table, $vars, $conds = '', $fname = __METHOD__, + $options = [], $join_conds = [] + ); + /** * Returns true if DBs are assumed to be on potentially different servers * diff --git a/includes/libs/rdbms/encasing/Subquery.php b/includes/libs/rdbms/encasing/Subquery.php new file mode 100644 index 0000000000..fc118d0ad9 --- /dev/null +++ b/includes/libs/rdbms/encasing/Subquery.php @@ -0,0 +1,44 @@ +sql = $sql; + } + + /** + * @return string Original SQL query + */ + public function __toString() { + return '(' . $this->sql . ')'; + } +} diff --git a/tests/phpunit/includes/libs/rdbms/database/DatabaseSQLTest.php b/tests/phpunit/includes/libs/rdbms/database/DatabaseSQLTest.php index 009580bdd4..b883c11ba3 100644 --- a/tests/phpunit/includes/libs/rdbms/database/DatabaseSQLTest.php +++ b/tests/phpunit/includes/libs/rdbms/database/DatabaseSQLTest.php @@ -1,6 +1,7 @@ database->selectRowCount( + $sql['tables'], + $sql['field'], + isset( $sql['conds'] ) ? $sql['conds'] : [], + __METHOD__, + isset( $sql['options'] ) ? $sql['options'] : [], + isset( $sql['join_conds'] ) ? $sql['join_conds'] : [] + ); + $this->assertLastSql( $sqlText ); + } + + public static function provideSelectRowCount() { + return [ + [ + [ + 'tables' => 'table', + 'field' => [ '*' ], + 'conds' => [ 'field' => 'text' ], + ], + "SELECT COUNT(*) AS rowcount FROM " . + "(SELECT 1 FROM table WHERE field = 'text' ) tmp_count" + ], + [ + [ + 'tables' => 'table', + 'field' => [ 'column' ], + 'conds' => [ 'field' => 'text' ], + ], + "SELECT COUNT(*) AS rowcount FROM " . + "(SELECT 1 FROM table WHERE field = 'text' AND (column IS NOT NULL) ) tmp_count" + ], + [ + [ + 'tables' => 'table', + 'field' => [ 'alias' => 'column' ], + 'conds' => [ 'field' => 'text' ], + ], + "SELECT COUNT(*) AS rowcount FROM " . + "(SELECT 1 FROM table WHERE field = 'text' AND (column IS NOT NULL) ) tmp_count" + ], + [ + [ + 'tables' => 'table', + 'field' => [ 'alias' => 'column' ], + 'conds' => '', + ], + "SELECT COUNT(*) AS rowcount FROM " . + "(SELECT 1 FROM table WHERE (column IS NOT NULL) ) tmp_count" + ], + [ + [ + 'tables' => 'table', + 'field' => [ 'alias' => 'column' ], + 'conds' => false, + ], + "SELECT COUNT(*) AS rowcount FROM " . + "(SELECT 1 FROM table WHERE (column IS NOT NULL) ) tmp_count" + ], + [ + [ + 'tables' => 'table', + 'field' => [ 'alias' => 'column' ], + 'conds' => null, + ], + "SELECT COUNT(*) AS rowcount FROM " . + "(SELECT 1 FROM table WHERE (column IS NOT NULL) ) tmp_count" + ], + [ + [ + 'tables' => 'table', + 'field' => [ 'alias' => 'column' ], + 'conds' => '1', + ], + "SELECT COUNT(*) AS rowcount FROM " . + "(SELECT 1 FROM table WHERE (1) AND (column IS NOT NULL) ) tmp_count" + ], + [ + [ + 'tables' => 'table', + 'field' => [ 'alias' => 'column' ], + 'conds' => '0', + ], + "SELECT COUNT(*) AS rowcount FROM " . + "(SELECT 1 FROM table WHERE (0) AND (column IS NOT NULL) ) tmp_count" + ], + ]; + } + /** * @dataProvider provideUpdate * @covers Wikimedia\Rdbms\Database::update -- 2.20.1