[Memcached_DataObject] Change how multiGet achieves an ordered result

The previous approach sent the key values twice, which for large sets is
twice as bad.

As an optional feature of this approach multiGet now allows retrieving tuples
in exact order and amount of the requested key values.
This commit is contained in:
Alexei Sorokin 2020-09-03 18:11:12 +03:00 committed by Diogo Peralta Cordeiro
parent b2e009bcd3
commit 590891139f
2 changed files with 132 additions and 78 deletions

View File

@ -77,13 +77,23 @@ abstract class Managed_DataObject extends Memcached_DataObject
* *
* @param string $keyCol name of column for key * @param string $keyCol name of column for key
* @param array $keyVals key values to fetch * @param array $keyVals key values to fetch
* @param boolean $skipNulls return only non-null results? * @param bool $skipNulls return only non-null results
* * @param bool $preserve return the same tuples as input
* @return array Array of objects, in order * @return object An object with tuples to be fetched, in order
*/ */
public static function multiGet($keyCol, array $keyVals, $skipNulls = true) public static function multiGet(
{ string $keyCol,
return parent::multiGetClass(get_called_class(), $keyCol, $keyVals, $skipNulls); array $keyVals,
bool $skipNulls = true,
bool $preserve = false
): object {
return parent::multiGetClass(
get_called_class(),
$keyCol,
$keyVals,
$skipNulls,
$preserve
);
} }
/** /**
@ -312,11 +322,6 @@ abstract class Managed_DataObject extends Memcached_DataObject
return $ckeys; return $ckeys;
} }
public function escapedTableName()
{
return common_database_tablename($this->tableName());
}
/** /**
* Returns an object by looking at the primary key column(s). * Returns an object by looking at the primary key column(s).
* *

View File

@ -69,84 +69,128 @@ class Memcached_DataObject extends Safe_DataObject
* @param string $cls Class to fetch * @param string $cls Class to fetch
* @param string $keyCol name of column for key * @param string $keyCol name of column for key
* @param array $keyVals key values to fetch * @param array $keyVals key values to fetch
* @param boolean $skipNulls skip provided null values * @param bool $skipNulls return only non-null results
* * @param bool $preserve return the same tuples as input
* @return array Array of objects, in order * @return object An object with tuples to be fetched, in order
*/ */
public static function multiGetClass($cls, $keyCol, array $keyVals, $skipNulls = true) public static function multiGetClass(
{ string $cls,
$obj = new $cls; string $keyCol,
array $keyVals,
bool $skipNulls,
bool $preserve
): object {
$obj = new $cls();
// PHP compatible datatype for settype() below // Do not select anything extra
$colType = $obj->columnType($keyCol); $obj->selectAdd();
$obj->selectAdd($obj->escapedTableName() . '.*');
if (!in_array($colType, array('integer', 'int'))) { // A PHP-compatible datatype to check against
// This is because I'm afraid to escape strings incorrectly $col_type = $obj->columnType($keyCol);
// in the way we use them below in FIND_IN_SET for MariaDB
throw new ServerException('Cannot do multiGet on anything but integer columns'); // The code below assumes one of the two results
if (!in_array($col_type, ['int', 'string'])) {
throw new ServerException(
'Cannot do multiGet on anything but integer or string columns'
);
} }
if ($skipNulls) { // Actually need to know if MariaDB or Oracle MySQL this time
foreach ($keyVals as $key => $val) { $db_type = common_config('db', 'type');
if (is_null($val)) { if ($db_type === 'mysql') {
unset($keyVals[$key]); $tmp_obj = new $cls();
$tmp_obj->query('SELECT 0 /*M! + 1 */ AS is_mariadb;');
if ($tmp_obj->fetch() && $tmp_obj->is_mariadb) {
$db_type = 'mariadb';
} }
} }
}
$obj->whereAddIn($keyCol, $keyVals, $colType);
// Since we're inputting straight to a query: format and escape // Since we're inputting straight to a query: format and escape
foreach ($keyVals as $key => $val) { $vals_escaped = [];
settype($val, $colType); foreach (array_values($keyVals) as $i => $val) {
$keyVals[$key] = $obj->escape($val); if (is_null($val)) {
} $val_escaped = 'NULL';
} elseif ($col_type === 'int') {
// Check if values are ordered, makes sorting in SQL easier $val_escaped = (string)(int) $val;
$prev_val = reset($keyVals);
$order_asc = $order_desc = true;
foreach ($keyVals as $val) {
if ($val < $prev_val) {
$order_asc = false;
}
if ($val > $prev_val) {
$order_desc = false;
}
if ($order_asc === false && $order_desc === false) {
break;
}
$prev_val = $val;
}
if ($order_asc) {
$obj->orderBy($keyCol);
} elseif ($order_desc) {
$obj->orderBy("{$keyCol} DESC");
} else { } else {
switch (common_config('db', 'type')) { $val_escaped = "'{$obj->escape($val)}'";
}
if ($db_type !== 'mariadb') {
$vals_escaped[] = $val_escaped;
} else {
// A completely different approach for MariaDB (see below)
$vals_escaped[] = "({$val_escaped},{$i})";
}
}
// One way to guarantee that there is no name collision
$join_tablename = common_database_tablename(
$obj->tableName() . '_vals'
);
$join_keyword = ($preserve ? 'RIGHT' : 'LEFT') . ' JOIN';
$vals_cast_type = ($col_type === 'int') ? 'INTEGER' : 'TEXT';
// A lot of magic to ensure we get an ordered reply with the same exact
// values as on input.
switch ($db_type) {
case 'pgsql': case 'pgsql':
// "position" will make sure we keep the desired order // Explicit casting is done to cast empty arrays
$obj->orderBy(sprintf( $obj->_join = "\n" . sprintf(
"position(',' || CAST(%s AS text) || ',' IN ',%s,')", <<<END
$keyCol, {$join_keyword} unnest(
implode(',', $keyVals) CAST(ARRAY[%s] AS {$vals_cast_type}[])
)); ) WITH ORDINALITY
AS {$join_tablename} ({$keyCol}, {$keyCol}_pos)
USING ({$keyCol})
END,
implode(',', $vals_escaped)
);
break;
case 'mariadb':
// Delivers an empty set
if (count($vals_escaped) == 0) {
$vals_escaped[] = '(NULL,0) LIMIT 0';
}
// MariaDB doesn't support JSON_TABLE, but Oracle MySQL does,
// which doesn't support VALUES without a ROW keyword though.
$obj->_join = "\n" . sprintf(
<<<END
{$join_keyword} (
WITH t1 ({$keyCol}, {$keyCol}_pos) AS (VALUES %s)
SELECT * FROM t1
) AS {$join_tablename} USING ({$keyCol})
END,
implode(',', $vals_escaped)
);
break; break;
case 'mysql': case 'mysql':
// "find_in_set" will make sure we keep the desired order
$obj->orderBy(sprintf(
"find_in_set(%s, '%s')",
$keyCol,
implode(',', $keyVals)
));
break;
default: default:
throw new ServerException('Unknown DB type selected.'); $obj->_join = "\n" . sprintf(
<<<END
{$join_keyword} JSON_TABLE(
JSON_ARRAY(%s), '$[*]' COLUMNS (
{$keyCol} {$vals_cast_type} PATH '$',
{$keyCol}_pos FOR ORDINALITY
)
) AS {$join_tablename} USING ({$keyCol})
END,
implode(',', $vals_escaped)
);
} }
if (!$preserve) {
// Implements a left semi-join
$obj->whereAdd("{$join_tablename}.{$keyCol}_pos IS NOT NULL");
} }
// Filters both NULLs requested and non-matching NULLs
if ($skipNulls) {
$obj->whereAdd("{$obj->escapedTableName()}.{$keyCol} IS NOT NULL");
}
$obj->orderBy("{$join_tablename}.{$keyCol}_pos");
$obj->find(); $obj->find();
return $obj; return $obj;
} }
@ -400,6 +444,11 @@ class Memcached_DataObject extends Safe_DataObject
return $result; return $result;
} }
public function escapedTableName()
{
return common_database_tablename($this->tableName());
}
public function columnType($columnName) public function columnType($columnName)
{ {
$keys = $this->table(); $keys = $this->table();
@ -410,7 +459,7 @@ class Memcached_DataObject extends Safe_DataObject
$def = $keys[$columnName]; $def = $keys[$columnName];
if ($def & DB_DATAOBJECT_INT) { if ($def & DB_DATAOBJECT_INT) {
return 'integer'; return 'int';
} else { } else {
return 'string'; return 'string';
} }