[DATABASE] Change collation handling

Before now table definitions could define collations only for MariaDB using the
MariaDB's collation names directly.
Now instead definitions get a slightly more abstract collation name syntax, but
only supporting the collations utf8mb4_bin and utf8mb4_unicode_(cs|ci) (wrapped
as utf8_bin, utf8_general_(cs|ci)), because those are the ones that have
practical use for GNU social.

Which also means that on MariaDB the formerly used utf8mb4_general_(cs|ci) have
been superseded by utf8mb4_unicode_(cs|ci), as they are the more modern
replacement.

Introduce collation support on PostgreSQL which results in use of the C (POSIX)
collation as utf8_bin and the und-x-icu collation as utf8_general_cs.
utf8_general_ci is also mapped to und-x-icu, which makes it case-sensitive,
unfortunately.
This commit is contained in:
Alexei Sorokin
2020-08-16 23:41:28 +03:00
parent 5c21816b22
commit 341e34b766
5 changed files with 141 additions and 36 deletions

View File

@@ -132,10 +132,13 @@ class PgsqlSchema extends Schema
) {
$field['auto_increment'] = true;
} elseif (array_key_exists($name, $enum_info)) {
$field['type'] = $type = 'enum';
$field['enum'] = $enum_info[$name];
}
if (!empty($row['collation_name'])) {
$field['collate'] = $row['collation_name'];
}
$def['fields'][$name] = $field;
}
@@ -415,6 +418,7 @@ class PgsqlSchema extends Schema
'integer' => 'int',
'char' => 'bpchar',
'datetime' => 'timestamp',
'enum' => 'text',
'blob' => 'bytea'
];
@@ -442,6 +446,49 @@ class PgsqlSchema extends Schema
return $type;
}
/**
* Collation in PostgreSQL format from our format
*
* @param string $collate
* @return string
*/
protected function collationToPostgreSQL(string $collate): string
{
if (!in_array($collate, [
'utf8_bin',
'utf8_general_cs',
'utf8_general_ci',
])) {
common_log(
LOG_ERR,
'Collation not supported: "' . $collate . '"'
);
$collate = 'utf8_bin';
}
// @fixme No case-insensitivity support
if (substr($collate, 0, 13) === 'utf8_general_') {
$collate = 'und-x-icu';
} elseif (substr($collate, 0, 8) === 'utf8_bin') {
$collate = 'C';
}
return $collate;
}
public function typeAndSize(string $name, array $column)
{
$col = parent::typeAndSize($name, $column);
if ($this->isStringType($column)) {
if (!empty($column['collate'])) {
$col .= ' COLLATE "' . $column['collate'] . '"';
}
}
return $col;
}
/**
* Append an SQL statement with an index definition for a full-text search
* index over one or more columns on a table.
@@ -475,14 +522,16 @@ class PgsqlSchema extends Schema
foreach ($tableDef['fields'] as $name => &$col) {
// No convenient support for field descriptions
unset($col['description']);
// @fixme Nor for MariaDB-specific collations
unset($col['collate']);
if ($col['type'] === 'serial') {
$col['type'] = 'int';
$col['auto_increment'] = true;
}
if (!empty($col['collate'])) {
$col['collate'] = $this->collationToPostgreSQL($col['collate']);
}
$col['type'] = $this->mapType($col);
unset($col['size']);
}