[DATABASE] Change collation handling
Before now table definitions could define collations only for MariaDB using the MariaDB's collation names directly. Now instead definitions get a slightly more abstract collation name syntax, but only supporting the collations utf8mb4_bin and utf8mb4_unicode_(cs|ci) (wrapped as utf8_bin, utf8_general_(cs|ci)), because those are the ones that have practical use for GNU social. Which also means that on MariaDB the formerly used utf8mb4_general_(cs|ci) have been superseded by utf8mb4_unicode_(cs|ci), as they are the more modern replacement. Introduce collation support on PostgreSQL which results in use of the C (POSIX) collation as utf8_bin and the und-x-icu collation as utf8_general_cs. utf8_general_ci is also mapped to und-x-icu, which makes it case-sensitive, unfortunately.
This commit is contained in:
parent
5c21816b22
commit
341e34b766
@ -79,7 +79,7 @@ class Notice extends Managed_DataObject
|
|||||||
'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'),
|
'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'),
|
||||||
'profile_id' => array('type' => 'int', 'not null' => true, 'description' => 'who made the update'),
|
'profile_id' => array('type' => 'int', 'not null' => true, 'description' => 'who made the update'),
|
||||||
'uri' => array('type' => 'varchar', 'length' => 191, 'description' => 'universally unique identifier, usually a tag URI'),
|
'uri' => array('type' => 'varchar', 'length' => 191, 'description' => 'universally unique identifier, usually a tag URI'),
|
||||||
'content' => array('type' => 'text', 'description' => 'update content', 'collate' => 'utf8mb4_general_ci'),
|
'content' => array('type' => 'text', 'description' => 'update content', 'collate' => 'utf8_general_ci'),
|
||||||
'rendered' => array('type' => 'text', 'description' => 'HTML version of the content'),
|
'rendered' => array('type' => 'text', 'description' => 'HTML version of the content'),
|
||||||
'url' => array('type' => 'varchar', 'length' => 191, 'description' => 'URL of any attachment (image, video, bookmark, whatever)'),
|
'url' => array('type' => 'varchar', 'length' => 191, 'description' => 'URL of any attachment (image, video, bookmark, whatever)'),
|
||||||
'created' => array('type' => 'datetime', 'description' => 'date this record was created'),
|
'created' => array('type' => 'datetime', 'description' => 'date this record was created'),
|
||||||
|
@ -46,12 +46,12 @@ class Profile extends Managed_DataObject
|
|||||||
'description' => 'local and remote users have profiles',
|
'description' => 'local and remote users have profiles',
|
||||||
'fields' => array(
|
'fields' => array(
|
||||||
'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'),
|
'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'),
|
||||||
'nickname' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'nickname or username', 'collate' => 'utf8mb4_general_ci'),
|
'nickname' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'nickname or username', 'collate' => 'utf8_general_ci'),
|
||||||
'fullname' => array('type' => 'text', 'description' => 'display name', 'collate' => 'utf8mb4_general_ci'),
|
'fullname' => array('type' => 'text', 'description' => 'display name', 'collate' => 'utf8_general_ci'),
|
||||||
'profileurl' => array('type' => 'text', 'description' => 'URL, cached so we dont regenerate'),
|
'profileurl' => array('type' => 'text', 'description' => 'URL, cached so we dont regenerate'),
|
||||||
'homepage' => array('type' => 'text', 'description' => 'identifying URL', 'collate' => 'utf8mb4_general_ci'),
|
'homepage' => array('type' => 'text', 'description' => 'identifying URL', 'collate' => 'utf8_general_ci'),
|
||||||
'bio' => array('type' => 'text', 'description' => 'descriptive biography', 'collate' => 'utf8mb4_general_ci'),
|
'bio' => array('type' => 'text', 'description' => 'descriptive biography', 'collate' => 'utf8_general_ci'),
|
||||||
'location' => array('type' => 'text', 'description' => 'physical location', 'collate' => 'utf8mb4_general_ci'),
|
'location' => array('type' => 'text', 'description' => 'physical location', 'collate' => 'utf8_general_ci'),
|
||||||
'lat' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'latitude'),
|
'lat' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'latitude'),
|
||||||
'lon' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'longitude'),
|
'lon' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'longitude'),
|
||||||
'location_id' => array('type' => 'int', 'description' => 'location id if possible'),
|
'location_id' => array('type' => 'int', 'description' => 'location id if possible'),
|
||||||
|
@ -147,10 +147,8 @@ class MysqlSchema extends Schema
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
$table_props = $this->getTableProperties($table, ['TABLE_COLLATION']);
|
if (!empty($row['COLLATION_NAME'])) {
|
||||||
$collate = $row['COLLATION_NAME'];
|
$field['collate'] = $row['COLLATION_NAME'];
|
||||||
if (!empty($collate) && $collate !== $table_props['TABLE_COLLATION']) {
|
|
||||||
$field['collate'] = $collate;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$def['fields'][$name] = $field;
|
$def['fields'][$name] = $field;
|
||||||
@ -471,17 +469,6 @@ class MysqlSchema extends Schema
|
|||||||
return in_array(strtolower($cd['type']), $ints);
|
return in_array(strtolower($cd['type']), $ints);
|
||||||
}
|
}
|
||||||
|
|
||||||
/**
|
|
||||||
* Is this column a string type?
|
|
||||||
* @param array $cd
|
|
||||||
* @return bool
|
|
||||||
*/
|
|
||||||
private function isStringType(array $cd): bool
|
|
||||||
{
|
|
||||||
$strings = ['char', 'varchar', 'text'];
|
|
||||||
return in_array(strtolower($cd['type']), $strings);
|
|
||||||
}
|
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the proper SQL for creating or
|
* Return the proper SQL for creating or
|
||||||
* altering a column.
|
* altering a column.
|
||||||
@ -547,6 +534,34 @@ class MysqlSchema extends Schema
|
|||||||
return $type;
|
return $type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Collation in MariaDB format from our format
|
||||||
|
*
|
||||||
|
* @param string $collate
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected function collationToMySQL(string $collate): string
|
||||||
|
{
|
||||||
|
if (!in_array($collate, [
|
||||||
|
'utf8_bin',
|
||||||
|
'utf8_general_cs',
|
||||||
|
'utf8_general_ci',
|
||||||
|
])) {
|
||||||
|
common_log(
|
||||||
|
LOG_ERR,
|
||||||
|
'Collation not supported: "' . $collate . '"'
|
||||||
|
);
|
||||||
|
$collate = 'utf8_bin';
|
||||||
|
}
|
||||||
|
|
||||||
|
if (substr($collate, 0, 13) === 'utf8_general_') {
|
||||||
|
$collate = 'utf8mb4_unicode_' . substr($collate, 13);
|
||||||
|
} elseif (substr($collate, 0, 5) === 'utf8_') {
|
||||||
|
$collate = 'utf8mb4_' . substr($collate, 5);
|
||||||
|
}
|
||||||
|
return $collate;
|
||||||
|
}
|
||||||
|
|
||||||
public function typeAndSize(string $name, array $column)
|
public function typeAndSize(string $name, array $column)
|
||||||
{
|
{
|
||||||
if ($column['type'] === 'enum') {
|
if ($column['type'] === 'enum') {
|
||||||
@ -581,15 +596,6 @@ class MysqlSchema extends Schema
|
|||||||
{
|
{
|
||||||
$tableDef = parent::filterDef($tableName, $tableDef);
|
$tableDef = parent::filterDef($tableName, $tableDef);
|
||||||
|
|
||||||
// Get existing table collation if the table exists.
|
|
||||||
// To know if collation that's been set is unique for the table.
|
|
||||||
try {
|
|
||||||
$table_props = $this->getTableProperties($tableName, ['TABLE_COLLATION']);
|
|
||||||
$table_collate = $table_props['TABLE_COLLATION'];
|
|
||||||
} catch (SchemaTableMissingException $e) {
|
|
||||||
$table_collate = null;
|
|
||||||
}
|
|
||||||
|
|
||||||
foreach ($tableDef['fields'] as $name => &$col) {
|
foreach ($tableDef['fields'] as $name => &$col) {
|
||||||
switch ($col['type']) {
|
switch ($col['type']) {
|
||||||
case 'serial':
|
case 'serial':
|
||||||
@ -603,9 +609,8 @@ class MysqlSchema extends Schema
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!empty($col['collate'])
|
if (!empty($col['collate'])) {
|
||||||
&& $col['collate'] === $table_collate) {
|
$col['collate'] = $this->collationToMySQL($col['collate']);
|
||||||
unset($col['collate']);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
$col['type'] = $this->mapType($col);
|
$col['type'] = $this->mapType($col);
|
||||||
|
@ -132,10 +132,13 @@ class PgsqlSchema extends Schema
|
|||||||
) {
|
) {
|
||||||
$field['auto_increment'] = true;
|
$field['auto_increment'] = true;
|
||||||
} elseif (array_key_exists($name, $enum_info)) {
|
} elseif (array_key_exists($name, $enum_info)) {
|
||||||
$field['type'] = $type = 'enum';
|
|
||||||
$field['enum'] = $enum_info[$name];
|
$field['enum'] = $enum_info[$name];
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!empty($row['collation_name'])) {
|
||||||
|
$field['collate'] = $row['collation_name'];
|
||||||
|
}
|
||||||
|
|
||||||
$def['fields'][$name] = $field;
|
$def['fields'][$name] = $field;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -415,6 +418,7 @@ class PgsqlSchema extends Schema
|
|||||||
'integer' => 'int',
|
'integer' => 'int',
|
||||||
'char' => 'bpchar',
|
'char' => 'bpchar',
|
||||||
'datetime' => 'timestamp',
|
'datetime' => 'timestamp',
|
||||||
|
'enum' => 'text',
|
||||||
'blob' => 'bytea'
|
'blob' => 'bytea'
|
||||||
];
|
];
|
||||||
|
|
||||||
@ -442,6 +446,49 @@ class PgsqlSchema extends Schema
|
|||||||
return $type;
|
return $type;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Collation in PostgreSQL format from our format
|
||||||
|
*
|
||||||
|
* @param string $collate
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected function collationToPostgreSQL(string $collate): string
|
||||||
|
{
|
||||||
|
if (!in_array($collate, [
|
||||||
|
'utf8_bin',
|
||||||
|
'utf8_general_cs',
|
||||||
|
'utf8_general_ci',
|
||||||
|
])) {
|
||||||
|
common_log(
|
||||||
|
LOG_ERR,
|
||||||
|
'Collation not supported: "' . $collate . '"'
|
||||||
|
);
|
||||||
|
$collate = 'utf8_bin';
|
||||||
|
}
|
||||||
|
|
||||||
|
// @fixme No case-insensitivity support
|
||||||
|
if (substr($collate, 0, 13) === 'utf8_general_') {
|
||||||
|
$collate = 'und-x-icu';
|
||||||
|
} elseif (substr($collate, 0, 8) === 'utf8_bin') {
|
||||||
|
$collate = 'C';
|
||||||
|
}
|
||||||
|
|
||||||
|
return $collate;
|
||||||
|
}
|
||||||
|
|
||||||
|
public function typeAndSize(string $name, array $column)
|
||||||
|
{
|
||||||
|
$col = parent::typeAndSize($name, $column);
|
||||||
|
|
||||||
|
if ($this->isStringType($column)) {
|
||||||
|
if (!empty($column['collate'])) {
|
||||||
|
$col .= ' COLLATE "' . $column['collate'] . '"';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return $col;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Append an SQL statement with an index definition for a full-text search
|
* Append an SQL statement with an index definition for a full-text search
|
||||||
* index over one or more columns on a table.
|
* index over one or more columns on a table.
|
||||||
@ -475,14 +522,16 @@ class PgsqlSchema extends Schema
|
|||||||
foreach ($tableDef['fields'] as $name => &$col) {
|
foreach ($tableDef['fields'] as $name => &$col) {
|
||||||
// No convenient support for field descriptions
|
// No convenient support for field descriptions
|
||||||
unset($col['description']);
|
unset($col['description']);
|
||||||
// @fixme Nor for MariaDB-specific collations
|
|
||||||
unset($col['collate']);
|
|
||||||
|
|
||||||
if ($col['type'] === 'serial') {
|
if ($col['type'] === 'serial') {
|
||||||
$col['type'] = 'int';
|
$col['type'] = 'int';
|
||||||
$col['auto_increment'] = true;
|
$col['auto_increment'] = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!empty($col['collate'])) {
|
||||||
|
$col['collate'] = $this->collationToPostgreSQL($col['collate']);
|
||||||
|
}
|
||||||
|
|
||||||
$col['type'] = $this->mapType($col);
|
$col['type'] = $this->mapType($col);
|
||||||
unset($col['size']);
|
unset($col['size']);
|
||||||
}
|
}
|
||||||
|
@ -891,6 +891,48 @@ class Schema
|
|||||||
return null;
|
return null;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Is this column a string type?
|
||||||
|
*
|
||||||
|
* @param array $cd
|
||||||
|
* @return bool
|
||||||
|
*/
|
||||||
|
protected function isStringType(array $cd): bool
|
||||||
|
{
|
||||||
|
$strings = ['char', 'varchar', 'text'];
|
||||||
|
$strings[] = 'bpchar'; // PostgreSQL
|
||||||
|
$strings[] = 'enum'; // MariaDB
|
||||||
|
return in_array(strtolower($cd['type']), $strings);
|
||||||
|
}
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Collation in our format from MariaDB format
|
||||||
|
*
|
||||||
|
* @param string $collate
|
||||||
|
* @return string
|
||||||
|
*/
|
||||||
|
protected function collationFromMySQL(string $collate): string
|
||||||
|
{
|
||||||
|
if (substr($collate, 0, 8) === 'utf8mb4_') {
|
||||||
|
$collate = 'utf8_' . substr($collate, 8);
|
||||||
|
}
|
||||||
|
if (substr($collate, 0, 13) === 'utf8_unicode_') {
|
||||||
|
$collate = 'utf8_general_' . substr($collate, 13);
|
||||||
|
}
|
||||||
|
if (!in_array($collate, [
|
||||||
|
'utf8_bin',
|
||||||
|
'utf8_general_cs',
|
||||||
|
'utf8_general_ci',
|
||||||
|
])) {
|
||||||
|
common_log(
|
||||||
|
LOG_ERR,
|
||||||
|
'Collation not supported: "' . $collate . '"'
|
||||||
|
);
|
||||||
|
$collate = 'utf8_bin';
|
||||||
|
}
|
||||||
|
return $collate;
|
||||||
|
}
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Return the proper SQL for creating or
|
* Return the proper SQL for creating or
|
||||||
* altering a column.
|
* altering a column.
|
||||||
@ -1059,6 +1101,15 @@ class Schema
|
|||||||
if (array_key_exists('not null', $col) && $col['not null'] !== true) {
|
if (array_key_exists('not null', $col) && $col['not null'] !== true) {
|
||||||
unset($col['not null']);
|
unset($col['not null']);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if ($this->isStringType($col)) {
|
||||||
|
// Default collation
|
||||||
|
if (empty($col['collate'])) {
|
||||||
|
$col['collate'] = 'utf8_bin';
|
||||||
|
}
|
||||||
|
// Migration from direct MariaDB collations
|
||||||
|
$col['collate'] = $this->collationFromMySQL($col['collate']);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (common_config('search', 'type') !== 'fulltext') {
|
if (common_config('search', 'type') !== 'fulltext') {
|
||||||
|
Loading…
Reference in New Issue
Block a user