diff --git a/classes/Notice.php b/classes/Notice.php index 3ddb7b5526..8ef56ece03 100644 --- a/classes/Notice.php +++ b/classes/Notice.php @@ -79,7 +79,7 @@ class Notice extends Managed_DataObject 'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'), 'profile_id' => array('type' => 'int', 'not null' => true, 'description' => 'who made the update'), 'uri' => array('type' => 'varchar', 'length' => 191, 'description' => 'universally unique identifier, usually a tag URI'), - 'content' => array('type' => 'text', 'description' => 'update content', 'collate' => 'utf8mb4_general_ci'), + 'content' => array('type' => 'text', 'description' => 'update content', 'collate' => 'utf8_general_ci'), 'rendered' => array('type' => 'text', 'description' => 'HTML version of the content'), 'url' => array('type' => 'varchar', 'length' => 191, 'description' => 'URL of any attachment (image, video, bookmark, whatever)'), 'created' => array('type' => 'datetime', 'description' => 'date this record was created'), diff --git a/classes/Profile.php b/classes/Profile.php index a1f9567161..d9d5d77d07 100644 --- a/classes/Profile.php +++ b/classes/Profile.php @@ -46,12 +46,12 @@ class Profile extends Managed_DataObject 'description' => 'local and remote users have profiles', 'fields' => array( 'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'), - 'nickname' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'nickname or username', 'collate' => 'utf8mb4_general_ci'), - 'fullname' => array('type' => 'text', 'description' => 'display name', 'collate' => 'utf8mb4_general_ci'), + 'nickname' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'nickname or username', 'collate' => 'utf8_general_ci'), + 'fullname' => array('type' => 'text', 'description' => 'display name', 'collate' => 'utf8_general_ci'), 'profileurl' => array('type' => 'text', 'description' => 'URL, cached so we dont regenerate'), - 'homepage' => array('type' => 'text', 'description' => 'identifying URL', 'collate' => 'utf8mb4_general_ci'), - 'bio' => array('type' => 'text', 'description' => 'descriptive biography', 'collate' => 'utf8mb4_general_ci'), - 'location' => array('type' => 'text', 'description' => 'physical location', 'collate' => 'utf8mb4_general_ci'), + 'homepage' => array('type' => 'text', 'description' => 'identifying URL', 'collate' => 'utf8_general_ci'), + 'bio' => array('type' => 'text', 'description' => 'descriptive biography', 'collate' => 'utf8_general_ci'), + 'location' => array('type' => 'text', 'description' => 'physical location', 'collate' => 'utf8_general_ci'), 'lat' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'latitude'), 'lon' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'longitude'), 'location_id' => array('type' => 'int', 'description' => 'location id if possible'), diff --git a/lib/database/mysqlschema.php b/lib/database/mysqlschema.php index ae1a1ce57a..4f0c461040 100644 --- a/lib/database/mysqlschema.php +++ b/lib/database/mysqlschema.php @@ -147,10 +147,8 @@ class MysqlSchema extends Schema } } - $table_props = $this->getTableProperties($table, ['TABLE_COLLATION']); - $collate = $row['COLLATION_NAME']; - if (!empty($collate) && $collate !== $table_props['TABLE_COLLATION']) { - $field['collate'] = $collate; + if (!empty($row['COLLATION_NAME'])) { + $field['collate'] = $row['COLLATION_NAME']; } $def['fields'][$name] = $field; @@ -471,17 +469,6 @@ class MysqlSchema extends Schema return in_array(strtolower($cd['type']), $ints); } - /** - * Is this column a string type? - * @param array $cd - * @return bool - */ - private function isStringType(array $cd): bool - { - $strings = ['char', 'varchar', 'text']; - return in_array(strtolower($cd['type']), $strings); - } - /** * Return the proper SQL for creating or * altering a column. @@ -547,6 +534,34 @@ class MysqlSchema extends Schema return $type; } + /** + * Collation in MariaDB format from our format + * + * @param string $collate + * @return string + */ + protected function collationToMySQL(string $collate): string + { + if (!in_array($collate, [ + 'utf8_bin', + 'utf8_general_cs', + 'utf8_general_ci', + ])) { + common_log( + LOG_ERR, + 'Collation not supported: "' . $collate . '"' + ); + $collate = 'utf8_bin'; + } + + if (substr($collate, 0, 13) === 'utf8_general_') { + $collate = 'utf8mb4_unicode_' . substr($collate, 13); + } elseif (substr($collate, 0, 5) === 'utf8_') { + $collate = 'utf8mb4_' . substr($collate, 5); + } + return $collate; + } + public function typeAndSize(string $name, array $column) { if ($column['type'] === 'enum') { @@ -581,15 +596,6 @@ class MysqlSchema extends Schema { $tableDef = parent::filterDef($tableName, $tableDef); - // Get existing table collation if the table exists. - // To know if collation that's been set is unique for the table. - try { - $table_props = $this->getTableProperties($tableName, ['TABLE_COLLATION']); - $table_collate = $table_props['TABLE_COLLATION']; - } catch (SchemaTableMissingException $e) { - $table_collate = null; - } - foreach ($tableDef['fields'] as $name => &$col) { switch ($col['type']) { case 'serial': @@ -603,9 +609,8 @@ class MysqlSchema extends Schema break; } - if (!empty($col['collate']) - && $col['collate'] === $table_collate) { - unset($col['collate']); + if (!empty($col['collate'])) { + $col['collate'] = $this->collationToMySQL($col['collate']); } $col['type'] = $this->mapType($col); diff --git a/lib/database/pgsqlschema.php b/lib/database/pgsqlschema.php index ceb966b381..1f47e69755 100644 --- a/lib/database/pgsqlschema.php +++ b/lib/database/pgsqlschema.php @@ -132,10 +132,13 @@ class PgsqlSchema extends Schema ) { $field['auto_increment'] = true; } elseif (array_key_exists($name, $enum_info)) { - $field['type'] = $type = 'enum'; $field['enum'] = $enum_info[$name]; } + if (!empty($row['collation_name'])) { + $field['collate'] = $row['collation_name']; + } + $def['fields'][$name] = $field; } @@ -415,6 +418,7 @@ class PgsqlSchema extends Schema 'integer' => 'int', 'char' => 'bpchar', 'datetime' => 'timestamp', + 'enum' => 'text', 'blob' => 'bytea' ]; @@ -442,6 +446,49 @@ class PgsqlSchema extends Schema return $type; } + /** + * Collation in PostgreSQL format from our format + * + * @param string $collate + * @return string + */ + protected function collationToPostgreSQL(string $collate): string + { + if (!in_array($collate, [ + 'utf8_bin', + 'utf8_general_cs', + 'utf8_general_ci', + ])) { + common_log( + LOG_ERR, + 'Collation not supported: "' . $collate . '"' + ); + $collate = 'utf8_bin'; + } + + // @fixme No case-insensitivity support + if (substr($collate, 0, 13) === 'utf8_general_') { + $collate = 'und-x-icu'; + } elseif (substr($collate, 0, 8) === 'utf8_bin') { + $collate = 'C'; + } + + return $collate; + } + + public function typeAndSize(string $name, array $column) + { + $col = parent::typeAndSize($name, $column); + + if ($this->isStringType($column)) { + if (!empty($column['collate'])) { + $col .= ' COLLATE "' . $column['collate'] . '"'; + } + } + + return $col; + } + /** * Append an SQL statement with an index definition for a full-text search * index over one or more columns on a table. @@ -475,14 +522,16 @@ class PgsqlSchema extends Schema foreach ($tableDef['fields'] as $name => &$col) { // No convenient support for field descriptions unset($col['description']); - // @fixme Nor for MariaDB-specific collations - unset($col['collate']); if ($col['type'] === 'serial') { $col['type'] = 'int'; $col['auto_increment'] = true; } + if (!empty($col['collate'])) { + $col['collate'] = $this->collationToPostgreSQL($col['collate']); + } + $col['type'] = $this->mapType($col); unset($col['size']); } diff --git a/lib/database/schema.php b/lib/database/schema.php index b5d34dce2b..6a085d343a 100644 --- a/lib/database/schema.php +++ b/lib/database/schema.php @@ -891,6 +891,48 @@ class Schema return null; } + /** + * Is this column a string type? + * + * @param array $cd + * @return bool + */ + protected function isStringType(array $cd): bool + { + $strings = ['char', 'varchar', 'text']; + $strings[] = 'bpchar'; // PostgreSQL + $strings[] = 'enum'; // MariaDB + return in_array(strtolower($cd['type']), $strings); + } + + /** + * Collation in our format from MariaDB format + * + * @param string $collate + * @return string + */ + protected function collationFromMySQL(string $collate): string + { + if (substr($collate, 0, 8) === 'utf8mb4_') { + $collate = 'utf8_' . substr($collate, 8); + } + if (substr($collate, 0, 13) === 'utf8_unicode_') { + $collate = 'utf8_general_' . substr($collate, 13); + } + if (!in_array($collate, [ + 'utf8_bin', + 'utf8_general_cs', + 'utf8_general_ci', + ])) { + common_log( + LOG_ERR, + 'Collation not supported: "' . $collate . '"' + ); + $collate = 'utf8_bin'; + } + return $collate; + } + /** * Return the proper SQL for creating or * altering a column. @@ -1059,6 +1101,15 @@ class Schema if (array_key_exists('not null', $col) && $col['not null'] !== true) { unset($col['not null']); } + + if ($this->isStringType($col)) { + // Default collation + if (empty($col['collate'])) { + $col['collate'] = 'utf8_bin'; + } + // Migration from direct MariaDB collations + $col['collate'] = $this->collationFromMySQL($col['collate']); + } } if (common_config('search', 'type') !== 'fulltext') {