forked from GNUsocial/gnu-social
		
	[DATABASE] Change collation handling
Before now table definitions could define collations only for MariaDB using the MariaDB's collation names directly. Now instead definitions get a slightly more abstract collation name syntax, but only supporting the collations utf8mb4_bin and utf8mb4_unicode_(cs|ci) (wrapped as utf8_bin, utf8_general_(cs|ci)), because those are the ones that have practical use for GNU social. Which also means that on MariaDB the formerly used utf8mb4_general_(cs|ci) have been superseded by utf8mb4_unicode_(cs|ci), as they are the more modern replacement. Introduce collation support on PostgreSQL which results in use of the C (POSIX) collation as utf8_bin and the und-x-icu collation as utf8_general_cs. utf8_general_ci is also mapped to und-x-icu, which makes it case-sensitive, unfortunately.
This commit is contained in:
		| @@ -79,7 +79,7 @@ class Notice extends Managed_DataObject | ||||
|                 'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'), | ||||
|                 'profile_id' => array('type' => 'int', 'not null' => true, 'description' => 'who made the update'), | ||||
|                 'uri' => array('type' => 'varchar', 'length' => 191, 'description' => 'universally unique identifier, usually a tag URI'), | ||||
|                 'content' => array('type' => 'text', 'description' => 'update content', 'collate' => 'utf8mb4_general_ci'), | ||||
|                 'content' => array('type' => 'text', 'description' => 'update content', 'collate' => 'utf8_general_ci'), | ||||
|                 'rendered' => array('type' => 'text', 'description' => 'HTML version of the content'), | ||||
|                 'url' => array('type' => 'varchar', 'length' => 191, 'description' => 'URL of any attachment (image, video, bookmark, whatever)'), | ||||
|                 'created' => array('type' => 'datetime', 'description' => 'date this record was created'), | ||||
|   | ||||
| @@ -46,12 +46,12 @@ class Profile extends Managed_DataObject | ||||
|             'description' => 'local and remote users have profiles', | ||||
|             'fields' => array( | ||||
|                 'id' => array('type' => 'serial', 'not null' => true, 'description' => 'unique identifier'), | ||||
|                 'nickname' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'nickname or username', 'collate' => 'utf8mb4_general_ci'), | ||||
|                 'fullname' => array('type' => 'text', 'description' => 'display name', 'collate' => 'utf8mb4_general_ci'), | ||||
|                 'nickname' => array('type' => 'varchar', 'length' => 64, 'not null' => true, 'description' => 'nickname or username', 'collate' => 'utf8_general_ci'), | ||||
|                 'fullname' => array('type' => 'text', 'description' => 'display name', 'collate' => 'utf8_general_ci'), | ||||
|                 'profileurl' => array('type' => 'text', 'description' => 'URL, cached so we dont regenerate'), | ||||
|                 'homepage' => array('type' => 'text', 'description' => 'identifying URL', 'collate' => 'utf8mb4_general_ci'), | ||||
|                 'bio' => array('type' => 'text', 'description' => 'descriptive biography', 'collate' => 'utf8mb4_general_ci'), | ||||
|                 'location' => array('type' => 'text', 'description' => 'physical location', 'collate' => 'utf8mb4_general_ci'), | ||||
|                 'homepage' => array('type' => 'text', 'description' => 'identifying URL', 'collate' => 'utf8_general_ci'), | ||||
|                 'bio' => array('type' => 'text', 'description' => 'descriptive biography', 'collate' => 'utf8_general_ci'), | ||||
|                 'location' => array('type' => 'text', 'description' => 'physical location', 'collate' => 'utf8_general_ci'), | ||||
|                 'lat' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'latitude'), | ||||
|                 'lon' => array('type' => 'numeric', 'precision' => 10, 'scale' => 7, 'description' => 'longitude'), | ||||
|                 'location_id' => array('type' => 'int', 'description' => 'location id if possible'), | ||||
|   | ||||
| @@ -147,10 +147,8 @@ class MysqlSchema extends Schema | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             $table_props = $this->getTableProperties($table, ['TABLE_COLLATION']); | ||||
|             $collate = $row['COLLATION_NAME']; | ||||
|             if (!empty($collate) && $collate !== $table_props['TABLE_COLLATION']) { | ||||
|                 $field['collate'] = $collate; | ||||
|             if (!empty($row['COLLATION_NAME'])) { | ||||
|                 $field['collate'] = $row['COLLATION_NAME']; | ||||
|             } | ||||
|  | ||||
|             $def['fields'][$name] = $field; | ||||
| @@ -471,17 +469,6 @@ class MysqlSchema extends Schema | ||||
|         return in_array(strtolower($cd['type']), $ints); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Is this column a string type? | ||||
|      * @param array $cd | ||||
|      * @return bool | ||||
|      */ | ||||
|     private function isStringType(array $cd): bool | ||||
|     { | ||||
|         $strings = ['char', 'varchar', 'text']; | ||||
|         return in_array(strtolower($cd['type']), $strings); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Return the proper SQL for creating or | ||||
|      * altering a column. | ||||
| @@ -547,6 +534,34 @@ class MysqlSchema extends Schema | ||||
|         return $type; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Collation in MariaDB format from our format | ||||
|      * | ||||
|      * @param string $collate | ||||
|      * @return string | ||||
|      */ | ||||
|     protected function collationToMySQL(string $collate): string | ||||
|     { | ||||
|         if (!in_array($collate, [ | ||||
|             'utf8_bin', | ||||
|             'utf8_general_cs', | ||||
|             'utf8_general_ci', | ||||
|         ])) { | ||||
|             common_log( | ||||
|                 LOG_ERR, | ||||
|                 'Collation not supported: "' . $collate . '"' | ||||
|             ); | ||||
|             $collate = 'utf8_bin'; | ||||
|         } | ||||
|  | ||||
|         if (substr($collate, 0, 13) === 'utf8_general_') { | ||||
|             $collate = 'utf8mb4_unicode_' . substr($collate, 13); | ||||
|         } elseif (substr($collate, 0, 5) === 'utf8_') { | ||||
|             $collate = 'utf8mb4_' . substr($collate, 5); | ||||
|         } | ||||
|         return $collate; | ||||
|     } | ||||
|  | ||||
|     public function typeAndSize(string $name, array $column) | ||||
|     { | ||||
|         if ($column['type'] === 'enum') { | ||||
| @@ -581,15 +596,6 @@ class MysqlSchema extends Schema | ||||
|     { | ||||
|         $tableDef = parent::filterDef($tableName, $tableDef); | ||||
|  | ||||
|         // Get existing table collation if the table exists. | ||||
|         // To know if collation that's been set is unique for the table. | ||||
|         try { | ||||
|             $table_props = $this->getTableProperties($tableName, ['TABLE_COLLATION']); | ||||
|             $table_collate = $table_props['TABLE_COLLATION']; | ||||
|         } catch (SchemaTableMissingException $e) { | ||||
|             $table_collate = null; | ||||
|         } | ||||
|  | ||||
|         foreach ($tableDef['fields'] as $name => &$col) { | ||||
|             switch ($col['type']) { | ||||
|                 case 'serial': | ||||
| @@ -603,9 +609,8 @@ class MysqlSchema extends Schema | ||||
|                     break; | ||||
|             } | ||||
|  | ||||
|             if (!empty($col['collate']) | ||||
|                 && $col['collate'] === $table_collate) { | ||||
|                 unset($col['collate']); | ||||
|             if (!empty($col['collate'])) { | ||||
|                 $col['collate'] = $this->collationToMySQL($col['collate']); | ||||
|             } | ||||
|  | ||||
|             $col['type'] = $this->mapType($col); | ||||
|   | ||||
| @@ -132,10 +132,13 @@ class PgsqlSchema extends Schema | ||||
|             ) { | ||||
|                 $field['auto_increment'] = true; | ||||
|             } elseif (array_key_exists($name, $enum_info)) { | ||||
|                 $field['type'] = $type = 'enum'; | ||||
|                 $field['enum'] = $enum_info[$name]; | ||||
|             } | ||||
|  | ||||
|             if (!empty($row['collation_name'])) { | ||||
|                 $field['collate'] = $row['collation_name']; | ||||
|             } | ||||
|  | ||||
|             $def['fields'][$name] = $field; | ||||
|         } | ||||
|  | ||||
| @@ -415,6 +418,7 @@ class PgsqlSchema extends Schema | ||||
|             'integer'  => 'int', | ||||
|             'char'     => 'bpchar', | ||||
|             'datetime' => 'timestamp', | ||||
|             'enum'     => 'text', | ||||
|             'blob'     => 'bytea' | ||||
|         ]; | ||||
|  | ||||
| @@ -442,6 +446,49 @@ class PgsqlSchema extends Schema | ||||
|         return $type; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Collation in PostgreSQL format from our format | ||||
|      * | ||||
|      * @param string $collate | ||||
|      * @return string | ||||
|      */ | ||||
|     protected function collationToPostgreSQL(string $collate): string | ||||
|     { | ||||
|         if (!in_array($collate, [ | ||||
|             'utf8_bin', | ||||
|             'utf8_general_cs', | ||||
|             'utf8_general_ci', | ||||
|         ])) { | ||||
|             common_log( | ||||
|                 LOG_ERR, | ||||
|                 'Collation not supported: "' . $collate . '"' | ||||
|             ); | ||||
|             $collate = 'utf8_bin'; | ||||
|         } | ||||
|  | ||||
|         // @fixme No case-insensitivity support | ||||
|         if (substr($collate, 0, 13) === 'utf8_general_') { | ||||
|             $collate = 'und-x-icu'; | ||||
|         } elseif (substr($collate, 0, 8) === 'utf8_bin') { | ||||
|             $collate = 'C'; | ||||
|         } | ||||
|  | ||||
|         return $collate; | ||||
|     } | ||||
|  | ||||
|     public function typeAndSize(string $name, array $column) | ||||
|     { | ||||
|         $col = parent::typeAndSize($name, $column); | ||||
|  | ||||
|         if ($this->isStringType($column)) { | ||||
|             if (!empty($column['collate'])) { | ||||
|                 $col .= ' COLLATE "' . $column['collate'] . '"'; | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         return $col; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Append an SQL statement with an index definition for a full-text search | ||||
|      * index over one or more columns on a table. | ||||
| @@ -475,14 +522,16 @@ class PgsqlSchema extends Schema | ||||
|         foreach ($tableDef['fields'] as $name => &$col) { | ||||
|             // No convenient support for field descriptions | ||||
|             unset($col['description']); | ||||
|             // @fixme Nor for MariaDB-specific collations | ||||
|             unset($col['collate']); | ||||
|  | ||||
|             if ($col['type'] === 'serial') { | ||||
|                 $col['type'] = 'int'; | ||||
|                 $col['auto_increment'] = true; | ||||
|             } | ||||
|  | ||||
|             if (!empty($col['collate'])) { | ||||
|                 $col['collate'] = $this->collationToPostgreSQL($col['collate']); | ||||
|             } | ||||
|  | ||||
|             $col['type'] = $this->mapType($col); | ||||
|             unset($col['size']); | ||||
|         } | ||||
|   | ||||
| @@ -891,6 +891,48 @@ class Schema | ||||
|         return null; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Is this column a string type? | ||||
|      * | ||||
|      * @param array $cd | ||||
|      * @return bool | ||||
|      */ | ||||
|     protected function isStringType(array $cd): bool | ||||
|     { | ||||
|         $strings = ['char', 'varchar', 'text']; | ||||
|         $strings[] = 'bpchar';  // PostgreSQL | ||||
|         $strings[] = 'enum';    // MariaDB | ||||
|         return in_array(strtolower($cd['type']), $strings); | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Collation in our format from MariaDB format | ||||
|      * | ||||
|      * @param string $collate | ||||
|      * @return string | ||||
|      */ | ||||
|     protected function collationFromMySQL(string $collate): string | ||||
|     { | ||||
|         if (substr($collate, 0, 8) === 'utf8mb4_') { | ||||
|             $collate = 'utf8_' . substr($collate, 8); | ||||
|         } | ||||
|         if (substr($collate, 0, 13) === 'utf8_unicode_') { | ||||
|             $collate = 'utf8_general_' . substr($collate, 13); | ||||
|         } | ||||
|         if (!in_array($collate, [ | ||||
|             'utf8_bin', | ||||
|             'utf8_general_cs', | ||||
|             'utf8_general_ci', | ||||
|         ])) { | ||||
|             common_log( | ||||
|                 LOG_ERR, | ||||
|                 'Collation not supported: "' . $collate . '"' | ||||
|             ); | ||||
|             $collate = 'utf8_bin'; | ||||
|         } | ||||
|         return $collate; | ||||
|     } | ||||
|  | ||||
|     /** | ||||
|      * Return the proper SQL for creating or | ||||
|      * altering a column. | ||||
| @@ -1059,6 +1101,15 @@ class Schema | ||||
|             if (array_key_exists('not null', $col) && $col['not null'] !== true) { | ||||
|                 unset($col['not null']); | ||||
|             } | ||||
|  | ||||
|             if ($this->isStringType($col)) { | ||||
|                 // Default collation | ||||
|                 if (empty($col['collate'])) { | ||||
|                     $col['collate'] = 'utf8_bin'; | ||||
|                 } | ||||
|                 // Migration from direct MariaDB collations | ||||
|                 $col['collate'] = $this->collationFromMySQL($col['collate']); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         if (common_config('search', 'type') !== 'fulltext') { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user