Set the character set before making a connection

Ideally the character set should be set with the connection, and so this is
exactly what's being done now.

And now the character set code is attempted to be generalised.
This commit is contained in:
Alexei Sorokin 2020-09-16 19:34:49 +03:00 committed by Diogo Peralta Cordeiro
parent 0b4a49d262
commit 318d2cb6ca
9 changed files with 118 additions and 54 deletions

View File

@ -863,9 +863,6 @@ class Memcached_DataObject extends Safe_DataObject
return $string;
}
// We overload so that 'SET NAMES "utf8mb4"' is called for
// each connection
public function _connect()
{
global $_DB_DATAOBJECT, $_PEAR;
@ -908,13 +905,10 @@ class Memcached_DataObject extends Safe_DataObject
if ($result && !$exists) {
// Required to make timestamp values usefully comparable.
// And set the character set to UTF-8.
if (common_config('db', 'type') !== 'mysql') {
parent::_query("SET TIME ZONE INTERVAL '+00:00' HOUR TO MINUTE");
parent::_query("SET NAMES 'UTF8'");
} else {
parent::_query("SET time_zone = '+0:00'");
parent::_query("SET NAMES 'utf8mb4'");
}
}

View File

@ -361,16 +361,36 @@ class MysqlSchema extends Schema
*
* @param string $name
* @param array $def
* @return string;
*
* @return string
*/
public function endCreateTable($name, array $def)
{
$engine = $this->preferredEngine($def);
return ") ENGINE=$engine CHARACTER SET utf8mb4 COLLATE utf8mb4_bin";
$engine = self::storageEngine($def);
$charset = self::charset();
return ") ENGINE '{$engine}' "
. "DEFAULT CHARACTER SET '{$charset}' "
. "DEFAULT COLLATE '{$charset}_bin'";
}
public function preferredEngine($def)
/**
* Returns the character set of choice for MariaDB.
* Overrides default standard "UTF8".
*
* @return string
*/
public static function charset(): string
{
return 'utf8mb4';
}
/**
* Returns the storage engine of choice for the supplied definition.
*
* @param array $def
* @return string
*/
protected static function storageEngine(array $def): string
{
return 'InnoDB';
}
@ -432,18 +452,18 @@ class MysqlSchema extends Schema
*/
public function appendAlterExtras(array &$phrase, $tableName, array $def)
{
// Check for table properties: make sure we're using a sane
// engine type and collation.
// @fixme make the default engine configurable?
// Check for table properties: make sure we are using sane
// storage engine, character set and collation.
$oldProps = $this->getTableProperties($tableName, ['ENGINE', 'TABLE_COLLATION']);
$engine = $this->preferredEngine($def);
if (strtolower($oldProps['ENGINE']) != strtolower($engine)) {
$phrase[] = "ENGINE=$engine";
$engine = self::storageEngine($def);
$charset = self::charset();
if (mb_strtolower($oldProps['ENGINE']) !== mb_strtolower($engine)) {
$phrase[] = "ENGINE '{$engine}'";
}
if (strtolower($oldProps['TABLE_COLLATION']) != 'utf8mb4_bin') {
$phrase[] = 'CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin';
$phrase[] = 'DEFAULT CHARACTER SET = utf8mb4';
$phrase[] = 'DEFAULT COLLATE = utf8mb4_bin';
if (strtolower($oldProps['TABLE_COLLATION']) !== "{$charset}_bin") {
$phrase[] = "CONVERT TO CHARACTER SET '{$charset} COLLATE '{$charset}_bin'";
$phrase[] = "DEFAULT CHARACTER SET '{$charset}'";
$phrase[] = "DEFAULT COLLATE '{$charset}_bin'";
}
}
@ -573,11 +593,11 @@ class MysqlSchema extends Schema
foreach ($column['enum'] as &$val) {
$vals[] = "'{$val}'";
}
return 'enum(' . implode(',', $vals) . ')';
return 'ENUM(' . implode(',', $vals) . ')';
} elseif ($this->isStringType($column)) {
$col = parent::typeAndSize($name, $column);
if (!empty($column['collate'])) {
$col .= ' COLLATE ' . $column['collate'];
$col .= " COLLATE '{$column['collate']}'";
}
return $col;
} else {

View File

@ -456,6 +456,16 @@ class Schema
return true;
}
/**
* Returns the character set of choice for this DBMS (in practice, UTF-8).
*
* @return string
*/
public static function charset(): string
{
return 'UTF8';
}
/**
* Modifies a column in the schema.
*

View File

@ -54,7 +54,6 @@ class GNUsocial
$moduleclass = "{$name}Module";
if (!class_exists($moduleclass)) {
$files = [
"modules/{$moduleclass}.php",
"modules/{$name}/{$moduleclass}.php"
@ -120,7 +119,6 @@ class GNUsocial
$moduleclass = "{$name}Plugin";
if (!class_exists($moduleclass)) {
$files = [
"local/plugins/{$moduleclass}.php",
"local/plugins/{$name}/{$moduleclass}.php",
@ -466,18 +464,30 @@ class GNUsocial
}
if (!self::$have_config) {
throw new NoConfigException("No configuration file found.",
$config_files);
throw new NoConfigException(
'No configuration file found.',
$config_files
);
}
$dsn = $config['db']['database'];
// Check for database server; must exist!
if (empty($config['db']['database'])) {
throw new ServerException("No database server for this site.");
}
if (empty($dsn)) {
throw new ServerException('No database server for this site.');
}
static function fillConfigVoids()
$database = MDB2::parseDSN($dsn);
if (empty($database['phptype'])) {
throw new ServerException('Database server for this site is invalid.');
}
$database['charset'] = common_database_charset();
$config['db']['database'] = $database;
}
public static function fillConfigVoids()
{
// special cases on empty configuration options
if (!common_config('thumbnail', 'dir')) {
@ -492,7 +502,7 @@ class GNUsocial
* Might make changes to the filesystem, to created dirs, but will
* not make database changes.
*/
static function verifyLoadedConfig()
public static function verifyLoadedConfig()
{
$mkdirs = [];
@ -533,7 +543,7 @@ class GNUsocial
* @return boolean true if we're running with HTTPS; else false
*/
static function isHTTPS()
public static function isHTTPS()
{
if (common_config('site', 'sslproxy')) {
return true;
@ -551,7 +561,7 @@ class GNUsocial
/**
* Can we use HTTPS? Then do! Only return false if it's not configured ("never").
*/
static function useHTTPS()
public static function useHTTPS()
{
return self::isHTTPS() || common_config('site', 'ssl') != 'never';
}
@ -561,7 +571,7 @@ class NoConfigException extends Exception
{
public $configFiles;
function __construct($msg, $configFiles)
public function __construct($msg, $configFiles)
{
parent::__construct($msg);
$this->configFiles = $configFiles;

View File

@ -67,11 +67,13 @@ abstract class Installer
'name' => 'MariaDB 10.3+',
'check_module' => 'mysqli',
'scheme' => 'mysqli', // DSN prefix for MDB2
'charset' => 'utf8mb4',
],
'pgsql' => [
'name' => 'PostgreSQL 11+',
'check_module' => 'pgsql',
'scheme' => 'pgsql', // DSN prefix for MDB2
'charset' => 'UTF8',
]
];
@ -296,14 +298,12 @@ abstract class Installer
$dsn = "{$scheme}://{$this->username}{$auth}@{$this->host}/{$this->database}";
$this->updateStatus('Checking database...');
$conn = $this->connectDatabase($dsn);
$charset = self::$dbModules[$this->dbtype]['charset'];
$conn = $this->connectDatabase($dsn, $charset);
$charset = ($this->dbtype !== 'mysql') ? 'UTF8' : 'utf8mb4';
$server_charset = $this->getDatabaseCharset($conn, $this->dbtype);
// Ensure the database server character set is UTF-8.
$conn->exec("SET NAMES '{$charset}'");
if ($server_charset !== $charset) {
$this->updateStatus(
'GNU social requires the "' . $charset . '" character set. '
@ -346,11 +346,17 @@ abstract class Installer
* Open a connection to the database.
*
* @param string $dsn
* @param string $charset
* @return MDB2_Driver_Common
* @throws Exception
*/
protected function connectDatabase(string $dsn)
protected function connectDatabase(string $dsn, string $charset)
{
$dsn = MDB2::parseDSN($dsn);
// Ensure the database client character set is UTF-8.
$dsn['charset'] = $charset;
$conn = MDB2::connect($dsn);
if (MDB2::isError($conn)) {
throw new Exception(
@ -375,25 +381,31 @@ abstract class Installer
switch ($dbtype) {
case 'pgsql':
$res = $conn->query('SHOW server_encoding');
if (MDB2::isError($res)) {
throw new Exception($res->getMessage());
}
$ret = $res->fetchOne();
break;
case 'mysql':
$res = $conn->query(
"SHOW SESSION VARIABLES LIKE 'character_set_database'"
$stmt = $conn->prepare(
<<<END
SELECT DEFAULT_CHARACTER_SET_NAME
FROM INFORMATION_SCHEMA.SCHEMATA
WHERE SCHEMA_NAME = ?
END,
['text'],
MDB2_PREPARE_RESULT
);
if (MDB2::isError($res)) {
throw new Exception($res->getMessage());
if (MDB2::isError($stmt)) {
return null;
}
[, $ret] = $res->fetchRow(MDB2_FETCHMODE_ORDERED);
$res = $stmt->execute([$database]);
break;
default:
throw new Exception('Unknown DB type selected.');
}
if (MDB2::isError($res)) {
throw new Exception($res->getMessage());
}
$ret = $res->fetchOne();
if (MDB2::isError($ret)) {
throw new Exception($ret->getMessage());
}

View File

@ -2390,6 +2390,15 @@ function common_compatible_license($from, $to)
return ($from == $to);
}
/**
* returns the character set name used for the current DBMS
*/
function common_database_charset(): string
{
$schema = Schema::get();
return $schema->charset();
}
/**
* returns a quoted table name
*/

View File

@ -49,6 +49,11 @@ class SQLStore_DB_Connection extends Auth_OpenID_DatabaseConnection
}
$dsn['new_link'] = true;
if (array_key_exists('charset', $options)) {
$dsn['charset'] = $options['charset'];
unset($options['charset']);
}
// To create a new Database connection is an absolute must, because
// php-openid code delays its transactions commitment.
// Is a must because our Internal Session Handler uses the database

View File

@ -45,6 +45,9 @@ function oid_store()
if (!is_array($options)) {
$options = [];
}
// A special case, carry the charset.
$options['charset'] = common_database_charset();
$dbconn = new SQLStore_DB_Connection($dsn, $options);
switch (common_config('db', 'type')) {

View File

@ -39,11 +39,11 @@ END_OF_TRIM_HELP;
require_once INSTALLDIR . '/scripts/commandline.inc';
require dirname(__FILE__) . '/sphinx-utils.php';
$base = $base ?? '/usr/local';
$timestamp = date('r');
print <<<END
echo <<<END
#
# Sphinx configuration for StatusNet
# Sphinx configuration for GNU social
# Generated {$timestamp}
#
@ -109,8 +109,9 @@ function sphinx_template($sn, $table, $query, $query_info)
{
$base = sphinx_base();
$dbtype = common_config('db', 'type');
$charset = common_database_charset();
print <<<END
echo <<<END
#
# {$sn->sitename}
@ -122,7 +123,7 @@ source {$sn->dbname}_src_{$table}
sql_user = {$sn->dbuser}
sql_pass = {$sn->dbpass}
sql_db = {$sn->dbname}
sql_query_pre = SET NAMES utf8;
sql_query_pre = SET NAMES '{$charset}';
sql_query = {$query}
sql_query_info = {$query_info}
sql_attr_timestamp = created_ts