Set the character set before making a connection

Ideally the character set should be set with the connection, and so this is
exactly what's being done now.

And now the character set code is attempted to be generalised.
This commit is contained in:
Alexei Sorokin 2020-09-16 19:34:49 +03:00
parent c2508f8fa2
commit aed2344bd4
9 changed files with 118 additions and 54 deletions

View File

@ -863,9 +863,6 @@ class Memcached_DataObject extends Safe_DataObject
return $string; return $string;
} }
// We overload so that 'SET NAMES "utf8mb4"' is called for
// each connection
public function _connect() public function _connect()
{ {
global $_DB_DATAOBJECT, $_PEAR; global $_DB_DATAOBJECT, $_PEAR;
@ -908,13 +905,10 @@ class Memcached_DataObject extends Safe_DataObject
if ($result && !$exists) { if ($result && !$exists) {
// Required to make timestamp values usefully comparable. // Required to make timestamp values usefully comparable.
// And set the character set to UTF-8.
if (common_config('db', 'type') !== 'mysql') { if (common_config('db', 'type') !== 'mysql') {
parent::_query("SET TIME ZONE INTERVAL '+00:00' HOUR TO MINUTE"); parent::_query("SET TIME ZONE INTERVAL '+00:00' HOUR TO MINUTE");
parent::_query("SET NAMES 'UTF8'");
} else { } else {
parent::_query("SET time_zone = '+0:00'"); parent::_query("SET time_zone = '+0:00'");
parent::_query("SET NAMES 'utf8mb4'");
} }
} }

View File

@ -361,16 +361,36 @@ class MysqlSchema extends Schema
* *
* @param string $name * @param string $name
* @param array $def * @param array $def
* @return string;
* *
* @return string
*/ */
public function endCreateTable($name, array $def) public function endCreateTable($name, array $def)
{ {
$engine = $this->preferredEngine($def); $engine = self::storageEngine($def);
return ") ENGINE=$engine CHARACTER SET utf8mb4 COLLATE utf8mb4_bin"; $charset = self::charset();
return ") ENGINE '{$engine}' "
. "DEFAULT CHARACTER SET '{$charset}' "
. "DEFAULT COLLATE '{$charset}_bin'";
} }
public function preferredEngine($def) /**
* Returns the character set of choice for MariaDB.
* Overrides default standard "UTF8".
*
* @return string
*/
public static function charset(): string
{
return 'utf8mb4';
}
/**
* Returns the storage engine of choice for the supplied definition.
*
* @param array $def
* @return string
*/
protected static function storageEngine(array $def): string
{ {
return 'InnoDB'; return 'InnoDB';
} }
@ -432,18 +452,18 @@ class MysqlSchema extends Schema
*/ */
public function appendAlterExtras(array &$phrase, $tableName, array $def) public function appendAlterExtras(array &$phrase, $tableName, array $def)
{ {
// Check for table properties: make sure we're using a sane // Check for table properties: make sure we are using sane
// engine type and collation. // storage engine, character set and collation.
// @fixme make the default engine configurable?
$oldProps = $this->getTableProperties($tableName, ['ENGINE', 'TABLE_COLLATION']); $oldProps = $this->getTableProperties($tableName, ['ENGINE', 'TABLE_COLLATION']);
$engine = $this->preferredEngine($def); $engine = self::storageEngine($def);
if (strtolower($oldProps['ENGINE']) != strtolower($engine)) { $charset = self::charset();
$phrase[] = "ENGINE=$engine"; if (mb_strtolower($oldProps['ENGINE']) !== mb_strtolower($engine)) {
$phrase[] = "ENGINE '{$engine}'";
} }
if (strtolower($oldProps['TABLE_COLLATION']) != 'utf8mb4_bin') { if (strtolower($oldProps['TABLE_COLLATION']) !== "{$charset}_bin") {
$phrase[] = 'CONVERT TO CHARACTER SET utf8mb4 COLLATE utf8mb4_bin'; $phrase[] = "CONVERT TO CHARACTER SET '{$charset} COLLATE '{$charset}_bin'";
$phrase[] = 'DEFAULT CHARACTER SET = utf8mb4'; $phrase[] = "DEFAULT CHARACTER SET '{$charset}'";
$phrase[] = 'DEFAULT COLLATE = utf8mb4_bin'; $phrase[] = "DEFAULT COLLATE '{$charset}_bin'";
} }
} }
@ -573,11 +593,11 @@ class MysqlSchema extends Schema
foreach ($column['enum'] as &$val) { foreach ($column['enum'] as &$val) {
$vals[] = "'{$val}'"; $vals[] = "'{$val}'";
} }
return 'enum(' . implode(',', $vals) . ')'; return 'ENUM(' . implode(',', $vals) . ')';
} elseif ($this->isStringType($column)) { } elseif ($this->isStringType($column)) {
$col = parent::typeAndSize($name, $column); $col = parent::typeAndSize($name, $column);
if (!empty($column['collate'])) { if (!empty($column['collate'])) {
$col .= ' COLLATE ' . $column['collate']; $col .= " COLLATE '{$column['collate']}'";
} }
return $col; return $col;
} else { } else {

View File

@ -456,6 +456,16 @@ class Schema
return true; return true;
} }
/**
* Returns the character set of choice for this DBMS (in practice, UTF-8).
*
* @return string
*/
public static function charset(): string
{
return 'UTF8';
}
/** /**
* Modifies a column in the schema. * Modifies a column in the schema.
* *

View File

@ -54,7 +54,6 @@ class GNUsocial
$moduleclass = "{$name}Module"; $moduleclass = "{$name}Module";
if (!class_exists($moduleclass)) { if (!class_exists($moduleclass)) {
$files = [ $files = [
"modules/{$moduleclass}.php", "modules/{$moduleclass}.php",
"modules/{$name}/{$moduleclass}.php" "modules/{$name}/{$moduleclass}.php"
@ -120,7 +119,6 @@ class GNUsocial
$moduleclass = "{$name}Plugin"; $moduleclass = "{$name}Plugin";
if (!class_exists($moduleclass)) { if (!class_exists($moduleclass)) {
$files = [ $files = [
"local/plugins/{$moduleclass}.php", "local/plugins/{$moduleclass}.php",
"local/plugins/{$name}/{$moduleclass}.php", "local/plugins/{$name}/{$moduleclass}.php",
@ -466,18 +464,30 @@ class GNUsocial
} }
if (!self::$have_config) { if (!self::$have_config) {
throw new NoConfigException("No configuration file found.", throw new NoConfigException(
$config_files); 'No configuration file found.',
$config_files
);
} }
$dsn = $config['db']['database'];
// Check for database server; must exist! // Check for database server; must exist!
if (empty($dsn)) {
if (empty($config['db']['database'])) { throw new ServerException('No database server for this site.');
throw new ServerException("No database server for this site.");
} }
$database = MDB2::parseDSN($dsn);
if (empty($database['phptype'])) {
throw new ServerException('Database server for this site is invalid.');
}
$database['charset'] = common_database_charset();
$config['db']['database'] = $database;
} }
static function fillConfigVoids() public static function fillConfigVoids()
{ {
// special cases on empty configuration options // special cases on empty configuration options
if (!common_config('thumbnail', 'dir')) { if (!common_config('thumbnail', 'dir')) {
@ -492,7 +502,7 @@ class GNUsocial
* Might make changes to the filesystem, to created dirs, but will * Might make changes to the filesystem, to created dirs, but will
* not make database changes. * not make database changes.
*/ */
static function verifyLoadedConfig() public static function verifyLoadedConfig()
{ {
$mkdirs = []; $mkdirs = [];
@ -533,7 +543,7 @@ class GNUsocial
* @return boolean true if we're running with HTTPS; else false * @return boolean true if we're running with HTTPS; else false
*/ */
static function isHTTPS() public static function isHTTPS()
{ {
if (common_config('site', 'sslproxy')) { if (common_config('site', 'sslproxy')) {
return true; return true;
@ -551,7 +561,7 @@ class GNUsocial
/** /**
* Can we use HTTPS? Then do! Only return false if it's not configured ("never"). * Can we use HTTPS? Then do! Only return false if it's not configured ("never").
*/ */
static function useHTTPS() public static function useHTTPS()
{ {
return self::isHTTPS() || common_config('site', 'ssl') != 'never'; return self::isHTTPS() || common_config('site', 'ssl') != 'never';
} }
@ -561,7 +571,7 @@ class NoConfigException extends Exception
{ {
public $configFiles; public $configFiles;
function __construct($msg, $configFiles) public function __construct($msg, $configFiles)
{ {
parent::__construct($msg); parent::__construct($msg);
$this->configFiles = $configFiles; $this->configFiles = $configFiles;

View File

@ -67,11 +67,13 @@ abstract class Installer
'name' => 'MariaDB 10.3+', 'name' => 'MariaDB 10.3+',
'check_module' => 'mysqli', 'check_module' => 'mysqli',
'scheme' => 'mysqli', // DSN prefix for MDB2 'scheme' => 'mysqli', // DSN prefix for MDB2
'charset' => 'utf8mb4',
], ],
'pgsql' => [ 'pgsql' => [
'name' => 'PostgreSQL 11+', 'name' => 'PostgreSQL 11+',
'check_module' => 'pgsql', 'check_module' => 'pgsql',
'scheme' => 'pgsql', // DSN prefix for MDB2 'scheme' => 'pgsql', // DSN prefix for MDB2
'charset' => 'UTF8',
] ]
]; ];
@ -296,14 +298,12 @@ abstract class Installer
$dsn = "{$scheme}://{$this->username}{$auth}@{$this->host}/{$this->database}"; $dsn = "{$scheme}://{$this->username}{$auth}@{$this->host}/{$this->database}";
$this->updateStatus('Checking database...'); $this->updateStatus('Checking database...');
$conn = $this->connectDatabase($dsn); $charset = self::$dbModules[$this->dbtype]['charset'];
$conn = $this->connectDatabase($dsn, $charset);
$charset = ($this->dbtype !== 'mysql') ? 'UTF8' : 'utf8mb4';
$server_charset = $this->getDatabaseCharset($conn, $this->dbtype); $server_charset = $this->getDatabaseCharset($conn, $this->dbtype);
// Ensure the database server character set is UTF-8. // Ensure the database server character set is UTF-8.
$conn->exec("SET NAMES '{$charset}'");
if ($server_charset !== $charset) { if ($server_charset !== $charset) {
$this->updateStatus( $this->updateStatus(
'GNU social requires the "' . $charset . '" character set. ' 'GNU social requires the "' . $charset . '" character set. '
@ -346,11 +346,17 @@ abstract class Installer
* Open a connection to the database. * Open a connection to the database.
* *
* @param string $dsn * @param string $dsn
* @param string $charset
* @return MDB2_Driver_Common * @return MDB2_Driver_Common
* @throws Exception * @throws Exception
*/ */
protected function connectDatabase(string $dsn) protected function connectDatabase(string $dsn, string $charset)
{ {
$dsn = MDB2::parseDSN($dsn);
// Ensure the database client character set is UTF-8.
$dsn['charset'] = $charset;
$conn = MDB2::connect($dsn); $conn = MDB2::connect($dsn);
if (MDB2::isError($conn)) { if (MDB2::isError($conn)) {
throw new Exception( throw new Exception(
@ -375,25 +381,31 @@ abstract class Installer
switch ($dbtype) { switch ($dbtype) {
case 'pgsql': case 'pgsql':
$res = $conn->query('SHOW server_encoding'); $res = $conn->query('SHOW server_encoding');
if (MDB2::isError($res)) {
throw new Exception($res->getMessage());
}
$ret = $res->fetchOne();
break; break;
case 'mysql': case 'mysql':
$res = $conn->query( $stmt = $conn->prepare(
"SHOW SESSION VARIABLES LIKE 'character_set_database'" <<<END
SELECT DEFAULT_CHARACTER_SET_NAME
FROM INFORMATION_SCHEMA.SCHEMATA
WHERE SCHEMA_NAME = ?
END,
['text'],
MDB2_PREPARE_RESULT
); );
if (MDB2::isError($res)) { if (MDB2::isError($stmt)) {
throw new Exception($res->getMessage()); return null;
} }
[, $ret] = $res->fetchRow(MDB2_FETCHMODE_ORDERED); $res = $stmt->execute([$database]);
break; break;
default: default:
throw new Exception('Unknown DB type selected.'); throw new Exception('Unknown DB type selected.');
} }
if (MDB2::isError($res)) {
throw new Exception($res->getMessage());
}
$ret = $res->fetchOne();
if (MDB2::isError($ret)) { if (MDB2::isError($ret)) {
throw new Exception($ret->getMessage()); throw new Exception($ret->getMessage());
} }

View File

@ -2390,6 +2390,15 @@ function common_compatible_license($from, $to)
return ($from == $to); return ($from == $to);
} }
/**
* returns the character set name used for the current DBMS
*/
function common_database_charset(): string
{
$schema = Schema::get();
return $schema->charset();
}
/** /**
* returns a quoted table name * returns a quoted table name
*/ */

View File

@ -49,6 +49,11 @@ class SQLStore_DB_Connection extends Auth_OpenID_DatabaseConnection
} }
$dsn['new_link'] = true; $dsn['new_link'] = true;
if (array_key_exists('charset', $options)) {
$dsn['charset'] = $options['charset'];
unset($options['charset']);
}
// To create a new Database connection is an absolute must, because // To create a new Database connection is an absolute must, because
// php-openid code delays its transactions commitment. // php-openid code delays its transactions commitment.
// Is a must because our Internal Session Handler uses the database // Is a must because our Internal Session Handler uses the database

View File

@ -45,6 +45,9 @@ function oid_store()
if (!is_array($options)) { if (!is_array($options)) {
$options = []; $options = [];
} }
// A special case, carry the charset.
$options['charset'] = common_database_charset();
$dbconn = new SQLStore_DB_Connection($dsn, $options); $dbconn = new SQLStore_DB_Connection($dsn, $options);
switch (common_config('db', 'type')) { switch (common_config('db', 'type')) {

View File

@ -39,11 +39,11 @@ END_OF_TRIM_HELP;
require_once INSTALLDIR . '/scripts/commandline.inc'; require_once INSTALLDIR . '/scripts/commandline.inc';
require dirname(__FILE__) . '/sphinx-utils.php'; require dirname(__FILE__) . '/sphinx-utils.php';
$base = $base ?? '/usr/local';
$timestamp = date('r'); $timestamp = date('r');
print <<<END echo <<<END
# #
# Sphinx configuration for StatusNet # Sphinx configuration for GNU social
# Generated {$timestamp} # Generated {$timestamp}
# #
@ -109,8 +109,9 @@ function sphinx_template($sn, $table, $query, $query_info)
{ {
$base = sphinx_base(); $base = sphinx_base();
$dbtype = common_config('db', 'type'); $dbtype = common_config('db', 'type');
$charset = common_database_charset();
print <<<END echo <<<END
# #
# {$sn->sitename} # {$sn->sitename}
@ -122,7 +123,7 @@ source {$sn->dbname}_src_{$table}
sql_user = {$sn->dbuser} sql_user = {$sn->dbuser}
sql_pass = {$sn->dbpass} sql_pass = {$sn->dbpass}
sql_db = {$sn->dbname} sql_db = {$sn->dbname}
sql_query_pre = SET NAMES utf8; sql_query_pre = SET NAMES '{$charset}';
sql_query = {$query} sql_query = {$query}
sql_query_info = {$query_info} sql_query_info = {$query_info}
sql_attr_timestamp = created_ts sql_attr_timestamp = created_ts