diff --git a/README b/README index 9207f3e900..db912f2016 100644 --- a/README +++ b/README @@ -694,6 +694,13 @@ to users on a remote site. (Or not... it's not well tested.) The Upgrading ========= +IMPORTANT NOTE: Laconica 0.7.4 introduced a fix for some +incorrectly-stored international characters ("UTF-8"). For new +installations, it will now store non-ASCII characters correctly. +However, older installations will have the incorrect storage, and will +consequently show up "wrong" in browsers. See below for how to deal +with this situation. + If you've been using Laconica 0.6, 0.5 or lower, or if you've been tracking the "git" version of the software, you will probably want to upgrade and keep your existing data. There is no automated upgrade @@ -783,6 +790,29 @@ problem. 3. When fixup_inboxes is finished, you can set the enabled flag to 'true'. +UTF-8 Database +-------------- + +Laconica 0.7.4 introduced a fix for some incorrectly-stored +international characters ("UTF-8"). This fix is not +backwards-compatible; installations from before 0.7.4 will show +non-ASCII characters of old notices incorrectly. This section explains +what to do. + +0. You can disable the new behaviour by setting the 'db''utf8' config + option to "false". You should only do this until you're ready to + convert your DB to the new format. +1. When you're ready to convert, you can run the fixup_utf8.php script + in the scripts/ subdirectory. If you've had the "new behaviour" + enabled (probably a good idea), you can give the ID of the first + "new" notice as a parameter, and only notices before that one will + be converted. Notices are converted in reverse chronological order, + so the most recent (and visible) ones will be converted first. The + script should work whether or not you have the 'db''utf8' config + option enabled. +2. When you're ready, set $config['db']['utf8'] to true, so that + new notices will be stored correctly. + Configuration options ===================== @@ -910,6 +940,10 @@ mirror: you can set this to an array of DSNs, like the above and adding the slaves to this array. Note that if you want some requests to go to the 'database' (master) server, you'll need to include it in this array, too. +utf8: whether to talk to the database in UTF-8 mode. This is the default + with new installations, but older sites may want to turn it off + until they get their databases fixed up. See "UTF-8 database" + above for details. syslog ------ diff --git a/classes/Memcached_DataObject.php b/classes/Memcached_DataObject.php index 5f71f716b3..52ad4100fc 100644 --- a/classes/Memcached_DataObject.php +++ b/classes/Memcached_DataObject.php @@ -227,4 +227,22 @@ class Memcached_DataObject extends DB_DataObject $c->set($ckey, $cached, MEMCACHE_COMPRESSED, $expiry); return new ArrayWrapper($cached); } + + // We overload so that 'SET NAMES "utf8"' is called for + // each connection + + function _connect() + { + global $_DB_DATAOBJECT; + $exists = !empty($this->_database_dsn_md5) && + isset($_DB_DATAOBJECT['CONNECTIONS'][$this->_database_dsn_md5]); + $result = parent::_connect(); + if (!$exists) { + $DB = &$_DB_DATAOBJECT['CONNECTIONS'][$this->_database_dsn_md5]; + if (common_config('db', 'utf8')) { + $DB->query('SET NAMES "utf8"'); + } + } + return $result; + } } diff --git a/db/notice_source.sql b/db/notice_source.sql index 1508af1ec8..d5a280b82c 100644 --- a/db/notice_source.sql +++ b/db/notice_source.sql @@ -2,6 +2,7 @@ INSERT INTO notice_source (code, name, url, created) VALUES ('adium', 'Adium', 'http://www.adiumx.com/', now()), + ('AgentSolo.com','AgentSolo.com','http://www.agentsolo.com/', now()), ('betwittered','BeTwittered','http://www.32hours.com/betwitteredinfo/', now()), ('bti','bti','http://gregkh.github.com/bti/', now()), ('cliqset', 'Cliqset', 'http://www.cliqset.com/', now()), @@ -29,6 +30,7 @@ VALUES ('pingvine','PingVine','http://pingvine.com/', now()), ('pocketwit','PockeTwit','http://code.google.com/p/pocketwit/', now()), ('posty','Posty','http://spreadingfunkyness.com/posty/', now()), + ('qtwitter','qTwitter','http://qtwitter.ayoy.net/', now()), ('royalewithcheese','Royale With Cheese','http://p.hellyeah.org/', now()), ('rssdent','rssdent','http://github.com/zcopley/rssdent/tree/master', now()), ('rygh.no','rygh.no','http://rygh.no/', now()), diff --git a/lib/common.php b/lib/common.php index 0ce46442de..4a98741e8e 100644 --- a/lib/common.php +++ b/lib/common.php @@ -174,6 +174,7 @@ $config['db'] = 'require_prefix' => 'classes/', 'class_prefix' => '', 'mirror' => null, + 'utf8' => true, 'db_driver' => 'DB', # XXX: JanRain libs only work with DB 'quote_identifiers' => false, 'type' => 'mysql' ); diff --git a/scripts/fixup_utf8.php b/scripts/fixup_utf8.php new file mode 100644 index 0000000000..e5021ff343 --- /dev/null +++ b/scripts/fixup_utf8.php @@ -0,0 +1,141 @@ +#!/usr/bin/env php +. + */ + +# Abort if called from a web server +if (isset($_SERVER) && array_key_exists('REQUEST_METHOD', $_SERVER)) { + print "This script must be run from the command line\n"; + exit(1); +} + +ini_set("max_execution_time", "0"); +ini_set("max_input_time", "0"); +set_time_limit(0); +mb_internal_encoding('UTF-8'); + +define('INSTALLDIR', realpath(dirname(__FILE__) . '/..')); +define('LACONICA', true); + +require_once(INSTALLDIR . '/lib/common.php'); +require_once('DB.php'); + +function fixup_utf8($id) { + + $dbl = doConnect('latin1'); + + if (empty($dbl)) { + return; + } + + $dbu = doConnect('utf8'); + + if (empty($dbu)) { + return; + } + + // Do a separate DB connection + + $sth = $dbu->prepare("UPDATE notice SET content = UNHEX(?), rendered = UNHEX(?) WHERE id = ?"); + + if (PEAR::isError($sth)) { + echo "ERROR: " . $sth->getMessage() . "\n"; + return; + } + + $sql = 'SELECT id, content, rendered FROM notice ' . + 'WHERE LENGTH(content) != CHAR_LENGTH(content)'; + + if (!empty($id)) { + $sql .= ' AND id < ' . $id; + } + + $sql .= ' ORDER BY id DESC'; + + $rn = $dbl->query($sql); + + if (PEAR::isError($rn)) { + echo "ERROR: " . $rn->getMessage() . "\n"; + return; + } + + echo "Number of rows: " . $rn->numRows() . "\n"; + + $notice = array(); + + while (DB_OK == $rn->fetchInto($notice)) { + + $id = ($notice[0])+0; + $content = bin2hex($notice[1]); + $rendered = bin2hex($notice[2]); + + echo "$id..."; + + $result =& $dbu->execute($sth, array($content, $rendered, $id)); + + if (PEAR::isError($result)) { + echo "ERROR: " . $result->getMessage() . "\n"; + continue; + } + + $cnt = $dbu->affectedRows(); + + if ($cnt != 1) { + echo "ERROR: 0 rows affected\n"; + continue; + } + + $notice = Notice::staticGet('id', $id); + $notice->decache(); + + echo "OK\n"; + } +} + +function doConnect($charset) +{ + $db = DB::connect(common_config('db', 'database'), + array('persistent' => false)); + + if (PEAR::isError($db)) { + echo "ERROR: " . $db->getMessage() . "\n"; + return NULL; + } + + $result = $db->query("SET NAMES $charset"); + + if (PEAR::isError($result)) { + echo "ERROR: " . $result->getMessage() . "\n"; + $db->disconnect(); + return NULL; + } + + $result = $db->autoCommit(true); + + if (PEAR::isError($result)) { + echo "ERROR: " . $result->getMessage() . "\n"; + $db->disconnect(); + return NULL; + } + + return $db; +} + +$id = ($argc > 1) ? $argv[1] : null; + +fixup_utf8($id);