Ticket #1281: JID validation now more or less follows spec instead of calling e-mail validator

Basic splitting/validation code submitted via http://status.net/wiki/XMPP/JID_validation -- Copyright 2009 Patrick Georgi <patrick@georgi-clan.de> Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact.

Added PEAR Net_IDNA package to extlib to handle IDN normalization (also used by Validate's email verifier if present).

* added test suite, supplemented my own test cases with JID validation and normalization test cases from libpurple
* follows XMPP rules for validation of name part
* fixes for normalization with non-ASCII names
* will do domain checks if $config['email']['check_domain'] is on, checking for an XMPP-server SRV record or any lookup. (We don't actually need to ping those direct though.)
* some more obscure stringprep validation rules aren't quite followed yet, but we err on the side of permissiveness.
* we still don't actually let you save your address with a resource on it, as we strip resources when looking up users who've sent us presence or message updates. I would recommend saving the outgoing resource as a separate field if/when we add that..?
This commit is contained in:
Brion Vibber 2010-03-30 17:35:27 -07:00
parent bfb2ac4910
commit 0841fa712e
6 changed files with 3735 additions and 12 deletions

View File

@ -292,7 +292,7 @@ class ImsettingsAction extends ConnectSettingsAction
$this->showForm(_('Cannot normalize that Jabber ID'));
return;
}
if (!jabber_valid_base_jid($jabber)) {
if (!jabber_valid_base_jid($jabber, common_config('email', 'domain_check'))) {
$this->showForm(_('Not a valid Jabber ID'));
return;
} else if ($user->jabber == $jabber) {

100
extlib/Net/IDNA.php Normal file
View File

@ -0,0 +1,100 @@
<?php
// {{{ license
/* vim: set expandtab tabstop=4 shiftwidth=4 softtabstop=4 foldmethod=marker: */
//
// +----------------------------------------------------------------------+
// | This library is free software; you can redistribute it and/or modify |
// | it under the terms of the GNU Lesser General Public License as |
// | published by the Free Software Foundation; either version 2.1 of the |
// | License, or (at your option) any later version. |
// | |
// | This library is distributed in the hope that it will be useful, but |
// | WITHOUT ANY WARRANTY; without even the implied warranty of |
// | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU |
// | Lesser General Public License for more details. |
// | |
// | You should have received a copy of the GNU Lesser General Public |
// | License along with this library; if not, write to the Free Software |
// | Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 |
// | USA. |
// +----------------------------------------------------------------------+
//
// }}}
/**
* Encode/decode Internationalized Domain Names.
* Factory class to get correct implementation either for php4 or php5.
*
* @author Markus Nix <mnix@docuverse.de>
* @author Matthias Sommerfeld <mso@phlylabs.de>
* @package Net
* @version $Id: IDNA.php 284681 2009-07-24 04:24:27Z clockwerx $
*/
class Net_IDNA
{
// {{{ factory
/**
* Attempts to return a concrete IDNA instance for either php4 or php5.
*
* @param array $params Set of paramaters
* @return object IDNA The newly created concrete Log instance, or an
* false on an error.
* @access public
*/
function getInstance($params = array())
{
$version = explode( '.', phpversion() );
$handler = ((int)$version[0] > 4) ? 'php5' : 'php4';
$class = 'Net_IDNA_' . $handler;
$classfile = 'Net/IDNA/' . $handler . '.php';
/*
* Attempt to include our version of the named class, but don't treat
* a failure as fatal. The caller may have already included their own
* version of the named class.
*/
@include_once $classfile;
/* If the class exists, return a new instance of it. */
if (class_exists($class)) {
return new $class($params);
}
return false;
}
// }}}
// {{{ singleton
/**
* Attempts to return a concrete IDNA instance for either php4 or php5,
* only creating a new instance if no IDNA instance with the same
* parameters currently exists.
*
* @param array $params Set of paramaters
* @return object IDNA The newly created concrete Log instance, or an
* false on an error.
* @access public
*/
function singleton($params = array())
{
static $instances;
if (!isset($instances)) {
$instances = array();
}
$signature = serialize($params);
if (!isset($instances[$signature])) {
$instances[$signature] = Net_IDNA::getInstance($params);
}
return $instances[$signature];
}
// }}}
}
?>

3269
extlib/Net/IDNA/php5.php Normal file

File diff suppressed because it is too large Load Diff

View File

@ -34,38 +34,197 @@ if (!defined('STATUSNET') && !defined('LACONICA')) {
require_once 'XMPPHP/XMPP.php';
/**
* checks whether a string is a syntactically valid Jabber ID (JID)
* Splits a Jabber ID (JID) into node, domain, and resource portions.
*
* Based on validation routine submitted by:
* @copyright 2009 Patrick Georgi <patrick@georgi-clan.de>
* @license Licensed under ISC-L, which is compatible with everything else that keeps the copyright notice intact.
*
* @param string $jid string to check
*
* @return boolean whether the string is a valid JID
* @return array with "node", "domain", and "resource" indices
* @throws Exception if input is not valid
*/
function jabber_valid_base_jid($jid)
function jabber_split_jid($jid)
{
// Cheap but effective
return Validate::email($jid);
$chars = '';
/* the following definitions come from stringprep, Appendix C,
which is used in its entirety by nodeprop, Chapter 5, "Prohibited Output" */
/* C1.1 ASCII space characters */
$chars .= "\x{20}";
/* C1.2 Non-ASCII space characters */
$chars .= "\x{a0}\x{1680}\x{2000}-\x{200b}\x{202f}\x{205f}\x{3000a}";
/* C2.1 ASCII control characters */
$chars .= "\x{00}-\x{1f}\x{7f}";
/* C2.2 Non-ASCII control characters */
$chars .= "\x{80}-\x{9f}\x{6dd}\x{70f}\x{180e}\x{200c}\x{200d}\x{2028}\x{2029}\x{2060}-\x{2063}\x{206a}-\x{206f}\x{feff}\x{fff9}-\x{fffc}\x{1d173}-\x{1d17a}";
/* C3 - Private Use */
$chars .= "\x{e000}-\x{f8ff}\x{f0000}-\x{ffffd}\x{100000}-\x{10fffd}";
/* C4 - Non-character code points */
$chars .= "\x{fdd0}-\x{fdef}\x{fffe}\x{ffff}\x{1fffe}\x{1ffff}\x{2fffe}\x{2ffff}\x{3fffe}\x{3ffff}\x{4fffe}\x{4ffff}\x{5fffe}\x{5ffff}\x{6fffe}\x{6ffff}\x{7fffe}\x{7ffff}\x{8fffe}\x{8ffff}\x{9fffe}\x{9ffff}\x{afffe}\x{affff}\x{bfffe}\x{bffff}\x{cfffe}\x{cffff}\x{dfffe}\x{dffff}\x{efffe}\x{effff}\x{ffffe}\x{fffff}\x{10fffe}\x{10ffff}";
/* C5 - Surrogate codes */
$chars .= "\x{d800}-\x{dfff}";
/* C6 - Inappropriate for plain text */
$chars .= "\x{fff9}-\x{fffd}";
/* C7 - Inappropriate for canonical representation */
$chars .= "\x{2ff0}-\x{2ffb}";
/* C8 - Change display properties or are deprecated */
$chars .= "\x{340}\x{341}\x{200e}\x{200f}\x{202a}-\x{202e}\x{206a}-\x{206f}";
/* C9 - Tagging characters */
$chars .= "\x{e0001}\x{e0020}-\x{e007f}";
/* Nodeprep forbids some more characters */
$nodeprepchars = $chars;
$nodeprepchars .= "\x{22}\x{26}\x{27}\x{2f}\x{3a}\x{3c}\x{3e}\x{40}";
$parts = explode("/", $jid, 2);
if (count($parts) > 1) {
$resource = $parts[1];
if ($resource == '') {
// Warning: empty resource isn't legit.
// But if we're normalizing, we may as well take it...
}
} else {
$resource = null;
}
$node = explode("@", $parts[0]);
if ((count($node) > 2) || (count($node) == 0)) {
throw new Exception("Invalid JID: too many @s");
} else if (count($node) == 1) {
$domain = $node[0];
$node = null;
} else {
$domain = $node[1];
$node = $node[0];
if ($node == '') {
throw new Exception("Invalid JID: @ but no node");
}
}
// Length limits per http://xmpp.org/rfcs/rfc3920.html#addressing
if ($node !== null) {
if (strlen($node) > 1023) {
throw new Exception("Invalid JID: node too long.");
}
if (preg_match("/[".$nodeprepchars."]/u", $node)) {
throw new Exception("Invalid JID node '$node'");
}
}
if (strlen($domain) > 1023) {
throw new Exception("Invalid JID: domain too long.");
}
if (!common_valid_domain($domain)) {
throw new Exception("Invalid JID domain name '$domain'");
}
if ($resource !== null) {
if (strlen($resource) > 1023) {
throw new Exception("Invalid JID: resource too long.");
}
if (preg_match("/[".$chars."]/u", $resource)) {
throw new Exception("Invalid JID resource '$resource'");
}
}
return array('node' => is_null($node) ? null : mb_strtolower($node),
'domain' => is_null($domain) ? null : mb_strtolower($domain),
'resource' => $resource);
}
/**
* normalizes a Jabber ID for comparison
* Checks whether a string is a syntactically valid Jabber ID (JID),
* either with or without a resource.
*
* Note that a bare domain can be a valid JID.
*
* @param string $jid string to check
* @param bool $check_domain whether we should validate that domain...
*
* @return boolean whether the string is a valid JID
*/
function jabber_valid_full_jid($jid, $check_domain=false)
{
try {
$parts = jabber_split_jid($jid);
if ($check_domain) {
if (!jabber_check_domain($parts['domain'])) {
return false;
}
}
return $parts['resource'] !== ''; // missing or present; empty ain't kosher
} catch (Exception $e) {
return false;
}
}
/**
* Checks whether a string is a syntactically valid base Jabber ID (JID).
* A base JID won't include a resource specifier on the end; since we
* take it off when reading input we can't really use them reliably
* to direct outgoing messages yet (sorry guys!)
*
* Note that a bare domain can be a valid JID.
*
* @param string $jid string to check
* @param bool $check_domain whether we should validate that domain...
*
* @return boolean whether the string is a valid JID
*/
function jabber_valid_base_jid($jid, $check_domain=false)
{
try {
$parts = jabber_split_jid($jid);
if ($check_domain) {
if (!jabber_check_domain($parts['domain'])) {
return false;
}
}
return ($parts['resource'] === null); // missing; empty ain't kosher
} catch (Exception $e) {
return false;
}
}
/**
* Normalizes a Jabber ID for comparison, dropping the resource component if any.
*
* @param string $jid JID to check
* @param bool $check_domain if true, reject if the domain isn't findable
*
* @return string an equivalent JID in normalized (lowercase) form
*/
function jabber_normalize_jid($jid)
{
if (preg_match("/(?:([^\@]+)\@)?([^\/]+)(?:\/(.*))?$/", $jid, $matches)) {
$node = $matches[1];
$server = $matches[2];
return strtolower($node.'@'.$server);
try {
$parts = jabber_split_jid($jid);
if ($parts['node'] !== null) {
return $parts['node'] . '@' . $parts['domain'];
} else {
return $parts['domain'];
}
} catch (Exception $e) {
return null;
}
}
/**
* Check if this domain's got some legit DNS record
*/
function jabber_check_domain($domain)
{
if (checkdnsrr("_xmpp-server._tcp." . $domain, "SRV")) {
return true;
}
if (checkdnsrr($domain, "ANY")) {
return true;
}
return false;
}
/**
* the JID of the Jabber daemon for this StatusNet instance
*

View File

@ -1397,6 +1397,55 @@ function common_valid_tag($tag)
return false;
}
/**
* Determine if given domain or address literal is valid
* eg for use in JIDs and URLs. Does not check if the domain
* exists!
*
* @param string $domain
* @return boolean valid or not
*/
function common_valid_domain($domain)
{
$octet = "(?:25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])";
$ipv4 = "(?:$octet(?:\.$octet){3})";
if (preg_match("/^$ipv4$/u", $domain)) return true;
$group = "(?:[0-9a-f]{1,4})";
$ipv6 = "(?:\[($group(?::$group){0,7})?(::)?($group(?::$group){0,7})?\])"; // http://tools.ietf.org/html/rfc3513#section-2.2
if (preg_match("/^$ipv6$/ui", $domain, $matches)) {
$before = explode(":", $matches[1]);
$zeroes = $matches[2];
$after = explode(":", $matches[3]);
if ($zeroes) {
$min = 0;
$max = 7;
} else {
$min = 1;
$max = 8;
}
$explicit = count($before) + count($after);
if ($explicit < $min || $explicit > $max) {
return false;
}
return true;
}
try {
require_once "Net/IDNA.php";
$idn = Net_IDNA::getInstance();
$domain = $idn->encode($domain);
} catch (Exception $e) {
return false;
}
$subdomain = "(?:[a-z0-9][a-z0-9-]*)"; // @fixme
$fqdn = "(?:$subdomain(?:\.$subdomain)*\.?)";
return preg_match("/^$fqdn$/ui", $domain);
}
/* Following functions are copied from MediaWiki GlobalFunctions.php
* and written by Evan Prodromou. */

146
tests/JidValidateTest.php Normal file
View File

@ -0,0 +1,146 @@
<?php
if (isset($_SERVER) && array_key_exists('REQUEST_METHOD', $_SERVER)) {
print "This script must be run from the command line\n";
exit();
}
define('INSTALLDIR', realpath(dirname(__FILE__) . '/..'));
define('STATUSNET', true);
define('LACONICA', true);
mb_internal_encoding('UTF-8'); // @fixme this probably belongs in common.php?
require_once INSTALLDIR . '/lib/common.php';
require_once INSTALLDIR . '/lib/jabber.php';
class JidValidateTest extends PHPUnit_Framework_TestCase
{
/**
* @dataProvider validationCases
*
*/
public function testValidate($jid, $validFull, $validBase)
{
$this->assertEquals($validFull, jabber_valid_full_jid($jid), "validating as full or base JID");
$this->assertEquals($validBase, jabber_valid_base_jid($jid), "validating as base JID only");
}
/**
* @dataProvider normalizationCases
*
*/
public function testNormalize($jid, $expected)
{
$this->assertEquals($expected, jabber_normalize_jid($jid));
}
/**
* @dataProvider domainCheckCases()
*/
public function testDomainCheck($domain, $expected, $note)
{
$this->assertEquals($expected, jabber_check_domain($domain), $note);
}
static public function validationCases()
{
$long1023 = "long1023" . str_repeat('x', 1023 - 8);
$long1024 = "long1024" . str_repeat('x', 1024 - 8);
return array(
// Our own test cases for standard things & those mentioned in bug reports
// (jid, valid_full, valid_base)
array('user@example.com', true, true),
array('user@example.com/resource', true, false),
array('user with spaces@example.com', false, false), // not kosher
array('user.@example.com', true, true), // "common in intranets"
array('example.com', true, true),
array('example.com/resource', true, false),
array('jabchat', true, true),
array("$long1023@$long1023/$long1023", true, false), // max 1023 "bytes" per portion per spec. Do they really mean bytes though?
array("$long1024@$long1023/$long1023", false, false),
array("$long1023@$long1024/$long1023", false, false),
array("$long1023@$long1023/$long1024", false, false),
// Borrowed from test_jabber_jutil.c in libpurple
array("gmail.com", true, true),
array("gmail.com/Test", true, false),
array("gmail.com/Test@", true, false),
array("gmail.com/@", true, false),
array("gmail.com/Test@alkjaweflkj", true, false),
array("mark.doliner@gmail.com", true, true),
array("mark.doliner@gmail.com/Test12345", true, false),
array("mark.doliner@gmail.com/Test@12345", true, false),
array("mark.doliner@gmail.com/Te/st@12@//345", true, false),
array("わいど@conference.jabber.org", true, true),
array("まりるーむ@conference.jabber.org", true, true),
array("mark.doliner@gmail.com/まりるーむ", true, false),
array("mark.doliner@gmail/stuff.org", true, false),
array("stuart@nödåtXäYZ.se", true, true),
array("stuart@nödåtXäYZ.se/まりるーむ", true, false),
array("mark.doliner@わいど.org", true, true),
array("nick@まつ.おおかみ.net", true, true),
array("paul@10.0.42.230/s", true, false),
array("paul@[::1]", true, true), /* IPv6 */
array("paul@[2001:470:1f05:d58::2]", true, true),
array("paul@[2001:470:1f05:d58::2]/foo", true, false),
array("pa=ul@10.0.42.230", true, true),
array("pa,ul@10.0.42.230", true, true),
array("@gmail.com", false, false),
array("@@gmail.com", false, false),
array("mark.doliner@@gmail.com/Test12345", false, false),
array("mark@doliner@gmail.com/Test12345", false, false),
array("@gmail.com/Test@12345", false, false),
array("/Test@12345", false, false),
array("mark.doliner@", false, false),
array("mark.doliner/", false, false),
array("mark.doliner@gmail_stuff.org", false, false),
array("mark.doliner@gmail[stuff.org", false, false),
array("mark.doliner@gmail\\stuff.org", false, false),
array("paul@[::1]124", false, false),
array("paul@2[::1]124/as", false, false),
array("paul@まつ.おおかみ/\x01", false, false),
/*
* RFC 3454 Section 6 reads, in part,
* "If a string contains any RandALCat character, the
* string MUST NOT contain any LCat character."
* The character is U+066D (ARABIC FIVE POINTED STAR).
*/
// Leaving this one commented out for the moment
// as it shouldn't hurt anything for our purposes.
//array("foo@example.com/٭simplexe٭", false, false)
);
}
static public function normalizationCases()
{
return array(
// Borrowed from test_jabber_jutil.c in libpurple
array('PaUL@DaRkRain42.org', 'paul@darkrain42.org'),
array('PaUL@DaRkRain42.org/', 'paul@darkrain42.org'),
array('PaUL@DaRkRain42.org/resource', 'paul@darkrain42.org'),
// Also adapted from libpurple tests...
array('Ф@darkrain42.org', 'ф@darkrain42.org'),
array('paul@Өarkrain.org', 'paul@өarkrain.org'),
);
}
static public function domainCheckCases()
{
return array(
array('gmail.com', true, 'known SRV record'),
array('jabber.org', true, 'known SRV record'),
array('status.net', true, 'known SRV record'),
array('status.leuksman.com', true, 'known no SRV record but valid domain'),
);
}
}