Improved the URL tests, and improve the matcher so more tests are passed. The remaining failing tests I believe are incorrect.
This commit is contained in:
parent
9ec3911020
commit
6a3a25b5a2
24
lib/util.php
24
lib/util.php
@ -412,30 +412,34 @@ function common_render_text($text)
|
||||
function common_replace_urls_callback($text, $callback, $notice_id = null) {
|
||||
// Start off with a regex
|
||||
$regex = '#'.
|
||||
'(?:^|[\s\(\)\[\]\{\}]+)'.
|
||||
'('.
|
||||
'(?:^|[\s\(\)\[\]\{\}\\\'\\\";]+)(?![\@\!\#])'.
|
||||
'('.
|
||||
'(?:'.
|
||||
'(?:'. //Known protocols
|
||||
'(?:'.
|
||||
'(?:https?|ftps?|mms|rtsp|gopher|news|nntp|telnet|wais|file|prospero|webcal|irc)://'.
|
||||
'|'.
|
||||
'(?:mailto|aim|tel|xmpp):'.
|
||||
')[^\s\/]+'.
|
||||
')'.
|
||||
'(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@
|
||||
'[\pN\pL\-\_\:\.]+(?<![\.\:])'. //dns
|
||||
')'.
|
||||
'|(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)'. //IPv4
|
||||
'|(?:'. //IPv6
|
||||
'(?:(?:(?:[0-9A-Fa-f]{1,4}:){7}(?:(?:[0-9A-Fa-f]{1,4})|:))|(?:(?:[0-9A-Fa-f]{1,4}:){6}(?::|(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})|(?::[0-9A-Fa-f]{1,4})))|(?:(?:[0-9A-Fa-f]{1,4}:){5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){4}(?::[0-9A-Fa-f]{1,4}){0,1}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){3}(?::[0-9A-Fa-f]{1,4}){0,2}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:){2}(?::[0-9A-Fa-f]{1,4}){0,3}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:[0-9A-Fa-f]{1,4}:)(?::[0-9A-Fa-f]{1,4}){0,4}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?::(?::[0-9A-Fa-f]{1,4}){0,5}(?:(?::(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})?)|(?:(?::[0-9A-Fa-f]{1,4}){1,2})))|(?:(?:(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})(?:\.(?:25[0-5]|2[0-4]\d|[01]?\d{1,2})){3})))'.
|
||||
')|(?:'. //DNS
|
||||
'\S+\.(?:museum|travel|onion|local|[a-z]{2,4})'.
|
||||
')'.
|
||||
'(?:[\pN\pL\-\_\+]+(?:\:[\pN\pL\-\_\+]+)?\@)?'. //user:pass@
|
||||
'[\pN\pL\-\_]+(?:\.[\pN\pL\-\_]+)*\.(?:museum|travel|onion|local|[a-z]{2,4})'.
|
||||
')(?![\pN\pL\-\_])'.
|
||||
')'.
|
||||
'(?:'.
|
||||
'$|(?:'.
|
||||
'/[^\s\(\)\[\]\{\}]*'.
|
||||
')'.
|
||||
')'.
|
||||
'(?:\:\d+)?'. //:port
|
||||
'(?:/[\pN\pL$\!\(\)\.\-\_\+\/\=\&\;]*)?'. // /path
|
||||
'(?:\?[\pN\pL\$\!\(\)\.\-\_\+\/\=\&\;\/]*)?'. // ?query string
|
||||
'(?:\#[\pN\pL$\!\(\)\.\-\_\+\/\=\&\;\/\?\#]*)?'. // #fragment
|
||||
')(?<![\?\.\,\#\)])'.
|
||||
')'.
|
||||
'#ix';
|
||||
'#ixu';
|
||||
return preg_replace_callback($regex, curry(callback_helper,$callback,$notice_id) ,$text);
|
||||
}
|
||||
|
||||
|
@ -25,10 +25,50 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase
|
||||
static public function provider()
|
||||
{
|
||||
return array(
|
||||
array('127.0.0.1',
|
||||
'<a href="http://127.0.0.1/" rel="external">127.0.0.1</a>'),
|
||||
array('127.0.0.1/test.php',
|
||||
'<a href="http://127.0.0.1/test.php" rel="external">127.0.0.1/test.php</a>'),
|
||||
array('http://::1/test.php',
|
||||
'<a href="http://::1/test.php" rel="external">http://::1/test.php</a>'),
|
||||
array('http://::1',
|
||||
'<a href="http://::1/" rel="external">http://::1</a>'),
|
||||
array('2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php',
|
||||
'<a href="http://2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php" rel="external">2001:4978:1b5:0:21d:e0ff:fe66:59ab/test.php</a>'),
|
||||
array('2001:4978:1b5:0:21d:e0ff:fe66:59ab',
|
||||
'<a href="http://2001:4978:1b5:0:21d:e0ff:fe66:59ab/" rel="external">2001:4978:1b5:0:21d:e0ff:fe66:59ab</a>'),
|
||||
array('http://127.0.0.1',
|
||||
'<a href="http://127.0.0.1/" rel="external">http://127.0.0.1</a>'),
|
||||
array('example.com',
|
||||
'<a href="http://example.com/" rel="external">example.com</a>'),
|
||||
array('example.com',
|
||||
'<a href="http://example.com/" rel="external">example.com</a>'),
|
||||
array('http://example.com',
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>'),
|
||||
array('http://example.com.',
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>.'),
|
||||
array('/var/lib/example.so',
|
||||
'/var/lib/example.so'),
|
||||
array('example',
|
||||
'example'),
|
||||
array('user@example.com',
|
||||
'<a href="mailto:user@example.com" rel="external">user@example.com</a>'),
|
||||
array('user_name+other@example.com',
|
||||
'<a href="mailto:user_name+other@example.com" rel="external">user_name+other@example.com</a>'),
|
||||
array('mailto:user@example.com',
|
||||
'<a href="mailto:user@example.com" rel="external">mailto:user@example.com</a>'),
|
||||
array('mailto:user@example.com?subject=test',
|
||||
'<a href="mailto:user@example.com?subject=test" rel="external">mailto:user@example.com?subject=test</a>'),
|
||||
array('#example',
|
||||
'#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('example'))) . '" rel="tag">example</a></span>'),
|
||||
array('#example.com',
|
||||
'#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('example.com'))) . '" rel="tag">example.com</a></span>'),
|
||||
array('#.net',
|
||||
'#<span class="tag"><a href="' . common_local_url('tag', array('tag' => common_canonical_tag('.net'))) . '" rel="tag">.net</a></span>'),
|
||||
array('http://example',
|
||||
'<a href="http://example/" rel="external">http://example</a>'),
|
||||
array('http://3xampl3',
|
||||
'<a href="http://3xampl3/" rel="external">http://3xampl3</a>'),
|
||||
array('http://example/',
|
||||
'<a href="http://example/" rel="external">http://example/</a>'),
|
||||
array('http://example/path',
|
||||
@ -47,6 +87,10 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase
|
||||
'<a href="http://user:pass@example.com/" rel="external">http://user:pass@example.com</a>'),
|
||||
array('http://example.com:8080',
|
||||
'<a href="http://example.com:8080/" rel="external">http://example.com:8080</a>'),
|
||||
array('http://example.com:8080/test.php',
|
||||
'<a href="http://example.com:8080/test.php" rel="external">http://example.com:8080/test.php</a>'),
|
||||
array('example.com:8080/test.php',
|
||||
'<a href="http://example.com:8080/test.php" rel="external">example.com:8080/test.php</a>'),
|
||||
array('http://www.example.com',
|
||||
'<a href="http://www.example.com/" rel="external">http://www.example.com</a>'),
|
||||
array('http://example.com/',
|
||||
@ -59,60 +103,65 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase
|
||||
'<a href="http://example.com/path.html#fragment" rel="external">http://example.com/path.html#fragment</a>'),
|
||||
array('http://example.com/path.php?foo=bar&bar=foo',
|
||||
'<a href="http://example.com/path.php?foo=bar&bar=foo" rel="external">http://example.com/path.php?foo=bar&bar=foo</a>'),
|
||||
array('http://müllärör.de',
|
||||
'<a href="http://müllärör.de" rel="external">http://müllärör.de</a>'),
|
||||
array('http://ﺱﺲﺷ.com',
|
||||
'<a href="http://ﺱﺲﺷ.com" rel="external">http://ﺱﺲﺷ.com</a>'),
|
||||
array('http://сделаткартинки.com',
|
||||
'<a href="http://сделаткартинки.com" rel="external">http://сделаткартинки.com</a>'),
|
||||
array('http://tūdaliņ.lv',
|
||||
'<a href="http://tūdaliņ.lv" rel="external">http://tūdaliņ.lv</a>'),
|
||||
array('http://brændendekærlighed.com',
|
||||
'<a href="http://brændendekærlighed.com" rel="external">http://brændendekærlighed.com</a>'),
|
||||
array('http://あーるいん.com',
|
||||
'<a href="http://あーるいん.com" rel="external">http://あーるいん.com</a>'),
|
||||
array('http://예비교사.com',
|
||||
'<a href="http://예비교사.com" rel="external">http://예비교사.com</a>'),
|
||||
array('http://example.com.',
|
||||
'<a href="http://example.com" rel="external">http://example.com</a>.'),
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>.'),
|
||||
array('http://müllärör.de',
|
||||
'<a href="http://müllärör.de/" rel="external">http://müllärör.de</a>'),
|
||||
array('http://ﺱﺲﺷ.com',
|
||||
'<a href="http://ﺱﺲﺷ.com/" rel="external">http://ﺱﺲﺷ.com</a>'),
|
||||
array('http://сделаткартинки.com',
|
||||
'<a href="http://сделаткартинки.com/" rel="external">http://сделаткартинки.com</a>'),
|
||||
array('http://tūdaliņ.lv',
|
||||
'<a href="http://tūdaliņ.lv/" rel="external">http://tūdaliņ.lv</a>'),
|
||||
array('http://brændendekærlighed.com',
|
||||
'<a href="http://brændendekærlighed.com/" rel="external">http://brændendekærlighed.com</a>'),
|
||||
array('http://あーるいん.com',
|
||||
'<a href="http://あーるいん.com/" rel="external">http://あーるいん.com</a>'),
|
||||
array('http://예비교사.com',
|
||||
'<a href="http://예비교사.com/" rel="external">http://예비교사.com</a>'),
|
||||
array('http://example.com.',
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>.'),
|
||||
array('http://example.com?',
|
||||
'<a href="http://example.com" rel="external">http://example.com</a>?'),
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>?'),
|
||||
array('http://example.com!',
|
||||
'<a href="http://example.com" rel="external">http://example.com</a>!'),
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>!'),
|
||||
array('http://example.com,',
|
||||
'<a href="http://example.com" rel="external">http://example.com</a>,'),
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>,'),
|
||||
array('http://example.com;',
|
||||
'<a href="http://example.com" rel="external">http://example.com</a>;'),
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>;'),
|
||||
array('http://example.com:',
|
||||
'<a href="http://example.com" rel="external">http://example.com</a>:'),
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>:'),
|
||||
array('\'http://example.com\'',
|
||||
'\'<a href="http://example.com" rel="external">http://example.com</a>\''),
|
||||
'\'<a href="http://example.com/" rel="external">http://example.com</a>\''),
|
||||
array('"http://example.com"',
|
||||
'"<a href="http://example.com" rel="external">http://example.com</a>"'),
|
||||
array('http://example.com
',
|
||||
'<a href="http://example.com" rel="external">http://example.com</a>'),
|
||||
'"<a href="http://example.com/" rel="external">http://example.com</a>"'),
|
||||
array('http://example.com',
|
||||
'<a href="http://example.com/" rel="external">http://example.com</a>'),
|
||||
array('(http://example.com)',
|
||||
'(<a href="http://example.com" rel="external">http://example.com</a>)'),
|
||||
'(<a href="http://example.com/" rel="external">http://example.com</a>)'),
|
||||
array('[http://example.com]',
|
||||
'[<a href="http://example.com" rel="external">http://example.com</a>]'),
|
||||
'[<a href="http://example.com/" rel="external">http://example.com</a>]'),
|
||||
array('<http://example.com>',
|
||||
'<<a href="http://example.com" rel="external">http://example.com</a>>'),
|
||||
'<<a href="http://example.com/" rel="external">http://example.com</a>>'),
|
||||
array('http://example.com/path/(foo)/bar',
|
||||
'<a href="http://example.com/path/(foo)/bar" rel="external">http://example.com/path/(foo)/bar</a>'),
|
||||
//Not a valid url - urls cannot contain unencoded square brackets
|
||||
array('http://example.com/path/[foo]/bar',
|
||||
'<a href="http://example.com/path/[foo]/bar" rel="external">http://example.com/path/[foo]/bar</a>'),
|
||||
array('http://example.com/path/foo/(bar)',
|
||||
'<a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>'),
|
||||
array('http://example.com/path/foo/[bar]',
|
||||
'<a href="http://example.com/path/foo/[bar]" rel="external">http://example.com/path/foo/[bar]</a>'),
|
||||
//Not a valid url - urls cannot contain unencoded square brackets
|
||||
//array('http://example.com/path/foo/[bar]',
|
||||
// '<a href="http://example.com/path/foo/[bar]" rel="external">http://example.com/path/foo/[bar]</a>'),
|
||||
array('Hey, check out my cool site http://example.com okay?',
|
||||
'Hey, check out my cool site <a href="http://example.com" rel="external">http://example.com</a> okay?'),
|
||||
'Hey, check out my cool site <a href="http://example.com/" rel="external">http://example.com</a> okay?'),
|
||||
array('What about parens (e.g. http://example.com/path/foo/(bar))?',
|
||||
'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>)?'),
|
||||
array('What about parens (e.g. http://example.com/path/foo/(bar)?',
|
||||
'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>?'),
|
||||
array('What about parens (e.g. http://example.com/path/foo/(bar).)?',
|
||||
'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>.)?'),
|
||||
//Not a valid url - urls cannot contain unencoded commas
|
||||
array('What about parens (e.g. http://example.com/path/(foo,bar)?',
|
||||
'What about parens (e.g. <a href="http://example.com/path/(foo,bar)" rel="external">http://example.com/path/(foo,bar)</a>?'),
|
||||
array('Unbalanced too (e.g. http://example.com/path/((((foo)/bar)?',
|
||||
@ -124,51 +173,51 @@ class URLDetectionTest extends PHPUnit_Framework_TestCase
|
||||
array('Unbalanced too (e.g. http://example.com/path/foo/(bar))))?',
|
||||
'Unbalanced too (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">http://example.com/path/foo/(bar)</a>)))?'),
|
||||
array('example.com',
|
||||
'<a href="http://example.com" rel="external">example.com</a>'),
|
||||
'<a href="http://example.com/" rel="external">example.com</a>'),
|
||||
array('example.org',
|
||||
'<a href="http://example.org" rel="external">example.org</a>'),
|
||||
'<a href="http://example.org/" rel="external">example.org</a>'),
|
||||
array('example.co.uk',
|
||||
'<a href="http://example.co.uk" rel="external">example.co.uk</a>'),
|
||||
'<a href="http://example.co.uk/" rel="external">example.co.uk</a>'),
|
||||
array('www.example.co.uk',
|
||||
'<a href="http://www.example.co.uk" rel="external">www.example.co.uk</a>'),
|
||||
'<a href="http://www.example.co.uk/" rel="external">www.example.co.uk</a>'),
|
||||
array('farm1.images.example.co.uk',
|
||||
'<a href="http://farm1.images.example.co.uk" rel="external">farm1.images.example.co.uk</a>'),
|
||||
'<a href="http://farm1.images.example.co.uk/" rel="external">farm1.images.example.co.uk</a>'),
|
||||
array('example.museum',
|
||||
'<a href="http://example.museum" rel="external">example.museum</a>'),
|
||||
'<a href="http://example.museum/" rel="external">example.museum</a>'),
|
||||
array('example.travel',
|
||||
'<a href="http://example.travel" rel="external">example.travel</a>'),
|
||||
'<a href="http://example.travel/" rel="external">example.travel</a>'),
|
||||
array('example.com.',
|
||||
'<a href="http://example.com" rel="external">example.com</a>.'),
|
||||
'<a href="http://example.com/" rel="external">example.com</a>.'),
|
||||
array('example.com?',
|
||||
'<a href="http://example.com" rel="external">example.com</a>?'),
|
||||
'<a href="http://example.com/" rel="external">example.com</a>?'),
|
||||
array('example.com!',
|
||||
'<a href="http://example.com" rel="external">example.com</a>!'),
|
||||
'<a href="http://example.com/" rel="external">example.com</a>!'),
|
||||
array('example.com,',
|
||||
'<a href="http://example.com" rel="external">example.com</a>,'),
|
||||
'<a href="http://example.com/" rel="external">example.com</a>,'),
|
||||
array('example.com;',
|
||||
'<a href="http://example.com" rel="external">example.com</a>;'),
|
||||
'<a href="http://example.com/" rel="external">example.com</a>;'),
|
||||
array('example.com:',
|
||||
'<a href="http://example.com" rel="external">example.com</a>:'),
|
||||
'<a href="http://example.com/" rel="external">example.com</a>:'),
|
||||
array('\'example.com\'',
|
||||
'\'<a href="http://example.com" rel="external">example.com</a>\''),
|
||||
'\'<a href="http://example.com/" rel="external">example.com</a>\''),
|
||||
array('"example.com"',
|
||||
'"<a href="http://example.com" rel="external">example.com</a>"'),
|
||||
array('example.com
',
|
||||
'<a href="http://example.com" rel="external">example.com</a>'),
|
||||
'"<a href="http://example.com/" rel="external">example.com</a>"'),
|
||||
array('example.com',
|
||||
'<a href="http://example.com/" rel="external">example.com</a>'),
|
||||
array('(example.com)',
|
||||
'(<a href="http://example.com" rel="external">example.com</a>)'),
|
||||
'(<a href="http://example.com/" rel="external">example.com</a>)'),
|
||||
array('[example.com]',
|
||||
'[<a href="http://example.com" rel="external">example.com</a>]'),
|
||||
'[<a href="http://example.com/" rel="external">example.com</a>]'),
|
||||
array('<example.com>',
|
||||
'<<a href="http://example.com" rel="external">example.com</a>>'),
|
||||
'<<a href="http://example.com/" rel="external">example.com</a>>'),
|
||||
array('Hey, check out my cool site example.com okay?',
|
||||
'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a> okay?'),
|
||||
'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a> okay?'),
|
||||
array('Hey, check out my cool site example.com.I made it.',
|
||||
'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.I made it.'),
|
||||
'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.I made it.'),
|
||||
array('Hey, check out my cool site example.com.Funny thing...',
|
||||
'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.Funny thing...'),
|
||||
'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.Funny thing...'),
|
||||
array('Hey, check out my cool site example.com.You will love it.',
|
||||
'Hey, check out my cool site <a href="http://example.com" rel="external">example.com</a>.You will love it.'),
|
||||
'Hey, check out my cool site <a href="http://example.com/" rel="external">example.com</a>.You will love it.'),
|
||||
array('What about parens (e.g. example.com/path/foo/(bar))?',
|
||||
'What about parens (e.g. <a href="http://example.com/path/foo/(bar)" rel="external">example.com/path/foo/(bar)</a>)?'),
|
||||
array('What about parens (e.g. example.com/path/foo/(bar)?',
|
||||
|
Loading…
Reference in New Issue
Block a user