Harmonize webfinger formatting and enable variable pre-mention character

This commit is contained in:
Mikael Nordfeldth 2017-08-10 11:06:52 +02:00
parent 20801a32f7
commit 8dd06cd8d8
3 changed files with 17 additions and 9 deletions

View File

@ -54,7 +54,10 @@ class Nickname
* We could probably use an email regex here, but mainly we are interested
* in matching it in our URLs, like https://social.example/user@example.com
*/
const WEBFINGER_FMT = '[0-9a-zA-Z_]{1,64}\@[0-9a-zA-Z_-.]{3,255}';
const WEBFINGER_FMT = '(?:\w+[\w\-\_\.]*)?\w+\@'.URL_REGEX_DOMAIN_NAME;
// old one without support for -_. in nickname part:
// const WEBFINGER_FMT = '[0-9a-zA-Z_]{1,64}\@[0-9a-zA-Z_-.]{3,255}';
/**
* Regex fragment for checking a canonical nickname.

View File

@ -768,7 +768,7 @@ function common_find_mentions($text, Profile $sender, Notice $parent=null)
}
}
$matches = common_find_mentions_raw($text);
$matches = common_find_mentions_raw($text, '@');
foreach ($matches as $match) {
try {
@ -879,9 +879,10 @@ function common_find_mentions($text, Profile $sender, Notice $parent=null)
* Should generally not be called directly; for use in common_find_mentions.
*
* @param string $text
* @param string $preMention Character(s) that signals a mention ('@', '!'...)
* @return array of PCRE match arrays
*/
function common_find_mentions_raw($text)
function common_find_mentions_raw($text, $preMention='@')
{
$tmatches = array();
preg_match_all('/^T (' . Nickname::DISPLAY_FMT . ') /',
@ -891,7 +892,7 @@ function common_find_mentions_raw($text)
$atmatches = array();
// the regexp's "(?!\@)" makes sure it doesn't matches the single "@remote" in "@remote@server.com"
preg_match_all('/'.Nickname::BEFORE_MENTIONS.'@(' . Nickname::DISPLAY_FMT . ')\b(?!\@)/',
preg_match_all('/'.Nickname::BEFORE_MENTIONS.preg_quote($preMention, '/').'(' . Nickname::DISPLAY_FMT . ')\b(?!\@)/',
$text,
$atmatches,
PREG_OFFSET_CAPTURE);

View File

@ -258,14 +258,16 @@ class OStatusPlugin extends Plugin
/**
* Webfinger matches: @user@example.com or even @user--one.george_orwell@1984.biz
* @param string $text The text from which to extract webfinger IDs
* @param string $preMention Character(s) that signals a mention ('@', '!'...)
*
* @return array The matching IDs (without @ or acct:) and each respective position in the given string.
*/
static function extractWebfingerIds($text)
static function extractWebfingerIds($text, $preMention='@')
{
$wmatches = array();
// Maybe this should harmonize with lib/nickname.php and Nickname::WEBFINGER_FMT
$result = preg_match_all('/(?<!\S)@((?:\w+[\w\-\_\.]*)?\w+@'.URL_REGEX_DOMAIN_NAME.')/',
$result = preg_match_all('/(?<!\S)'.preg_quote($preMention, '/').'('.Nickname::WEBFINGER_FMT.')/',
$text,
$wmatches,
PREG_OFFSET_CAPTURE);
@ -279,15 +281,17 @@ class OStatusPlugin extends Plugin
/**
* Profile URL matches: @example.com/mublog/user
* @param string $text The text from which to extract URL mentions
* @param string $preMention Character(s) that signals a mention ('@', '!'...)
*
* @return array The matching URLs (without @ or acct:) and each respective position in the given string.
*/
static function extractUrlMentions($text)
static function extractUrlMentions($text, $preMention='@')
{
$wmatches = array();
// In the regexp below we need to match / _before_ URL_REGEX_VALID_PATH_CHARS because it otherwise gets merged
// with the TLD before (but / is in URL_REGEX_VALID_PATH_CHARS anyway, it's just its positioning that is important)
$result = preg_match_all('/(?:^|\s+)@('.URL_REGEX_DOMAIN_NAME.'(?:\/['.URL_REGEX_VALID_PATH_CHARS.']*)*)/',
$result = preg_match_all('/(?:^|\s+)'.preg_quote($preMention, '/').'('.URL_REGEX_DOMAIN_NAME.'(?:\/['.URL_REGEX_VALID_PATH_CHARS.']*)*)/',
$text,
$wmatches,
PREG_OFFSET_CAPTURE);
@ -312,7 +316,7 @@ class OStatusPlugin extends Plugin
{
$matches = array();
foreach (self::extractWebfingerIds($text) as $wmatch) {
foreach (self::extractWebfingerIds($text, '@') as $wmatch) {
list($target, $pos) = $wmatch;
$this->log(LOG_INFO, "Checking webfinger '$target'");
$profile = null;