Harmonize webfinger formatting and enable variable pre-mention character

This commit is contained in:
Mikael Nordfeldth 2017-08-10 11:06:52 +02:00
parent 20801a32f7
commit 8dd06cd8d8
3 changed files with 17 additions and 9 deletions

View File

@ -54,7 +54,10 @@ class Nickname
* We could probably use an email regex here, but mainly we are interested * We could probably use an email regex here, but mainly we are interested
* in matching it in our URLs, like https://social.example/user@example.com * in matching it in our URLs, like https://social.example/user@example.com
*/ */
const WEBFINGER_FMT = '[0-9a-zA-Z_]{1,64}\@[0-9a-zA-Z_-.]{3,255}'; const WEBFINGER_FMT = '(?:\w+[\w\-\_\.]*)?\w+\@'.URL_REGEX_DOMAIN_NAME;
// old one without support for -_. in nickname part:
// const WEBFINGER_FMT = '[0-9a-zA-Z_]{1,64}\@[0-9a-zA-Z_-.]{3,255}';
/** /**
* Regex fragment for checking a canonical nickname. * Regex fragment for checking a canonical nickname.

View File

@ -768,7 +768,7 @@ function common_find_mentions($text, Profile $sender, Notice $parent=null)
} }
} }
$matches = common_find_mentions_raw($text); $matches = common_find_mentions_raw($text, '@');
foreach ($matches as $match) { foreach ($matches as $match) {
try { try {
@ -879,9 +879,10 @@ function common_find_mentions($text, Profile $sender, Notice $parent=null)
* Should generally not be called directly; for use in common_find_mentions. * Should generally not be called directly; for use in common_find_mentions.
* *
* @param string $text * @param string $text
* @param string $preMention Character(s) that signals a mention ('@', '!'...)
* @return array of PCRE match arrays * @return array of PCRE match arrays
*/ */
function common_find_mentions_raw($text) function common_find_mentions_raw($text, $preMention='@')
{ {
$tmatches = array(); $tmatches = array();
preg_match_all('/^T (' . Nickname::DISPLAY_FMT . ') /', preg_match_all('/^T (' . Nickname::DISPLAY_FMT . ') /',
@ -891,7 +892,7 @@ function common_find_mentions_raw($text)
$atmatches = array(); $atmatches = array();
// the regexp's "(?!\@)" makes sure it doesn't matches the single "@remote" in "@remote@server.com" // the regexp's "(?!\@)" makes sure it doesn't matches the single "@remote" in "@remote@server.com"
preg_match_all('/'.Nickname::BEFORE_MENTIONS.'@(' . Nickname::DISPLAY_FMT . ')\b(?!\@)/', preg_match_all('/'.Nickname::BEFORE_MENTIONS.preg_quote($preMention, '/').'(' . Nickname::DISPLAY_FMT . ')\b(?!\@)/',
$text, $text,
$atmatches, $atmatches,
PREG_OFFSET_CAPTURE); PREG_OFFSET_CAPTURE);

View File

@ -258,14 +258,16 @@ class OStatusPlugin extends Plugin
/** /**
* Webfinger matches: @user@example.com or even @user--one.george_orwell@1984.biz * Webfinger matches: @user@example.com or even @user--one.george_orwell@1984.biz
* @param string $text The text from which to extract webfinger IDs
* @param string $preMention Character(s) that signals a mention ('@', '!'...)
* *
* @return array The matching IDs (without @ or acct:) and each respective position in the given string. * @return array The matching IDs (without @ or acct:) and each respective position in the given string.
*/ */
static function extractWebfingerIds($text) static function extractWebfingerIds($text, $preMention='@')
{ {
$wmatches = array(); $wmatches = array();
// Maybe this should harmonize with lib/nickname.php and Nickname::WEBFINGER_FMT // Maybe this should harmonize with lib/nickname.php and Nickname::WEBFINGER_FMT
$result = preg_match_all('/(?<!\S)@((?:\w+[\w\-\_\.]*)?\w+@'.URL_REGEX_DOMAIN_NAME.')/', $result = preg_match_all('/(?<!\S)'.preg_quote($preMention, '/').'('.Nickname::WEBFINGER_FMT.')/',
$text, $text,
$wmatches, $wmatches,
PREG_OFFSET_CAPTURE); PREG_OFFSET_CAPTURE);
@ -279,15 +281,17 @@ class OStatusPlugin extends Plugin
/** /**
* Profile URL matches: @example.com/mublog/user * Profile URL matches: @example.com/mublog/user
* @param string $text The text from which to extract URL mentions
* @param string $preMention Character(s) that signals a mention ('@', '!'...)
* *
* @return array The matching URLs (without @ or acct:) and each respective position in the given string. * @return array The matching URLs (without @ or acct:) and each respective position in the given string.
*/ */
static function extractUrlMentions($text) static function extractUrlMentions($text, $preMention='@')
{ {
$wmatches = array(); $wmatches = array();
// In the regexp below we need to match / _before_ URL_REGEX_VALID_PATH_CHARS because it otherwise gets merged // In the regexp below we need to match / _before_ URL_REGEX_VALID_PATH_CHARS because it otherwise gets merged
// with the TLD before (but / is in URL_REGEX_VALID_PATH_CHARS anyway, it's just its positioning that is important) // with the TLD before (but / is in URL_REGEX_VALID_PATH_CHARS anyway, it's just its positioning that is important)
$result = preg_match_all('/(?:^|\s+)@('.URL_REGEX_DOMAIN_NAME.'(?:\/['.URL_REGEX_VALID_PATH_CHARS.']*)*)/', $result = preg_match_all('/(?:^|\s+)'.preg_quote($preMention, '/').'('.URL_REGEX_DOMAIN_NAME.'(?:\/['.URL_REGEX_VALID_PATH_CHARS.']*)*)/',
$text, $text,
$wmatches, $wmatches,
PREG_OFFSET_CAPTURE); PREG_OFFSET_CAPTURE);
@ -312,7 +316,7 @@ class OStatusPlugin extends Plugin
{ {
$matches = array(); $matches = array();
foreach (self::extractWebfingerIds($text) as $wmatch) { foreach (self::extractWebfingerIds($text, '@') as $wmatch) {
list($target, $pos) = $wmatch; list($target, $pos) = $wmatch;
$this->log(LOG_INFO, "Checking webfinger '$target'"); $this->log(LOG_INFO, "Checking webfinger '$target'");
$profile = null; $profile = null;