heuristics for paren linking (thanks @brion!)
darcs-hash:20081102090241-5ed1f-f5278c69aa0c7427294b40b91f8957530b07062f.gz
This commit is contained in:
parent
3b62407067
commit
854e208c05
30
lib/util.php
30
lib/util.php
@ -711,12 +711,40 @@ function common_render_text($text) {
|
||||
$r = htmlspecialchars($text);
|
||||
|
||||
$r = preg_replace('/[\x{0}-\x{8}\x{b}-\x{c}\x{e}-\x{19}]/', '', $r);
|
||||
$r = preg_replace('@https?://[^)\]>\s]+@', '<a href="\0" class="extlink">\0</a>', $r);
|
||||
$r = preg_replace_callback('@https?://[^\]>\s]+@', 'common_render_uri_thingy', $r);
|
||||
$r = preg_replace('/(^|\s+)#([A-Za-z0-9_\-\.]{1,64})/e', "'\\1#'.common_tag_link('\\2')", $r);
|
||||
# XXX: machine tags
|
||||
return $r;
|
||||
}
|
||||
|
||||
function common_render_uri_thingy($matches) {
|
||||
$uri = $matches[0];
|
||||
$trailer = '';
|
||||
|
||||
# Some heuristics for extracting URIs from surrounding punctuation
|
||||
# Strip from trailing text...
|
||||
if (preg_match('/^(.*)([,.:"\']+)$/', $uri, $matches)) {
|
||||
$uri = $matches[1];
|
||||
$trailer = $matches[2];
|
||||
}
|
||||
|
||||
$pairs = array(
|
||||
']' => '[', # technically disallowed in URIs, but used in Java docs
|
||||
')' => '(', # far too frequent in Wikipedia and MSDN
|
||||
);
|
||||
$final = substr($uri, -1, 1);
|
||||
if (isset($pairs[$final])) {
|
||||
$openers = substr_count($uri, $pairs[$final]);
|
||||
$closers = substr_count($uri, $final);
|
||||
if ($closers > $openers) {
|
||||
// Assume the paren was opened outside the URI
|
||||
$uri = substr($uri, 0, -1);
|
||||
$trailer = $final . $trailer;
|
||||
}
|
||||
}
|
||||
return '<a href="' . $uri . '" class="extlink">' . $uri . '</a>' . $trailer;
|
||||
}
|
||||
|
||||
function common_tag_link($tag) {
|
||||
$canonical = common_canonical_tag($tag);
|
||||
$url = common_local_url('tag', array('tag' => $canonical));
|
||||
|
Loading…
Reference in New Issue
Block a user