2008-08-22 14:17:14 +01:00
< ? php
#
# Markdown - A text-to-HTML conversion tool for web writers
#
# PHP Markdown
# Copyright (c) 2004-2008 Michel Fortin
# <http://www.michelf.com/projects/php-markdown/>
#
# Original Markdown
# Copyright (c) 2004-2006 John Gruber
# <http://daringfireball.net/projects/markdown/>
#
define ( 'MARKDOWN_VERSION' , " 1.0.1m " ); # Sat 21 Jun 2008
#
# Global default settings:
#
# Change to ">" for HTML output
@ define ( 'MARKDOWN_EMPTY_ELEMENT_SUFFIX' , " /> " );
# Define the width of a tab for code blocks.
@ define ( 'MARKDOWN_TAB_WIDTH' , 4 );
#
# WordPress settings:
#
# Change to false to remove Markdown from posts and/or comments.
@ define ( 'MARKDOWN_WP_POSTS' , true );
@ define ( 'MARKDOWN_WP_COMMENTS' , true );
### Standard Function Interface ###
@ define ( 'MARKDOWN_PARSER_CLASS' , 'Markdown_Parser' );
function Markdown ( $text ) {
#
# Initialize the parser and return the result of its transform method.
#
# Setup static parser variable.
static $parser ;
if ( ! isset ( $parser )) {
$parser_class = MARKDOWN_PARSER_CLASS ;
$parser = new $parser_class ;
}
# Transform text using parser.
return $parser -> transform ( $text );
}
### WordPress Plugin Interface ###
/*
Plugin Name : Markdown
Plugin URI : http :// www . michelf . com / projects / php - markdown /
Description : < a href = " http://daringfireball.net/projects/markdown/syntax " > Markdown syntax </ a > allows you to write using an easy - to - read , easy - to - write plain text format . Based on the original Perl version by < a href = " http://daringfireball.net/ " > John Gruber </ a >. < a href = " http://www.michelf.com/projects/php-markdown/ " > More ...</ a >
Version : 1.0 . 1 m
Author : Michel Fortin
Author URI : http :// www . michelf . com /
*/
if ( isset ( $wp_version )) {
# More details about how it works here:
# <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
# Post content and excerpts
# - Remove WordPress paragraph generator.
# - Run Markdown on excerpt, then remove all tags.
# - Add paragraph tag around the excerpt, but remove it for the excerpt rss.
if ( MARKDOWN_WP_POSTS ) {
remove_filter ( 'the_content' , 'wpautop' );
remove_filter ( 'the_content_rss' , 'wpautop' );
remove_filter ( 'the_excerpt' , 'wpautop' );
add_filter ( 'the_content' , 'Markdown' , 6 );
add_filter ( 'the_content_rss' , 'Markdown' , 6 );
add_filter ( 'get_the_excerpt' , 'Markdown' , 6 );
add_filter ( 'get_the_excerpt' , 'trim' , 7 );
add_filter ( 'the_excerpt' , 'mdwp_add_p' );
add_filter ( 'the_excerpt_rss' , 'mdwp_strip_p' );
remove_filter ( 'content_save_pre' , 'balanceTags' , 50 );
remove_filter ( 'excerpt_save_pre' , 'balanceTags' , 50 );
add_filter ( 'the_content' , 'balanceTags' , 50 );
add_filter ( 'get_the_excerpt' , 'balanceTags' , 9 );
}
# Comments
# - Remove WordPress paragraph generator.
# - Remove WordPress auto-link generator.
# - Scramble important tags before passing them to the kses filter.
# - Run Markdown on excerpt then remove paragraph tags.
if ( MARKDOWN_WP_COMMENTS ) {
remove_filter ( 'comment_text' , 'wpautop' , 30 );
remove_filter ( 'comment_text' , 'make_clickable' );
add_filter ( 'pre_comment_content' , 'Markdown' , 6 );
add_filter ( 'pre_comment_content' , 'mdwp_hide_tags' , 8 );
add_filter ( 'pre_comment_content' , 'mdwp_show_tags' , 12 );
add_filter ( 'get_comment_text' , 'Markdown' , 6 );
add_filter ( 'get_comment_excerpt' , 'Markdown' , 6 );
add_filter ( 'get_comment_excerpt' , 'mdwp_strip_p' , 7 );
global $mdwp_hidden_tags , $mdwp_placeholders ;
$mdwp_hidden_tags = explode ( ' ' ,
'<p> </p> <pre> </pre> <ol> </ol> <ul> </ul> <li> </li>' );
$mdwp_placeholders = explode ( ' ' , str_rot13 (
'pEj07ZbbBZ U1kqgh4w4p pre2zmeN6K QTi31t9pre ol0MP1jzJR ' .
'ML5IjmbRol ulANi1NsGY J7zRLJqPul liA8ctl16T K9nhooUHli' ));
}
function mdwp_add_p ( $text ) {
if ( ! preg_match ( '{^$|^<(p|ul|ol|dl|pre|blockquote)>}i' , $text )) {
$text = '<p>' . $text . '</p>' ;
$text = preg_replace ( '{\n{2,}}' , " </p> \n \n <p> " , $text );
}
return $text ;
}
function mdwp_strip_p ( $t ) { return preg_replace ( '{</?p>}i' , '' , $t ); }
function mdwp_hide_tags ( $text ) {
global $mdwp_hidden_tags , $mdwp_placeholders ;
return str_replace ( $mdwp_hidden_tags , $mdwp_placeholders , $text );
}
function mdwp_show_tags ( $text ) {
global $mdwp_hidden_tags , $mdwp_placeholders ;
return str_replace ( $mdwp_placeholders , $mdwp_hidden_tags , $text );
}
}
### bBlog Plugin Info ###
function identify_modifier_markdown () {
return array (
'name' => 'markdown' ,
'type' => 'modifier' ,
'nicename' => 'Markdown' ,
'description' => 'A text-to-HTML conversion tool for web writers' ,
'authors' => 'Michel Fortin and John Gruber' ,
'licence' => 'BSD-like' ,
'version' => MARKDOWN_VERSION ,
'help' => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
);
}
### Smarty Modifier Interface ###
function smarty_modifier_markdown ( $text ) {
return Markdown ( $text );
}
### Textile Compatibility Mode ###
# Rename this file to "classTextile.php" and it can replace Textile everywhere.
if ( strcasecmp ( substr ( __FILE__ , - 16 ), " classTextile.php " ) == 0 ) {
# Try to include PHP SmartyPants. Should be in the same directory.
@ include_once 'smartypants.php' ;
# Fake Textile class. It calls Markdown instead.
class Textile {
function TextileThis ( $text , $lite = '' , $encode = '' ) {
if ( $lite == '' && $encode == '' ) $text = Markdown ( $text );
if ( function_exists ( 'SmartyPants' )) $text = SmartyPants ( $text );
return $text ;
}
# Fake restricted version: restrictions are not supported for now.
function TextileRestricted ( $text , $lite = '' , $noimage = '' ) {
return $this -> TextileThis ( $text , $lite );
}
# Workaround to ensure compatibility with TextPattern 4.0.3.
function blockLite ( $text ) { return $text ; }
}
}
#
# Markdown Parser Class
#
class Markdown_Parser {
# Regex to match balanced [brackets].
# Needed to insert a maximum bracked depth while converting to PHP.
var $nested_brackets_depth = 6 ;
var $nested_brackets_re ;
var $nested_url_parenthesis_depth = 4 ;
var $nested_url_parenthesis_re ;
# Table of hash values for escaped characters:
var $escape_chars = '\`*_{}[]()>#+-.!' ;
var $escape_chars_re ;
# Change to ">" for HTML output.
var $empty_element_suffix = MARKDOWN_EMPTY_ELEMENT_SUFFIX ;
var $tab_width = MARKDOWN_TAB_WIDTH ;
# Change to `true` to disallow markup or entities.
var $no_markup = false ;
var $no_entities = false ;
# Predefined urls and titles for reference links and images.
var $predef_urls = array ();
var $predef_titles = array ();
function Markdown_Parser () {
#
# Constructor function. Initialize appropriate member variables.
#
$this -> _initDetab ();
$this -> prepareItalicsAndBold ();
$this -> nested_brackets_re =
str_repeat ( '(?>[^\[\]]+|\[' , $this -> nested_brackets_depth ) .
str_repeat ( '\])*' , $this -> nested_brackets_depth );
$this -> nested_url_parenthesis_re =
str_repeat ( '(?>[^()\s]+|\(' , $this -> nested_url_parenthesis_depth ) .
str_repeat ( '(?>\)))*' , $this -> nested_url_parenthesis_depth );
$this -> escape_chars_re = '[' . preg_quote ( $this -> escape_chars ) . ']' ;
# Sort document, block, and span gamut in ascendent priority order.
asort ( $this -> document_gamut );
asort ( $this -> block_gamut );
asort ( $this -> span_gamut );
}
# Internal hashes used during transformation.
var $urls = array ();
var $titles = array ();
var $html_hashes = array ();
# Status flag to avoid invalid nesting.
var $in_anchor = false ;
function setup () {
#
# Called before the transformation process starts to setup parser
# states.
#
# Clear global hashes.
$this -> urls = $this -> predef_urls ;
$this -> titles = $this -> predef_titles ;
$this -> html_hashes = array ();
$in_anchor = false ;
}
function teardown () {
#
# Called after the transformation process to clear any variable
# which may be taking up memory unnecessarly.
#
$this -> urls = array ();
$this -> titles = array ();
$this -> html_hashes = array ();
}
function transform ( $text ) {
#
# Main function. Performs some preprocessing on the input text
# and pass it through the document gamut.
#
$this -> setup ();
# Remove UTF-8 BOM and marker character in input, if present.
$text = preg_replace ( '{^\xEF\xBB\xBF|\x1A}' , '' , $text );
# Standardize line endings:
# DOS to Unix and Mac to Unix
$text = preg_replace ( '{\r\n?}' , " \n " , $text );
# Make sure $text ends with a couple of newlines:
$text .= " \n \n " ;
# Convert all tabs to spaces.
$text = $this -> detab ( $text );
# Turn block-level HTML blocks into hash entries
$text = $this -> hashHTMLBlocks ( $text );
# Strip any lines consisting only of spaces and tabs.
# This makes subsequent regexen easier to write, because we can
# match consecutive blank lines with /\n+/ instead of something
# contorted like /[ ]*\n+/ .
$text = preg_replace ( '/^[ ]+$/m' , '' , $text );
# Run document gamut methods.
foreach ( $this -> document_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
}
$this -> teardown ();
return $text . " \n " ;
}
var $document_gamut = array (
# Strip link definitions, store in hashes.
" stripLinkDefinitions " => 20 ,
" runBasicBlockGamut " => 30 ,
);
function stripLinkDefinitions ( $text ) {
#
# Strips link definitions from text, stores the URLs and titles in
# hash references.
#
$less_than_tab = $this -> tab_width - 1 ;
# Link defs are in the form: ^[id]: url "optional title"
$text = preg_replace_callback ( ' {
^ [ ]{ 0 , '.$less_than_tab.' } \ [( .+ ) \ ][ ] ? : # id = $1
[ ] *
\n ? # maybe *one* newline
[ ] *
< ? ( \S + ? ) > ? # url = $2
[ ] *
\n ? # maybe one newline
[ ] *
( ? :
( ? <= \s ) # lookbehind for whitespace
[ " (]
( .* ? ) # title = $3
[ " )]
[ ] *
) ? # title is optional
( ? : \n +| \Z )
} xm ' ,
array ( & $this , '_stripLinkDefinitions_callback' ),
$text );
return $text ;
}
function _stripLinkDefinitions_callback ( $matches ) {
$link_id = strtolower ( $matches [ 1 ]);
$this -> urls [ $link_id ] = $matches [ 2 ];
$this -> titles [ $link_id ] =& $matches [ 3 ];
return '' ; # String that will replace the block
}
function hashHTMLBlocks ( $text ) {
if ( $this -> no_markup ) return $text ;
$less_than_tab = $this -> tab_width - 1 ;
# Hashify HTML blocks:
# We only want to do this for block-level HTML tags, such as headers,
# lists, and tables. That's because we still want to wrap <p>s around
# "paragraphs" that are wrapped in non-block-level tags, such as anchors,
# phrase emphasis, and spans. The list of tags we're looking for is
# hard-coded:
#
# * List "a" is made of tags which can be both inline or block-level.
# These will be treated block-level when the start tag is alone on
# its line, otherwise they're not matched here and will be taken as
# inline later.
# * List "b" is made of tags which are always block-level;
#
$block_tags_a_re = 'ins|del' ;
$block_tags_b_re = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|address|' .
'script|noscript|form|fieldset|iframe|math' ;
# Regular expression for the content of a block tag.
$nested_tags_level = 4 ;
$attr = '
( ?> # optional tag attributes
\s # starts with whitespace
( ?>
[ ^> " /]+ # text outside quotes
|
/+ ( ? !> ) # slash not followed by ">"
|
" [^ " ] * " # text inside double quotes (tolerate " > " )
|
\ ' [ ^ \ ' ] * \ ' # text inside single quotes (tolerate ">")
) *
) ?
' ;
$content =
str_repeat ( '
( ?>
[ ^< ] + # content without tag
|
< \2 # nested opening tag
'.$attr.' # attributes
( ?>
/>
|
> ' , $nested_tags_level ) . # end of opening tag
'.*?' . # last level nested tag content
str_repeat ( '
</ \2\s *> # closing nested tag
)
|
< ( ? !/ \2\s *> # other tags with a different name
)
) * ' ,
$nested_tags_level );
$content2 = str_replace ( '\2' , '\3' , $content );
# First, look for nested blocks, e.g.:
# <div>
# <div>
# tags for inner block must be indented.
# </div>
# </div>
#
# The outermost tags must start at the left margin for this to match, and
# the inner nested divs must be indented.
# We need to do this before the next, more liberal match, because the next
# match will start at the first `<div>` and stop at the first `</div>`.
$text = preg_replace_callback ( ' {( ?>
( ?>
( ? <= \n\n ) # Starting after a blank line
| # or
\A\n ? # the beginning of the doc
)
( # save in $1
# Match from `\n<tag>` to `</tag>\n`, handling nested tags
# in between.
[ ]{ 0 , '.$less_than_tab.' }
< ( '.$block_tags_b_re.' ) # start tag = $2
'.$attr.' > # attributes followed by > and \n
'.$content.' # content, support nesting
</ \2 > # the matching end tag
[ ] * # trailing spaces/tabs
( ? = \n +| \Z ) # followed by a newline or end of document
| # Special version for tags of group a.
[ ]{ 0 , '.$less_than_tab.' }
< ( '.$block_tags_a_re.' ) # start tag = $3
'.$attr.' > [ ] * \n # attributes followed by >
'.$content2.' # content, support nesting
</ \3 > # the matching end tag
[ ] * # trailing spaces/tabs
( ? = \n +| \Z ) # followed by a newline or end of document
| # Special case just for <hr />. It was easier to make a special
# case than to make the other regex more complicated.
[ ]{ 0 , '.$less_than_tab.' }
< ( hr ) # start tag = $2
'.$attr.' # attributes
/ ?> # the matching end tag
[ ] *
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
| # Special case for standalone HTML comments:
[ ]{ 0 , '.$less_than_tab.' }
( ? s :
<!-- .* ? -->
)
[ ] *
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
| # PHP and ASP-style processor instructions (<? and <%)
[ ]{ 0 , '.$less_than_tab.' }
( ? s :
< ([ ? % ]) # $2
.* ?
\2 >
)
[ ] *
( ? = \n { 2 ,} | \Z ) # followed by a blank line or end of document
)
)} Sxmi ' ,
array ( & $this , '_hashHTMLBlocks_callback' ),
$text );
return $text ;
}
function _hashHTMLBlocks_callback ( $matches ) {
$text = $matches [ 1 ];
$key = $this -> hashBlock ( $text );
return " \n \n $key\n\n " ;
}
function hashPart ( $text , $boundary = 'X' ) {
#
# Called whenever a tag must be hashed when a function insert an atomic
# element in the text stream. Passing $text to through this function gives
# a unique text-token which will be reverted back when calling unhash.
#
# The $boundary argument specify what character should be used to surround
# the token. By convension, "B" is used for block elements that needs not
# to be wrapped into paragraph tags at the end, ":" is used for elements
# that are word separators and "X" is used in the general case.
#
# Swap back any tag hash found in $text so we do not have to `unhash`
# multiple times at the end.
$text = $this -> unhash ( $text );
# Then hash the block.
static $i = 0 ;
$key = " $boundary\x1A " . ++ $i . $boundary ;
$this -> html_hashes [ $key ] = $text ;
return $key ; # String that will replace the tag.
}
function hashBlock ( $text ) {
#
# Shortcut function for hashPart with block-level boundaries.
#
return $this -> hashPart ( $text , 'B' );
}
var $block_gamut = array (
#
# These are all the transformations that form block-level
# tags like paragraphs, headers, and list items.
#
" doHeaders " => 10 ,
" doHorizontalRules " => 20 ,
" doLists " => 40 ,
" doCodeBlocks " => 50 ,
" doBlockQuotes " => 60 ,
);
function runBlockGamut ( $text ) {
#
# Run block gamut tranformations.
#
# We need to escape raw HTML in Markdown source before doing anything
# else. This need to be done for each block, and not only at the
# begining in the Markdown function since hashed blocks can be part of
# list items and could have been indented. Indented blocks would have
# been seen as a code block in a previous pass of hashHTMLBlocks.
$text = $this -> hashHTMLBlocks ( $text );
return $this -> runBasicBlockGamut ( $text );
}
function runBasicBlockGamut ( $text ) {
#
# Run block gamut tranformations, without hashing HTML blocks. This is
# useful when HTML blocks are known to be already hashed, like in the first
# whole-document pass.
#
foreach ( $this -> block_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
}
# Finally form paragraph and restore hashed blocks.
$text = $this -> formParagraphs ( $text );
return $text ;
}
function doHorizontalRules ( $text ) {
# Do Horizontal Rules:
return preg_replace (
' {
^ [ ]{ 0 , 3 } # Leading space
([ -* _ ]) # $1: First marker
( ?> # Repeated marker group
[ ]{ 0 , 2 } # Zero, one, or two spaces.
\1 # Marker character
){ 2 ,} # Group repeated at least twice
[ ] * # Tailing spaces
$ # End of line.
} mx ' ,
" \n " . $this -> hashBlock ( " <hr $this->empty_element_suffix " ) . " \n " ,
$text );
}
var $span_gamut = array (
#
# These are all the transformations that occur *within* block-level
# tags like paragraphs, headers, and list items.
#
# Process character escapes, code spans, and inline HTML
# in one shot.
" parseSpan " => - 30 ,
# Process anchor and image tags. Images must come first,
# because ![foo][f] looks like an anchor.
" doImages " => 10 ,
" doAnchors " => 20 ,
# Make links out of things like `<http://example.com/>`
# Must come after doAnchors, because you can use < and >
# delimiters in inline links like [this](<url>).
" doAutoLinks " => 30 ,
" encodeAmpsAndAngles " => 40 ,
" doItalicsAndBold " => 50 ,
" doHardBreaks " => 60 ,
);
function runSpanGamut ( $text ) {
#
# Run span gamut tranformations.
#
foreach ( $this -> span_gamut as $method => $priority ) {
$text = $this -> $method ( $text );
}
return $text ;
}
function doHardBreaks ( $text ) {
# Do hard breaks:
return preg_replace_callback ( '/ {2,}\n/' ,
array ( & $this , '_doHardBreaks_callback' ), $text );
}
function _doHardBreaks_callback ( $matches ) {
return $this -> hashPart ( " <br $this->empty_element_suffix\n " );
}
function doAnchors ( $text ) {
#
# Turn Markdown link shortcuts into XHTML <a> tags.
#
if ( $this -> in_anchor ) return $text ;
$this -> in_anchor = true ;
#
# First, handle reference-style links: [link text] [id]
#
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
\ [
( '.$this->nested_brackets_re.' ) # link text = $2
\ ]
[ ] ? # one optional space
( ? : \n [ ] * ) ? # one optional newline followed by spaces
\ [
( .* ? ) # id = $3
\ ]
)
} xs ' ,
array ( & $this , '_doAnchors_reference_callback' ), $text );
#
# Next, inline-style links: [link text](url "optional title")
#
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
\ [
( '.$this->nested_brackets_re.' ) # link text = $2
\ ]
\ ( # literal paren
[ ] *
( ? :
< ( \S * ) > # href = $3
|
( '.$this->nested_url_parenthesis_re.' ) # href = $4
)
[ ] *
( # $5
([ \ ' " ]) # quote char = $ 6
( .* ? ) # Title = $7
\6 # matching quote
[ ] * # ignore any spaces/tabs between closing quote and )
) ? # title is optional
\ )
)
} xs ' ,
array ( & $this , '_DoAnchors_inline_callback' ), $text );
#
# Last, handle reference-style shortcuts: [link text]
# These must come last in case you've also got [link test][1]
# or [link test](/foo)
#
// $text = preg_replace_callback('{
// ( # wrap whole match in $1
// \[
// ([^\[\]]+) # link text = $2; can\'t contain [ or ]
// \]
// )
// }xs',
// array(&$this, '_doAnchors_reference_callback'), $text);
$this -> in_anchor = false ;
return $text ;
}
function _doAnchors_reference_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$link_text = $matches [ 2 ];
$link_id =& $matches [ 3 ];
if ( $link_id == " " ) {
# for shortcut links like [this][] or [this].
$link_id = $link_text ;
}
# lower-case and turn embedded newlines into spaces
$link_id = strtolower ( $link_id );
$link_id = preg_replace ( '{[ ]?\n}' , ' ' , $link_id );
if ( isset ( $this -> urls [ $link_id ])) {
$url = $this -> urls [ $link_id ];
$url = $this -> encodeAttribute ( $url );
$result = " <a href= \" $url\ " " ;
if ( isset ( $this -> titles [ $link_id ] ) ) {
$title = $this -> titles [ $link_id ];
$title = $this -> encodeAttribute ( $title );
$result .= " title= \" $title\ " " ;
}
$link_text = $this -> runSpanGamut ( $link_text );
$result .= " > $link_text </a> " ;
$result = $this -> hashPart ( $result );
}
else {
$result = $whole_match ;
}
return $result ;
}
function _doAnchors_inline_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$link_text = $this -> runSpanGamut ( $matches [ 2 ]);
$url = $matches [ 3 ] == '' ? $matches [ 4 ] : $matches [ 3 ];
$title =& $matches [ 7 ];
$url = $this -> encodeAttribute ( $url );
$result = " <a href= \" $url\ " " ;
if ( isset ( $title )) {
$title = $this -> encodeAttribute ( $title );
$result .= " title= \" $title\ " " ;
}
$link_text = $this -> runSpanGamut ( $link_text );
$result .= " > $link_text </a> " ;
return $this -> hashPart ( $result );
}
function doImages ( $text ) {
#
# Turn Markdown image shortcuts into <img> tags.
#
#
# First, handle reference-style labeled images: ![alt text][id]
#
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
! \ [
( '.$this->nested_brackets_re.' ) # alt text = $2
\ ]
[ ] ? # one optional space
( ? : \n [ ] * ) ? # one optional newline followed by spaces
\ [
( .* ? ) # id = $3
\ ]
)
} xs ' ,
array ( & $this , '_doImages_reference_callback' ), $text );
#
# Next, handle inline images: ![alt text](url "optional title")
# Don't forget: encode * and _
#
$text = preg_replace_callback ( ' {
( # wrap whole match in $1
! \ [
( '.$this->nested_brackets_re.' ) # alt text = $2
\ ]
\s ? # One optional whitespace character
\ ( # literal paren
[ ] *
( ? :
< ( \S * ) > # src url = $3
|
( '.$this->nested_url_parenthesis_re.' ) # src url = $4
)
[ ] *
( # $5
([ \ ' " ]) # quote char = $ 6
( .* ? ) # title = $7
\6 # matching quote
[ ] *
) ? # title is optional
\ )
)
} xs ' ,
array ( & $this , '_doImages_inline_callback' ), $text );
return $text ;
}
function _doImages_reference_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$alt_text = $matches [ 2 ];
$link_id = strtolower ( $matches [ 3 ]);
if ( $link_id == " " ) {
$link_id = strtolower ( $alt_text ); # for shortcut links like ![this][].
}
$alt_text = $this -> encodeAttribute ( $alt_text );
if ( isset ( $this -> urls [ $link_id ])) {
$url = $this -> encodeAttribute ( $this -> urls [ $link_id ]);
$result = " <img src= \" $url\ " alt = \ " $alt_text\ " " ;
if ( isset ( $this -> titles [ $link_id ])) {
$title = $this -> titles [ $link_id ];
$title = $this -> encodeAttribute ( $title );
$result .= " title= \" $title\ " " ;
}
$result .= $this -> empty_element_suffix ;
$result = $this -> hashPart ( $result );
}
else {
# If there's no such link ID, leave intact:
$result = $whole_match ;
}
return $result ;
}
function _doImages_inline_callback ( $matches ) {
$whole_match = $matches [ 1 ];
$alt_text = $matches [ 2 ];
$url = $matches [ 3 ] == '' ? $matches [ 4 ] : $matches [ 3 ];
$title =& $matches [ 7 ];
$alt_text = $this -> encodeAttribute ( $alt_text );
$url = $this -> encodeAttribute ( $url );
$result = " <img src= \" $url\ " alt = \ " $alt_text\ " " ;
if ( isset ( $title )) {
$title = $this -> encodeAttribute ( $title );
$result .= " title= \" $title\ " " ; # $title already quoted
}
$result .= $this -> empty_element_suffix ;
return $this -> hashPart ( $result );
}
function doHeaders ( $text ) {
# Setext-style headers:
# Header 1
# ========
#
# Header 2
# --------
#
$text = preg_replace_callback ( '{ ^(.+?)[ ]*\n(=+|-+)[ ]*\n+ }mx' ,
array ( & $this , '_doHeaders_callback_setext' ), $text );
# atx-style headers:
# # Header 1
# ## Header 2
# ## Header 2 with closing hashes ##
# ...
# ###### Header 6
#
$text = preg_replace_callback ( ' {
^ ( \ #{1,6}) # $1 = string of #\'s
[ ] *
( .+ ? ) # $2 = Header text
[ ] *
\ #* # optional closing #\'s (not counted)
\n +
} xm ' ,
array ( & $this , '_doHeaders_callback_atx' ), $text );
return $text ;
}
function _doHeaders_callback_setext ( $matches ) {
# Terrible hack to check we haven't found an empty list item.
if ( $matches [ 2 ] == '-' && preg_match ( '{^-(?: |$)}' , $matches [ 1 ]))
return $matches [ 0 ];
$level = $matches [ 2 ]{ 0 } == '=' ? 1 : 2 ;
$block = " <h $level > " . $this -> runSpanGamut ( $matches [ 1 ]) . " </h $level > " ;
return " \n " . $this -> hashBlock ( $block ) . " \n \n " ;
}
function _doHeaders_callback_atx ( $matches ) {
$level = strlen ( $matches [ 1 ]);
$block = " <h $level > " . $this -> runSpanGamut ( $matches [ 2 ]) . " </h $level > " ;
return " \n " . $this -> hashBlock ( $block ) . " \n \n " ;
}
function doLists ( $text ) {
#
# Form HTML ordered (numbered) and unordered (bulleted) lists.
#
$less_than_tab = $this -> tab_width - 1 ;
# Re-usable patterns to match list item bullets and number markers:
$marker_ul_re = '[*+-]' ;
$marker_ol_re = '\d+[.]' ;
$marker_any_re = " (?: $marker_ul_re | $marker_ol_re ) " ;
$markers_relist = array ( $marker_ul_re , $marker_ol_re );
foreach ( $markers_relist as $marker_re ) {
# Re-usable pattern to match any entirel ul or ol list:
$whole_list_re = '
( # $1 = whole list
( # $2
[ ]{ 0 , '.$less_than_tab.' }
( '.$marker_re.' ) # $3 = first list item marker
[ ] +
)
( ? s :.+ ? )
( # $4
\z
|
\n { 2 ,}
( ? = \S )
( ? ! # Negative lookahead for another list item marker
[ ] *
'.$marker_re.' [ ] +
)
)
)
' ; // mx
# We use a different prefix before nested lists than top-level lists.
# See extended comment in _ProcessListItems().
if ( $this -> list_level ) {
$text = preg_replace_callback ( ' {
^
'.$whole_list_re.'
} mx ' ,
array ( & $this , '_doLists_callback' ), $text );
}
else {
$text = preg_replace_callback ( ' {
( ? : ( ? <= \n ) \n | \A\n ? ) # Must eat the newline
'.$whole_list_re.'
} mx ' ,
array ( & $this , '_doLists_callback' ), $text );
}
}
return $text ;
}
function _doLists_callback ( $matches ) {
# Re-usable patterns to match list item bullets and number markers:
$marker_ul_re = '[*+-]' ;
$marker_ol_re = '\d+[.]' ;
$marker_any_re = " (?: $marker_ul_re | $marker_ol_re ) " ;
$list = $matches [ 1 ];
$list_type = preg_match ( " / $marker_ul_re / " , $matches [ 3 ]) ? " ul " : " ol " ;
$marker_any_re = ( $list_type == " ul " ? $marker_ul_re : $marker_ol_re );
$list .= " \n " ;
$result = $this -> processListItems ( $list , $marker_any_re );
$result = $this -> hashBlock ( " < $list_type > \n " . $result . " </ $list_type > " );
return " \n " . $result . " \n \n " ;
}
var $list_level = 0 ;
function processListItems ( $list_str , $marker_any_re ) {
#
# Process the contents of a single ordered or unordered list, splitting it
# into individual list items.
#
# The $this->list_level global keeps track of when we're inside a list.
# Each time we enter a list, we increment it; when we leave a list,
# we decrement. If it's zero, we're not in a list anymore.
#
# We do this because when we're not inside a list, we want to treat
# something like this:
#
# I recommend upgrading to version
# 8. Oops, now this line is treated
# as a sub-list.
#
# As a single paragraph, despite the fact that the second line starts
# with a digit-period-space sequence.
#
# Whereas when we're inside a list (or sub-list), that line will be
# treated as the start of a sub-list. What a kludge, huh? This is
# an aspect of Markdown's syntax that's hard to parse perfectly
# without resorting to mind-reading. Perhaps the solution is to
# change the syntax rules such that sub-lists must start with a
# starting cardinal number; e.g. "1." or "a.".
$this -> list_level ++ ;
# trim trailing blank lines:
$list_str = preg_replace ( " / \n { 2,} \\ z/ " , " \n " , $list_str );
$list_str = preg_replace_callback ( ' {
( \n ) ? # leading line = $1
( ^ [ ] * ) # leading whitespace = $2
( '.$marker_any_re.' # list marker and space = $3
( ? : [ ] +| ( ? = \n )) # space only required if item is not empty
)
(( ? s :.* ? )) # list item text = $4
( ? : ( \n + ( ? = \n )) | \n ) # tailing blank line = $5
( ? = \n * ( \z | \2 ( '.$marker_any_re.' ) ( ? : [ ] +| ( ? = \n ))))
} xm ' ,
array ( & $this , '_processListItems_callback' ), $list_str );
$this -> list_level -- ;
return $list_str ;
}
function _processListItems_callback ( $matches ) {
$item = $matches [ 4 ];
$leading_line =& $matches [ 1 ];
$leading_space =& $matches [ 2 ];
$marker_space = $matches [ 3 ];
$tailing_blank_line =& $matches [ 5 ];
if ( $leading_line || $tailing_blank_line ||
preg_match ( '/\n{2,}/' , $item ))
{
# Replace marker with the appropriate whitespace indentation
$item = $leading_space . str_repeat ( ' ' , strlen ( $marker_space )) . $item ;
$item = $this -> runBlockGamut ( $this -> outdent ( $item ) . " \n " );
}
else {
# Recursion for sub-lists:
$item = $this -> doLists ( $this -> outdent ( $item ));
$item = preg_replace ( '/\n+$/' , '' , $item );
$item = $this -> runSpanGamut ( $item );
}
return " <li> " . $item . " </li> \n " ;
}
function doCodeBlocks ( $text ) {
#
# Process Markdown `<pre><code>` blocks.
#
$text = preg_replace_callback ( ' {
( ? : \n\n | \A\n ? )
( # $1 = the code block -- one or more lines, starting with a space/tab
( ?>
[ ]{ '.$this->tab_width.' } # Lines must start with a tab or a tab-width of spaces
.* \n +
) +
)
(( ? =^ [ ]{ 0 , '.$this->tab_width.' } \S ) | \Z ) # Lookahead for non-space at line-start, or end of doc
} xm ' ,
array ( & $this , '_doCodeBlocks_callback' ), $text );
return $text ;
}
function _doCodeBlocks_callback ( $matches ) {
$codeblock = $matches [ 1 ];
$codeblock = $this -> outdent ( $codeblock );
$codeblock = htmlspecialchars ( $codeblock , ENT_NOQUOTES );
# trim leading newlines and trailing newlines
$codeblock = preg_replace ( '/\A\n+|\n+\z/' , '' , $codeblock );
$codeblock = " <pre><code> $codeblock\n </code></pre> " ;
return " \n \n " . $this -> hashBlock ( $codeblock ) . " \n \n " ;
}
function makeCodeSpan ( $code ) {
#
# Create a code span markup for $code. Called from handleSpanToken.
#
$code = htmlspecialchars ( trim ( $code ), ENT_NOQUOTES );
return $this -> hashPart ( " <code> $code </code> " );
}
var $em_relist = array (
'' => '(?:(?<!\*)\*(?!\*)|(?<!_)_(?!_))(?=\S)(?![.,:;]\s)' ,
'*' => '(?<=\S)(?<!\*)\*(?!\*)' ,
'_' => '(?<=\S)(?<!_)_(?!_)' ,
);
var $strong_relist = array (
'' => '(?:(?<!\*)\*\*(?!\*)|(?<!_)__(?!_))(?=\S)(?![.,:;]\s)' ,
'**' => '(?<=\S)(?<!\*)\*\*(?!\*)' ,
'__' => '(?<=\S)(?<!_)__(?!_)' ,
);
var $em_strong_relist = array (
'' => '(?:(?<!\*)\*\*\*(?!\*)|(?<!_)___(?!_))(?=\S)(?![.,:;]\s)' ,
'***' => '(?<=\S)(?<!\*)\*\*\*(?!\*)' ,
'___' => '(?<=\S)(?<!_)___(?!_)' ,
);
var $em_strong_prepared_relist ;
function prepareItalicsAndBold () {
#
# Prepare regular expressions for seraching emphasis tokens in any
# context.
#
foreach ( $this -> em_relist as $em => $em_re ) {
foreach ( $this -> strong_relist as $strong => $strong_re ) {
# Construct list of allowed token expressions.
$token_relist = array ();
if ( isset ( $this -> em_strong_relist [ " $em $strong " ])) {
$token_relist [] = $this -> em_strong_relist [ " $em $strong " ];
}
$token_relist [] = $em_re ;
$token_relist [] = $strong_re ;
# Construct master expression from list.
$token_re = '{(' . implode ( '|' , $token_relist ) . ')}' ;
$this -> em_strong_prepared_relist [ " $em $strong " ] = $token_re ;
}
}
}
function doItalicsAndBold ( $text ) {
$token_stack = array ( '' );
$text_stack = array ( '' );
$em = '' ;
$strong = '' ;
$tree_char_em = false ;
while ( 1 ) {
#
# Get prepared regular expression for seraching emphasis tokens
# in current context.
#
$token_re = $this -> em_strong_prepared_relist [ " $em $strong " ];
#
# Each loop iteration seach for the next emphasis token.
# Each token is then passed to handleSpanToken.
#
$parts = preg_split ( $token_re , $text , 2 , PREG_SPLIT_DELIM_CAPTURE );
$text_stack [ 0 ] .= $parts [ 0 ];
$token =& $parts [ 1 ];
$text =& $parts [ 2 ];
if ( empty ( $token )) {
# Reached end of text span: empty stack without emitting.
# any more emphasis.
while ( $token_stack [ 0 ]) {
$text_stack [ 1 ] .= array_shift ( $token_stack );
$text_stack [ 0 ] .= array_shift ( $text_stack );
}
break ;
}
$token_len = strlen ( $token );
if ( $tree_char_em ) {
# Reached closing marker while inside a three-char emphasis.
if ( $token_len == 3 ) {
# Three-char closing marker, close em and strong.
array_shift ( $token_stack );
$span = array_shift ( $text_stack );
$span = $this -> runSpanGamut ( $span );
$span = " <strong><em> $span </em></strong> " ;
$text_stack [ 0 ] .= $this -> hashPart ( $span );
$em = '' ;
$strong = '' ;
} else {
# Other closing marker: close one em or strong and
# change current token state to match the other
$token_stack [ 0 ] = str_repeat ( $token { 0 }, 3 - $token_len );
$tag = $token_len == 2 ? " strong " : " em " ;
$span = $text_stack [ 0 ];
$span = $this -> runSpanGamut ( $span );
$span = " < $tag > $span </ $tag > " ;
$text_stack [ 0 ] = $this -> hashPart ( $span );
$$tag = '' ; # $$tag stands for $em or $strong
}
$tree_char_em = false ;
} else if ( $token_len == 3 ) {
if ( $em ) {
# Reached closing marker for both em and strong.
# Closing strong marker:
for ( $i = 0 ; $i < 2 ; ++ $i ) {
$shifted_token = array_shift ( $token_stack );
$tag = strlen ( $shifted_token ) == 2 ? " strong " : " em " ;
$span = array_shift ( $text_stack );
$span = $this -> runSpanGamut ( $span );
$span = " < $tag > $span </ $tag > " ;
$text_stack [ 0 ] .= $this -> hashPart ( $span );
$$tag = '' ; # $$tag stands for $em or $strong
}
} else {
# Reached opening three-char emphasis marker. Push on token
# stack; will be handled by the special condition above.
$em = $token { 0 };
$strong = " $em $em " ;
array_unshift ( $token_stack , $token );
array_unshift ( $text_stack , '' );
$tree_char_em = true ;
}
} else if ( $token_len == 2 ) {
if ( $strong ) {
# Unwind any dangling emphasis marker:
if ( strlen ( $token_stack [ 0 ]) == 1 ) {
$text_stack [ 1 ] .= array_shift ( $token_stack );
$text_stack [ 0 ] .= array_shift ( $text_stack );
}
# Closing strong marker:
array_shift ( $token_stack );
$span = array_shift ( $text_stack );
$span = $this -> runSpanGamut ( $span );
$span = " <strong> $span </strong> " ;
$text_stack [ 0 ] .= $this -> hashPart ( $span );
$strong = '' ;
} else {
array_unshift ( $token_stack , $token );
array_unshift ( $text_stack , '' );
$strong = $token ;
}
} else {
# Here $token_len == 1
if ( $em ) {
if ( strlen ( $token_stack [ 0 ]) == 1 ) {
# Closing emphasis marker:
array_shift ( $token_stack );
$span = array_shift ( $text_stack );
$span = $this -> runSpanGamut ( $span );
$span = " <em> $span </em> " ;
$text_stack [ 0 ] .= $this -> hashPart ( $span );
$em = '' ;
} else {
$text_stack [ 0 ] .= $token ;
}
} else {
array_unshift ( $token_stack , $token );
array_unshift ( $text_stack , '' );
$em = $token ;
}
}
}
return $text_stack [ 0 ];
}
function doBlockQuotes ( $text ) {
$text = preg_replace_callback ( ' /
( # Wrap whole match in $1
( ?>
^ [ ] *> [ ] ? # ">" at the start of a line
.+ \n # rest of the first line
( .+ \n ) * # subsequent consecutive lines
\n * # blanks
) +
)
/ xm ' ,
array ( & $this , '_doBlockQuotes_callback' ), $text );
return $text ;
}
function _doBlockQuotes_callback ( $matches ) {
$bq = $matches [ 1 ];
# trim one level of quoting - trim whitespace-only lines
$bq = preg_replace ( '/^[ ]*>[ ]?|^[ ]+$/m' , '' , $bq );
$bq = $this -> runBlockGamut ( $bq ); # recurse
$bq = preg_replace ( '/^/m' , " " , $bq );
# These leading spaces cause problem with <pre> content,
# so we need to fix that:
$bq = preg_replace_callback ( '{(\s*<pre>.+?</pre>)}sx' ,
array ( & $this , '_DoBlockQuotes_callback2' ), $bq );
return " \n " . $this -> hashBlock ( " <blockquote> \n $bq\n </blockquote> " ) . " \n \n " ;
}
function _doBlockQuotes_callback2 ( $matches ) {
$pre = $matches [ 1 ];
$pre = preg_replace ( '/^ /m' , '' , $pre );
return $pre ;
}
function formParagraphs ( $text ) {
#
# Params:
# $text - string to process with html <p> tags
#
# Strip leading and trailing lines:
$text = preg_replace ( '/\A\n+|\n+\z/' , '' , $text );
$grafs = preg_split ( '/\n{2,}/' , $text , - 1 , PREG_SPLIT_NO_EMPTY );
#
# Wrap <p> tags and unhashify HTML blocks
#
foreach ( $grafs as $key => $value ) {
if ( ! preg_match ( '/^B\x1A[0-9]+B$/' , $value )) {
# Is a paragraph.
$value = $this -> runSpanGamut ( $value );
$value = preg_replace ( '/^([ ]*)/' , " <p> " , $value );
$value .= " </p> " ;
$grafs [ $key ] = $this -> unhash ( $value );
}
else {
# Is a block.
# Modify elements of @grafs in-place...
$graf = $value ;
$block = $this -> html_hashes [ $graf ];
$graf = $block ;
// if (preg_match('{
// \A
// ( # $1 = <div> tag
// <div \s+
// [^>]*
// \b
// markdown\s*=\s* ([\'"]) # $2 = attr quote char
// 1
// \2
// [^>]*
// >
// )
// ( # $3 = contents
// .*
// )
// (</div>) # $4 = closing tag
// \z
// }xs', $block, $matches))
// {
// list(, $div_open, , $div_content, $div_close) = $matches;
//
2009-11-08 22:28:51 +00:00
// # We can't call Markdown(), because that resets the hash;
2008-08-22 14:17:14 +01:00
// # that initialization code should be pulled into its own sub, though.
// $div_content = $this->hashHTMLBlocks($div_content);
//
// # Run document gamut methods on the content.
// foreach ($this->document_gamut as $method => $priority) {
// $div_content = $this->$method($div_content);
// }
//
// $div_open = preg_replace(
// '{\smarkdown\s*=\s*([\'"]).+?\1}', '', $div_open);
//
// $graf = $div_open . "\n" . $div_content . "\n" . $div_close;
// }
$grafs [ $key ] = $graf ;
}
}
return implode ( " \n \n " , $grafs );
}
function encodeAttribute ( $text ) {
#
# Encode text for a double-quoted HTML attribute. This function
# is *not* suitable for attributes enclosed in single quotes.
#
$text = $this -> encodeAmpsAndAngles ( $text );
$text = str_replace ( '"' , '"' , $text );
return $text ;
}
function encodeAmpsAndAngles ( $text ) {
#
# Smart processing for ampersands and angle brackets that need to
# be encoded. Valid character entities are left alone unless the
# no-entities mode is set.
#
if ( $this -> no_entities ) {
$text = str_replace ( '&' , '&' , $text );
} else {
# Ampersand-encoding based entirely on Nat Irons's Amputator
# MT plugin: <http://bumppo.net/projects/amputator/>
$text = preg_replace ( '/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/' ,
'&' , $text );;
}
# Encode remaining <'s
$text = str_replace ( '<' , '<' , $text );
return $text ;
}
function doAutoLinks ( $text ) {
$text = preg_replace_callback ( '{<((https?|ftp|dict):[^\'">\s]+)>}i' ,
array ( & $this , '_doAutoLinks_url_callback' ), $text );
# Email addresses: <address@domain.foo>
$text = preg_replace_callback ( ' {
<
( ? : mailto : ) ?
(
[ -. \w\x80 - \xFF ] +
\ @
[ - a - z0 - 9 \x80 - \xFF ] + ( \ . [ - a - z0 - 9 \x80 - \xFF ] + ) * \ . [ a - z ] +
)
>
} xi ' ,
array ( & $this , '_doAutoLinks_email_callback' ), $text );
return $text ;
}
function _doAutoLinks_url_callback ( $matches ) {
$url = $this -> encodeAttribute ( $matches [ 1 ]);
$link = " <a href= \" $url\ " > $url </ a > " ;
return $this -> hashPart ( $link );
}
function _doAutoLinks_email_callback ( $matches ) {
$address = $matches [ 1 ];
$link = $this -> encodeEmailAddress ( $address );
return $this -> hashPart ( $link );
}
function encodeEmailAddress ( $addr ) {
#
# Input: an email address, e.g. "foo@example.com"
#
# Output: the email address as a mailto link, with each character
# of the address encoded as either a decimal or hex entity, in
# the hopes of foiling most address harvesting spam bots. E.g.:
#
# <p><a href="mailto:foo
# @example.co
# m">foo@exampl
# e.com</a></p>
#
# Based by a filter by Matthew Wickline, posted to BBEdit-Talk.
# With some optimizations by Milian Wolff.
#
$addr = " mailto: " . $addr ;
$chars = preg_split ( '/(?<!^)(?!$)/' , $addr );
$seed = ( int ) abs ( crc32 ( $addr ) / strlen ( $addr )); # Deterministic seed.
foreach ( $chars as $key => $char ) {
$ord = ord ( $char );
# Ignore non-ascii chars.
if ( $ord < 128 ) {
$r = ( $seed * ( 1 + $key )) % 100 ; # Pseudo-random function.
# roughly 10% raw, 45% hex, 45% dec
# '@' *must* be encoded. I insist.
if ( $r > 90 && $char != '@' ) /* do nothing */ ;
else if ( $r < 45 ) $chars [ $key ] = '&#x' . dechex ( $ord ) . ';' ;
else $chars [ $key ] = '&#' . $ord . ';' ;
}
}
$addr = implode ( '' , $chars );
$text = implode ( '' , array_slice ( $chars , 7 )); # text without `mailto:`
$addr = " <a href= \" $addr\ " > $text </ a > " ;
return $addr ;
}
function parseSpan ( $str ) {
#
# Take the string $str and parse it into tokens, hashing embeded HTML,
# escaped characters and handling code spans.
#
$output = '' ;
$span_re = ' {
(
\\\\ '.$this->escape_chars_re.'
|
( ? <! [ ` \\\\ ])
` + # code span marker
'.( $this->no_markup ? ' ' : '
|
<!-- .* ? --> # comment
|
< \ ? .* ? \ ?> | <%.*?%> # processing instruction
|
< [ /! $ ] ? [ - a - zA - Z0 - 9 : ] + # regular tags
( ?>
\s
( ?> [^"\'>]+|"[^"]*"|\'[^\']*\')*
) ?
>
').'
)
} xs ' ;
while ( 1 ) {
#
# Each loop iteration seach for either the next tag, the next
# openning code span marker, or the next escaped character.
# Each token is then passed to handleSpanToken.
#
$parts = preg_split ( $span_re , $str , 2 , PREG_SPLIT_DELIM_CAPTURE );
# Create token from text preceding tag.
if ( $parts [ 0 ] != " " ) {
$output .= $parts [ 0 ];
}
# Check if we reach the end.
if ( isset ( $parts [ 1 ])) {
$output .= $this -> handleSpanToken ( $parts [ 1 ], $parts [ 2 ]);
$str = $parts [ 2 ];
}
else {
break ;
}
}
return $output ;
}
function handleSpanToken ( $token , & $str ) {
#
# Handle $token provided by parseSpan by determining its nature and
# returning the corresponding value that should replace it.
#
switch ( $token { 0 }) {
case " \\ " :
return $this -> hashPart ( " &# " . ord ( $token { 1 }) . " ; " );
case " ` " :
# Search for end marker in remaining text.
if ( preg_match ( '/^(.*?[^`])' . preg_quote ( $token ) . '(?!`)(.*)$/sm' ,
$str , $matches ))
{
$str = $matches [ 2 ];
$codespan = $this -> makeCodeSpan ( $matches [ 1 ]);
return $this -> hashPart ( $codespan );
}
return $token ; // return as text since no ending marker found.
default :
return $this -> hashPart ( $token );
}
}
function outdent ( $text ) {
#
# Remove one level of line-leading tabs or spaces
#
return preg_replace ( '/^(\t|[ ]{1,' . $this -> tab_width . '})/m' , '' , $text );
}
# String length function for detab. `_initDetab` will create a function to
# hanlde UTF-8 if the default function does not exist.
var $utf8_strlen = 'mb_strlen' ;
function detab ( $text ) {
#
# Replace tabs with the appropriate amount of space.
#
# For each line we separate the line in blocks delemited by
# tab characters. Then we reconstruct every line by adding the
# appropriate number of space between each blocks.
$text = preg_replace_callback ( '/^.*\t.*$/m' ,
array ( & $this , '_detab_callback' ), $text );
return $text ;
}
function _detab_callback ( $matches ) {
$line = $matches [ 0 ];
$strlen = $this -> utf8_strlen ; # strlen function for UTF-8.
# Split in blocks.
$blocks = explode ( " \t " , $line );
# Add each blocks to the line.
$line = $blocks [ 0 ];
unset ( $blocks [ 0 ]); # Do not add first block twice.
foreach ( $blocks as $block ) {
# Calculate amount of space, insert spaces, insert block.
$amount = $this -> tab_width -
$strlen ( $line , 'UTF-8' ) % $this -> tab_width ;
$line .= str_repeat ( " " , $amount ) . $block ;
}
return $line ;
}
function _initDetab () {
#
# Check for the availability of the function in the `utf8_strlen` property
# (initially `mb_strlen`). If the function is not available, create a
# function that will loosely count the number of UTF-8 characters with a
# regular expression.
#
if ( function_exists ( $this -> utf8_strlen )) return ;
$this -> utf8_strlen = create_function ( '$text' , ' return preg_match_all (
" /[ \\ \\ x00- \\ \\ xBF]|[ \\ \\ xC0- \\ \\ xFF][ \\ \\ x80- \\ \\ xBF]*/ " ,
$text , $m ); ' );
}
function unhash ( $text ) {
#
# Swap back in all the tags hashed by _HashHTMLBlocks.
#
return preg_replace_callback ( '/(.)\x1A[0-9]+\1/' ,
array ( & $this , '_unhash_callback' ), $text );
}
function _unhash_callback ( $matches ) {
return $this -> html_hashes [ $matches [ 0 ]];
}
}
/*
PHP Markdown
============
Description
-----------
This is a PHP translation of the original Markdown formatter written in
Perl by John Gruber .
Markdown is a text - to - HTML filter ; it translates an easy - to - read /
easy - to - write structured text format into HTML . Markdown ' s text format
is most similar to that of plain text email , and supports features such
as headers , * emphasis * , code blocks , blockquotes , and links .
Markdown ' s syntax is designed not as a generic markup language , but
specifically to serve as a front - end to ( X ) HTML . You can use span - level
HTML tags anywhere in a Markdown document , and you can use block level
HTML tags ( like < div > and < table > as well ) .
For more information about Markdown ' s syntax , see :
< http :// daringfireball . net / projects / markdown />
Bugs
----
To file bug reports please send email to :
< michel . fortin @ michelf . com >
Please include with your report : ( 1 ) the example input ; ( 2 ) the output you
expected ; ( 3 ) the output Markdown actually produced .
Version History
---------------
See the readme file for detailed release notes for this version .
Copyright and License
---------------------
PHP Markdown
Copyright ( c ) 2004 - 2008 Michel Fortin
< http :// www . michelf . com />
All rights reserved .
Based on Markdown
Copyright ( c ) 2003 - 2006 John Gruber
< http :// daringfireball . net />
All rights reserved .
Redistribution and use in source and binary forms , with or without
modification , are permitted provided that the following conditions are
met :
* Redistributions of source code must retain the above copyright notice ,
this list of conditions and the following disclaimer .
* Redistributions in binary form must reproduce the above copyright
notice , this list of conditions and the following disclaimer in the
documentation and / or other materials provided with the distribution .
* Neither the name " Markdown " nor the names of its contributors may
be used to endorse or promote products derived from this software
without specific prior written permission .
This software is provided by the copyright holders and contributors " as
is " and any express or implied warranties, including, but not limited
to , the implied warranties of merchantability and fitness for a
particular purpose are disclaimed . In no event shall the copyright owner
or contributors be liable for any direct , indirect , incidental , special ,
exemplary , or consequential damages ( including , but not limited to ,
procurement of substitute goods or services ; loss of use , data , or
profits ; or business interruption ) however caused and on any theory of
liability , whether in contract , strict liability , or tort ( including
negligence or otherwise ) arising in any way out of the use of this
software , even if advised of the possibility of such damage .
*/
?>