root/trunk/lib/markdown.php

Revision 2, 37.8 kB (checked in by fabien, 6 years ago)

initial import

  • Property svn:mime-type set to text/x-php
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
Line 
1 <?php
2
3 #
4 # Markdown  -  A text-to-HTML conversion tool for web writers
5 #
6 # Copyright (c) 2004-2005 John Gruber 
7 # <http://daringfireball.net/projects/markdown/>
8 #
9 # Copyright (c) 2004-2005 Michel Fortin - PHP Port 
10 # <http://www.michelf.com/projects/php-markdown/>
11 #
12
13
14 global    $MarkdownPHPVersion, $MarkdownSyntaxVersion,
15         $md_empty_element_suffix, $md_tab_width,
16         $md_nested_brackets_depth, $md_nested_brackets,
17         $md_escape_table, $md_backslash_escape_table,
18         $md_list_level;
19
20 $MarkdownPHPVersion    = '1.0.1c'; # Fri 9 Dec 2005
21 $MarkdownSyntaxVersion = '1.0.1'# Sun 12 Dec 2004
22
23
24 #
25 # Global default settings:
26 #
27 $md_empty_element_suffix = " />";     # Change to ">" for HTML output
28 $md_tab_width = 4;
29
30 #
31 # WordPress settings:
32 #
33 $md_wp_posts    = true# Set to false to remove Markdown from posts.
34 $md_wp_comments = true# Set to false to remove Markdown from comments.
35
36
37 # -- WordPress Plugin Interface -----------------------------------------------
38 /*
39 Plugin Name: Markdown
40 Plugin URI: http://www.michelf.com/projects/php-markdown/
41 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
42 Version: 1.0.1c
43 Author: Michel Fortin
44 Author URI: http://www.michelf.com/
45 */
46 if (isset($wp_version)) {
47     # More details about how it works here:
48     # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
49     
50     # Post content and excerpts
51     if ($md_wp_posts) {
52         remove_filter('the_content''wpautop');
53         remove_filter('the_excerpt''wpautop');
54         add_filter('the_content',     'Markdown', 6);
55         add_filter('get_the_excerpt', 'Markdown', 6);
56         add_filter('get_the_excerpt', 'trim', 7);
57         add_filter('the_excerpt',     'md_add_p');
58         add_filter('the_excerpt_rss', 'md_strip_p');
59         
60         remove_filter('content_save_pre''balanceTags', 50);
61         remove_filter('excerpt_save_pre''balanceTags', 50);
62         add_filter('the_content',        'balanceTags', 50);
63         add_filter('get_the_excerpt', 'balanceTags', 9);
64         
65         function md_add_p($text) {
66             if (strlen($text) == 0) return;
67             if (strcasecmp(substr($text, -3), '<p>') == 0) return $text;
68             return '<p>'.$text.'</p>';
69         }
70         function md_strip_p($t) { return preg_replace('{</?[pP]>}', '', $t); }
71     }
72     
73     # Comments
74     if ($md_wp_comments) {
75         remove_filter('comment_text', 'wpautop');
76         remove_filter('comment_text', 'make_clickable');
77         add_filter('pre_comment_content', 'Markdown', 6);
78         add_filter('pre_comment_content', 'md_hide_tags', 8);
79         add_filter('pre_comment_content', 'md_show_tags', 12);
80         add_filter('get_comment_text',    'Markdown', 6);
81         add_filter('get_comment_excerpt', 'Markdown', 6);
82         add_filter('get_comment_excerpt', 'md_strip_p', 7);
83     
84         global $md_hidden_tags;
85         $md_hidden_tags = array(
86             '<p>'    => md5('<p>'),        '</p>'    => md5('</p>'),
87             '<pre>'    => md5('<pre>'),    '</pre>'=> md5('</pre>'),
88             '<ol>'    => md5('<ol>'),        '</ol>'    => md5('</ol>'),
89             '<ul>'    => md5('<ul>'),        '</ul>'    => md5('</ul>'),
90             '<li>'    => md5('<li>'),        '</li>'    => md5('</li>'),
91             );
92         
93         function md_hide_tags($text) {
94             global $md_hidden_tags;
95             return str_replace(array_keys($md_hidden_tags),
96                                 array_values($md_hidden_tags), $text);
97         }
98         function md_show_tags($text) {
99             global $md_hidden_tags;
100             return str_replace(array_values($md_hidden_tags),
101                                 array_keys($md_hidden_tags), $text);
102         }
103     }
104 }
105
106
107 # -- bBlog Plugin Info --------------------------------------------------------
108 function identify_modifier_markdown() {
109     global $MarkdownPHPVersion;
110     return array(
111         'name'            => 'markdown',
112         'type'            => 'modifier',
113         'nicename'        => 'Markdown',
114         'description'    => 'A text-to-HTML conversion tool for web writers',
115         'authors'        => 'Michel Fortin and John Gruber',
116         'licence'        => 'GPL',
117         'version'        => $MarkdownPHPVersion,
118         'help'            => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
119     );
120 }
121
122 # -- Smarty Modifier Interface ------------------------------------------------
123 function smarty_modifier_markdown($text) {
124     return Markdown($text);
125 }
126
127 # -- Textile Compatibility Mode -----------------------------------------------
128 # Rename this file to "classTextile.php" and it can replace Textile anywhere.
129 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
130     # Try to include PHP SmartyPants. Should be in the same directory.
131     @include_once 'smartypants.php';
132     # Fake Textile class. It calls Markdown instead.
133     class Textile {
134         function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
135             if ($lite == '' && $encode == '')   $text = Markdown($text);
136             if (function_exists('SmartyPants')) $text = SmartyPants($text);
137             return $text;
138         }
139     }
140 }
141
142
143
144 #
145 # Globals:
146 #
147
148 # Regex to match balanced [brackets].
149 # Needed to insert a maximum bracked depth while converting to PHP.
150 $md_nested_brackets_depth = 6;
151 $md_nested_brackets =
152     str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
153     str_repeat('\])*', $md_nested_brackets_depth);
154
155 # Table of hash values for escaped characters:
156 $md_escape_table = array(
157     "\\" => md5("\\"),
158     "`" => md5("`"),
159     "*" => md5("*"),
160     "_" => md5("_"),
161     "{" => md5("{"),
162     "}" => md5("}"),
163     "[" => md5("["),
164     "]" => md5("]"),
165     "(" => md5("("),
166     ")" => md5(")"),
167     ">" => md5(">"),
168     "#" => md5("#"),
169     "+" => md5("+"),
170     "-" => md5("-"),
171     "." => md5("."),
172     "!" => md5("!")
173 );
174 # Create an identical table but for escaped characters.
175 $md_backslash_escape_table;
176 foreach ($md_escape_table as $key => $char)
177     $md_backslash_escape_table["\\$key"] = $char;
178
179
180 function Markdown($text) {
181 #
182 # Main function. The order in which other subs are called here is
183 # essential. Link and image substitutions need to happen before
184 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
185 # and <img> tags get encoded.
186 #
187     # Clear the global hashes. If we don't clear these, you get conflicts
188     # from other articles when generating a page which contains more than
189     # one article (e.g. an index page that shows the N most recent
190     # articles):
191     global $md_urls, $md_titles, $md_html_blocks;
192     $md_urls = array();
193     $md_titles = array();
194     $md_html_blocks = array();
195
196     # Standardize line endings:
197     #   DOS to Unix and Mac to Unix
198     $text = str_replace(array("\r\n", "\r"), "\n", $text);
199
200     # Make sure $text ends with a couple of newlines:
201     $text .= "\n\n";
202
203     # Convert all tabs to spaces.
204     $text = _Detab($text);
205
206     # Strip any lines consisting only of spaces and tabs.
207     # This makes subsequent regexen easier to write, because we can
208     # match consecutive blank lines with /\n+/ instead of something
209     # contorted like /[ \t]*\n+/ .
210     $text = preg_replace('/^[ \t]+$/m', '', $text);
211
212     # Turn block-level HTML blocks into hash entries
213     $text = _HashHTMLBlocks($text);
214
215     # Strip link definitions, store in hashes.
216     $text = _StripLinkDefinitions($text);
217
218     $text = _RunBlockGamut($text);
219
220     $text = _UnescapeSpecialChars($text);
221
222     return $text . "\n";
223 }
224
225
226 function _StripLinkDefinitions($text) {
227 #
228 # Strips link definitions from text, stores the URLs and titles in
229 # hash references.
230 #
231     global $md_tab_width;
232     $less_than_tab = $md_tab_width - 1;
233
234     # Link defs are in the form: ^[id]: url "optional title"
235     $text = preg_replace_callback('{
236                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\]:    # id = $1
237                           [ \t]*
238                           \n?                # maybe *one* newline
239                           [ \t]*
240                         <?(\S+?)>?            # url = $2
241                           [ \t]*
242                           \n?                # maybe one newline
243                           [ \t]*
244                         (?:
245                             (?<=\s)            # lookbehind for whitespace
246                             ["(]
247                             (.+?)            # title = $3
248                             [")]
249                             [ \t]*
250                         )?    # title is optional
251                         (?:\n+|\Z)
252         }xm',
253         '_StripLinkDefinitions_callback',
254         $text);
255     return $text;
256 }
257 function _StripLinkDefinitions_callback($matches) {
258     global $md_urls, $md_titles;
259     $link_id = strtolower($matches[1]);
260     $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
261     if (isset($matches[3]))
262         $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
263     return ''; # String that will replace the block
264 }
265
266
267 function _HashHTMLBlocks($text) {
268     global $md_tab_width;
269     $less_than_tab = $md_tab_width - 1;
270
271     # Hashify HTML blocks:
272     # We only want to do this for block-level HTML tags, such as headers,
273     # lists, and tables. That's because we still want to wrap <p>s around
274     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
275     # phrase emphasis, and spans. The list of tags we're looking for is
276     # hard-coded:
277     $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
278                     'script|noscript|form|fieldset|iframe|math|ins|del';
279     $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
280                     'script|noscript|form|fieldset|iframe|math';
281
282     # First, look for nested blocks, e.g.:
283     #     <div>
284     #         <div>
285     #         tags for inner block must be indented.
286     #         </div>
287     #     </div>
288     #
289     # The outermost tags must start at the left margin for this to match, and
290     # the inner nested divs must be indented.
291     # We need to do this before the next, more liberal match, because the next
292     # match will start at the first `<div>` and stop at the first `</div>`.
293     $text = preg_replace_callback("{
294                 (                        # save in $1
295                     ^                    # start of line  (with /m)
296                     <($block_tags_a)    # start tag = $2
297                     \\b                    # word break
298                     (.*\\n)*?            # any number of lines, minimally matching
299                     </\\2>                # the matching end tag
300                     [ \\t]*                # trailing spaces/tabs
301                     (?=\\n+|\\Z)    # followed by a newline or end of document
302                 )
303         }xm",
304         '_HashHTMLBlocks_callback',
305         $text);
306
307     #
308     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
309     #
310     $text = preg_replace_callback("{
311                 (                        # save in $1
312                     ^                    # start of line  (with /m)
313                     <($block_tags_b)    # start tag = $2
314                     \\b                    # word break
315                     (.*\\n)*?            # any number of lines, minimally matching
316                     .*</\\2>                # the matching end tag
317                     [ \\t]*                # trailing spaces/tabs
318                     (?=\\n+|\\Z)    # followed by a newline or end of document
319                 )
320         }xm",
321         '_HashHTMLBlocks_callback',
322         $text);
323
324     # Special case just for <hr />. It was easier to make a special case than
325     # to make the other regex more complicated.
326     $text = preg_replace_callback('{
327                 (?:
328                     (?<=\n\n)        # Starting after a blank line
329                     |                # or
330                     \A\n?            # the beginning of the doc
331                 )
332                 (                        # save in $1
333                     [ ]{0,'.$less_than_tab.'}
334                     <(hr)                # start tag = $2
335                     \b                    # word break
336                     ([^<>])*?            #
337                     /?>                    # the matching end tag
338                     [ \t]*
339                     (?=\n{2,}|\Z)        # followed by a blank line or end of document
340                 )
341         }x',
342         '_HashHTMLBlocks_callback',
343         $text);
344
345     # Special case for standalone HTML comments:
346     $text = preg_replace_callback('{
347                 (?:
348                     (?<=\n\n)        # Starting after a blank line
349                     |                # or
350                     \A\n?            # the beginning of the doc
351                 )
352                 (                        # save in $1
353                     [ ]{0,'.$less_than_tab.'}
354                     (?s:
355                         <!
356                         (--.*?--\s*)+
357                         >
358                     )
359                     [ \t]*
360                     (?=\n{2,}|\Z)        # followed by a blank line or end of document
361                 )
362             }x',
363             '_HashHTMLBlocks_callback',
364             $text);
365
366     return $text;
367 }
368 function _HashHTMLBlocks_callback($matches) {
369     global $md_html_blocks;
370     $text = $matches[1];
371     $key = md5($text);
372     $md_html_blocks[$key] = $text;
373     return "\n\n$key\n\n"; # String that will replace the block
374 }
375
376
377 function _RunBlockGamut($text) {
378 #
379 # These are all the transformations that form block-level
380 # tags like paragraphs, headers, and list items.
381 #
382     global $md_empty_element_suffix;
383
384     $text = _DoHeaders($text);
385
386     # Do Horizontal Rules:
387     $text = preg_replace(
388         array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
389               '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
390               '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
391         "\n<hr$md_empty_element_suffix\n",
392         $text);
393
394     $text = _DoLists($text);
395     $text = _DoCodeBlocks($text);
396     $text = _DoBlockQuotes($text);
397
398     # We already ran _HashHTMLBlocks() before, in Markdown(), but that
399     # was to escape raw HTML in the original Markdown source. This time,
400     # we're escaping the markup we've just created, so that we don't wrap
401     # <p> tags around block-level tags.
402     $text = _HashHTMLBlocks($text);
403     $text = _FormParagraphs($text);
404
405     return $text;
406 }
407
408
409 function _RunSpanGamut($text) {
410 #
411 # These are all the transformations that occur *within* block-level
412 # tags like paragraphs, headers, and list items.
413 #
414     global $md_empty_element_suffix;
415
416     $text = _DoCodeSpans($text);
417
418     $text = _EscapeSpecialChars($text);
419
420     # Process anchor and image tags. Images must come first,
421     # because ![foo][f] looks like an anchor.
422     $text = _DoImages($text);
423     $text = _DoAnchors($text);
424
425     # Make links out of things like `<http://example.com/>`
426     # Must come after _DoAnchors(), because you can use < and >
427     # delimiters in inline links like [this](<url>).
428     $text = _DoAutoLinks($text);
429     $text = _EncodeAmpsAndAngles($text);
430     $text = _DoItalicsAndBold($text);
431
432     # Do hard breaks:
433     $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
434
435     return $text;
436 }
437
438
439 function _EscapeSpecialChars($text) {
440     global $md_escape_table;
441     $tokens = _TokenizeHTML($text);
442
443     $text = '';   # rebuild $text from the tokens
444 #    $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
445 #    $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
446
447     foreach ($tokens as $cur_token) {
448         if ($cur_token[0] == 'tag') {
449             # Within tags, encode * and _ so they don't conflict
450             # with their use in Markdown for italics and strong.
451             # We're replacing each such character with its
452             # corresponding MD5 checksum value; this is likely
453             # overkill, but it should prevent us from colliding
454             # with the escape values by accident.
455             $cur_token[1] = str_replace(array('*', '_'),
456                 array($md_escape_table['*'], $md_escape_table['_']),
457                 $cur_token[1]);
458             $text .= $cur_token[1];
459         } else {
460             $t = $cur_token[1];
461             $t = _EncodeBackslashEscapes($t);
462             $text .= $t;
463         }
464     }
465     return $text;
466 }
467
468
469 function _DoAnchors($text) {
470 #
471 # Turn Markdown link shortcuts into XHTML <a> tags.
472 #
473     global $md_nested_brackets;
474     #
475     # First, handle reference-style links: [link text] [id]
476     #
477     $text = preg_replace_callback("{
478         (                    # wrap whole match in $1
479           \\[
480             ($md_nested_brackets)    # link text = $2
481           \\]
482
483           [ ]?                # one optional space
484           (?:\\n[ ]*)?        # one optional newline followed by spaces
485
486           \\[
487             (.*?)        # id = $3
488           \\]
489         )
490         }xs",
491         '_DoAnchors_reference_callback', $text);
492
493     #
494     # Next, inline-style links: [link text](url "optional title")
495     #
496     $text = preg_replace_callback("{
497         (                # wrap whole match in $1
498           \\[
499             ($md_nested_brackets)    # link text = $2
500           \\]
501           \\(            # literal paren
502             [ \\t]*
503             <?(.*?)>?    # href = $3
504             [ \\t]*
505             (            # $4
506               (['\"])    # quote char = $5
507               (.*?)        # Title = $6
508               \\5        # matching quote
509             )?            # title is optional
510           \\)
511         )
512         }xs",
513         '_DoAnchors_inline_callback', $text);
514
515     return $text;
516 }
517 function _DoAnchors_reference_callback($matches) {
518     global $md_urls, $md_titles, $md_escape_table;
519     $whole_match = $matches[1];
520     $link_text   = $matches[2];
521     $link_id     = strtolower($matches[3]);
522
523     if ($link_id == "") {
524         $link_id = strtolower($link_text); # for shortcut links like [this][].
525     }
526
527     if (isset($md_urls[$link_id])) {
528         $url = $md_urls[$link_id];
529         # We've got to encode these to avoid conflicting with italics/bold.
530         $url = str_replace(array('*', '_'),
531                            array($md_escape_table['*'], $md_escape_table['_']),
532                            $url);
533         $result = "<a href=\"$url\"";
534         if ( isset( $md_titles[$link_id] ) ) {
535             $title = $md_titles[$link_id];
536             $title = str_replace(array('*',     '_'),
537                                  array($md_escape_table['*'],
538                                        $md_escape_table['_']), $title);
539             $result .=  " title=\"$title\"";
540         }
541         $result .= ">$link_text</a>";
542     }
543     else {
544         $result = $whole_match;
545     }
546     return $result;
547 }
548 function _DoAnchors_inline_callback($matches) {
549     global $md_escape_table;
550     $whole_match    = $matches[1];
551     $link_text        = $matches[2];
552     $url            = $matches[3];
553     $title            =& $matches[6];
554
555     # We've got to encode these to avoid conflicting with italics/bold.
556     $url = str_replace(array('*', '_'),
557                        array($md_escape_table['*'], $md_escape_table['_']),
558                        $url);
559     $result = "<a href=\"$url\"";
560     if (isset($title)) {
561         $title = str_replace('"', '&quot;', $title);
562         $title = str_replace(array('*', '_'),
563                              array($md_escape_table['*'], $md_escape_table['_']),
564                              $title);
565         $result .=  " title=\"$title\"";
566     }
567     
568     $result .= ">$link_text</a>";
569
570     return $result;
571 }
572
573
574 function _DoImages($text) {
575 #
576 # Turn Markdown image shortcuts into <img> tags.
577 #
578     global $md_nested_brackets;
579
580     #
581     # First, handle reference-style labeled images: ![alt text][id]
582     #
583     $text = preg_replace_callback('{
584         (                # wrap whole match in $1
585           !\[
586             ('.$md_nested_brackets.')        # alt text = $2
587           \]
588
589           [ ]?                # one optional space
590           (?:\n[ ]*)?        # one optional newline followed by spaces
591
592           \[
593             (.*?)        # id = $3
594           \]
595
596         )
597         }xs',
598         '_DoImages_reference_callback', $text);
599
600     #
601     # Next, handle inline images:  ![alt text](url "optional title")
602     # Don't forget: encode * and _
603
604     $text = preg_replace_callback('{
605         (                # wrap whole match in $1
606           !\[
607             ('.$md_nested_brackets.')        # alt text = $2
608           \]
609           \(            # literal paren
610             [ \t]*
611             <?(\S+?)>?    # src url = $3
612             [ \t]*
613             (            # $4
614               ([\'"])    # quote char = $5
615               (.*?)        # title = $6
616               \5        # matching quote
617               [ \t]*
618             )?            # title is optional
619           \)
620         )
621         }xs',
622         '_DoImages_inline_callback', $text);
623
624     return $text;
625 }
626 function _DoImages_reference_callback($matches) {
627     global $md_urls, $md_titles, $md_empty_element_suffix, $md_escape_table;
628     $whole_match = $matches[1];
629     $alt_text    = $matches[2];
630     $link_id     = strtolower($matches[3]);
631
632     if ($link_id == "") {
633         $link_id = strtolower($alt_text); # for shortcut links like ![this][].
634     }
635
636     $alt_text = str_replace('"', '&quot;', $alt_text);
637     if (isset($md_urls[$link_id])) {
638         $url = $md_urls[$link_id];
639         # We've got to encode these to avoid conflicting with italics/bold.
640         $url = str_replace(array('*', '_'),
641                            array($md_escape_table['*'], $md_escape_table['_']),
642                            $url);
643         $result = "<img src=\"$url\" alt=\"$alt_text\"";
644         if (isset($md_titles[$link_id])) {
645             $title = $md_titles[$link_id];
646             $title = str_replace(array('*', '_'),
647                                  array($md_escape_table['*'],
648                                        $md_escape_table['_']), $title);
649             $result .=  " title=\"$title\"";
650         }
651         $result .= $md_empty_element_suffix;
652     }
653     else {
654         # If there's no such link ID, leave intact:
655         $result = $whole_match;
656     }
657
658     return $result;
659 }
660 function _DoImages_inline_callback($matches) {
661     global $md_empty_element_suffix, $md_escape_table;
662     $whole_match    = $matches[1];
663     $alt_text        = $matches[2];
664     $url            = $matches[3];
665     $title            = '';
666     if (isset($matches[6])) {
667         $title        = $matches[6];
668     }
669
670     $alt_text = str_replace('"', '&quot;', $alt_text);
671     $title    = str_replace('"', '&quot;', $title);
672     # We've got to encode these to avoid conflicting with italics/bold.
673     $url = str_replace(array('*', '_'),
674                        array($md_escape_table['*'], $md_escape_table['_']),
675                        $url);
676     $result = "<img src=\"$url\" alt=\"$alt_text\"";
677     if (isset($title)) {
678         $title = str_replace(array('*', '_'),
679                              array($md_escape_table['*'], $md_escape_table['_']),
680                              $title);
681         $result .=  " title=\"$title\""; # $title already quoted
682     }
683     $result .= $md_empty_element_suffix;
684
685     return $result;
686 }
687
688
689 function _DoHeaders($text) {
690     # Setext-style headers:
691     #      Header 1
692     #      ========
693     # 
694     #      Header 2
695     #      --------
696     #
697     $text = preg_replace(
698         array('{ ^(.+)[ \t]*\n=+[ \t]*\n+ }emx',
699               '{ ^(.+)[ \t]*\n-+[ \t]*\n+ }emx'),
700         array("'<h1>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h1>\n\n'",
701               "'<h2>'._RunSpanGamut(_UnslashQuotes('\\1')).'</h2>\n\n'"),
702         $text);
703
704     # atx-style headers:
705     #    # Header 1
706     #    ## Header 2
707     #    ## Header 2 with closing hashes ##
708     #    ...
709     #    ###### Header 6
710     #
711     $text = preg_replace("{
712             ^(\\#{1,6})    # $1 = string of #'s
713             [ \\t]*
714             (.+?)        # $2 = Header text
715             [ \\t]*
716             \\#*            # optional closing #'s (not counted)
717             \\n+
718         }xme",
719         "'<h'.strlen('\\1').'>'._RunSpanGamut(_UnslashQuotes('\\2')).'</h'.strlen('\\1').'>\n\n'",
720         $text);
721
722     return $text;
723 }
724
725
726 function _DoLists($text) {
727 #
728 # Form HTML ordered (numbered) and unordered (bulleted) lists.
729 #
730     global $md_tab_width, $md_list_level;
731     $less_than_tab = $md_tab_width - 1;
732
733     # Re-usable patterns to match list item bullets and number markers:
734     $marker_ul  = '[*+-]';
735     $marker_ol  = '\d+[.]';
736     $marker_any = "(?:$marker_ul|$marker_ol)";
737
738     $markers = array($marker_ul, $marker_ol);
739
740     foreach ($markers as $marker) {
741         # Re-usable pattern to match any entirel ul or ol list:
742         $whole_list = '
743             (                                # $1 = whole list
744               (                                # $2
745                 [ ]{0,'.$less_than_tab.'}
746                 ('.$marker.')                # $3 = first list item marker
747                 [ \t]+
748               )
749               (?s:.+?)
750               (                                # $4
751                   \z
752                 |
753                   \n{2,}
754                   (?=\S)
755                   (?!                        # Negative lookahead for another list item marker
756                     [ \t]*
757                     '.$marker.'[ \t]+
758                   )
759               )
760             )
761         '; // mx
762         
763         # We use a different prefix before nested lists than top-level lists.
764         # See extended comment in _ProcessListItems().
765     
766         if ($md_list_level) {
767             $text = preg_replace_callback('{
768                     ^
769                     '.$whole_list.'
770                 }mx',
771                 '_DoLists_callback_top', $text);
772         }
773         else {
774             $text = preg_replace_callback('{
775                     (?:(?<=\n\n)|\A\n?)
776                     '.$whole_list.'
777                 }mx',
778                 '_DoLists_callback_nested', $text);
779         }
780     }
781
782     return $text;
783 }
784 function _DoLists_callback_top($matches) {
785     # Re-usable patterns to match list item bullets and number markers:
786     $marker_ul  = '[*+-]';
787     $marker_ol  = '\d+[.]';
788     $marker_any = "(?:$marker_ul|$marker_ol)";
789     
790     $list = $matches[1];
791     $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
792     
793     $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
794     
795     # Turn double returns into triple returns, so that we can make a
796     # paragraph for the last item in a list, if necessary:
797     $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
798     $result = _ProcessListItems($list, $marker_any);
799     
800     # Trim any trailing whitespace, to put the closing `</$list_type>`
801     # up on the preceding line, to get it past the current stupid
802     # HTML block parser. This is a hack to work around the terrible
803     # hack that is the HTML block parser.
804     $result = rtrim($result);
805     $result = "<$list_type>" . $result . "</$list_type>\n";
806     return $result;
807 }
808 function _DoLists_callback_nested($matches) {
809     # Re-usable patterns to match list item bullets and number markers:
810     $marker_ul  = '[*+-]';
811     $marker_ol  = '\d+[.]';
812     $marker_any = "(?:$marker_ul|$marker_ol)";
813     
814     $list = $matches[1];
815     $list_type = preg_match("/$marker_ul/", $matches[3]) ? "ul" : "ol";
816     
817     $marker_any = ( $list_type == "ul" ? $marker_ul : $marker_ol );
818     
819     # Turn double returns into triple returns, so that we can make a
820     # paragraph for the last item in a list, if necessary:
821     $list = preg_replace("/\n{2,}/", "\n\n\n", $list);
822     $result = _ProcessListItems($list, $marker_any);
823     $result = "<$list_type>\n" . $result . "</$list_type>\n";
824     return $result;
825 }
826
827
828 function _ProcessListItems($list_str, $marker_any) {
829 #
830 #    Process the contents of a single ordered or unordered list, splitting it
831 #    into individual list items.
832 #
833     global $md_list_level;
834     
835     # The $md_list_level global keeps track of when we're inside a list.
836     # Each time we enter a list, we increment it; when we leave a list,
837     # we decrement. If it's zero, we're not in a list anymore.
838     #
839     # We do this because when we're not inside a list, we want to treat
840     # something like this:
841     #
842     #        I recommend upgrading to version
843     #        8. Oops, now this line is treated
844     #        as a sub-list.
845     #
846     # As a single paragraph, despite the fact that the second line starts
847     # with a digit-period-space sequence.
848     #
849     # Whereas when we're inside a list (or sub-list), that line will be
850     # treated as the start of a sub-list. What a kludge, huh? This is
851     # an aspect of Markdown's syntax that's hard to parse perfectly
852     # without resorting to mind-reading. Perhaps the solution is to
853     # change the syntax rules such that sub-lists must start with a
854     # starting cardinal number; e.g. "1." or "a.".
855     
856     $md_list_level++;
857
858     # trim trailing blank lines:
859     $list_str = preg_replace("/\n{2,}\\z/", "\n", $list_str);
860
861     $list_str = preg_replace_callback('{
862         (\n)?                            # leading line = $1
863         (^[ \t]*)                        # leading whitespace = $2
864         ('.$marker_any.') [ \t]+        # list marker = $3
865         ((?s:.+?)                        # list item text   = $4
866         (\n{1,2}))
867         (?= \n* (\z | \2 ('.$marker_any.') [ \t]+))
868         }xm',
869         '_ProcessListItems_callback', $list_str);
870
871     $md_list_level--;
872     return $list_str;
873 }
874 function _ProcessListItems_callback($matches) {
875     $item = $matches[4];
876     $leading_line =& $matches[1];
877     $leading_space =& $matches[2];
878
879     if ($leading_line || preg_match('/\n{2,}/', $item)) {
880         $item = _RunBlockGamut(_Outdent($item));
881     }
882     else {
883         # Recursion for sub-lists:
884         $item = _DoLists(_Outdent($item));
885         $item = preg_replace('/\n+$/', '', $item);
886         $item = _RunSpanGamut($item);
887     }
888
889     return "<li>" . $item . "</li>\n";
890 }
891
892
893 function _DoCodeBlocks($text) {
894 #
895 #    Process Markdown `<pre><code>` blocks.
896 #
897     global $md_tab_width;
898     $text = preg_replace_callback('{
899             (?:\n\n|\A)
900             (                # $1 = the code block -- one or more lines, starting with a space/tab
901               (?:
902                 (?:[ ]{'.$md_tab_width.'} | \t)  # Lines must start with a tab or a tab-width of spaces
903                 .*\n+
904               )+
905             )
906             ((?=^[ ]{0,'.$md_tab_width.'}\S)|\Z)    # Lookahead for non-space at line-start, or end of doc
907         }xm',
908         '_DoCodeBlocks_callback', $text);
909
910     return $text;
911 }
912 function _DoCodeBlocks_callback($matches) {
913     $codeblock = $matches[1];
914
915     $codeblock = _EncodeCode(_Outdent($codeblock));
916 //    $codeblock = _Detab($codeblock);
917     # trim leading newlines and trailing whitespace
918     $codeblock = preg_replace(array('/\A\n+/', '/\s+\z/'), '', $codeblock);
919
920     $result = "\n\n<pre><code>" . $codeblock . "\n</code></pre>\n\n";
921
922     return $result;
923 }
924
925
926 function _DoCodeSpans($text) {
927 #
928 #     *    Backtick quotes are used for <code></code> spans.
929 #
930 #     *    You can use multiple backticks as the delimiters if you want to
931 #         include literal backticks in the code span. So, this input:
932 #
933 #          Just type ``foo `bar` baz`` at the prompt.
934 #
935 #          Will translate to:
936 #
937 #          <p>Just type <code>foo `bar` baz</code> at the prompt.</p>
938 #
939 #        There's no arbitrary limit to the number of backticks you
940 #        can use as delimters. If you need three consecutive backticks
941 #        in your code, use four for delimiters, etc.
942 #
943 #    *    You can use spaces to get literal backticks at the edges:
944 #
945 #          ... type `` `bar` `` ...
946 #
947 #          Turns to:
948 #
949 #          ... type <code>`bar`</code> ...
950 #
951     $text = preg_replace_callback('@
952             (?<!\\\)    # Character before opening ` can\'t be a backslash
953             (`+)        # $1 = Opening run of `
954             (.+?)        # $2 = The code block
955             (?<!`)
956             \1            # Matching closer
957             (?!`)
958         @xs',
959         '_DoCodeSpans_callback', $text);
960
961     return $text;
962 }
963 function _DoCodeSpans_callback($matches) {
964     $c = $matches[2];
965     $c = preg_replace('/^[ \t]*/', '', $c); # leading whitespace
966     $c = preg_replace('/[ \t]*$/', '', $c); # trailing whitespace
967     $c = _EncodeCode($c);
968     return "<code>$c</code>";
969 }
970
971
972 function _EncodeCode($_) {
973 #
974 # Encode/escape certain characters inside Markdown code runs.
975 # The point is that in code, these characters are literals,
976 # and lose their special Markdown meanings.
977 #
978     global $md_escape_table;
979
980     # Encode all ampersands; HTML entities are not
981     # entities within a Markdown code span.
982     $_ = str_replace('&', '&amp;', $_);
983
984     # Do the angle bracket song and dance:
985     $_ = str_replace(array('<',    '>'),
986                      array('&lt;', '&gt;'), $_);
987
988     # Now, escape characters that are magic in Markdown:
989     $_ = str_replace(array_keys($md_escape_table),
990                      array_values($md_escape_table), $_);
991
992     return $_;
993 }
994
995
996 function _DoItalicsAndBold($text) {
997     # <strong> must go first:
998     $text = preg_replace('{
999             (                        # $1: Marker
1000                 (?<!\*\*) \*\* |    #     (not preceded by two chars of
1001                 (?<!__)   __        #      the same marker)
1002             )                       
1003             (?=\S)                     # Not followed by whitespace
1004             (?!\1)                    #   or two others marker chars.
1005             (                        # $2: Content
1006                 (?:
1007                     [^*_]+?            # Anthing not em markers.
1008                 |
1009                                     # Balence any regular emphasis inside.
1010                     ([*_]) (?=\S) .+? (?<=\S) \3    # $3: em char (* or _)
1011                 |
1012                     (?! \1 ) .        # Allow unbalenced * and _.
1013                 )+?
1014             )
1015             (?<=\S) \1                # End mark not preceded by whitespace.
1016         }sx',
1017         '<strong>\2</strong>', $text);
1018     # Then <em>:
1019     $text = preg_replace(
1020         '{ ( (?<!\*)\* | (?<!_)_ ) (?=\S) (?! \1) (.+?) (?<=\S) \1 }sx',
1021         '<em>\2</em>', $text);
1022
1023     return $text;
1024 }
1025
1026
1027 function _DoBlockQuotes($text) {
1028     $text = preg_replace_callback('/
1029           (                                # Wrap whole match in $1
1030             (
1031               ^[ \t]*>[ \t]?            # ">" at the start of a line
1032                 .+\n                    # rest of the first line
1033               (.+\n)*                    # subsequent consecutive lines
1034               \n*                        # blanks
1035             )+
1036           )
1037         /xm',
1038         '_DoBlockQuotes_callback', $text);
1039
1040     return $text;
1041 }
1042 function _DoBlockQuotes_callback($matches) {
1043     $bq = $matches[1];
1044     # trim one level of quoting - trim whitespace-only lines
1045     $bq = preg_replace(array('/^[ \t]*>[ \t]?/m', '/^[ \t]+$/m'), '', $bq);
1046     $bq = _RunBlockGamut($bq);        # recurse
1047
1048     $bq = preg_replace('/^/m', "  ", $bq);
1049     # These leading spaces screw with <pre> content, so we need to fix that:
1050     $bq = preg_replace_callback('{(\s*<pre>.+?</pre>)}sx',
1051                                 '_DoBlockQuotes_callback2', $bq);
1052
1053     return "<blockquote>\n$bq\n</blockquote>\n\n";
1054 }
1055 function _DoBlockQuotes_callback2($matches) {
1056     $pre = $matches[1];
1057     $pre = preg_replace('/^  /m', '', $pre);
1058     return $pre;
1059 }
1060
1061
1062 function _FormParagraphs($text) {
1063 #
1064 #    Params:
1065 #        $text - string to process with html <p> tags
1066 #
1067     global $md_html_blocks;
1068
1069     # Strip leading and trailing lines:
1070     $text = preg_replace(array('/\A\n+/', '/\n+\z/'), '', $text);
1071
1072     $grafs = preg_split('/\n{2,}/', $text, -1, PREG_SPLIT_NO_EMPTY);
1073
1074     #
1075     # Wrap <p> tags.
1076     #
1077     foreach ($grafs as $key => $value) {
1078         if (!isset( $md_html_blocks[$value] )) {
1079             $value = _RunSpanGamut($value);
1080             $value = preg_replace('/^([ \t]*)/', '<p>', $value);
1081             $value .= "</p>";
1082             $grafs[$key] = $value;
1083         }
1084     }
1085
1086     #
1087     # Unhashify HTML blocks
1088     #
1089     foreach ($grafs as $key => $value) {
1090         if (isset( $md_html_blocks[$value] )) {
1091             $grafs[$key] = $md_html_blocks[$value];
1092         }
1093     }
1094
1095     return implode("\n\n", $grafs);
1096 }
1097
1098
1099 function _EncodeAmpsAndAngles($text) {
1100 # Smart processing for ampersands and angle brackets that need to be encoded.
1101
1102     # Ampersand-encoding based entirely on Nat Irons's Amputator MT plugin:
1103     #   http://bumppo.net/projects/amputator/
1104     $text = preg_replace('/&(?!#?[xX]?(?:[0-9a-fA-F]+|\w+);)/',
1105                          '&amp;', $text);;
1106
1107     # Encode naked <'s
1108     $text = preg_replace('{<(?![a-z/?\$!])}i', '&lt;', $text);
1109
1110     return $text;
1111 }
1112
1113
1114 function _EncodeBackslashEscapes($text) {
1115 #
1116 #    Parameter:  String.
1117 #    Returns:    The string, with after processing the following backslash
1118 #                escape sequences.
1119 #
1120     global $md_escape_table, $md_backslash_escape_table;
1121     # Must process escaped backslashes first.
1122     return str_replace(array_keys($md_backslash_escape_table),
1123                        array_values($md_backslash_escape_table), $text);
1124 }
1125
1126
1127 function _DoAutoLinks($text) {
1128     $text = preg_replace("!<((https?|ftp):[^'\">\\s]+)>!",
1129                          '<a href="\1">\1</a>', $text);
1130
1131     # Email addresses: <address@domain.foo>
1132     $text = preg_replace('{
1133         <
1134         (?:mailto:)?
1135         (
1136             [-.\w]+
1137             \@
1138             [-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+
1139         )
1140         >
1141         }exi',
1142         "_EncodeEmailAddress(_UnescapeSpecialChars(_UnslashQuotes('\\1')))",
1143         $text);
1144
1145     return $text;
1146 }
1147
1148
1149 function _EncodeEmailAddress($addr) {
1150 #
1151 #    Input: an email address, e.g. "foo@example.com"
1152 #
1153 #    Output: the email address as a mailto link, with each character
1154 #        of the address encoded as either a decimal or hex entity, in
1155 #        the hopes of foiling most address harvesting spam bots. E.g.:
1156 #
1157 #      <a href="&#x6D;&#97;&#105;&#108;&#x74;&#111;:&#102;&#111;&#111;&#64;&#101;
1158 #        x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;">&#102;&#111;&#111;
1159 #        &#64;&#101;x&#x61;&#109;&#x70;&#108;&#x65;&#x2E;&#99;&#111;&#109;</a>
1160 #
1161 #    Based by a filter by Matthew Wickline, posted to the BBEdit-Talk
1162 #    mailing list: <http://tinyurl.com/yu7ue>
1163 #
1164     $addr = "mailto:" . $addr;
1165     $length = strlen($addr);
1166
1167     # leave ':' alone (to spot mailto: later)
1168     $addr = preg_replace_callback('/([^\:])/',
1169                                   '_EncodeEmailAddress_callback', $addr);
1170
1171     $addr = "<a href=\"$addr\">$addr</a>";
1172     # strip the mailto: from the visible part
1173     $addr = preg_replace('/">.+?:/', '">', $addr);
1174
1175     return $addr;
1176 }
1177 function _EncodeEmailAddress_callback($matches) {
1178     $char = $matches[1];
1179     $r = rand(0, 100);
1180     # roughly 10% raw, 45% hex, 45% dec
1181     # '@' *must* be encoded. I insist.
1182     if ($r > 90 && $char != '@') return $char;
1183     if ($r < 45) return '&#x'.dechex(ord($char)).';';
1184     return '&#'.ord($char).';';
1185 }
1186
1187
1188 function _UnescapeSpecialChars($text) {
1189 #
1190 # Swap back in all the special characters we've hidden.
1191 #
1192     global $md_escape_table;
1193     return str_replace(array_values($md_escape_table),
1194                        array_keys($md_escape_table), $text);
1195 }
1196
1197
1198 # _TokenizeHTML is shared between PHP Markdown and PHP SmartyPants.
1199 # We only define it if it is not already defined.
1200 if (!function_exists('_TokenizeHTML')) :
1201 function _TokenizeHTML($str) {
1202 #
1203 #   Parameter:  String containing HTML markup.
1204 #   Returns:    An array of the tokens comprising the input
1205 #               string. Each token is either a tag (possibly with nested,
1206 #               tags contained therein, such as <a href="<MTFoo>">, or a
1207 #               run of text between tags. Each element of the array is a
1208 #               two-element array; the first is either 'tag' or 'text';
1209 #               the second is the actual value.
1210 #
1211 #
1212 #   Regular expression derived from the _tokenize() subroutine in
1213 #   Brad Choate's MTRegex plugin.
1214 #   <http://www.bradchoate.com/past/mtregex.php>
1215 #
1216     $index = 0;
1217     $tokens = array();
1218
1219     $match = '(?s:<!(?:--.*?--\s*)+>)|'.    # comment
1220              '(?s:<\?.*?\?>)|'.                # processing instruction
1221                                              # regular tags
1222              '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
1223
1224     $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE);
1225
1226     foreach ($parts as $part) {
1227         if (++$index % 2 && $part != '')
1228             $tokens[] = array('text', $part);
1229         else
1230             $tokens[] = array('tag', $part);
1231     }
1232
1233     return $tokens;
1234 }
1235 endif;
1236
1237
1238 function _Outdent($text) {
1239 #
1240 # Remove one level of line-leading tabs or spaces
1241 #
1242     global $md_tab_width;
1243     return preg_replace("/^(\\t|[ ]{1,$md_tab_width})/m", "", $text);
1244 }
1245
1246
1247 function _Detab($text) {
1248 #
1249 # Replace tabs with the appropriate amount of space.
1250 #
1251     global $md_tab_width;
1252
1253     # For each line we separate the line in blocks delemited by
1254     # tab characters. Then we reconstruct every line by adding the
1255     # appropriate number of space between each blocks.
1256     
1257     $lines = explode("\n", $text);
1258     $text = "";
1259     
1260     foreach ($lines as $line) {
1261         # Split in blocks.
1262         $blocks = explode("\t", $line);
1263         # Add each blocks to the line.
1264         $line = $blocks[0];
1265         unset($blocks[0]); # Do not add first block twice.
1266         foreach ($blocks as $block) {
1267             # Calculate amount of space, insert spaces, insert block.
1268             $amount = $md_tab_width - strlen($line) % $md_tab_width;
1269             $line .= str_repeat(" ", $amount) . $block;
1270         }
1271         $text .= "$line\n";
1272     }
1273     return $text;
1274 }
1275
1276
1277 function _UnslashQuotes($text) {
1278 #
1279 #    This function is useful to remove automaticaly slashed double quotes
1280 #    when using preg_replace and evaluating an expression.
1281 #    Parameter:  String.
1282 #    Returns:    The string with any slash-double-quote (\") sequence replaced
1283 #                by a single double quote.
1284 #
1285     return str_replace('\"', '"', $text);
1286 }
1287
1288
1289 /*
1290
1291 PHP Markdown
1292 ============
1293
1294 Description
1295 -----------
1296
1297 This is a PHP translation of the original Markdown formatter written in
1298 Perl by John Gruber.
1299
1300 Markdown is a text-to-HTML filter; it translates an easy-to-read /
1301 easy-to-write structured text format into HTML. Markdown's text format
1302 is most similar to that of plain text email, and supports features such
1303 as headers, *emphasis*, code blocks, blockquotes, and links.
1304
1305 Markdown's syntax is designed not as a generic markup language, but
1306 specifically to serve as a front-end to (X)HTML. You can use span-level
1307 HTML tags anywhere in a Markdown document, and you can use block level
1308 HTML tags (like <div> and <table> as well).
1309
1310 For more information about Markdown's syntax, see:
1311
1312 <http://daringfireball.net/projects/markdown/>
1313
1314
1315 Bugs
1316 ----
1317
1318 To file bug reports please send email to:
1319
1320 <michel.fortin@michelf.com>
1321
1322 Please include with your report: (1) the example input; (2) the output you
1323 expected; (3) the output Markdown actually produced.
1324
1325
1326 Version History
1327 ---------------
1328
1329 See the readme file for detailed release notes for this version.
1330
1331 1.0.1c - 9 Dec 2005
1332
1333 1.0.1b - 6 Jun 2005
1334
1335 1.0.1a - 15 Apr 2005
1336
1337 1.0.1 - 16 Dec 2004
1338
1339 1.0 - 21 Aug 2004
1340
1341
1342 Author & Contributors
1343 ---------------------
1344
1345 Original Perl version by John Gruber 
1346 <http://daringfireball.net/>
1347
1348 PHP port and other contributions by Michel Fortin 
1349 <http://www.michelf.com/>
1350
1351
1352 Copyright and License
1353 ---------------------
1354
1355 Copyright (c) 2004-2005 Michel Fortin 
1356 <http://www.michelf.com/> 
1357 All rights reserved.
1358
1359 Copyright (c) 2003-2004 John Gruber   
1360 <http://daringfireball.net/>   
1361 All rights reserved.
1362
1363 Redistribution and use in source and binary forms, with or without
1364 modification, are permitted provided that the following conditions are
1365 met:
1366
1367 *    Redistributions of source code must retain the above copyright notice,
1368     this list of conditions and the following disclaimer.
1369
1370 *    Redistributions in binary form must reproduce the above copyright
1371     notice, this list of conditions and the following disclaimer in the
1372     documentation and/or other materials provided with the distribution.
1373
1374 *    Neither the name "Markdown" nor the names of its contributors may
1375     be used to endorse or promote products derived from this software
1376     without specific prior written permission.
1377
1378 This software is provided by the copyright holders and contributors "as
1379 is" and any express or implied warranties, including, but not limited
1380 to, the implied warranties of merchantability and fitness for a
1381 particular purpose are disclaimed. In no event shall the copyright owner
1382 or contributors be liable for any direct, indirect, incidental, special,
1383 exemplary, or consequential damages (including, but not limited to,
1384 procurement of substitute goods or services; loss of use, data, or
1385 profits; or business interruption) however caused and on any theory of
1386 liability, whether in contract, strict liability, or tort (including
1387 negligence or otherwise) arising in any way out of the use of this
1388 software, even if advised of the possibility of such damage.
1389
1390 */
1391 ?>
1392
Note: See TracBrowser for help on using the browser.