root/trunk/lib/markdown.php

Revision 2, 37.8 kB (checked in by fabien, 3 years ago)

initial import

  • Property svn:mime-type set to text/x-php
  • Property svn:eol-style set to native
  • Property svn:keywords set to Id
Line 
1 <?php
2
3 #
4 # Markdown  -  A text-to-HTML conversion tool for web writers
5 #
6 # Copyright (c) 2004-2005 John Gruber 
7 # <http://daringfireball.net/projects/markdown/>
8 #
9 # Copyright (c) 2004-2005 Michel Fortin - PHP Port 
10 # <http://www.michelf.com/projects/php-markdown/>
11 #
12
13
14 global    $MarkdownPHPVersion, $MarkdownSyntaxVersion,
15         $md_empty_element_suffix, $md_tab_width,
16         $md_nested_brackets_depth, $md_nested_brackets,
17         $md_escape_table, $md_backslash_escape_table,
18         $md_list_level;
19
20 $MarkdownPHPVersion    = '1.0.1c'; # Fri 9 Dec 2005
21 $MarkdownSyntaxVersion = '1.0.1'# Sun 12 Dec 2004
22
23
24 #
25 # Global default settings:
26 #
27 $md_empty_element_suffix = " />";     # Change to ">" for HTML output
28 $md_tab_width = 4;
29
30 #
31 # WordPress settings:
32 #
33 $md_wp_posts    = true# Set to false to remove Markdown from posts.
34 $md_wp_comments = true# Set to false to remove Markdown from comments.
35
36
37 # -- WordPress Plugin Interface -----------------------------------------------
38 /*
39 Plugin Name: Markdown
40 Plugin URI: http://www.michelf.com/projects/php-markdown/
41 Description: <a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>
42 Version: 1.0.1c
43 Author: Michel Fortin
44 Author URI: http://www.michelf.com/
45 */
46 if (isset($wp_version)) {
47     # More details about how it works here:
48     # <http://www.michelf.com/weblog/2005/wordpress-text-flow-vs-markdown/>
49     
50     # Post content and excerpts
51     if ($md_wp_posts) {
52         remove_filter('the_content''wpautop');
53         remove_filter('the_excerpt''wpautop');
54         add_filter('the_content',     'Markdown', 6);
55         add_filter('get_the_excerpt', 'Markdown', 6);
56         add_filter('get_the_excerpt', 'trim', 7);
57         add_filter('the_excerpt',     'md_add_p');
58         add_filter('the_excerpt_rss', 'md_strip_p');
59         
60         remove_filter('content_save_pre''balanceTags', 50);
61         remove_filter('excerpt_save_pre''balanceTags', 50);
62         add_filter('the_content',        'balanceTags', 50);
63         add_filter('get_the_excerpt', 'balanceTags', 9);
64         
65         function md_add_p($text) {
66             if (strlen($text) == 0) return;
67             if (strcasecmp(substr($text, -3), '<p>') == 0) return $text;
68             return '<p>'.$text.'</p>';
69         }
70         function md_strip_p($t) { return preg_replace('{</?[pP]>}', '', $t); }
71     }
72     
73     # Comments
74     if ($md_wp_comments) {
75         remove_filter('comment_text', 'wpautop');
76         remove_filter('comment_text', 'make_clickable');
77         add_filter('pre_comment_content', 'Markdown', 6);
78         add_filter('pre_comment_content', 'md_hide_tags', 8);
79         add_filter('pre_comment_content', 'md_show_tags', 12);
80         add_filter('get_comment_text',    'Markdown', 6);
81         add_filter('get_comment_excerpt', 'Markdown', 6);
82         add_filter('get_comment_excerpt', 'md_strip_p', 7);
83     
84         global $md_hidden_tags;
85         $md_hidden_tags = array(
86             '<p>'    => md5('<p>'),        '</p>'    => md5('</p>'),
87             '<pre>'    => md5('<pre>'),    '</pre>'=> md5('</pre>'),
88             '<ol>'    => md5('<ol>'),        '</ol>'    => md5('</ol>'),
89             '<ul>'    => md5('<ul>'),        '</ul>'    => md5('</ul>'),
90             '<li>'    => md5('<li>'),        '</li>'    => md5('</li>'),
91             );
92         
93         function md_hide_tags($text) {
94             global $md_hidden_tags;
95             return str_replace(array_keys($md_hidden_tags),
96                                 array_values($md_hidden_tags), $text);
97         }
98         function md_show_tags($text) {
99             global $md_hidden_tags;
100             return str_replace(array_values($md_hidden_tags),
101                                 array_keys($md_hidden_tags), $text);
102         }
103     }
104 }
105
106
107 # -- bBlog Plugin Info --------------------------------------------------------
108 function identify_modifier_markdown() {
109     global $MarkdownPHPVersion;
110     return array(
111         'name'            => 'markdown',
112         'type'            => 'modifier',
113         'nicename'        => 'Markdown',
114         'description'    => 'A text-to-HTML conversion tool for web writers',
115         'authors'        => 'Michel Fortin and John Gruber',
116         'licence'        => 'GPL',
117         'version'        => $MarkdownPHPVersion,
118         'help'            => '<a href="http://daringfireball.net/projects/markdown/syntax">Markdown syntax</a> allows you to write using an easy-to-read, easy-to-write plain text format. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>. <a href="http://www.michelf.com/projects/php-markdown/">More...</a>'
119     );
120 }
121
122 # -- Smarty Modifier Interface ------------------------------------------------
123 function smarty_modifier_markdown($text) {
124     return Markdown($text);
125 }
126
127 # -- Textile Compatibility Mode -----------------------------------------------
128 # Rename this file to "classTextile.php" and it can replace Textile anywhere.
129 if (strcasecmp(substr(__FILE__, -16), "classTextile.php") == 0) {
130     # Try to include PHP SmartyPants. Should be in the same directory.
131     @include_once 'smartypants.php';
132     # Fake Textile class. It calls Markdown instead.
133     class Textile {
134         function TextileThis($text, $lite='', $encode='', $noimage='', $strict='') {
135             if ($lite == '' && $encode == '')   $text = Markdown($text);
136             if (function_exists('SmartyPants')) $text = SmartyPants($text);
137             return $text;
138         }
139     }
140 }
141
142
143
144 #
145 # Globals:
146 #
147
148 # Regex to match balanced [brackets].
149 # Needed to insert a maximum bracked depth while converting to PHP.
150 $md_nested_brackets_depth = 6;
151 $md_nested_brackets =
152     str_repeat('(?>[^\[\]]+|\[', $md_nested_brackets_depth).
153     str_repeat('\])*', $md_nested_brackets_depth);
154
155 # Table of hash values for escaped characters:
156 $md_escape_table = array(
157     "\\" => md5("\\"),
158     "`" => md5("`"),
159     "*" => md5("*"),
160     "_" => md5("_"),
161     "{" => md5("{"),
162     "}" => md5("}"),
163     "[" => md5("["),
164     "]" => md5("]"),
165     "(" => md5("("),
166     ")" => md5(")"),
167     ">" => md5(">"),
168     "#" => md5("#"),
169     "+" => md5("+"),
170     "-" => md5("-"),
171     "." => md5("."),
172     "!" => md5("!")
173 );
174 # Create an identical table but for escaped characters.
175 $md_backslash_escape_table;
176 foreach ($md_escape_table as $key => $char)
177     $md_backslash_escape_table["\\$key"] = $char;
178
179
180 function Markdown($text) {
181 #
182 # Main function. The order in which other subs are called here is
183 # essential. Link and image substitutions need to happen before
184 # _EscapeSpecialCharsWithinTagAttributes(), so that any *'s or _'s in the <a>
185 # and <img> tags get encoded.
186 #
187     # Clear the global hashes. If we don't clear these, you get conflicts
188     # from other articles when generating a page which contains more than
189     # one article (e.g. an index page that shows the N most recent
190     # articles):
191     global $md_urls, $md_titles, $md_html_blocks;
192     $md_urls = array();
193     $md_titles = array();
194     $md_html_blocks = array();
195
196     # Standardize line endings:
197     #   DOS to Unix and Mac to Unix
198     $text = str_replace(array("\r\n", "\r"), "\n", $text);
199
200     # Make sure $text ends with a couple of newlines:
201     $text .= "\n\n";
202
203     # Convert all tabs to spaces.
204     $text = _Detab($text);
205
206     # Strip any lines consisting only of spaces and tabs.
207     # This makes subsequent regexen easier to write, because we can
208     # match consecutive blank lines with /\n+/ instead of something
209     # contorted like /[ \t]*\n+/ .
210     $text = preg_replace('/^[ \t]+$/m', '', $text);
211
212     # Turn block-level HTML blocks into hash entries
213     $text = _HashHTMLBlocks($text);
214
215     # Strip link definitions, store in hashes.
216     $text = _StripLinkDefinitions($text);
217
218     $text = _RunBlockGamut($text);
219
220     $text = _UnescapeSpecialChars($text);
221
222     return $text . "\n";
223 }
224
225
226 function _StripLinkDefinitions($text) {
227 #
228 # Strips link definitions from text, stores the URLs and titles in
229 # hash references.
230 #
231     global $md_tab_width;
232     $less_than_tab = $md_tab_width - 1;
233
234     # Link defs are in the form: ^[id]: url "optional title"
235     $text = preg_replace_callback('{
236                         ^[ ]{0,'.$less_than_tab.'}\[(.+)\]:    # id = $1
237                           [ \t]*
238                           \n?                # maybe *one* newline
239                           [ \t]*
240                         <?(\S+?)>?            # url = $2
241                           [ \t]*
242                           \n?                # maybe one newline
243                           [ \t]*
244                         (?:
245                             (?<=\s)            # lookbehind for whitespace
246                             ["(]
247                             (.+?)            # title = $3
248                             [")]
249                             [ \t]*
250                         )?    # title is optional
251                         (?:\n+|\Z)
252         }xm',
253         '_StripLinkDefinitions_callback',
254         $text);
255     return $text;
256 }
257 function _StripLinkDefinitions_callback($matches) {
258     global $md_urls, $md_titles;
259     $link_id = strtolower($matches[1]);
260     $md_urls[$link_id] = _EncodeAmpsAndAngles($matches[2]);
261     if (isset($matches[3]))
262         $md_titles[$link_id] = str_replace('"', '&quot;', $matches[3]);
263     return ''; # String that will replace the block
264 }
265
266
267 function _HashHTMLBlocks($text) {
268     global $md_tab_width;
269     $less_than_tab = $md_tab_width - 1;
270
271     # Hashify HTML blocks:
272     # We only want to do this for block-level HTML tags, such as headers,
273     # lists, and tables. That's because we still want to wrap <p>s around
274     # "paragraphs" that are wrapped in non-block-level tags, such as anchors,
275     # phrase emphasis, and spans. The list of tags we're looking for is
276     # hard-coded:
277     $block_tags_a = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
278                     'script|noscript|form|fieldset|iframe|math|ins|del';
279     $block_tags_b = 'p|div|h[1-6]|blockquote|pre|table|dl|ol|ul|'.
280                     'script|noscript|form|fieldset|iframe|math';
281
282     # First, look for nested blocks, e.g.:
283     #     <div>
284     #         <div>
285     #         tags for inner block must be indented.
286     #         </div>
287     #     </div>
288     #
289     # The outermost tags must start at the left margin for this to match, and
290     # the inner nested divs must be indented.
291     # We need to do this before the next, more liberal match, because the next
292     # match will start at the first `<div>` and stop at the first `</div>`.
293     $text = preg_replace_callback("{
294                 (                        # save in $1
295                     ^                    # start of line  (with /m)
296                     <($block_tags_a)    # start tag = $2
297                     \\b                    # word break
298                     (.*\\n)*?            # any number of lines, minimally matching
299                     </\\2>                # the matching end tag
300                     [ \\t]*                # trailing spaces/tabs
301                     (?=\\n+|\\Z)    # followed by a newline or end of document
302                 )
303         }xm",
304         '_HashHTMLBlocks_callback',
305         $text);
306
307     #
308     # Now match more liberally, simply from `\n<tag>` to `</tag>\n`
309     #
310     $text = preg_replace_callback("{
311                 (                        # save in $1
312                     ^                    # start of line  (with /m)
313                     <($block_tags_b)    # start tag = $2
314                     \\b                    # word break
315                     (.*\\n)*?            # any number of lines, minimally matching
316                     .*</\\2>                # the matching end tag
317                     [ \\t]*                # trailing spaces/tabs
318                     (?=\\n+|\\Z)    # followed by a newline or end of document
319                 )
320         }xm",
321         '_HashHTMLBlocks_callback',
322         $text);
323
324     # Special case just for <hr />. It was easier to make a special case than
325     # to make the other regex more complicated.
326     $text = preg_replace_callback('{
327                 (?:
328                     (?<=\n\n)        # Starting after a blank line
329                     |                # or
330                     \A\n?            # the beginning of the doc
331                 )
332                 (                        # save in $1
333                     [ ]{0,'.$less_than_tab.'}
334                     <(hr)                # start tag = $2
335                     \b                    # word break
336                     ([^<>])*?            #
337                     /?>                    # the matching end tag
338                     [ \t]*
339                     (?=\n{2,}|\Z)        # followed by a blank line or end of document
340                 )
341         }x',
342         '_HashHTMLBlocks_callback',
343         $text);
344
345     # Special case for standalone HTML comments:
346     $text = preg_replace_callback('{
347                 (?:
348                     (?<=\n\n)        # Starting after a blank line
349                     |                # or
350                     \A\n?            # the beginning of the doc
351                 )
352                 (                        # save in $1
353                     [ ]{0,'.$less_than_tab.'}
354                     (?s:
355                         <!
356                         (--.*?--\s*)+
357                         >
358                     )
359                     [ \t]*
360                     (?=\n{2,}|\Z)        # followed by a blank line or end of document
361                 )
362             }x',
363             '_HashHTMLBlocks_callback',
364             $text);
365
366     return $text;
367 }
368 function _HashHTMLBlocks_callback($matches) {
369     global $md_html_blocks;
370     $text = $matches[1];
371     $key = md5($text);
372     $md_html_blocks[$key] = $text;
373     return "\n\n$key\n\n"; # String that will replace the block
374 }
375
376
377 function _RunBlockGamut($text) {
378 #
379 # These are all the transformations that form block-level
380 # tags like paragraphs, headers, and list items.
381 #
382     global $md_empty_element_suffix;
383
384     $text = _DoHeaders($text);
385
386     # Do Horizontal Rules:
387     $text = preg_replace(
388         array('{^[ ]{0,2}([ ]?\*[ ]?){3,}[ \t]*$}mx',
389               '{^[ ]{0,2}([ ]? -[ ]?){3,}[ \t]*$}mx',
390               '{^[ ]{0,2}([ ]? _[ ]?){3,}[ \t]*$}mx'),
391         "\n<hr$md_empty_element_suffix\n",
392         $text);
393
394     $text = _DoLists($text);
395     $text = _DoCodeBlocks($text);
396     $text = _DoBlockQuotes($text);
397
398     # We already ran _HashHTMLBlocks() before, in Markdown(), but that
399     # was to escape raw HTML in the original Markdown source. This time,
400     # we're escaping the markup we've just created, so that we don't wrap
401     # <p> tags around block-level tags.
402     $text = _HashHTMLBlocks($text);
403     $text = _FormParagraphs($text);
404
405     return $text;
406 }
407
408
409 function _RunSpanGamut($text) {
410 #
411 # These are all the transformations that occur *within* block-level
412 # tags like paragraphs, headers, and list items.
413 #
414     global $md_empty_element_suffix;
415
416     $text = _DoCodeSpans($text);
417
418     $text = _EscapeSpecialChars($text);
419
420     # Process anchor and image tags. Images must come first,
421     # because ![foo][f] looks like an anchor.
422     $text = _DoImages($text);
423     $text = _DoAnchors($text);
424
425     # Make links out of things like `<http://example.com/>`
426     # Must come after _DoAnchors(), because you can use < and >
427     # delimiters in inline links like [this](<url>).
428     $text = _DoAutoLinks($text);
429     $text = _EncodeAmpsAndAngles($text);
430     $text = _DoItalicsAndBold($text);
431
432     # Do hard breaks:
433     $text = preg_replace('/ {2,}\n/', "<br$md_empty_element_suffix\n", $text);
434
435     return $text;
436 }
437
438
439 function _EscapeSpecialChars($text) {
440     global $md_escape_table;
441     $tokens = _TokenizeHTML($text);
442
443     $text = '';   # rebuild $text from the tokens
444 #    $in_pre = 0;  # Keep track of when we're inside <pre> or <code> tags.
445 #    $tags_to_skip = "!<(/?)(?:pre|code|kbd|script|math)[\s>]!";
446
447     foreach ($tokens as $cur_token) {
448         if ($cur_token[0] == 'tag') {
449             # Within tags, encode * and _ so they don't conflict
450             # with their use in Markdown for italics and strong.
451             # We're replacing each such character with its
452             # corresponding MD5 checksum value; this is likely
453             # overkill, but it should prevent us from colliding
454             # with the escape values by accident.
455             $cur_token[1] = str_replace(array('*', '_'),
456                 array($md_escape_table['*'], $md_escape_table['_']),
457                 $cur_token[1]);
458             $text .= $cur_token[1];
459         } else {
460             $t = $cur_token[1];
461             $t = _EncodeBackslashEscapes($t);
462             $text .= $t;
463         }
464     }
465     return $text;
466 }
467
468
469 function _DoAnchors($text) {
470 #
471 # Turn Markdown link shortcuts into XHTML <a> tags.
472 #
473     global $md_nested_brackets;
474     #
475     # First, handle reference-style links: [link text] [id]
476     #
477     $text = preg_replace_callback("{
478         (                    # wrap whole match in $1
479           \\[
480             ($md_nested_brackets)    # link text = $2
481           \\]
482
483           [ ]?                # one optional space
484           (?:\\n[ ]*)?        # one optional newline followed by spaces
485
486           \\[
487             (.*?)        # id = $3
488           \\]
489         )
490         }xs",
491         '_DoAnchors_reference_callback', $text);
492
493     #
494     # Next, inline-style links: [link text](url "optional title")
495     #
496     $text = preg_replace_callback("{
497         (                # wrap whole match in $1
498           \\[
499             ($md_nested_brackets)    # link text = $2
500           \\]
501           \\(            # literal paren
502             [ \\t]*
503             <?(.*?)>?    # href = $3
504             [ \\t]*
505             (            # $4
506               (['\"])    # quote char = $5
507               (.*?)        # Title = $6
508               \\5        # matching quote
509             )?            # title is optional
510           \\)
511         )
512         }xs",
513         '_DoAnchors_inline_callback', $text);
514
515     return $text;
516 }
517 function _DoAnchors_reference_callback($matches) {
518     global $md_urls, $md_titles, $md_escape_table;
519     $whole_match = $matches[1];
520     $link_text   = $matches[2];
521     $link_id     = strtolower($matches[3]);
522
523     if ($link_id == "") {
524         $link_id = strtolower($link_text); # for shortcut links like [this][].
525     }
526
527     if (isset($md_urls[$link_id])) {
528         $url = $md_urls[$link_id];
529         # We've got to encode these to avoid conflicting with italics/bold.
530         $url = str_replace(array('*', '_'),
531                            array($md_escape_table['*'], $md_escape_table['_']),
532                            $url);
533         $result = "<a href=\"$url\"";
534         if ( isset( $md_titles[$link_id] ) ) {
535             $title = $md_titles[$link_id];
536             $title = str_replace(array('*',     '_'),
537                                  array($md_escape_table['*'],
538                                        $md_escape_table['_']), $title);
539             $result .=  " title=\"$title\"";
540         }
541         $result .= ">$link_text</a>";
542     }
543     else {
544         $result = $whole_match;
545     }
546     return $result;
547 }
548 function _DoAnchors_inline_callback($matches) {
549     global $md_escape_table;
550     $whole_match    = $matches[1];
551     $link_text        = $matches[2];
552     $url            = $matches[3];
553     $title            =& $matches[6];
554
555     # We've got to encode these to avoid conflicting with italics/bold.
556     $url = str_replace(array('*', '_'),
557                        array($md_escape_table['*'], $md_escape_table['_']),
558                        $url);
559     $result = "<a href=\"$url\"";
560     if (isset(