4 # SmartyPants - Smart punctuation for web sites
7 # <http://daringfireball.net>
9 # PHP port by Michel Fortin
10 # <http://www.michelf.com/>
12 # Copyright (c) 2003-2004 John Gruber
13 # Copyright (c) 2004-2005 Michel Fortin
17 global $SmartyPantsPHPVersion, $SmartyPantsSyntaxVersion,
18 $smartypants_attr, $sp_tags_to_skip;
20 $SmartyPantsPHPVersion = '1.5.1e'; # Fru 9 Dec 2005
21 $SmartyPantsSyntaxVersion = '1.5.1'; # Fri 12 Mar 2004
24 # Configurable variables:
25 $smartypants_attr = "1"; # Change this to configure.
26 # 1 => "--" for em-dashes; no en-dash support
27 # 2 => "---" for em-dashes; "--" for en-dashes
28 # 3 => "--" for em-dashes; "---" for en-dashes
29 # See docs for more configuration options.
32 $sp_tags_to_skip = '<(/?)(?:pre|code|kbd|script|math)[\s>]';
35 # -- WordPress plugin interface -----------------------------------------------
37 Plugin Name: SmartyPants
38 Plugin URI: http://www.michelf.com/projects/php-smartypants/
39 Description: SmartyPants is a web publishing utility that translates plain ASCII punctuation characters into “smart” typographic punctuation HTML entities. This plugin <strong>replace the default WordPress Texturize algorithm</strong> for the content and the title of your posts, the comments body and author name, and everywhere else Texturize normally apply. Based on the original Perl version by <a href="http://daringfireball.net/">John Gruber</a>.
42 Author URI: http://www.michelf.com/
44 if (isset($wp_version)) {
45 # Remove default Texturize filter that would conflict with SmartyPants.
46 remove_filter('category_description', 'wptexturize');
47 remove_filter('list_cats', 'wptexturize');
48 remove_filter('comment_author', 'wptexturize');
49 remove_filter('comment_text', 'wptexturize');
50 remove_filter('single_post_title', 'wptexturize');
51 remove_filter('the_title', 'wptexturize');
52 remove_filter('the_content', 'wptexturize');
53 remove_filter('the_excerpt', 'wptexturize');
54 # Add SmartyPants filter with priority 10 (same as Texturize).
55 add_filter('category_description', 'SmartyPants', 10);
56 add_filter('list_cats', 'SmartyPants', 10);
57 add_filter('comment_author', 'SmartyPants', 10);
58 add_filter('comment_text', 'SmartyPants', 10);
59 add_filter('single_post_title', 'SmartyPants', 10);
60 add_filter('the_title', 'SmartyPants', 10);
61 add_filter('the_content', 'SmartyPants', 10);
62 add_filter('the_excerpt', 'SmartyPants', 10);
65 # -- Smarty Modifier Interface ------------------------------------------------
66 function smarty_modifier_smartypants($text, $attr = NULL) {
67 return SmartyPants($text, $attr);
72 function SmartyPants($text, $attr = NULL, $ctx = NULL) {
73 global $smartypants_attr, $sp_tags_to_skip;
75 $text; # text to be parsed
76 $attr; # value of the smart_quotes="" attribute
77 $ctx; # MT context object (unused)
78 if ($attr == NULL) $attr = $smartypants_attr;
80 # Options to specify which transformations to make:
82 $convert_quot = 0; # should we translate " entities into normal quotes?
87 # 2 : set all, using old school en- and em- dash shortcuts
88 # 3 : set all, using inverted old school en and em- dash shortcuts
91 # b : backtick quotes (``double'' only)
92 # B : backtick quotes (``double'' and `single')
94 # D : old school dashes
95 # i : inverted old school dashes
97 # w : convert " entities to " for Dreamweaver users
103 else if ($attr == "1") {
104 # Do everything, turn all options on.
110 else if ($attr == "2") {
111 # Do everything, turn all options on, use old school dash shorthand.
117 else if ($attr == "3") {
118 # Do everything, turn all options on, use inverted old school dash shorthand.
124 else if ($attr == "-1") {
125 # Special "stupefy" mode.
129 $chars = preg_split('//', $attr);
130 foreach ($chars as $c){
131 if ($c == "q") { $do_quotes = 1; }
132 else if ($c == "b") { $do_backticks = 1; }
133 else if ($c == "B") { $do_backticks = 2; }
134 else if ($c == "d") { $do_dashes = 1; }
135 else if ($c == "D") { $do_dashes = 2; }
136 else if ($c == "i") { $do_dashes = 3; }
137 else if ($c == "e") { $do_ellipses = 1; }
138 else if ($c == "w") { $convert_quot = 1; }
140 # Unknown attribute option, ignore.
145 $tokens = _TokenizeHTML($text);
147 $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags.
149 $prev_token_last_char = ""; # This is a cheat, used to get some context
150 # for one-character tokens that consist of
151 # just a quote char. What we do is remember
152 # the last character of the previous text
153 # token, to use as context to curl single-
154 # character quote tokens correctly.
156 foreach ($tokens as $cur_token) {
157 if ($cur_token[0] == "tag") {
158 # Don't mess with quotes inside tags.
159 $result .= $cur_token[1];
160 if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
161 $in_pre = isset($matches[1]) && $matches[1] == '/' ?
0 : 1;
165 $last_char = substr($t, -1); # Remember last char of this token before processing.
167 $t = ProcessEscapes($t);
170 $t = preg_replace('/"/', '"', $t);
174 if ($do_dashes == 1) $t = EducateDashes($t);
175 if ($do_dashes == 2) $t = EducateDashesOldSchool($t);
176 if ($do_dashes == 3) $t = EducateDashesOldSchoolInverted($t);
179 if ($do_ellipses) $t = EducateEllipses($t);
181 # Note: backticks need to be processed before quotes.
183 $t = EducateBackticks($t);
184 if ($do_backticks == 2) $t = EducateSingleBackticks($t);
189 # Special case: single-character ' token
190 if (preg_match('/\S/', $prev_token_last_char)) {
197 else if ($t == '"') {
198 # Special case: single-character " token
199 if (preg_match('/\S/', $prev_token_last_char)) {
208 $t = EducateQuotes($t);
212 if ($do_stupefy) $t = StupefyEntities($t);
214 $prev_token_last_char = $last_char;
223 function SmartQuotes($text, $attr = NULL, $ctx = NULL) {
224 global $smartypants_attr, $sp_tags_to_skip;
226 $text; # text to be parsed
227 $attr; # value of the smart_quotes="" attribute
228 $ctx; # MT context object (unused)
229 if ($attr == NULL) $attr = $smartypants_attr;
231 $do_backticks; # should we educate ``backticks'' -style quotes?
237 else if ($attr == 2) {
238 # smarten ``backticks'' -style quotes
245 # Special case to handle quotes at the very end of $text when preceded by
246 # an HTML tag. Add a space to give the quote education algorithm a bit of
247 # context, so that it can guess correctly that it's a closing quote:
248 $add_extra_space = 0;
249 if (preg_match("/>['\"]\\z/", $text)) {
250 $add_extra_space = 1; # Remember, so we can trim the extra space later.
254 $tokens = _TokenizeHTML($text);
256 $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
258 $prev_token_last_char = ""; # This is a cheat, used to get some context
259 # for one-character tokens that consist of
260 # just a quote char. What we do is remember
261 # the last character of the previous text
262 # token, to use as context to curl single-
263 # character quote tokens correctly.
265 foreach ($tokens as $cur_token) {
266 if ($cur_token[0] == "tag") {
267 # Don't mess with quotes inside tags
268 $result .= $cur_token[1];
269 if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
270 $in_pre = isset($matches[1]) && $matches[1] == '/' ?
0 : 1;
274 $last_char = substr($t, -1); # Remember last char of this token before processing.
276 $t = ProcessEscapes($t);
278 $t = EducateBackticks($t);
282 # Special case: single-character ' token
283 if (preg_match('/\S/', $prev_token_last_char)) {
290 else if ($t == '"') {
291 # Special case: single-character " token
292 if (preg_match('/\S/', $prev_token_last_char)) {
301 $t = EducateQuotes($t);
305 $prev_token_last_char = $last_char;
310 if ($add_extra_space) {
311 preg_replace('/ \z/', '', $result); # Trim trailing space if we added one earlier.
317 function SmartDashes($text, $attr = NULL, $ctx = NULL) {
318 global $smartypants_attr, $sp_tags_to_skip;
320 $text; # text to be parsed
321 $attr; # value of the smart_dashes="" attribute
322 $ctx; # MT context object (unused)
323 if ($attr == NULL) $attr = $smartypants_attr;
325 # reference to the subroutine to use for dash education, default to EducateDashes:
326 $dash_sub_ref = 'EducateDashes';
332 else if ($attr == 2) {
333 # use old smart dash shortcuts, "--" for en, "---" for em
334 $dash_sub_ref = 'EducateDashesOldSchool';
336 else if ($attr == 3) {
337 # inverse of 2, "--" for em, "---" for en
338 $dash_sub_ref = 'EducateDashesOldSchoolInverted';
342 $tokens = _TokenizeHTML($text);
345 $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
346 foreach ($tokens as $cur_token) {
347 if ($cur_token[0] == "tag") {
348 # Don't mess with quotes inside tags
349 $result .= $cur_token[1];
350 if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
351 $in_pre = isset($matches[1]) && $matches[1] == '/' ?
0 : 1;
356 $t = ProcessEscapes($t);
357 $t = $dash_sub_ref($t);
366 function SmartEllipses($text, $attr = NULL, $ctx = NULL) {
368 $text; # text to be parsed
369 $attr; # value of the smart_ellipses="" attribute
370 $ctx; # MT context object (unused)
371 if ($attr == NULL) $attr = $smartypants_attr;
379 $tokens = _TokenizeHTML($text);
382 $in_pre = 0; # Keep track of when we're inside <pre> or <code> tags
383 foreach ($tokens as $cur_token) {
384 if ($cur_token[0] == "tag") {
385 # Don't mess with quotes inside tags
386 $result .= $cur_token[1];
387 if (preg_match("@$sp_tags_to_skip@", $cur_token[1], $matches)) {
388 $in_pre = isset($matches[1]) && $matches[1] == '/' ?
0 : 1;
393 $t = ProcessEscapes($t);
394 $t = EducateEllipses($t);
403 function EducateQuotes($_) {
407 # Returns: The string, with "educated" curly quote HTML entities.
409 # Example input: "Isn't this fun?"
410 # Example output: “Isn’t this fun?”
412 # Make our own "punctuation" character class, because the POSIX-style
413 # [:PUNCT:] is only available in Perl 5.6 or later:
414 $punct_class = "[!\"#\\$\\%'()*+,-.\\/:;<=>?\\@\\[\\\\\]\\^_`{|}~]";
416 # Special case if the very first character is a quote
417 # followed by punctuation at a non-word-break. Close the quotes by brute force:
419 array("/^'(?=$punct_class\\B)/", "/^\"(?=$punct_class\\B)/"),
420 array('’', '”'), $_);
423 # Special case for double sets of quotes, e.g.:
424 # <p>He said, "'Quoted' words in a larger quote."</p>
426 array("/\"'(?=\w)/", "/'\"(?=\w)/"),
427 array('“‘', '‘“'), $_);
429 # Special case for decade abbreviations (the '80s):
430 $_ = preg_replace("/'(?=\\d{2}s)/", '’', $_);
432 $close_class = '[^\ \t\r\n\[\{\(\-]';
433 $dec_dashes = '&\#8211;|&\#8212;';
435 # Get most opening single quotes:
438 \\s | # a whitespace char, or
439 | # a non-breaking space entity, or
441 &[mn]dash; | # named dash entities
442 $dec_dashes | # or decimal entities
443 &\\#x201[34]; # or hex
446 (?=\\w) # followed by a word character
447 }x", '\1‘', $_);
448 # Single closing quotes:
452 (?(1)| # If $1 captured, then do nothing;
453 (?=\\s | s\\b) # otherwise, positive lookahead for a whitespace
454 ) # char or an 's' at a word ending position. This
455 # is a special case to handle something like:
456 # \"<i>Custer</i>'s Last Stand.\"
457 }xi", '\1’', $_);
459 # Any remaining single quotes should be opening ones:
460 $_ = str_replace("'", '‘', $_);
463 # Get most opening double quotes:
466 \\s | # a whitespace char, or
467 | # a non-breaking space entity, or
469 &[mn]dash; | # named dash entities
470 $dec_dashes | # or decimal entities
471 &\\#x201[34]; # or hex
474 (?=\\w) # followed by a word character
475 }x", '\1“', $_);
477 # Double closing quotes:
481 (?(1)|(?=\\s)) # If $1 captured, then do nothing;
482 # if not, then make sure the next char is whitespace.
483 }x", '\1”', $_);
485 # Any remaining quotes should be opening ones.
486 $_ = str_replace('"', '“', $_);
492 function EducateBackticks($_) {
495 # Returns: The string, with ``backticks'' -style double quotes
496 # translated into HTML curly quote entities.
498 # Example input: ``Isn't this fun?''
499 # Example output: “Isn't this fun?”
502 $_ = str_replace(array("``", "''",),
503 array('“', '”'), $_);
508 function EducateSingleBackticks($_) {
511 # Returns: The string, with `backticks' -style single quotes
512 # translated into HTML curly quote entities.
514 # Example input: `Isn't this fun?'
515 # Example output: ‘Isn’t this fun?’
518 $_ = str_replace(array("`", "'",),
519 array('‘', '’'), $_);
524 function EducateDashes($_) {
528 # Returns: The string, with each instance of "--" translated to
529 # an em-dash HTML entity.
532 $_ = str_replace('--', '—', $_);
537 function EducateDashesOldSchool($_) {
541 # Returns: The string, with each instance of "--" translated to
542 # an en-dash HTML entity, and each "---" translated to
543 # an em-dash HTML entity.
547 $_ = str_replace(array("---", "--",),
548 array('—', '–'), $_);
553 function EducateDashesOldSchoolInverted($_) {
557 # Returns: The string, with each instance of "--" translated to
558 # an em-dash HTML entity, and each "---" translated to
559 # an en-dash HTML entity. Two reasons why: First, unlike the
560 # en- and em-dash syntax supported by
561 # EducateDashesOldSchool(), it's compatible with existing
562 # entries written before SmartyPants 1.1, back when "--" was
563 # only used for em-dashes. Second, em-dashes are more
564 # common than en-dashes, and so it sort of makes sense that
565 # the shortcut should be shorter to type. (Thanks to Aaron
566 # Swartz for the idea.)
570 $_ = str_replace(array("---", "--",),
571 array('–', '—'), $_);
576 function EducateEllipses($_) {
579 # Returns: The string, with each instance of "..." translated to
580 # an ellipsis HTML entity. Also converts the case where
581 # there are spaces between the dots.
583 # Example input: Huh...?
584 # Example output: Huh…?
587 $_ = str_replace(array("...", ". . .",), '…', $_);
592 function StupefyEntities($_) {
595 # Returns: The string, with each SmartyPants HTML entity translated to
596 # its ASCII counterpart.
598 # Example input: “Hello — world.”
599 # Example output: "Hello -- world."
603 $_ = str_replace(array('–', '—'),
604 array('-', '--'), $_);
606 # single quote open close
607 $_ = str_replace(array('‘', '’'), "'", $_);
609 # double quote open close
610 $_ = str_replace(array('“', '”'), '"', $_);
612 $_ = str_replace('…', '...', $_); # ellipsis
618 function ProcessEscapes($_) {
621 # Returns: The string, with after processing the following backslash
622 # escape sequences. This is useful if you want to force a "dumb"
623 # quote or other character to appear.
635 array('\\\\', '\"', "\'", '\.', '\-', '\`'),
636 array('\', '"', ''', '.', '-', '`'), $_);
642 # _TokenizeHTML is shared between PHP SmartyPants and PHP Markdown.
643 # We only define it if it is not already defined.
644 if (!function_exists('_TokenizeHTML')) :
645 function _TokenizeHTML($str) {
647 # Parameter: String containing HTML markup.
648 # Returns: An array of the tokens comprising the input
649 # string. Each token is either a tag (possibly with nested,
650 # tags contained therein, such as <a href="<MTFoo>">, or a
651 # run of text between tags. Each element of the array is a
652 # two-element array; the first is either 'tag' or 'text';
653 # the second is the actual value.
656 # Regular expression derived from the _tokenize() subroutine in
657 # Brad Choate's MTRegex plugin.
658 # <http://www.bradchoate.com/past/mtregex.php>
663 $match = '(?s:<!(?:--.*?--\s*)+>)|'. # comment
664 '(?s:<\?.*?\?>)|'. # processing instruction
666 '(?:<[/!$]?[-a-zA-Z0-9:]+\b(?>[^"\'>]+|"[^"]*"|\'[^\']*\')*>)';
668 $parts = preg_split("{($match)}", $str, -1, PREG_SPLIT_DELIM_CAPTURE
);
670 foreach ($parts as $part) {
671 if (++
$index %
2 && $part != '')
672 $tokens[] = array('text', $part);
674 $tokens[] = array('tag', $part);
689 This is a PHP translation of the original SmartyPants quote educator written in
692 SmartyPants is a web publishing utility that translates plain ASCII
693 punctuation characters into "smart" typographic punctuation HTML
694 entities. SmartyPants can perform the following transformations:
696 * Straight quotes (`"` and `'`) into "curly" quote HTML entities
697 * Backticks-style quotes (` ``like this'' `) into "curly" quote HTML
699 * Dashes (`--` and `---`) into en- and em-dash entities
700 * Three consecutive dots (`...`) into an ellipsis entity
702 SmartyPants does not modify characters within `<pre>`, `<code>`, `<kbd>`,
703 `<script>`, or `<math>` tag blocks. Typically, these tags are used to
704 display text where smart quotes and other "smart punctuation" would not
705 be appropriate, such as source code or example markup.
708 ### Backslash Escapes ###
710 If you need to use literal straight quotes (or plain hyphens and
711 periods), SmartyPants accepts the following backslash escape sequences
712 to force non-smart punctuation. It does so by transforming the escape
713 sequence into a decimal-encoded HTML entity:
715 Escape Value Character
716 ------ ----- ---------
724 This is useful, for example, when you want to use straight quotes as
725 foot and inch marks: 6'2" tall; a 17" iMac.
731 To file bug reports or feature requests (other than topics listed in the
732 Caveats section above) please send email to:
734 <michel.fortin@michelf.com>
736 If the bug involves quotes being curled the wrong way, please send example
740 ### Algorithmic Shortcomings ###
742 One situation in which quotes will get curled the wrong way is when
743 apostrophes are used at the start of leading contractions. For example:
745 'Twas the night before Christmas.
747 In the case above, SmartyPants will turn the apostrophe into an opening
748 single-quote, when in fact it should be a closing one. I don't think
749 this problem can be solved in the general case -- every word processor
750 I've tried gets this wrong as well. In such cases, it's best to use the
751 proper HTML entity for closing single-quotes (`’`) by hand.
759 * Corrected a bug that prevented special characters from being
765 * Corrected a small bug in `_TokenizeHTML` where a Doctype declaration
766 was not seen as HTML (smart quotes where applied inside).
771 * Changed a regular expression in `_TokenizeHTML` that could lead to
772 a segmentation fault with PHP 4.3.8 on Linux.
777 * Corrected a problem with quotes immediately following a dash
778 with no space between: `Text--"quoted text"--text.`
780 * PHP SmartyPants can now be used as a modifier by the Smarty
781 template engine. Rename the file to "modifier.smartypants.php"
782 and put it in your smarty plugins folder.
784 * Replaced a lot of space characters by tabs, saving about 4 KB.
789 * PHP Markdown and PHP Smartypants now share the same `_TokenizeHTML`
790 function when loaded simultanously.
792 * Changed the internals of `_TokenizeHTML` to lower the PHP version
793 requirement to PHP 4.0.5.
798 * Initial release of PHP SmartyPants, based on version 1.5.1 of the
799 original SmartyPants written in Perl.
806 <http://daringfireball.net/>
808 Ported to PHP by Michel Fortin
809 <http://www.michelf.com/>
815 Portions of this plug-in are based on Brad Choate's nifty MTRegex plug-in.
816 Brad Choate also contributed a few bits of source code to this plug-in.
817 Brad Choate is a fine hacker indeed. (<http://bradchoate.com/>)
819 Jeremy Hedley (<http://antipixel.com/>) and Charles Wiltgen
820 (<http://playbacktime.com/>) deserve mention for exemplary beta testing.
823 Copyright and License
824 ---------------------
826 Copyright (c) 2003 John Gruber
827 <http://daringfireball.net/>
830 Copyright (c) 2004-2005 Michel Fortin
831 <http://www.michelf.com>
833 Redistribution and use in source and binary forms, with or without
834 modification, are permitted provided that the following conditions are met:
836 * Redistributions of source code must retain the above copyright
837 notice, this list of conditions and the following disclaimer.
839 * Redistributions in binary form must reproduce the above copyright
840 notice, this list of conditions and the following disclaimer in the
841 documentation and/or other materials provided with the distribution.
843 * Neither the name "SmartyPants" nor the names of its contributors may
844 be used to endorse or promote products derived from this software
845 without specific prior written permission.
847 This software is provided by the copyright holders and contributors "as is"
848 and any express or implied warranties, including, but not limited to, the
849 implied warranties of merchantability and fitness for a particular purpose
850 are disclaimed. In no event shall the copyright owner or contributors be
851 liable for any direct, indirect, incidental, special, exemplary, or
852 consequential damages (including, but not limited to, procurement of
853 substitute goods or services; loss of use, data, or profits; or business
854 interruption) however caused and on any theory of liability, whether in
855 contract, strict liability, or tort (including negligence or otherwise)
856 arising in any way out of the use of this software, even if advised of the
857 possibility of such damage.