# # SmartyPants - A Movable Type Plug-In # by John Gruber # http://daringfireball.net # # Version 1.0 # Wed Nov 13, 2002 # # See the readme or POD for details, installation instructions, and # license information. # # Copyright (c) 2002 John Gruber # use strict; use MT::Template::Context; MT::Template::Context->add_global_filter( smarty_pants => \&SmartyPants); MT::Template::Context->add_global_filter( smart_quotes => \&SmartQuotes); MT::Template::Context->add_global_filter( smart_dashes => \&SmartDashes); MT::Template::Context->add_global_filter( smart_ellipses => \&SmartEllipses); sub SmartyPants { # Paramaters: my $text = shift; # text to be parsed my $attr = shift; # value of the smart_quotes="" attribute my $ctx = shift; # MT context object (unused) # Options to specify which transformations to make: my ($do_quotes, $do_backticks, $do_dashes, $do_ellipses); # Parse attributes: # 0 : do nothing # 1 : set all # # q : quotes # b : backticks # d : dashes # e : ellipses if ($attr eq "0") { # Do nothing. return $text; } elsif ($attr eq "1") { # Do everything, turn all options on. $do_quotes = 1; $do_backticks = 1; $do_dashes = 1; $do_ellipses = 1; } else { my @chars = split(//, $attr); foreach (@chars) { if ($_ eq "q") { $do_quotes = 1; } elsif ($_ eq "b") { $do_backticks = 1; } elsif ($_ eq "d") { $do_dashes = 1; } elsif ($_ eq "e") { $do_ellipses = 1; } else { # Unknown attribute option, ignore. } } } my $tokens; $tokens ||= _tokenize($text); my $result = ''; my $in_pre = 0; # Keep track of when we're inside
 or  tags.
    foreach my $cur_token (@$tokens) {
        if ($cur_token->[0] eq "tag") {
            # Don't mess with quotes inside tags.
            $result .= $cur_token->[1];
            if ($cur_token->[1] =~m!<(/?)(?:pre|code)[\s>]!) {
                $in_pre = defined $1 && $1 eq '/' ? 0 : 1;
            }
        } else {
            my $t = $cur_token->[1];
            # Notes: backticks need to be processed before quotes.
            if (!$in_pre) {
                $t = EducateBackticks($t) if $do_backticks;
                $t = EducateQuotes($t) if $do_quotes;
                $t = EducateDashes($t) if $do_dashes;
                $t = EducateEllipses($t) if $do_ellipses;
            }
            $result .= $t;
        }
    }
    return $result;
}


sub SmartQuotes {
    # Paramaters:
    my $text = shift;   # text to be parsed
    my $attr = shift;   # value of the smart_quotes="" attribute
    my $ctx  = shift;   # MT context object (unused)

    my $do_backticks;   # should we educate ``backticks'' -style quotes?

    if ($attr == 0) {
        # do nothing;
        return $text;
    }
    elsif ($attr == 2) {
        # smarten ``backticks'' -style quotes
        $do_backticks = 1;
    }
    else {
        $do_backticks = 0;
    }

    my $tokens;
    $tokens ||= _tokenize($text);

    my $result = '';
    my $in_pre = 0;  # Keep track of when we're inside 
 or  tags
    foreach my $cur_token (@$tokens) {
        if ($cur_token->[0] eq "tag") {
            # Don't mess with quotes inside tags
            $result .= $cur_token->[1];
            if ($cur_token->[1] =~m!<(/?)(?:pre|code)[\s>]!) {
                $in_pre = defined $1 && $1 eq '/' ? 0 : 1;
            }
        } else {
            my $t = $cur_token->[1];
            if (!$in_pre) {
                if ($do_backticks) {
                    $t = EducateBackticks($t);
                }
                $t = EducateQuotes($t);
            }
            $result .= $t;
        }
    }
    return $result;
}


sub SmartDashes {
    # Paramaters:
    my $text = shift;   # text to be parsed
    my $attr = shift;   # value of the smart_dashes="" attribute
    my $ctx  = shift;   # MT context object (unused)

    if ($attr == 0) {
        # do nothing;
        return $text;
    }

    my $tokens;
    $tokens ||= _tokenize($text);

    my $result = '';
    my $in_pre = 0;  # Keep track of when we're inside 
 or  tags
    foreach my $cur_token (@$tokens) {
        if ($cur_token->[0] eq "tag") {
            # Don't mess with quotes inside tags
            $result .= $cur_token->[1];
            if ($cur_token->[1] =~m!<(/?)(?:pre|code)[\s>]!) {
                $in_pre = defined $1 && $1 eq '/' ? 0 : 1;
            }
        } else {
            my $t = $cur_token->[1];
            $t = EducateDashes($t) unless ($in_pre);
            $result .= $t;
        }
    }
    return $result;
}


sub SmartEllipses {
    # Paramaters:
    my $text = shift;   # text to be parsed
    my $attr = shift;   # value of the smart_ellipses="" attribute
    my $ctx  = shift;   # MT context object (unused)

    if ($attr == 0) {
        # do nothing;
        return $text;
    }

    my $tokens;
    $tokens ||= _tokenize($text);

    my $result = '';
    my $in_pre = 0;  # Keep track of when we're inside 
 or  tags
    foreach my $cur_token (@$tokens) {
        if ($cur_token->[0] eq "tag") {
            # Don't mess with quotes inside tags
            $result .= $cur_token->[1];
            if ($cur_token->[1] =~m!<(/?)(?:pre|code)[\s>]!) {
                $in_pre = defined $1 && $1 eq '/' ? 0 : 1;
            }
        } else {
            my $t = $cur_token->[1];
            $t = EducateEllipses($t) unless ($in_pre);
            $result .= $t;
        }
    }
    return $result;
}


sub EducateQuotes {
#
#   Parameter:  String of text.
#
#   Returns:    The string, with "educated" curly quote HTML entities.
#
#   Example input:  "Isn't this fun?"
#   Example output: “Isn’t this fun?”
#

    $_ = shift;

    # Tell perl not to gripe when we use $1 in substitutions,
    # even when it's undefined:
    local $^W = 0;

    # Single closing quotes:
    s {
        ([^\ \t\r\n\[\{\(])?
        '
        (?(1)|          # If $1 captured, then do nothing;
          (?=\s | s\b)  # otherwise, positive lookahead for a whitespace
        )               # char or an 's' at a word ending position. This
                        # is a special case to handle something like:
                        # "Custer's Last Stand."
    } {$1’}xg;

    # Single opening quotes:
    s/'/‘/g;

    # Double closing quotes:
    s {
        ([^\ \t\r\n\[\{\(])?
        "
        (?(1)|(?=\s))   # If $1 captured, then do nothing;
                        # if not, then make sure the next char is whitespace
    } {$1”}xg;

    # Double opening quotes:
    s/"/“/g;

    return $_;
}


sub EducateBackticks {
#
#   Parameter:  String of text.
#   Returns:    The string, with ``backticks'' -style double quotes
#               translated into HTML curly quote entities.
#
#   Example input:  ``Isn't this fun?''
#   Example output: “Isn't this fun?”
#

    $_ = shift;
    s/``/“/g;
    s/''/”/g;
    return $_;
}


sub EducateDashes {
#
#   Parameter:  String of text.
#
#   Returns:    The string, with each instance of " -- " translated to
#               an em-dash HTML entities.
#
#   Example input:  "Isn't this fun?"
#   Example output: “Isn’t this fun?”
#

    $_ = shift;
    # Pattern looks for any of:
    #   space-dash-dash-space
    #   start-dash-dash-space
    #   space-dash-dash-end
    #   start-dash-dash-end
    s/((?: |^))--((?: |$))/$1—$2/g;
    return $_;
}


sub EducateEllipses {
#
#   Parameter:  String of text.
#   Returns:    The string, with each instance of "..." translated to
#               an ellipsis HTML entity.
#
#   Example input:  Huh...?
#   Example output: Huh…?
#

    $_ = shift;
    # The pattern below uses an optional leading dot, so that if a sentence
    # ends with a period and then three extra dots, the ellipsis entity will
    # come after the period.
    s/(\.?)\.{3}/$1…/g;
    return $_;
}


sub _tokenize {
#
#   Parameter:  String of text containing HTML markup.
#   Returns:    Reference to an array of the tokens comprising the input
#               string. Each token is either a tag (possibly with nested,
#               tags contained therein, such as , or a
#               run of text between tags. Each element of the array is a
#               two-element array; the first is either 'tag' or 'text';
#               the second is the actual value.
#
#
#   Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin.
#       
#

    my ($str) = @_;
    my $pos = 0;
    my $len = length $str;
    my @tokens;

    # pattern to match balanced nested <> pairs, up to two levels deep:
    my $nested_angles = qr/<(?:[^<>]|<[^<>]*>)*>/;

    while ($str =~ m/($nested_angles)/gs) {
        my $whole_tag = $1;
        my $sec_start = pos $str;
        my $tag_start = $sec_start - length $whole_tag;
        if ($pos < $tag_start) {
            push @tokens, ['text', substr($str, $pos, $tag_start - $pos)];
        }
        push @tokens, ['tag', $whole_tag];
        $pos = pos $str;
    }
    push @tokens, ['text', substr($str, $pos, $len - $pos)] if $pos < $len;
    \@tokens;
}


1;
__END__


=pod

=head1 NAME

B - A smart quotes plug-in for Movable Type.

=head1 SYNOPSIS

    <$MTEntryBody smart_quotes="1" smart_dashes="1" smart_ellipses="1"$>

    <$MTEntryBody smarty_pants="1"$>


=head1 DESCRIPTION

This plug-in adds four new filter attributes to Movable Type template
tags: "smart_quotes", "smart_dashes", "smart_ellipses", and "smarty_pants":

=over 4

=item *

The smart_quotes attribute transforms the content of the tag, turning
straight quotes -- (') and (") -- into HTML entity-encoded smart quotes
(a.k.a. curly quotes, a.k.a. typographer's quotes). The smart_quotes
attribute can optionally process ``backticks'' -style quotes as well (see
Options, below).

=item *

The smart_dashes attribute transforms each instance of " -- " (space dash
dash space) into an entity-encoded em-dash.

=item *

The smart_ellipses attribute transforms each instance of "..." (dot dot
dot) into an entity-encoded ellipsis.

=item *

The smarty_pants attribute provides access to all of the above
transformations, in a single attribute.


=back

This means you can write, edit, and save your posts using plain old ASCII
straight quotes, plain dashes, and plain dots, but your published posts
will appear with smart quotes, em-dashes, and proper ellipses.

For example, to display an entry body with smart quotes, but with no
transformation for dashes or ellipses:

    <$MTEntryBody smart_quotes="1"$>

To display a entry title with all transformations (including ``backticks''
-style quotes):

    <$MTEntryTitle smart_quotes="2" smart_dashes="1" smart_ellipses="1"$>

The smarty_pants attribute can be used to produce the same results as the previous example, using just one attribute:

    <$MTEntryTitle smarty_pants="1"$>


SmartyPants should work within any MT content tag.

SmartyPants does not modify characters within C<< 
 >> or C<< 
>> tag blocks. Typically, these tags are used to display text where smart quotes are not appropriate, such as source code or example markup.

SmartyPants requires version 2.5 or later of Movable Type. Global tag
filters were not available to the plug-in interface in earlier versions of
Movable Type.


=head1 INSTALLATION

=over 4

=item 1.

Copy the "SmartyPants.pl" file into your Movable Type "plugins" directory.
The "plugins" directory should be in the same directory as "mt.cgi"; if it
doesn't already exist, use your FTP program to create it. Your
installation should look like this:

    (mt home)/plugins/SmartyPants.pl

=item 2.

If you're currently logged in to Movable Type, log out and log back in. 

=item 3.

That's it. You can now call this plug-in from your templates by including
the SmartyPants attributes in any MT content tag.

=back


=head1 OPTIONS

=head2 smart_quotes

The smart_quotes attribute accepts the following values:

=over 4

=item B<"0">

Suppress all quote education. (Do nothing.)

=item B<"1"> 

Default behavior. Educates normal quote characters: (C<">) and (C<'>).

=item B<"2">

Educate ``backticks'' -style double quotes (in addition to educating
regular quotes). Transforms each instance of two consecutive backtick
characters (C<``>) into an opening double-quote, and each instance of two
consecutive apostrophes (C<''>) into a closing double-quote.

=back


=head2 smart_dashes

The smart_dashes attribute accepts the following values:

=over 4

=item B<"0">

Suppress dash education. (Do nothing.)

=item B<"1"> 

Default behavior. Transforms each instance of " -- " (space dash dash
space) into an HTML entity-encoded em-dash. It will also transform
em-dashes at the beginning and end of a line (e.g.: start-of-line dash
dash space).

=back


=head2 smart_ellipses

The smart_ellipses attribute accepts the following values:

=over 4

=item B<"0">

Suppress ellipsis education. (Do nothing.)

=item B<"1"> 

Default behavior. Transforms each instance of C<...> (dot dot dot) into
an HTML entity-encoded ellipsis. If there are four consecutive dots,
SmartyPants assumes this means "full stop" followed by "ellipsis".

=back

=head2 smarty_pants

The smarty_pants attribute provides access to all transformations in a single attribute. 

=over 4

=item B<"0">

Suppress all transformations. (Do nothing.)

=item B<"1"> 

Default behavior. Performs all SmartyPants transformations: quotes (including ``backticks'' -style), dashes, and ellipses.

=back

The following single-character attribute values can be combined to toggle
individual transformations from within the smarty_pants attribute. For
example, to educate normal quotes and em-dashes, but not ellipses or
``backticks'' -style quotes:

    <$MTFoo smarty_pants="qd"$>

=over 4

=item B<"q">

Educates normal quote characters: (C<">) and (C<'>).

=item B<"b">

Educates ``backticks'' -style double quotes.

=item B<"d">

Educates em-dashes.

=item B<"e">

Educates ellipses.

=back


=head1 CAVEATS

=head2 Why You Might Not Want to Use Smart Quotes in Your Weblog

For one thing, you might not care.

Most normal, mentally stable individuals do not take notice of proper
typographic punctuation. Many design and typography nerds, however, break
out in a nasty rash when they encounter, say, a restaurant sign that uses
a straight apostrophe to spell "Joe's".

If you're the sort of person who just doesn't care, you might well want to
continue not caring. Using straight quotes -- and sticking to the 7-bit
ASCII character set in general -- is certainly a simpler way to live.

Even if you I care about accurate typography, you still might want to
think twice before educating the quote characters in your weblog. One side
effect of publishing curly quote HTML entities is that it makes your
weblog a bit harder for others to quote from using copy-and-paste. What
happens is that when someone copies text from your blog, the copied text
contains the 8-bit curly quote characters (as well as the 8-bit characters
for em-dashes and ellipses, if you use these options). These characters
are not standard across different text encoding methods, which is why they
need to be encoded as HTML entities.

People copying text from your weblog, however, may not notice that you're
using curly quotes, and they'll go ahead and paste the unencoded 8-bit
characters copied from their browser into an email message or their own
weblog. When pasted as raw "smart quotes", these characters are likely to
get mangled beyond recognition.

That said, my own opinion is that any decent text editor or email client
makes it easy to stupefy smart quote characters into their 7-bit
equivalents, and I don't consider it my problem if you're using an
indecent text editor or email client.


=head2 Algorithmic Shortcomings

One situation in which quotes will get curled the wrong way is when
apostrophes are used at the start of leading contractions. For example:

    the '80s
    'Twas the night before Christmas.

In both cases above, SmartyPants will turn the apostrophes into opening
single-quotes, when in fact they should be closing ones. I don't think
this problem can be solved in the general case -- every word processor
I've tried gets this wrong as well. In such cases, it's best to use the
proper HTML entity for closing single-quotes (C<’>) by hand.

(I should also note that my personal style is to abbreviate decades like
this:

    the 80's

so admittedly, I'm not all that interested in solving this problem.)


=head1 BUGS

To file bug reports or feature requests (other than topics listed in the
Caveats section above) please send email to:

    smartypants@daringfireball.net

If the bug involves quotes being curled the wrong way, please send example
text to illustrate.


=head1 SEE ALSO

This plug-in effectively obsoletes the technique documented here:

    http://daringfireball.net/2002/08/movable_type_smart_quote_devilry.html

However, the above instructions may still be of interest if for some
reason you are still running an older version of Movable Type.


=head1 VERSION HISTORY

    1.0: Wed Nov 13, 2002
    Initial release.


=head1 AUTHOR

    John Gruber
    http://daringfireball.net


=head1 ADDITIONAL CREDITS

Portions of this plug-in are based on Brad Choate's nifty MTRegex plug-in. Brad Choate also contributed a few bits of source code to this plug-in. Brad Choate is a fine hacker indeed. (http://bradchaote.com/)

Jeremy Hedley (http://antipixel.com/) and Charles Wiltgen (http://playbacktime.com/) deserve mention for exemplary beta testing.


=head1 COPYRIGHT and LICENSE

Copyright (c) 2002 John Gruber  

Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.

The Software is provided "as is", without warranty of any kind, express or
implied, including but not limited to the warranties of merchantability,
fitness for a particular purpose and noninfringement. In no event shall
the authors or copyright holders be liable for any claim, damages or other
liability, whether in an action of contract, tort or otherwise, arising
from, out of or in connection with the Software or the use or other
dealings in the Software.

=cut