local-lib5/lib/perl5/i486-linux-gnu-thread-multi/Template/Parser.pm

   1 #============================================================= -*-Perl-*-
   2 #
   3 # Template::Parser
   4 #
   5 # DESCRIPTION
   6 #   This module implements a LALR(1) parser and assocated support
   7 #   methods to parse template documents into the appropriate "compiled"
   8 #   format.  Much of the parser DFA code (see _parse() method) is based
   9 #   on Francois Desarmenien's Parse::Yapp module.  Kudos to him.
  10 #
  11 # AUTHOR
  12 #   Andy Wardley <abw@wardley.org>
  13 #
  14 # COPYRIGHT
  15 #   Copyright (C) 1996-2007 Andy Wardley.  All Rights Reserved.
  16 #
  17 #   This module is free software; you can redistribute it and/or
  18 #   modify it under the same terms as Perl itself.
  19 #
  20 #   The following copyright notice appears in the Parse::Yapp
  21 #   documentation.
  22 #
  23 #      The Parse::Yapp module and its related modules and shell
  24 #      scripts are copyright (c) 1998 Francois Desarmenien,
  25 #      France. All rights reserved.
  26 #
  27 #      You may use and distribute them under the terms of either
  28 #      the GNU General Public License or the Artistic License, as
  29 #      specified in the Perl README file.
  30 #
  31 #============================================================================
  32
  33 package Template::Parser;
  34
  35 use strict;
  36 use warnings;
  37 use base 'Template::Base';
  38
  39 use Template::Constants qw( :status :chomp );
  40 use Template::Directive;
  41 use Template::Grammar;
  42
  43 # parser state constants
  44 use constant CONTINUE => 0;
  45 use constant ACCEPT   => 1;
  46 use constant ERROR    => 2;
  47 use constant ABORT    => 3;
  48
  49 our $VERSION = 2.89;
  50 our $DEBUG   = 0 unless defined $DEBUG;
  51 our $ERROR   = '';
  52
  53
  54 #========================================================================
  55 #                        -- COMMON TAG STYLES --
  56 #========================================================================
  57
  58 our $TAG_STYLE   = {
  59     'default'   => [ '\[%',    '%\]'    ],
  60     'template1' => [ '[\[%]%', '%[\]%]' ],
  61     'metatext'  => [ '%%',     '%%'     ],
  62     'html'      => [ '<!--',   '-->'    ],
  63     'mason'     => [ '<%',     '>'      ],
  64     'asp'       => [ '<%',     '%>'     ],
  65     'php'       => [ '<\?',    '\?>'    ],
  66     'star'      => [ '\[\*',   '\*\]'   ],
  67 };
  68 $TAG_STYLE->{ template } = $TAG_STYLE->{ tt2 } = $TAG_STYLE->{ default };
  69
  70
  71 our $DEFAULT_STYLE = {
  72     START_TAG   => $TAG_STYLE->{ default }->[0],
  73     END_TAG     => $TAG_STYLE->{ default }->[1],
  74 #    TAG_STYLE   => 'default',
  75     ANYCASE     => 0,
  76     INTERPOLATE => 0,
  77     PRE_CHOMP   => 0,
  78     POST_CHOMP  => 0,
  79     V1DOLLAR    => 0,
  80     EVAL_PERL   => 0,
  81 };
  82
  83 our $QUOTED_ESCAPES = {
  84         n => "\n",
  85         r => "\r",
  86         t => "\t",
  87 };
  88
  89 # note that '-' must come first so Perl doesn't think it denotes a range
  90 our $CHOMP_FLAGS  = qr/[-=~+]/;
  91
  92
  93
  94 #========================================================================
  95 #                      -----  PUBLIC METHODS -----
  96 #========================================================================
  97
  98 #------------------------------------------------------------------------
  99 # new(\%config)
 100 #
 101 # Constructor method.
 102 #------------------------------------------------------------------------
 103
 104 sub new {
 105     my $class  = shift;
 106     my $config = $_[0] && ref($_[0]) eq 'HASH' ? shift(@_) : { @_ };
 107     my ($tagstyle, $debug, $start, $end, $defaults, $grammar, $hash, $key, $udef);
 108
 109     my $self = bless {
 110         START_TAG   => undef,
 111         END_TAG     => undef,
 112         TAG_STYLE   => 'default',
 113         ANYCASE     => 0,
 114         INTERPOLATE => 0,
 115         PRE_CHOMP   => 0,
 116         POST_CHOMP  => 0,
 117         V1DOLLAR    => 0,
 118         EVAL_PERL   => 0,
 119         FILE_INFO   => 1,
 120         GRAMMAR     => undef,
 121         _ERROR      => '',
 122         IN_BLOCK    => [ ],
 123         FACTORY     => $config->{ FACTORY } || 'Template::Directive',
 124     }, $class;
 125
 126     # update self with any relevant keys in config
 127     foreach $key (keys %$self) {
 128         $self->{ $key } = $config->{ $key } if defined $config->{ $key };
 129     }
 130     $self->{ FILEINFO } = [ ];
 131
 132     # DEBUG config item can be a bitmask
 133     if (defined ($debug = $config->{ DEBUG })) {
 134         $self->{ DEBUG } = $debug & ( Template::Constants::DEBUG_PARSER
 135                                     | Template::Constants::DEBUG_FLAGS );
 136         $self->{ DEBUG_DIRS } = $debug & Template::Constants::DEBUG_DIRS;
 137     }
 138     # package variable can be set to 1 to support previous behaviour
 139     elsif ($DEBUG == 1) {
 140         $self->{ DEBUG } = Template::Constants::DEBUG_PARSER;
 141         $self->{ DEBUG_DIRS } = 0;
 142     }
 143     # otherwise let $DEBUG be a bitmask
 144     else {
 145         $self->{ DEBUG } = $DEBUG & ( Template::Constants::DEBUG_PARSER
 146                                     | Template::Constants::DEBUG_FLAGS );
 147         $self->{ DEBUG_DIRS } = $DEBUG & Template::Constants::DEBUG_DIRS;
 148     }
 149
 150     $grammar = $self->{ GRAMMAR } ||= do {
 151         require Template::Grammar;
 152         Template::Grammar->new();
 153     };
 154
 155     # build a FACTORY object to include any NAMESPACE definitions,
 156     # but only if FACTORY isn't already an object
 157     if ($config->{ NAMESPACE } && ! ref $self->{ FACTORY }) {
 158         my $fclass = $self->{ FACTORY };
 159         $self->{ FACTORY } = $fclass->new( NAMESPACE => $config->{ NAMESPACE } )
 160             || return $class->error($fclass->error());
 161     }
 162
 163     # load grammar rules, states and lex table
 164     @$self{ qw( LEXTABLE STATES RULES ) }
 165         = @$grammar{ qw( LEXTABLE STATES RULES ) };
 166
 167     $self->new_style($config)
 168         || return $class->error($self->error());
 169
 170     return $self;
 171 }
 172
 173 #-----------------------------------------------------------------------
 174 # These methods are used to track nested IF and WHILE blocks.  Each
 175 # generated if/while block is given a label indicating the directive
 176 # type and nesting depth, e.g. FOR0, WHILE1, FOR2, WHILE3, etc.  The
 177 # NEXT and LAST directives use the innermost label, e.g. last WHILE3;
 178 #-----------------------------------------------------------------------
 179
 180 sub enter_block {
 181     my ($self, $name) = @_;
 182     my $blocks = $self->{ IN_BLOCK };
 183     push(@{ $self->{ IN_BLOCK } }, $name);
 184 }
 185
 186 sub leave_block {
 187     my $self = shift;
 188     my $label = $self->block_label;
 189     pop(@{ $self->{ IN_BLOCK } });
 190     return $label;
 191 }
 192
 193 sub in_block {
 194     my ($self, $name) = @_;
 195     my $blocks = $self->{ IN_BLOCK };
 196     return @$blocks && $blocks->[-1] eq $name;
 197 }
 198
 199 sub block_label {
 200     my ($self, $prefix, $suffix) = @_;
 201     my $blocks = $self->{ IN_BLOCK };
 202     my $name   = @$blocks
 203         ? $blocks->[-1] . scalar @$blocks
 204         : undef;
 205     return join('', grep { defined $_ } $prefix, $name, $suffix);
 206 }
 207
 208
 209
 210 #------------------------------------------------------------------------
 211 # new_style(\%config)
 212 #
 213 # Install a new (stacked) parser style.  This feature is currently
 214 # experimental but should mimic the previous behaviour with regard to
 215 # TAG_STYLE, START_TAG, END_TAG, etc.
 216 #------------------------------------------------------------------------
 217
 218 sub new_style {
 219     my ($self, $config) = @_;
 220     my $styles = $self->{ STYLE } ||= [ ];
 221     my ($tagstyle, $tags, $start, $end, $key);
 222
 223     # clone new style from previous or default style
 224     my $style  = { %{ $styles->[-1] || $DEFAULT_STYLE } };
 225
 226     # expand START_TAG and END_TAG from specified TAG_STYLE
 227     if ($tagstyle = $config->{ TAG_STYLE }) {
 228         return $self->error("Invalid tag style: $tagstyle")
 229             unless defined ($tags = $TAG_STYLE->{ $tagstyle });
 230         ($start, $end) = @$tags;
 231         $config->{ START_TAG } ||= $start;
 232         $config->{   END_TAG } ||= $end;
 233     }
 234
 235     foreach $key (keys %$DEFAULT_STYLE) {
 236         $style->{ $key } = $config->{ $key } if defined $config->{ $key };
 237     }
 238     push(@$styles, $style);
 239     return $style;
 240 }
 241
 242
 243 #------------------------------------------------------------------------
 244 # old_style()
 245 #
 246 # Pop the current parser style and revert to the previous one.  See
 247 # new_style().   ** experimental **
 248 #------------------------------------------------------------------------
 249
 250 sub old_style {
 251     my $self = shift;
 252     my $styles = $self->{ STYLE };
 253     return $self->error('only 1 parser style remaining')
 254         unless (@$styles > 1);
 255     pop @$styles;
 256     return $styles->[-1];
 257 }
 258
 259
 260 #------------------------------------------------------------------------
 261 # parse($text, $data)
 262 #
 263 # Parses the text string, $text and returns a hash array representing
 264 # the compiled template block(s) as Perl code, in the format expected
 265 # by Template::Document.
 266 #------------------------------------------------------------------------
 267
 268 sub parse {
 269     my ($self, $text, $info) = @_;
 270     my ($tokens, $block);
 271
 272     $info->{ DEBUG } = $self->{ DEBUG_DIRS }
 273         unless defined $info->{ DEBUG };
 274
 275 #    print "info: { ", join(', ', map { "$_ => $info->{ $_ }" } keys %$info), " }\n";
 276
 277     # store for blocks defined in the template (see define_block())
 278     my $defblock = $self->{ DEFBLOCK } = { };
 279     my $metadata = $self->{ METADATA } = [ ];
 280     $self->{ DEFBLOCKS } = [ ];
 281
 282     $self->{ _ERROR } = '';
 283
 284     # split file into TEXT/DIRECTIVE chunks
 285     $tokens = $self->split_text($text)
 286         || return undef;                                    ## RETURN ##
 287
 288     push(@{ $self->{ FILEINFO } }, $info);
 289
 290     # parse chunks
 291     $block = $self->_parse($tokens, $info);
 292
 293     pop(@{ $self->{ FILEINFO } });
 294
 295     return undef unless $block;                             ## RETURN ##
 296
 297     $self->debug("compiled main template document block:\n$block")
 298         if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER;
 299
 300     return {
 301         BLOCK     => $block,
 302         DEFBLOCKS => $defblock,
 303         METADATA  => { @$metadata },
 304     };
 305 }
 306
 307
 308
 309 #------------------------------------------------------------------------
 310 # split_text($text)
 311 #
 312 # Split input template text into directives and raw text chunks.
 313 #------------------------------------------------------------------------
 314
 315 sub split_text {
 316     my ($self, $text) = @_;
 317     my ($pre, $dir, $prelines, $dirlines, $postlines, $chomp, $tags, @tags);
 318     my $style = $self->{ STYLE }->[-1];
 319     my ($start, $end, $prechomp, $postchomp, $interp ) =
 320         @$style{ qw( START_TAG END_TAG PRE_CHOMP POST_CHOMP INTERPOLATE ) };
 321     my $tags_dir = $self->{ANYCASE} ? qr<TAGS>i : qr<TAGS>;
 322
 323     my @tokens = ();
 324     my $line = 1;
 325
 326     return \@tokens                                         ## RETURN ##
 327         unless defined $text && length $text;
 328
 329     # extract all directives from the text
 330     while ($text =~ s/
 331            ^(.*?)               # $1 - start of line up to directive
 332            (?:
 333             $start          # start of tag
 334             (.*?)           # $2 - tag contents
 335             $end            # end of tag
 336             )
 337            //sx) {
 338
 339         ($pre, $dir) = ($1, $2);
 340         $pre = '' unless defined $pre;
 341         $dir = '' unless defined $dir;
 342
 343         $prelines  = ($pre =~ tr/\n//);  # newlines in preceeding text
 344         $dirlines  = ($dir =~ tr/\n//);  # newlines in directive tag
 345         $postlines = 0;                  # newlines chomped after tag
 346
 347         for ($dir) {
 348             if (/^\#/) {
 349                 # comment out entire directive except for any end chomp flag
 350                 $dir = ($dir =~ /($CHOMP_FLAGS)$/o) ? $1 : '';
 351             }
 352             else {
 353                 s/^($CHOMP_FLAGS)?\s*//so;
 354                 # PRE_CHOMP: process whitespace before tag
 355                 $chomp = $1 ? $1 : $prechomp;
 356                 $chomp =~ tr/-=~+/1230/;
 357                 if ($chomp && $pre) {
 358                     # chomp off whitespace and newline preceding directive
 359                     if ($chomp == CHOMP_ALL) {
 360                         $pre =~ s{ (\r?\n|^) [^\S\n]* \z }{}mx;
 361                     }
 362                     elsif ($chomp == CHOMP_COLLAPSE) {
 363                         $pre =~ s{ (\s+) \z }{ }x;
 364                     }
 365                     elsif ($chomp == CHOMP_GREEDY) {
 366                         $pre =~ s{ (\s+) \z }{}x;
 367                     }
 368                 }
 369             }
 370
 371             # POST_CHOMP: process whitespace after tag
 372             s/\s*($CHOMP_FLAGS)?\s*$//so;
 373             $chomp = $1 ? $1 : $postchomp;
 374             $chomp =~ tr/-=~+/1230/;
 375             if ($chomp) {
 376                 if ($chomp == CHOMP_ALL) {
 377                     $text =~ s{ ^ ([^\S\n]* \n) }{}x
 378                         && $postlines++;
 379                 }
 380                 elsif ($chomp == CHOMP_COLLAPSE) {
 381                     $text =~ s{ ^ (\s+) }{ }x
 382                         && ($postlines += $1=~y/\n//);
 383                 }
 384                 # any trailing whitespace
 385                 elsif ($chomp == CHOMP_GREEDY) {
 386                     $text =~ s{ ^ (\s+) }{}x
 387                         && ($postlines += $1=~y/\n//);
 388                 }
 389             }
 390         }
 391
 392         # any text preceding the directive can now be added
 393         if (length $pre) {
 394             push(@tokens, $interp
 395                  ? [ $pre, $line, 'ITEXT' ]
 396                  : ('TEXT', $pre) );
 397         }
 398         $line += $prelines;
 399
 400         # and now the directive, along with line number information
 401         if (length $dir) {
 402             # the TAGS directive is a compile-time switch
 403             if ($dir =~ /^$tags_dir\s+(.*)/) {
 404                 my @tags = split(/\s+/, $1);
 405                 if (scalar @tags > 1) {
 406                     ($start, $end) = map { quotemeta($_) } @tags;
 407                 }
 408                 elsif ($tags = $TAG_STYLE->{ $tags[0] }) {
 409                     ($start, $end) = @$tags;
 410                 }
 411                 else {
 412                     warn "invalid TAGS style: $tags[0]\n";
 413                 }
 414             }
 415             else {
 416                 # DIRECTIVE is pushed as:
 417                 #   [ $dirtext, $line_no(s), \@tokens ]
 418                 push(@tokens,
 419                      [ $dir,
 420                        ($dirlines
 421                         ? sprintf("%d-%d", $line, $line + $dirlines)
 422                         : $line),
 423                        $self->tokenise_directive($dir) ]);
 424             }
 425         }
 426
 427         # update line counter to include directive lines and any extra
 428         # newline chomped off the start of the following text
 429         $line += $dirlines + $postlines;
 430     }
 431
 432     # anything remaining in the string is plain text
 433     push(@tokens, $interp
 434          ? [ $text, $line, 'ITEXT' ]
 435          : ( 'TEXT', $text) )
 436         if length $text;
 437
 438     return \@tokens;                                        ## RETURN ##
 439 }
 440
 441
 442
 443 #------------------------------------------------------------------------
 444 # interpolate_text($text, $line)
 445 #
 446 # Examines $text looking for any variable references embedded like
 447 # $this or like ${ this }.
 448 #------------------------------------------------------------------------
 449
 450 sub interpolate_text {
 451     my ($self, $text, $line) = @_;
 452     my @tokens  = ();
 453     my ($pre, $var, $dir);
 454
 455
 456    while ($text =~
 457            /
 458            ( (?: \\. | [^\$] ){1,3000} ) # escaped or non-'$' character [$1]
 459            |
 460            ( \$ (?:                 # embedded variable            [$2]
 461              (?: \{ ([^\}]*) \} )   # ${ ... }                     [$3]
 462              |
 463              ([\w\.]+)              # $word                        [$4]
 464              )
 465            )
 466         /gx) {
 467
 468         ($pre, $var, $dir) = ($1, $3 || $4, $2);
 469
 470         # preceding text
 471         if (defined($pre) && length($pre)) {
 472             $line += $pre =~ tr/\n//;
 473             $pre =~ s/\\\$/\$/g;
 474             push(@tokens, 'TEXT', $pre);
 475         }
 476         # $variable reference
 477         if ($var) {
 478             $line += $dir =~ tr/\n/ /;
 479             push(@tokens, [ $dir, $line, $self->tokenise_directive($var) ]);
 480         }
 481         # other '$' reference - treated as text
 482         elsif ($dir) {
 483             $line += $dir =~ tr/\n//;
 484             push(@tokens, 'TEXT', $dir);
 485         }
 486     }
 487
 488     return \@tokens;
 489 }
 490
 491
 492
 493 #------------------------------------------------------------------------
 494 # tokenise_directive($text)
 495 #
 496 # Called by the private _parse() method when it encounters a DIRECTIVE
 497 # token in the list provided by the split_text() or interpolate_text()
 498 # methods.  The directive text is passed by parameter.
 499 #
 500 # The method splits the directive into individual tokens as recognised
 501 # by the parser grammar (see Template::Grammar for details).  It
 502 # constructs a list of tokens each represented by 2 elements, as per
 503 # split_text() et al.  The first element contains the token type, the
 504 # second the token itself.
 505 #
 506 # The method tokenises the string using a complex (but fast) regex.
 507 # For a deeper understanding of the regex magic at work here, see
 508 # Jeffrey Friedl's excellent book "Mastering Regular Expressions",
 509 # from O'Reilly, ISBN 1-56592-257-3
 510 #
 511 # Returns a reference to the list of chunks (each one being 2 elements)
 512 # identified in the directive text.  On error, the internal _ERROR string
 513 # is set and undef is returned.
 514 #------------------------------------------------------------------------
 515
 516 sub tokenise_directive {
 517     my ($self, $text, $line) = @_;
 518     my ($token, $uctoken, $type, $lookup);
 519     my $lextable = $self->{ LEXTABLE };
 520     my $style    = $self->{ STYLE }->[-1];
 521     my ($anycase, $start, $end) = @$style{ qw( ANYCASE START_TAG END_TAG ) };
 522     my @tokens = ( );
 523
 524     while ($text =~
 525             /
 526                 # strip out any comments
 527                 (\#[^\n]*)
 528            |
 529                 # a quoted phrase matches in $3
 530                 (["'])                   # $2 - opening quote, ' or "
 531                 (                        # $3 - quoted text buffer
 532                     (?:                  # repeat group (no backreference)
 533                         \\\\             # an escaped backslash \\
 534                     |                    # ...or...
 535                         \\\2             # an escaped quote \" or \' (match $1)
 536                     |                    # ...or...
 537                         .                # any other character
 538                     |   \n
 539                     )*?                  # non-greedy repeat
 540                 )                        # end of $3
 541                 \2                       # match opening quote
 542             |
 543                 # an unquoted number matches in $4
 544                 (-?\d+(?:\.\d+)?)       # numbers
 545             |
 546                 # filename matches in $5
 547                 ( \/?\w+(?:(?:\/|::?)\w*)+ | \/\w+)
 548             |
 549                 # an identifier matches in $6
 550                 (\w+)                    # variable identifier
 551             |
 552                 # an unquoted word or symbol matches in $7
 553                 (   [(){}\[\]:;,\/\\]    # misc parenthesis and symbols
 554 #               |   \->                  # arrow operator (for future?)
 555                 |   [+\-*]               # math operations
 556                 |   \$\{?                # dollar with option left brace
 557                 |   =>                   # like '='
 558                 |   [=!<>]?= | [!<>]     # eqality tests
 559                 |   &&? | \|\|?          # boolean ops
 560                 |   \.\.?                # n..n sequence
 561                 |   \S+                  # something unquoted
 562                 )                        # end of $7
 563             /gmxo) {
 564
 565         # ignore comments to EOL
 566         next if $1;
 567
 568         # quoted string
 569         if (defined ($token = $3)) {
 570             # double-quoted string may include $variable references
 571             if ($2 eq '"') {
 572                 if ($token =~ /[\$\\]/) {
 573                     $type = 'QUOTED';
 574                     # unescape " and \ but leave \$ escaped so that
 575                         # interpolate_text() doesn't incorrectly treat it
 576                     # as a variable reference
 577 #                   $token =~ s/\\([\\"])/$1/g;
 578                         for ($token) {
 579                                 s/\\([^\$nrt])/$1/g;
 580                                 s/\\([nrt])/$QUOTED_ESCAPES->{ $1 }/ge;
 581                         }
 582                     push(@tokens, ('"') x 2,
 583                                   @{ $self->interpolate_text($token) },
 584                                   ('"') x 2);
 585                     next;
 586                 }
 587                 else {
 588                     $type = 'LITERAL';
 589                     $token =~ s['][\\']g;
 590                     $token = "'$token'";
 591                 }
 592             }
 593             else {
 594                 $type = 'LITERAL';
 595                 $token = "'$token'";
 596             }
 597         }
 598         # number
 599         elsif (defined ($token = $4)) {
 600             $type = 'NUMBER';
 601         }
 602         elsif (defined($token = $5)) {
 603             $type = 'FILENAME';
 604         }
 605         elsif (defined($token = $6)) {
 606             # Fold potential keywords to UPPER CASE if the ANYCASE option is
 607             # set, unless (we've got some preceeding tokens and) the previous
 608             # token is a DOT op.  This prevents the 'last' in 'data.last'
 609             # from being interpreted as the LAST keyword.
 610             $uctoken =
 611                 ($anycase && (! @tokens || $tokens[-2] ne 'DOT'))
 612                     ? uc $token
 613                     :    $token;
 614             if (defined ($type = $lextable->{ $uctoken })) {
 615                 $token = $uctoken;
 616             }
 617             else {
 618                 $type = 'IDENT';
 619             }
 620         }
 621         elsif (defined ($token = $7)) {
 622             # reserved words may be in lower case unless case sensitive
 623             $uctoken = $anycase ? uc $token : $token;
 624             unless (defined ($type = $lextable->{ $uctoken })) {
 625                 $type = 'UNQUOTED';
 626             }
 627         }
 628
 629         push(@tokens, $type, $token);
 630
 631 #       print(STDERR " +[ $type, $token ]\n")
 632 #           if $DEBUG;
 633     }
 634
 635 #    print STDERR "tokenise directive() returning:\n  [ @tokens ]\n"
 636 #       if $DEBUG;
 637
 638     return \@tokens;                                        ## RETURN ##
 639 }
 640
 641
 642 #------------------------------------------------------------------------
 643 # define_block($name, $block)
 644 #
 645 # Called by the parser 'defblock' rule when a BLOCK definition is
 646 # encountered in the template.  The name of the block is passed in the
 647 # first parameter and a reference to the compiled block is passed in
 648 # the second.  This method stores the block in the $self->{ DEFBLOCK }
 649 # hash which has been initialised by parse() and will later be used
 650 # by the same method to call the store() method on the calling cache
 651 # to define the block "externally".
 652 #------------------------------------------------------------------------
 653
 654 sub define_block {
 655     my ($self, $name, $block) = @_;
 656     my $defblock = $self->{ DEFBLOCK }
 657         || return undef;
 658
 659     $self->debug("compiled block '$name':\n$block")
 660         if $self->{ DEBUG } & Template::Constants::DEBUG_PARSER;
 661
 662     $defblock->{ $name } = $block;
 663
 664     return undef;
 665 }
 666
 667 sub push_defblock {
 668     my $self = shift;
 669     my $stack = $self->{ DEFBLOCK_STACK } ||= [];
 670     push(@$stack, $self->{ DEFBLOCK } );
 671     $self->{ DEFBLOCK } = { };
 672 }
 673
 674 sub pop_defblock {
 675     my $self  = shift;
 676     my $defs  = $self->{ DEFBLOCK };
 677     my $stack = $self->{ DEFBLOCK_STACK } || return $defs;
 678     return $defs unless @$stack;
 679     $self->{ DEFBLOCK } = pop @$stack;
 680     return $defs;
 681 }
 682
 683
 684 #------------------------------------------------------------------------
 685 # add_metadata(\@setlist)
 686 #------------------------------------------------------------------------
 687
 688 sub add_metadata {
 689     my ($self, $setlist) = @_;
 690     my $metadata = $self->{ METADATA }
 691         || return undef;
 692
 693     push(@$metadata, @$setlist);
 694
 695     return undef;
 696 }
 697
 698
 699 #------------------------------------------------------------------------
 700 # location()
 701 #
 702 # Return Perl comment indicating current parser file and line
 703 #------------------------------------------------------------------------
 704
 705 sub location {
 706     my $self = shift;
 707     return "\n" unless $self->{ FILE_INFO };
 708     my $line = ${ $self->{ LINE } };
 709     my $info = $self->{ FILEINFO }->[-1];
 710     my $file = $info->{ path } || $info->{ name }
 711         || '(unknown template)';
 712     $line =~ s/\-.*$//; # might be 'n-n'
 713     $line ||= 1;
 714     return "#line $line \"$file\"\n";
 715 }
 716
 717
 718 #========================================================================
 719 #                     -----  PRIVATE METHODS -----
 720 #========================================================================
 721
 722 #------------------------------------------------------------------------
 723 # _parse(\@tokens, \@info)
 724 #
 725 # Parses the list of input tokens passed by reference and returns a
 726 # Template::Directive::Block object which contains the compiled
 727 # representation of the template.
 728 #
 729 # This is the main parser DFA loop.  See embedded comments for
 730 # further details.
 731 #
 732 # On error, undef is returned and the internal _ERROR field is set to
 733 # indicate the error.  This can be retrieved by calling the error()
 734 # method.
 735 #------------------------------------------------------------------------
 736
 737 sub _parse {
 738     my ($self, $tokens, $info) = @_;
 739     my ($token, $value, $text, $line, $inperl);
 740     my ($state, $stateno, $status, $action, $lookup, $coderet, @codevars);
 741     my ($lhs, $len, $code);         # rule contents
 742     my $stack = [ [ 0, undef ] ];   # DFA stack
 743
 744 # DEBUG
 745 #   local $" = ', ';
 746
 747     # retrieve internal rule and state tables
 748     my ($states, $rules) = @$self{ qw( STATES RULES ) };
 749
 750     # call the grammar set_factory method to install emitter factory
 751     $self->{ GRAMMAR }->install_factory($self->{ FACTORY });
 752
 753     $line = $inperl = 0;
 754     $self->{ LINE   } = \$line;
 755     $self->{ FILE   } = $info->{ name };
 756     $self->{ INPERL } = \$inperl;
 757
 758     $status = CONTINUE;
 759     my $in_string = 0;
 760
 761     while(1) {
 762         # get state number and state
 763         $stateno =  $stack->[-1]->[0];
 764         $state   = $states->[$stateno];
 765
 766         # see if any lookaheads exist for the current state
 767         if (exists $state->{'ACTIONS'}) {
 768
 769             # get next token and expand any directives (i.e. token is an
 770             # array ref) onto the front of the token list
 771             while (! defined $token && @$tokens) {
 772                 $token = shift(@$tokens);
 773                 if (ref $token) {
 774                     ($text, $line, $token) = @$token;
 775                     if (ref $token) {
 776                         if ($info->{ DEBUG } && ! $in_string) {
 777                             # - - - - - - - - - - - - - - - - - - - - - - - - -
 778                             # This is gnarly.  Look away now if you're easily
 779                             # frightened.  We're pushing parse tokens onto the
 780                             # pending list to simulate a DEBUG directive like so:
 781                             # [% DEBUG msg line='20' text='INCLUDE foo' %]
 782                             # - - - - - - - - - - - - - - - - - - - - - - - - -
 783                             my $dtext = $text;
 784                             $dtext =~ s[(['\\])][\\$1]g;
 785                             unshift(@$tokens,
 786                                     DEBUG   => 'DEBUG',
 787                                     IDENT   => 'msg',
 788                                     IDENT   => 'line',
 789                                     ASSIGN  => '=',
 790                                     LITERAL => "'$line'",
 791                                     IDENT   => 'text',
 792                                     ASSIGN  => '=',
 793                                     LITERAL => "'$dtext'",
 794                                     IDENT   => 'file',
 795                                     ASSIGN  => '=',
 796                                     LITERAL => "'$info->{ name }'",
 797                                     (';') x 2,
 798                                     @$token,
 799                                     (';') x 2);
 800                         }
 801                         else {
 802                             unshift(@$tokens, @$token, (';') x 2);
 803                         }
 804                         $token = undef;  # force redo
 805                     }
 806                     elsif ($token eq 'ITEXT') {
 807                         if ($inperl) {
 808                             # don't perform interpolation in PERL blocks
 809                             $token = 'TEXT';
 810                             $value = $text;
 811                         }
 812                         else {
 813                             unshift(@$tokens,
 814                                     @{ $self->interpolate_text($text, $line) });
 815                             $token = undef; # force redo
 816                         }
 817                     }
 818                 }
 819                 else {
 820                     # toggle string flag to indicate if we're crossing
 821                     # a string boundary
 822                     $in_string = ! $in_string if $token eq '"';
 823                     $value = shift(@$tokens);
 824                 }
 825             };
 826             # clear undefined token to avoid 'undefined variable blah blah'
 827             # warnings and let the parser logic pick it up in a minute
 828             $token = '' unless defined $token;
 829
 830             # get the next state for the current lookahead token
 831             $action = defined ($lookup = $state->{'ACTIONS'}->{ $token })
 832                       ? $lookup
 833                       : defined ($lookup = $state->{'DEFAULT'})
 834                         ? $lookup
 835                         : undef;
 836         }
 837         else {
 838             # no lookahead actions
 839             $action = $state->{'DEFAULT'};
 840         }
 841
 842         # ERROR: no ACTION
 843         last unless defined $action;
 844
 845         # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 846         # shift (+ive ACTION)
 847         # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 848         if ($action > 0) {
 849             push(@$stack, [ $action, $value ]);
 850             $token = $value = undef;
 851             redo;
 852         };
 853
 854         # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 855         # reduce (-ive ACTION)
 856         # - - - - - - - - - - - - - - - - - - - - - - - - - - - -
 857         ($lhs, $len, $code) = @{ $rules->[ -$action ] };
 858
 859         # no action imples ACCEPTance
 860         $action
 861             or $status = ACCEPT;
 862
 863         # use dummy sub if code ref doesn't exist
 864         $code = sub { $_[1] }
 865             unless $code;
 866
 867         @codevars = $len
 868                 ?   map { $_->[1] } @$stack[ -$len .. -1 ]
 869                 :   ();
 870
 871         eval {
 872             $coderet = &$code( $self, @codevars );
 873         };
 874         if ($@) {
 875             my $err = $@;
 876             chomp $err;
 877             return $self->_parse_error($err);
 878         }
 879
 880         # reduce stack by $len
 881         splice(@$stack, -$len, $len);
 882
 883         # ACCEPT
 884         return $coderet                                     ## RETURN ##
 885             if $status == ACCEPT;
 886
 887         # ABORT
 888         return undef                                        ## RETURN ##
 889             if $status == ABORT;
 890
 891         # ERROR
 892         last
 893             if $status == ERROR;
 894     }
 895     continue {
 896         push(@$stack, [ $states->[ $stack->[-1][0] ]->{'GOTOS'}->{ $lhs },
 897               $coderet ]),
 898     }
 899
 900     # ERROR                                                 ## RETURN ##
 901     return $self->_parse_error('unexpected end of input')
 902         unless defined $value;
 903
 904     # munge text of last directive to make it readable
 905 #    $text =~ s/\n/\\n/g;
 906
 907     return $self->_parse_error("unexpected end of directive", $text)
 908         if $value eq ';';   # end of directive SEPARATOR
 909
 910     return $self->_parse_error("unexpected token ($value)", $text);
 911 }
 912
 913
 914
 915 #------------------------------------------------------------------------
 916 # _parse_error($msg, $dirtext)
 917 #
 918 # Method used to handle errors encountered during the parse process
 919 # in the _parse() method.
 920 #------------------------------------------------------------------------
 921
 922 sub _parse_error {
 923     my ($self, $msg, $text) = @_;
 924     my $line = $self->{ LINE };
 925     $line = ref($line) ? $$line : $line;
 926     $line = 'unknown' unless $line;
 927
 928     $msg .= "\n  [% $text %]"
 929         if defined $text;
 930
 931     return $self->error("line $line: $msg");
 932 }
 933
 934
 935 #------------------------------------------------------------------------
 936 # _dump()
 937 #
 938 # Debug method returns a string representing the internal state of the
 939 # object.
 940 #------------------------------------------------------------------------
 941
 942 sub _dump {
 943     my $self = shift;
 944     my $output = "[Template::Parser] {\n";
 945     my $format = "    %-16s => %s\n";
 946     my $key;
 947
 948     foreach $key (qw( START_TAG END_TAG TAG_STYLE ANYCASE INTERPOLATE
 949                       PRE_CHOMP POST_CHOMP V1DOLLAR )) {
 950         my $val = $self->{ $key };
 951         $val = '<undef>' unless defined $val;
 952         $output .= sprintf($format, $key, $val);
 953     }
 954
 955     $output .= '}';
 956     return $output;
 957 }
 958
 959
 960 1;
 961
 962 __END__
 963
 964 =head1 NAME
 965
 966 Template::Parser - LALR(1) parser for compiling template documents
 967
 968 =head1 SYNOPSIS
 969
 970     use Template::Parser;
 971
 972     $parser   = Template::Parser->new(\%config);
 973     $template = $parser->parse($text)
 974         || die $parser->error(), "\n";
 975
 976 =head1 DESCRIPTION
 977
 978 The C<Template::Parser> module implements a LALR(1) parser and associated
 979 methods for parsing template documents into Perl code.
 980
 981 =head1 PUBLIC METHODS
 982
 983 =head2 new(\%params)
 984
 985 The C<new()> constructor creates and returns a reference to a new
 986 C<Template::Parser> object.
 987
 988 A reference to a hash may be supplied as a parameter to provide configuration values.
 989 See L<CONFIGURATION OPTIONS> below for a summary of these options and
 990 L<Template::Manual::Config> for full details.
 991
 992     my $parser = Template::Parser->new({
 993         START_TAG => quotemeta('<+'),
 994         END_TAG   => quotemeta('+>'),
 995     });
 996
 997 =head2 parse($text)
 998
 999 The C<parse()> method parses the text passed in the first parameter and
1000 returns a reference to a hash array of data defining the compiled
1001 representation of the template text, suitable for passing to the
1002 L<Template::Document> L<new()|Template::Document#new()> constructor method. On
1003 error, undef is returned.
1004
1005     $data = $parser->parse($text)
1006         || die $parser->error();
1007
1008 The C<$data> hash reference returned contains a C<BLOCK> item containing the
1009 compiled Perl code for the template, a C<DEFBLOCKS> item containing a
1010 reference to a hash array of sub-template C<BLOCK>s defined within in the
1011 template, and a C<METADATA> item containing a reference to a hash array
1012 of metadata values defined in C<META> tags.
1013
1014 =head1 CONFIGURATION OPTIONS
1015
1016 The C<Template::Parser> module accepts the following configuration
1017 options.  Please see L<Template::Manual::Config> for futher details
1018 on each option.
1019
1020 =head2 START_TAG, END_TAG
1021
1022 The L<START_TAG|Template::Manual::Config#START_TAG_END_TAG> and
1023 L<END_TAG|Template::Manual::Config#START_TAG_END_TAG> options are used to
1024 specify character sequences or regular expressions that mark the start and end
1025 of a template directive.
1026
1027     my $parser = Template::Parser->new({
1028         START_TAG => quotemeta('<+'),
1029         END_TAG   => quotemeta('+>'),
1030     });
1031
1032 =head2 TAG_STYLE
1033
1034 The L<TAG_STYLE|Template::Manual::Config#TAG_STYLE> option can be used to set
1035 both L<START_TAG> and L<END_TAG> according to pre-defined tag styles.
1036
1037     my $parser = Template::Parser->new({
1038         TAG_STYLE => 'star',     # [* ... *]
1039     });
1040
1041 =head2 PRE_CHOMP, POST_CHOMP
1042
1043 The L<PRE_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> and
1044 L<POST_CHOMP|Template::Manual::Config#PRE_CHOMP_POST_CHOMP> can be set to remove
1045 any whitespace before or after a directive tag, respectively.
1046
1047     my $parser = Template::Parser-E<gt>new({
1048         PRE_CHOMP  => 1,
1049         POST_CHOMP => 1,
1050     });
1051
1052 =head2 INTERPOLATE
1053
1054 The L<INTERPOLATE|Template::Manual::Config#INTERPOLATE> flag can be set
1055 to allow variables to be embedded in plain text blocks.
1056
1057     my $parser = Template::Parser->new({
1058         INTERPOLATE => 1,
1059     });
1060
1061 Variables should be prefixed by a C<$> to identify them, using curly braces
1062 to explicitly scope the variable name where necessary.
1063
1064     Hello ${name},
1065
1066     The day today is ${day.today}.
1067
1068 =head2 ANYCASE
1069
1070 The L<ANYCASE|Template::Manual::Config#ANYCASE> option can be set
1071 to allow directive keywords to be specified in any case.
1072
1073     # with ANYCASE set to 1
1074     [% INCLUDE foobar %]    # OK
1075     [% include foobar %]    # OK
1076     [% include = 10   %]    # ERROR, 'include' is a reserved word
1077
1078 =head2 GRAMMAR
1079
1080 The L<GRAMMAR|Template::Manual::Config#GRAMMAR> configuration item can be used
1081 to specify an alternate grammar for the parser. This allows a modified or
1082 entirely new template language to be constructed and used by the Template
1083 Toolkit.
1084
1085     use MyOrg::Template::Grammar;
1086
1087     my $parser = Template::Parser->new({
1088         GRAMMAR = MyOrg::Template::Grammar->new();
1089     });
1090
1091 By default, an instance of the default L<Template::Grammar> will be
1092 created and used automatically if a C<GRAMMAR> item isn't specified.
1093
1094 =head2 DEBUG
1095
1096 The L<DEBUG|Template::Manual::Config#DEBUG> option can be used to enable
1097 various debugging features of the C<Template::Parser> module.
1098
1099     use Template::Constants qw( :debug );
1100
1101     my $template = Template->new({
1102         DEBUG => DEBUG_PARSER | DEBUG_DIRS,
1103     });
1104
1105 =head1 AUTHOR
1106
1107 Andy Wardley E<lt>abw@wardley.orgE<gt> L<http://wardley.org/>
1108
1109 =head1 COPYRIGHT
1110
1111 Copyright (C) 1996-2007 Andy Wardley.  All Rights Reserved.
1112
1113 This module is free software; you can redistribute it and/or
1114 modify it under the same terms as Perl itself.
1115
1116 The main parsing loop of the C<Template::Parser> module was derived from a
1117 standalone parser generated by version 0.16 of the C<Parse::Yapp> module. The
1118 following copyright notice appears in the C<Parse::Yapp> documentation.
1119
1120     The Parse::Yapp module and its related modules and shell
1121     scripts are copyright (c) 1998 Francois Desarmenien,
1122     France. All rights reserved.
1123
1124     You may use and distribute them under the terms of either
1125     the GNU General Public License or the Artistic License, as
1126     specified in the Perl README file.
1127
1128 =head1 SEE ALSO
1129
1130 L<Template>, L<Template::Grammar>, L<Template::Directive>
1131