7 PPI::Lexer - The PPI Lexer
14 my $Lexer = PPI::Lexer->new;
16 # Build a PPI::Document object from a Token stream
17 my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18 my $Document = $Lexer->lex_tokenizer($Tokenizer);
20 # Build a PPI::Document object for some raw source
21 my $source = "print 'Hello World!'; kill(Humans->all);";
22 $Document = $Lexer->lex_source($source);
24 # Build a PPI::Document object for a particular file name
25 $Document = $Lexer->lex_file('My/Module.pm');
29 The is the L<PPI> Lexer. In the larger scheme of things, its job is to take
30 token streams, in a variety of forms, and "lex" them into nested structures.
32 Pretty much everything in this module happens behind the scenes at this
33 point. In fact, at the moment you don't really need to instantiate the lexer
34 at all, the three main methods will auto-instantiate themselves a
35 C<PPI::Lexer> object as needed.
37 All methods do a one-shot "lex this and give me a L<PPI::Document> object".
39 In fact, if you are reading this, what you B<probably> want to do is to
40 just "load a document", in which case you can do this in a much more
41 direct and concise manner with one of the following.
45 $Document = PPI::Document->load( $filename );
46 $Document = PPI::Document->new( $string );
48 See L<PPI::Document> for more details.
50 For more unusual tasks, by all means forge onwards.
58 use Params::Util qw{_STRING _INSTANCE};
59 use List::MoreUtils ();
61 use PPI::Exception ();
63 use vars qw{$VERSION $errstr *_PARENT %ROUND %RESOLVE};
68 # Faster than having another method call just
69 # to set the structure finish token.
70 *_PARENT = *PPI::Element::_PARENT;
72 # Keyword -> Structure class maps
75 'if' => 'PPI::Structure::Condition',
76 'elsif' => 'PPI::Structure::Condition',
77 'unless' => 'PPI::Structure::Condition',
78 'while' => 'PPI::Structure::Condition',
79 'until' => 'PPI::Structure::Condition',
82 'for' => 'PPI::Structure::For',
83 'foreach' => 'PPI::Structure::For',
86 # Opening brace to refining method
98 #####################################################################
105 The C<new> constructor creates a new C<PPI::Lexer> object. The object itself
106 is merely used to hold various buffers and state data during the lexing
107 process, and holds no significant data between -E<gt>lex_xxxxx calls.
109 Returns a new C<PPI::Lexer> object
114 my $class = shift->_clear;
116 Tokenizer => undef, # Where we store the tokenizer for a run
117 buffer => [], # The input token buffer
118 delayed => [], # The "delayed insignificant tokens" buffer
126 #####################################################################
127 # Main Lexing Methods
131 =head2 lex_file $filename
133 The C<lex_file> method takes a filename as argument. It then loads the file,
134 creates a L<PPI::Tokenizer> for the content and lexes the token stream
135 produced by the tokenizer. Basically, a sort of all-in-one method for
136 getting a L<PPI::Document> object from a file name.
138 Returns a L<PPI::Document> object, or C<undef> on error.
143 my $self = ref $_[0] ? shift : shift->new;
144 my $file = _STRING(shift);
145 unless ( defined $file ) {
146 return $self->_error("Did not pass a filename to PPI::Lexer::lex_file");
149 # Create the Tokenizer
150 my $Tokenizer = eval {
151 PPI::Tokenizer->new( $file );
153 if ( _INSTANCE($@, 'PPI::Exception') ) {
154 return $self->_error( $@->message );
156 return $self->_error( $errstr );
159 $self->lex_tokenizer( $Tokenizer );
164 =head2 lex_source $string
166 The C<lex_source> method takes a normal scalar string as argument. It
167 creates a L<PPI::Tokenizer> object for the string, and then lexes the
168 resulting token stream.
170 Returns a L<PPI::Document> object, or C<undef> on error.
175 my $self = ref $_[0] ? shift : shift->new;
177 unless ( defined $source and not ref $source ) {
178 return $self->_error("Did not pass a string to PPI::Lexer::lex_source");
181 # Create the Tokenizer and hand off to the next method
182 my $Tokenizer = eval {
183 PPI::Tokenizer->new( \$source );
185 if ( _INSTANCE($@, 'PPI::Exception') ) {
186 return $self->_error( $@->message );
188 return $self->_error( $errstr );
191 $self->lex_tokenizer( $Tokenizer );
196 =head2 lex_tokenizer $Tokenizer
198 The C<lex_tokenizer> takes as argument a L<PPI::Tokenizer> object. It
199 lexes the token stream from the tokenizer into a L<PPI::Document> object.
201 Returns a L<PPI::Document> object, or C<undef> on error.
206 my $self = ref $_[0] ? shift : shift->new;
207 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
208 return $self->_error(
209 "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
212 # Create the empty document
213 my $Document = PPI::Document->new;
215 # Lex the token stream into the document
216 $self->{Tokenizer} = $Tokenizer;
218 $self->_lex_document($Document);
221 # If an error occurs DESTROY the partially built document.
223 if ( _INSTANCE($@, 'PPI::Exception') ) {
224 return $self->_error( $@->message );
226 return $self->_error( $errstr );
237 #####################################################################
238 # Lex Methods - Document Object
242 =begin testing _lex_document 3
244 # Validate the creation of a null statement
246 my $token = new_ok( 'PPI::Token::Structure' => [ ')' ] );
247 my $brace = new_ok( 'PPI::Statement::UnmatchedBrace' => [ $token ] );
248 is( $brace->content, ')', '->content ok' );
256 my ($self, $Document) = @_;
258 # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
260 # Start the processing loop
262 while ( ref($Token = $self->_get_token) ) {
263 # Add insignificant tokens directly beneath us
264 unless ( $Token->significant ) {
265 $self->_add_element( $Document, $Token );
269 if ( $Token->content eq ';' ) {
270 # It's a semi-colon on it's own.
271 # We call this a null statement.
274 PPI::Statement::Null->new($Token),
279 # Handle anything other than a structural element
280 unless ( ref $Token eq 'PPI::Token::Structure' ) {
281 # Determine the class for the Statement, and create it
282 my $Statement = $self->_statement($Document, $Token)->new($Token);
284 # Move the lexing down into the statement
285 $self->_add_delayed( $Document );
286 $self->_add_element( $Document, $Statement );
287 $self->_lex_statement( $Statement );
292 # Is this the opening of a structure?
293 if ( $Token->__LEXER__opens ) {
294 # This should actually have a Statement instead
295 $self->_rollback( $Token );
296 my $Statement = PPI::Statement->new;
297 $self->_add_element( $Document, $Statement );
298 $self->_lex_statement( $Statement );
302 # Is this the close of a structure.
303 if ( $Token->__LEXER__closes ) {
304 # Because we are at the top of the tree, this is an error.
305 # This means either a mis-parsing, or an mistake in the code.
306 # To handle this, we create a "Naked Close" statement
307 $self->_add_element( $Document,
308 PPI::Statement::UnmatchedBrace->new($Token)
313 # Shouldn't be able to get here
314 PPI::Exception->throw('Lexer reached an illegal state');
317 # Did we leave the main loop because of a Tokenizer error?
318 unless ( defined $Token ) {
319 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
320 $errstr ||= 'Unknown Tokenizer Error';
321 PPI::Exception->throw($errstr);
324 # No error, it's just the end of file.
325 # Add any insignificant trailing tokens.
326 $self->_add_delayed( $Document );
328 # If the Tokenizer has any v6 blocks to attach, do so now.
329 # Checking once at the end is faster than adding a special
330 # case check for every statement parsed.
331 my $perl6 = $self->{Tokenizer}->{'perl6'};
333 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
334 foreach my $include ( @$includes ) {
336 PPI::Exception->throw('Failed to find a perl6 section');
338 $include->{perl6} = shift @$perl6;
349 #####################################################################
350 # Lex Methods - Statement Object
352 use vars qw{%STATEMENT_CLASSES};
354 # Keyword -> Statement Subclass
355 %STATEMENT_CLASSES = (
356 # Things that affect the timing of execution
357 'BEGIN' => 'PPI::Statement::Scheduled',
358 'CHECK' => 'PPI::Statement::Scheduled',
359 'UNITCHECK' => 'PPI::Statement::Scheduled',
360 'INIT' => 'PPI::Statement::Scheduled',
361 'END' => 'PPI::Statement::Scheduled',
363 # Loading and context statement
364 'package' => 'PPI::Statement::Package',
365 # 'use' => 'PPI::Statement::Include',
366 'no' => 'PPI::Statement::Include',
367 'require' => 'PPI::Statement::Include',
369 # Various declarations
370 'my' => 'PPI::Statement::Variable',
371 'local' => 'PPI::Statement::Variable',
372 'our' => 'PPI::Statement::Variable',
373 'state' => 'PPI::Statement::Variable',
374 # Statements starting with 'sub' could be any one of...
375 # 'sub' => 'PPI::Statement::Sub',
376 # 'sub' => 'PPI::Statement::Scheduled',
377 # 'sub' => 'PPI::Statement',
380 'if' => 'PPI::Statement::Compound',
381 'unless' => 'PPI::Statement::Compound',
382 'for' => 'PPI::Statement::Compound',
383 'foreach' => 'PPI::Statement::Compound',
384 'while' => 'PPI::Statement::Compound',
385 'until' => 'PPI::Statement::Compound',
388 'given' => 'PPI::Statement::Given',
389 'when' => 'PPI::Statement::When',
390 'default' => 'PPI::Statement::When',
392 # Various ways of breaking out of scope
393 'redo' => 'PPI::Statement::Break',
394 'next' => 'PPI::Statement::Break',
395 'last' => 'PPI::Statement::Break',
396 'return' => 'PPI::Statement::Break',
397 'goto' => 'PPI::Statement::Break',
399 # Special sections of the file
400 '__DATA__' => 'PPI::Statement::Data',
401 '__END__' => 'PPI::Statement::End',
406 my ($self, $Parent, $Token) = @_;
408 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
409 # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
411 # Check for things like ( parent => ... )
412 if ( $Parent->isa('PPI::Structure::List') ) {
413 if ( $Token->isa('PPI::Token::Word') ) {
414 # Is the next significant token a =>
415 # Read ahead to the next significant token
417 while ( $Next = $self->_get_token ) {
418 unless ( $Next->significant ) {
419 push @{$self->{delayed}}, $Next;
420 # $self->_delay_element( $Next );
426 $Next->isa('PPI::Token::Operator')
428 $Next->content eq '=>'
430 # Is an ordinary expression
431 $self->_rollback( $Next );
432 return 'PPI::Statement::Expression';
438 # Rollback and continue
439 $self->_rollback( $Next );
443 # Is it a token in our known classes list
444 my $class = $STATEMENT_CLASSES{$Token->content};
446 # Handle potential barewords for subscripts
447 if ( $Parent->isa('PPI::Structure::Subscript') ) {
448 # Fast obvious case, just an expression
449 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
450 return 'PPI::Statement::Expression';
453 # This is something like "my" or "our" etc... more subtle.
454 # Check if the next token is a closing curly brace.
455 # This means we are something like $h{my}
457 while ( $Next = $self->_get_token ) {
458 unless ( $Next->significant ) {
459 push @{$self->{delayed}}, $Next;
460 # $self->_delay_element( $Next );
464 # Found the next significant token.
465 # Is it a closing curly brace?
466 if ( $Next->content eq '}' ) {
467 $self->_rollback( $Next );
468 return 'PPI::Statement::Expression';
470 $self->_rollback( $Next );
475 # End of file... this means it is something like $h{our
476 # which is probably going to be $h{our} ... I think
477 $self->_rollback( $Next );
478 return 'PPI::Statement::Expression';
481 # If it's a token in our list, use that class
482 return $class if $class;
484 # Handle the more in-depth sub detection
485 if ( $Token->content eq 'sub' ) {
486 # Read ahead to the next significant token
488 while ( $Next = $self->_get_token ) {
489 unless ( $Next->significant ) {
490 push @{$self->{delayed}}, $Next;
491 # $self->_delay_element( $Next );
495 # Got the next significant token
496 my $sclass = $STATEMENT_CLASSES{$Next->content};
497 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
498 $self->_rollback( $Next );
499 return 'PPI::Statement::Scheduled';
501 if ( $Next->isa('PPI::Token::Word') ) {
502 $self->_rollback( $Next );
503 return 'PPI::Statement::Sub';
506 ### Comment out these two, as they would return PPI::Statement anyway
507 # if ( $content eq '{' ) {
508 # Anonymous sub at start of statement
509 # return 'PPI::Statement';
512 # if ( $Next->isa('PPI::Token::Prototype') ) {
513 # Anonymous sub at start of statement
514 # return 'PPI::Statement';
517 # PPI::Statement is the safest fall-through
518 $self->_rollback( $Next );
519 return 'PPI::Statement';
522 # End of file... PPI::Statement::Sub is the most likely
523 $self->_rollback( $Next );
524 return 'PPI::Statement::Sub';
527 if ( $Token->content eq 'use' ) {
528 # Add a special case for "use v6" lines.
530 while ( $Next = $self->_get_token ) {
531 unless ( $Next->significant ) {
532 push @{$self->{delayed}}, $Next;
533 # $self->_delay_element( $Next );
537 # Found the next significant token.
539 if ( $Next->content eq 'v6' ) {
540 $self->_rollback( $Next );
541 return 'PPI::Statement::Include::Perl6';
543 $self->_rollback( $Next );
544 return 'PPI::Statement::Include';
548 # End of file... this means it is an incomplete use
549 # line, just treat it as a normal include.
550 $self->_rollback( $Next );
551 return 'PPI::Statement::Include';
554 # If our parent is a Condition, we are an Expression
555 if ( $Parent->isa('PPI::Structure::Condition') ) {
556 return 'PPI::Statement::Expression';
559 # If our parent is a List, we are also an expression
560 if ( $Parent->isa('PPI::Structure::List') ) {
561 return 'PPI::Statement::Expression';
564 # Switch statements use expressions, as well.
566 $Parent->isa('PPI::Structure::Given')
568 $Parent->isa('PPI::Structure::When')
570 return 'PPI::Statement::Expression';
573 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
574 return 'PPI::Statement::Compound';
577 # Beyond that, I have no idea for the moment.
578 # Just keep adding more conditions above this.
579 return 'PPI::Statement';
583 my ($self, $Statement) = @_;
585 # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
587 # Handle some special statements
588 if ( $Statement->isa('PPI::Statement::End') ) {
589 return $self->_lex_end( $Statement );
592 # Begin processing tokens
594 while ( ref( $Token = $self->_get_token ) ) {
595 # Delay whitespace and comment tokens
596 unless ( $Token->significant ) {
597 push @{$self->{delayed}}, $Token;
598 # $self->_delay_element( $Token );
602 # Structual closes, and __DATA__ and __END__ tags implicitly
603 # end every type of statement
605 $Token->__LEXER__closes
607 $Token->isa('PPI::Token::Separator')
609 # Rollback and end the statement
610 return $self->_rollback( $Token );
613 # Normal statements never implicitly end
614 unless ( $Statement->__LEXER__normal ) {
615 # Have we hit an implicit end to the statement
616 unless ( $self->_continues( $Statement, $Token ) ) {
617 # Rollback and finish the statement
618 return $self->_rollback( $Token );
622 # Any normal character just gets added
623 unless ( $Token->isa('PPI::Token::Structure') ) {
624 $self->_add_element( $Statement, $Token );
628 # Handle normal statement terminators
629 if ( $Token->content eq ';' ) {
630 $self->_add_element( $Statement, $Token );
634 # Which leaves us with a new structure
636 # Determine the class for the structure and create it
637 my $method = $RESOLVE{$Token->content};
638 my $Structure = $self->$method($Statement)->new($Token);
640 # Move the lexing down into the Structure
641 $self->_add_delayed( $Statement );
642 $self->_add_element( $Statement, $Structure );
643 $self->_lex_structure( $Structure );
646 # Was it an error in the tokenizer?
647 unless ( defined $Token ) {
648 PPI::Exception->throw;
651 # No, it's just the end of the file...
652 # Roll back any insignificant tokens, they'll get added at the Document level
657 my ($self, $Statement) = @_;
659 # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
661 # End of the file, EVERYTHING is ours
663 while ( $Token = $self->_get_token ) {
664 # Inlined $Statement->__add_element($Token);
665 Scalar::Util::weaken(
666 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
668 push @{$Statement->{children}}, $Token;
671 # Was it an error in the tokenizer?
672 unless ( defined $Token ) {
673 PPI::Exception->throw;
676 # No, it's just the end of the file...
677 # Roll back any insignificant tokens, they get added at the Document level
681 # For many statements, it can be dificult to determine the end-point.
682 # This method takes a statement and the next significant token, and attempts
683 # to determine if the there is a statement boundary between the two, or if
684 # the statement can continue with the token.
686 my ($self, $Statement, $Token) = @_;
688 # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
689 # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
691 # Handle the simple block case
694 $Statement->schildren == 1
696 $Statement->schild(0)->isa('PPI::Structure::Block')
701 # Alrighty then, there are only five implied end statement types,
702 # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, and ::When
704 unless ( ref($Statement) =~ /\b(?:Scheduled|Sub|Compound|Given|When)$/ ) {
708 # Of these five, ::Scheduled, ::Sub, ::Given, and ::When follow the same
709 # simple rule and can be handled first.
710 my @part = $Statement->schildren;
711 my $LastChild = $part[-1];
712 unless ( $Statement->isa('PPI::Statement::Compound') ) {
713 # If the last significant element of the statement is a block,
714 # then a scheduled statement is done, no questions asked.
715 return ! $LastChild->isa('PPI::Structure::Block');
718 # Now we get to compound statements, which kind of suck (to lex).
719 # However, of them all, the 'if' type, which includes unless, are
720 # relatively easy to handle compared to the others.
721 my $type = $Statement->type;
722 if ( $type eq 'if' ) {
723 # This should be one of the following
725 # if (EXPR) BLOCK else BLOCK
726 # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
728 # We only implicitly end on a block
729 unless ( $LastChild->isa('PPI::Structure::Block') ) {
731 # if (EXPR) BLOCK else ...
732 # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
736 # If the token before the block is an 'else',
737 # it's over, no matter what.
738 my $NextLast = $Statement->schild(-2);
742 $NextLast->isa('PPI::Token')
744 $NextLast->isa('PPI::Token::Word')
746 $NextLast->content eq 'else'
751 # Otherwise, we continue for 'elsif' or 'else' only.
753 $Token->isa('PPI::Token::Word')
755 $Token->content eq 'else'
757 $Token->content eq 'elsif'
766 if ( $type eq 'label' ) {
767 # We only have the label so far, could be any of
768 # LABEL while (EXPR) BLOCK
769 # LABEL while (EXPR) BLOCK continue BLOCK
770 # LABEL for (EXPR; EXPR; EXPR) BLOCK
771 # LABEL foreach VAR (LIST) BLOCK
772 # LABEL foreach VAR (LIST) BLOCK continue BLOCK
773 # LABEL BLOCK continue BLOCK
775 # Handle cases with a word after the label
777 $Token->isa('PPI::Token::Word')
779 $Token->content =~ /^(?:while|until|for|foreach)$/
784 # Handle labelled blocks
785 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
792 # Handle the common "after round braces" case
793 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
794 # LABEL while (EXPR) ...
795 # LABEL while (EXPR) ...
796 # LABEL for (EXPR; EXPR; EXPR) ...
797 # LABEL for VAR (LIST) ...
798 # LABEL foreach VAR (LIST) ...
799 # Only a block will do
800 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
803 if ( $type eq 'for' ) {
804 # LABEL for (EXPR; EXPR; EXPR) BLOCK
806 $LastChild->isa('PPI::Token::Word')
808 $LastChild->content =~ /^for(?:each)?\z/
813 $Token->isa('PPI::Token::Structure')
815 $Token->content eq '('
818 $Token->isa('PPI::Token::QuoteLike::Words')
823 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
824 # LABEL for VAR QW{} ...
825 # LABEL foreach VAR QW{} ...
826 # Only a block will do
827 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
830 # In this case, we can also behave like a foreach
833 } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
834 # LABEL for (EXPR; EXPR; EXPR) BLOCK
835 # That's it, nothing can continue
838 } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
839 # LABEL for VAR QW{} ...
840 # LABEL foreach VAR QW{} ...
841 # Only a block will do
842 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
846 # Handle the common continue case
847 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
848 # LABEL while (EXPR) BLOCK continue ...
849 # LABEL foreach VAR (LIST) BLOCK continue ...
850 # LABEL BLOCK continue ...
851 # Only a block will do
852 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
855 # Handle the common continuable block case
856 if ( $LastChild->isa('PPI::Structure::Block') ) {
857 # LABEL while (EXPR) BLOCK
858 # LABEL while (EXPR) BLOCK ...
859 # LABEL for (EXPR; EXPR; EXPR) BLOCK
860 # LABEL foreach VAR (LIST) BLOCK
861 # LABEL foreach VAR (LIST) BLOCK ...
863 # Is this the block for a continue?
864 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
865 # LABEL while (EXPR) BLOCK continue BLOCK
866 # LABEL foreach VAR (LIST) BLOCK continue BLOCK
867 # LABEL BLOCK continue BLOCK
868 # That's it, nothing can continue this
872 # Only a continue will do
873 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
876 if ( $type eq 'block' ) {
877 # LABEL BLOCK continue BLOCK
878 # Every possible case is covered in the common cases above
881 if ( $type eq 'while' ) {
882 # LABEL while (EXPR) BLOCK
883 # LABEL while (EXPR) BLOCK continue BLOCK
884 # LABEL until (EXPR) BLOCK
885 # LABEL until (EXPR) BLOCK continue BLOCK
886 # The only case not covered is the while ...
888 $LastChild->isa('PPI::Token::Word')
890 $LastChild->content eq 'while'
892 $LastChild->content eq 'until'
897 # Only a condition structure will do
898 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
902 if ( $type eq 'foreach' ) {
903 # LABEL foreach VAR (LIST) BLOCK
904 # LABEL foreach VAR (LIST) BLOCK continue BLOCK
905 # The only two cases that have not been covered already are
906 # 'foreach ...' and 'foreach VAR ...'
908 if ( $LastChild->isa('PPI::Token::Symbol') ) {
909 # LABEL foreach my $scalar ...
910 # Open round brace, or a quotewords
911 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
912 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
916 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
917 # There are three possibilities here
919 $Token->isa('PPI::Token::Word')
921 ($STATEMENT_CLASSES{ $Token->content } || '')
923 'PPI::Statement::Variable'
928 } elsif ( $Token->content =~ /^\$/ ) {
931 } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
933 } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
941 ($STATEMENT_CLASSES{ $LastChild->content } || '')
943 'PPI::Statement::Variable'
945 # LABEL foreach my ...
946 # Only a scalar will do
947 return $Token->content =~ /^\$/;
950 # Handle the rare for my $foo qw{bar} ... case
951 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
952 # LABEL for VAR QW ...
953 # LABEL foreach VAR QW ...
954 # Only a block will do
955 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
959 # Something we don't know about... what could it be
960 PPI::Exception->throw("Illegal state in '$type' compound statement");
967 #####################################################################
968 # Lex Methods - Structure Object
970 # Given a parent element, and a ( token to open a structure, determine
971 # the class that the structure should be.
973 my ($self, $Parent) = @_;
975 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
977 # Get the last significant element in the parent
978 my $Element = $Parent->schild(-1);
979 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
980 # Can it be determined because it is a keyword?
981 my $rclass = $ROUND{$Element->content};
982 return $rclass if $rclass;
985 # If we are part of a for or foreach statement, we are a ForLoop
986 if ( $Parent->isa('PPI::Statement::Compound') ) {
987 if ( $Parent->type =~ /^for(?:each)?$/ ) {
988 return 'PPI::Structure::For';
990 } elsif ( $Parent->isa('PPI::Statement::Given') ) {
991 return 'PPI::Structure::Given';
992 } elsif ( $Parent->isa('PPI::Statement::When') ) {
993 return 'PPI::Structure::When';
996 # Otherwise, it must be a list
998 # If the previous element is -> then we mark it as a dereference
999 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1000 $Element->{_dereference} = 1;
1003 'PPI::Structure::List'
1006 # Given a parent element, and a [ token to open a structure, determine
1007 # the class that the structure should be.
1009 my ($self, $Parent) = @_;
1011 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1013 # Get the last significant element in the parent
1014 my $Element = $Parent->schild(-1);
1016 # Is this a subscript, like $foo[1] or $foo{expr}
1019 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1021 $Element->{_dereference} = 1;
1022 return 'PPI::Structure::Subscript';
1024 if ( $Element->isa('PPI::Structure::Subscript') ) {
1026 return 'PPI::Structure::Subscript';
1028 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1030 return 'PPI::Structure::Subscript';
1032 # FIXME - More cases to catch
1035 # Otherwise, we assume that it's an anonymous arrayref constructor
1036 'PPI::Structure::Constructor';
1039 use vars qw{%CURLY_CLASSES};
1041 # Keyword -> Structure class maps
1044 'sub' => 'PPI::Structure::Block',
1045 'grep' => 'PPI::Structure::Block',
1046 'map' => 'PPI::Structure::Block',
1047 'sort' => 'PPI::Structure::Block',
1048 'do' => 'PPI::Structure::Block',
1051 '=' => 'PPI::Structure::Constructor',
1052 '||=' => 'PPI::Structure::Constructor',
1053 ',' => 'PPI::Structure::Constructor',
1054 '=>' => 'PPI::Structure::Constructor',
1058 # Given a parent element, and a { token to open a structure, determine
1059 # the class that the structure should be.
1061 my ($self, $Parent) = @_;
1063 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1065 # Get the last significant element in the parent
1066 my $Element = $Parent->schild(-1);
1067 my $content = $Element ? $Element->content : '';
1069 # Is this a subscript, like $foo[1] or $foo{expr}
1071 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1073 $Element->{_dereference} = 1;
1074 return 'PPI::Structure::Subscript';
1076 if ( $Element->isa('PPI::Structure::Subscript') ) {
1078 return 'PPI::Structure::Subscript';
1080 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1082 return 'PPI::Structure::Subscript';
1084 if ( $CURLY_CLASSES{$content} ) {
1086 return $CURLY_CLASSES{$content};
1090 # Are we in a compound statement
1091 if ( $Parent->isa('PPI::Statement::Compound') ) {
1092 # We will only encounter blocks in compound statements
1093 return 'PPI::Structure::Block';
1096 # Are we the second argument of use
1097 if ( $Parent->isa('PPI::Statement::Include') ) {
1098 if ( $Parent->schildren == 2 ) {
1099 # This is something like use constant { ... };
1100 return 'PPI::Structure::Constructor';
1104 # Unless we are at the start of the statement, everything else should be a block
1105 ### FIXME This is possibly a bad choice, but will have to do for now.
1106 return 'PPI::Structure::Block' if $Element;
1108 # Special case: Are we the param of a core function
1109 # i.e. map({ $_ => 1 } @foo)
1111 $Parent->isa('PPI::Statement')
1113 _INSTANCE($Parent->parent, 'PPI::Structure::List')
1115 my $function = $Parent->parent->parent->schild(-2);
1116 if ( $function and $function->content =~ /^(?:map|grep|sort)$/ ) {
1117 return 'PPI::Structure::Block';
1121 # We need to scan ahead.
1125 while ( $Next = $self->_get_token ) {
1126 unless ( $Next->significant ) {
1127 push @delayed, $Next;
1131 # If a closing curly, this is an anonymous hash-ref
1132 if ( ++$position == 1 and $Next->content eq '}' ) {
1133 $self->_buffer( splice(@delayed), $Next );
1134 return 'PPI::Structure::Constructor';
1137 # If it contains a => as the second thing,
1138 # this is also an anonymous hash-ref.
1139 if ( $position == 2 and $Next->content eq '=>' ) {
1140 $self->_buffer( splice(@delayed), $Next );
1141 return 'PPI::Structure::Constructor';
1144 # We only check the first two, then default to block
1145 if ( $position >= 3 ) {
1146 $self->_buffer( splice(@delayed), $Next );
1150 # Delay and continue
1151 push @delayed, $Next;
1154 # Hit the end of the document, or bailed out, go with block
1155 $self->_buffer( splice(@delayed) );
1156 if ( ref $Parent eq 'PPI::Statement' ) {
1157 bless $Parent, 'PPI::Statement::Compound';
1159 return 'PPI::Structure::Block';
1164 =begin testing _lex_structure 4
1166 # Validate the creation of a null statement
1168 my $token = new_ok( 'PPI::Token::Structure' => [ ';' ] );
1169 my $null = new_ok( 'PPI::Statement::Null' => [ $token ] );
1170 is( $null->content, ';', '->content ok' );
1173 # Validate the creation of an empty statement
1174 new_ok( 'PPI::Statement' => [ ] );
1180 sub _lex_structure {
1181 my ($self, $Structure) = @_;
1183 # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1185 # Start the processing loop
1187 while ( ref($Token = $self->_get_token) ) {
1188 # Is this a direct type token
1189 unless ( $Token->significant ) {
1190 push @{$self->{delayed}}, $Token;
1191 # $self->_delay_element( $Token );
1195 # Anything other than a Structure starts a Statement
1196 unless ( $Token->isa('PPI::Token::Structure') ) {
1197 # Because _statement may well delay and rollback itself,
1198 # we need to add the delayed tokens early
1199 $self->_add_delayed( $Structure );
1201 # Determine the class for the Statement and create it
1202 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1204 # Move the lexing down into the Statement
1205 $self->_add_element( $Structure, $Statement );
1206 $self->_lex_statement( $Statement );
1211 # Is this the opening of another structure directly inside us?
1212 if ( $Token->__LEXER__opens ) {
1213 # Rollback the Token, and recurse into the statement
1214 $self->_rollback( $Token );
1215 my $Statement = PPI::Statement->new;
1216 $self->_add_element( $Structure, $Statement );
1217 $self->_lex_statement( $Statement );
1221 # Is this the close of a structure ( which would be an error )
1222 if ( $Token->__LEXER__closes ) {
1223 # Is this OUR closing structure
1224 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1225 # Add any delayed tokens, and the finishing token (the ugly way)
1226 $self->_add_delayed( $Structure );
1227 $Structure->{finish} = $Token;
1228 Scalar::Util::weaken(
1229 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1232 # Confirm that ForLoop structures are actually so, and
1233 # aren't really a list.
1234 if ( $Structure->isa('PPI::Structure::For') ) {
1235 if ( 2 > scalar grep {
1236 $_->isa('PPI::Statement')
1237 } $Structure->children ) {
1238 bless($Structure, 'PPI::Structure::List');
1244 # Unmatched closing brace.
1245 # Either they typed the wrong thing, or haven't put
1246 # one at all. Either way it's an error we need to
1247 # somehow handle gracefully. For now, we'll treat it
1248 # as implicitly ending the structure. This causes the
1249 # least damage across the various reasons why this
1250 # might have happened.
1251 return $self->_rollback( $Token );
1254 # It's a semi-colon on it's own, just inside the block.
1255 # This is a null statement.
1256 $self->_add_element(
1258 PPI::Statement::Null->new($Token),
1263 unless ( defined $Token ) {
1264 PPI::Exception->throw;
1267 # No, it's just the end of file.
1268 # Add any insignificant trailing tokens.
1269 $self->_add_delayed( $Structure );
1276 #####################################################################
1279 # Get the next token for processing, handling buffering
1281 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
1284 # Old long version of the above
1286 # # First from the buffer
1287 # if ( @{$self->{buffer}} ) {
1288 # return shift @{$self->{buffer}};
1291 # # Then from the Tokenizer
1292 # $self->{Tokenizer}->get_token;
1295 # Delay the addition of a insignificant elements.
1296 # This ended up being inlined.
1297 # sub _delay_element {
1299 # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1300 # push @{ $_[0]->{delayed} }, $_[1];
1303 # Add an Element to a Node, including any delayed Elements
1305 my ($self, $Parent, $Element) = @_;
1307 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1308 # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1310 # Handle a special case, where a statement is not fully resolved
1311 if ( ref $Parent eq 'PPI::Statement' ) {
1312 my $first = $Parent->schild(0);
1313 my $second = $Parent->schild(1);
1314 if ( $first and $first->isa('PPI::Token::Label') and ! $second ) {
1315 # It's a labelled statement
1316 if ( $STATEMENT_CLASSES{$second->content} ) {
1317 bless $Parent, $STATEMENT_CLASSES{$second->content};
1322 # Add first the delayed, from the front, then the passed element
1323 foreach my $el ( @{$self->{delayed}} ) {
1324 Scalar::Util::weaken(
1325 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1327 # Inlined $Parent->__add_element($el);
1329 Scalar::Util::weaken(
1330 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1332 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
1334 # Clear the delayed elements
1335 $self->{delayed} = [];
1338 # Specifically just add any delayed tokens, if any.
1340 my ($self, $Parent) = @_;
1342 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1345 foreach my $el ( @{$self->{delayed}} ) {
1346 Scalar::Util::weaken(
1347 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1349 # Inlined $Parent->__add_element($el);
1351 push @{$Parent->{children}}, @{$self->{delayed}};
1353 # Clear the delayed elements
1354 $self->{delayed} = [];
1357 # Rollback the delayed tokens, plus any passed. Once all the tokens
1358 # have been moved back on to the buffer, the order should be.
1359 # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1363 # First, put any passed objects back
1365 unshift @{$self->{buffer}}, splice @_;
1368 # Then, put back anything delayed
1369 if ( @{$self->{delayed}} ) {
1370 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
1376 # Partial rollback, just return a single list to the buffer
1380 # Put any passed objects back
1382 unshift @{$self->{buffer}}, splice @_;
1392 #####################################################################
1395 # Set the error message
1401 # Clear the error message.
1402 # Returns the object as a convenience.
1412 For any error that occurs, you can use the C<errstr>, as either
1413 a static or object method, to access the error message.
1415 If no error occurs for any particular action, C<errstr> will return false.
1427 #####################################################################
1430 # This is something of a future expansion... ignore it for now :)
1432 # use PPI::Statement::Sub ();
1434 # sub PPI::Statement::Sub::__LEXER__normal { '' }
1442 - Add optional support for some of the more common source filters
1444 - Some additional checks for blessing things into various Statement
1445 and Structure subclasses.
1449 See the L<support section|PPI/SUPPORT> in the main module.
1453 Adam Kennedy E<lt>adamk@cpan.orgE<gt>
1457 Copyright 2001 - 2009 Adam Kennedy.
1459 This program is free software; you can redistribute
1460 it and/or modify it under the same terms as Perl itself.
1462 The full text of the license can be found in the
1463 LICENSE file included with this module.