Add built local::lib
[catagits/Gitalist.git] / local-lib5 / lib / perl5 / PPI / Lexer.pm
CommitLineData
3fea05b9 1package PPI::Lexer;
2
3=pod
4
5=head1 NAME
6
7PPI::Lexer - The PPI Lexer
8
9=head1 SYNOPSIS
10
11 use PPI;
12
13 # Create a new Lexer
14 my $Lexer = PPI::Lexer->new;
15
16 # Build a PPI::Document object from a Token stream
17 my $Tokenizer = PPI::Tokenizer->load('My/Module.pm');
18 my $Document = $Lexer->lex_tokenizer($Tokenizer);
19
20 # Build a PPI::Document object for some raw source
21 my $source = "print 'Hello World!'; kill(Humans->all);";
22 $Document = $Lexer->lex_source($source);
23
24 # Build a PPI::Document object for a particular file name
25 $Document = $Lexer->lex_file('My/Module.pm');
26
27=head1 DESCRIPTION
28
29The is the L<PPI> Lexer. In the larger scheme of things, its job is to take
30token streams, in a variety of forms, and "lex" them into nested structures.
31
32Pretty much everything in this module happens behind the scenes at this
33point. In fact, at the moment you don't really need to instantiate the lexer
34at all, the three main methods will auto-instantiate themselves a
35C<PPI::Lexer> object as needed.
36
37All methods do a one-shot "lex this and give me a L<PPI::Document> object".
38
39In fact, if you are reading this, what you B<probably> want to do is to
40just "load a document", in which case you can do this in a much more
41direct and concise manner with one of the following.
42
43 use PPI;
44
45 $Document = PPI::Document->load( $filename );
46 $Document = PPI::Document->new( $string );
47
48See L<PPI::Document> for more details.
49
50For more unusual tasks, by all means forge onwards.
51
52=head1 METHODS
53
54=cut
55
56use strict;
57use Scalar::Util ();
58use Params::Util qw{_STRING _INSTANCE};
59use List::MoreUtils ();
60use PPI ();
61use PPI::Exception ();
62
63use vars qw{$VERSION $errstr *_PARENT %ROUND %RESOLVE};
64BEGIN {
65 $VERSION = '1.206';
66 $errstr = '';
67
68 # Faster than having another method call just
69 # to set the structure finish token.
70 *_PARENT = *PPI::Element::_PARENT;
71
72 # Keyword -> Structure class maps
73 %ROUND = (
74 # Conditions
75 'if' => 'PPI::Structure::Condition',
76 'elsif' => 'PPI::Structure::Condition',
77 'unless' => 'PPI::Structure::Condition',
78 'while' => 'PPI::Structure::Condition',
79 'until' => 'PPI::Structure::Condition',
80
81 # For(each)
82 'for' => 'PPI::Structure::For',
83 'foreach' => 'PPI::Structure::For',
84 );
85
86 # Opening brace to refining method
87 %RESOLVE = (
88 '(' => '_round',
89 '[' => '_square',
90 '{' => '_curly',
91 );
92}
93
94
95
96
97
98#####################################################################
99# Constructor
100
101=pod
102
103=head2 new
104
105The C<new> constructor creates a new C<PPI::Lexer> object. The object itself
106is merely used to hold various buffers and state data during the lexing
107process, and holds no significant data between -E<gt>lex_xxxxx calls.
108
109Returns a new C<PPI::Lexer> object
110
111=cut
112
113sub new {
114 my $class = shift->_clear;
115 bless {
116 Tokenizer => undef, # Where we store the tokenizer for a run
117 buffer => [], # The input token buffer
118 delayed => [], # The "delayed insignificant tokens" buffer
119 }, $class;
120}
121
122
123
124
125
126#####################################################################
127# Main Lexing Methods
128
129=pod
130
131=head2 lex_file $filename
132
133The C<lex_file> method takes a filename as argument. It then loads the file,
134creates a L<PPI::Tokenizer> for the content and lexes the token stream
135produced by the tokenizer. Basically, a sort of all-in-one method for
136getting a L<PPI::Document> object from a file name.
137
138Returns a L<PPI::Document> object, or C<undef> on error.
139
140=cut
141
142sub lex_file {
143 my $self = ref $_[0] ? shift : shift->new;
144 my $file = _STRING(shift);
145 unless ( defined $file ) {
146 return $self->_error("Did not pass a filename to PPI::Lexer::lex_file");
147 }
148
149 # Create the Tokenizer
150 my $Tokenizer = eval {
151 PPI::Tokenizer->new( $file );
152 };
153 if ( _INSTANCE($@, 'PPI::Exception') ) {
154 return $self->_error( $@->message );
155 } elsif ( $@ ) {
156 return $self->_error( $errstr );
157 }
158
159 $self->lex_tokenizer( $Tokenizer );
160}
161
162=pod
163
164=head2 lex_source $string
165
166The C<lex_source> method takes a normal scalar string as argument. It
167creates a L<PPI::Tokenizer> object for the string, and then lexes the
168resulting token stream.
169
170Returns a L<PPI::Document> object, or C<undef> on error.
171
172=cut
173
174sub lex_source {
175 my $self = ref $_[0] ? shift : shift->new;
176 my $source = shift;
177 unless ( defined $source and not ref $source ) {
178 return $self->_error("Did not pass a string to PPI::Lexer::lex_source");
179 }
180
181 # Create the Tokenizer and hand off to the next method
182 my $Tokenizer = eval {
183 PPI::Tokenizer->new( \$source );
184 };
185 if ( _INSTANCE($@, 'PPI::Exception') ) {
186 return $self->_error( $@->message );
187 } elsif ( $@ ) {
188 return $self->_error( $errstr );
189 }
190
191 $self->lex_tokenizer( $Tokenizer );
192}
193
194=pod
195
196=head2 lex_tokenizer $Tokenizer
197
198The C<lex_tokenizer> takes as argument a L<PPI::Tokenizer> object. It
199lexes the token stream from the tokenizer into a L<PPI::Document> object.
200
201Returns a L<PPI::Document> object, or C<undef> on error.
202
203=cut
204
205sub lex_tokenizer {
206 my $self = ref $_[0] ? shift : shift->new;
207 my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer');
208 return $self->_error(
209 "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer"
210 ) unless $Tokenizer;
211
212 # Create the empty document
213 my $Document = PPI::Document->new;
214
215 # Lex the token stream into the document
216 $self->{Tokenizer} = $Tokenizer;
217 eval {
218 $self->_lex_document($Document);
219 };
220 if ( $@ ) {
221 # If an error occurs DESTROY the partially built document.
222 undef $Document;
223 if ( _INSTANCE($@, 'PPI::Exception') ) {
224 return $self->_error( $@->message );
225 } else {
226 return $self->_error( $errstr );
227 }
228 }
229
230 return $Document;
231}
232
233
234
235
236
237#####################################################################
238# Lex Methods - Document Object
239
240=pod
241
242=begin testing _lex_document 3
243
244# Validate the creation of a null statement
245SCOPE: {
246 my $token = new_ok( 'PPI::Token::Structure' => [ ')' ] );
247 my $brace = new_ok( 'PPI::Statement::UnmatchedBrace' => [ $token ] );
248 is( $brace->content, ')', '->content ok' );
249}
250
251=end testing
252
253=cut
254
255sub _lex_document {
256 my ($self, $Document) = @_;
257 # my $self = shift;
258 # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef;
259
260 # Start the processing loop
261 my $Token;
262 while ( ref($Token = $self->_get_token) ) {
263 # Add insignificant tokens directly beneath us
264 unless ( $Token->significant ) {
265 $self->_add_element( $Document, $Token );
266 next;
267 }
268
269 if ( $Token->content eq ';' ) {
270 # It's a semi-colon on it's own.
271 # We call this a null statement.
272 $self->_add_element(
273 $Document,
274 PPI::Statement::Null->new($Token),
275 );
276 next;
277 }
278
279 # Handle anything other than a structural element
280 unless ( ref $Token eq 'PPI::Token::Structure' ) {
281 # Determine the class for the Statement, and create it
282 my $Statement = $self->_statement($Document, $Token)->new($Token);
283
284 # Move the lexing down into the statement
285 $self->_add_delayed( $Document );
286 $self->_add_element( $Document, $Statement );
287 $self->_lex_statement( $Statement );
288
289 next;
290 }
291
292 # Is this the opening of a structure?
293 if ( $Token->__LEXER__opens ) {
294 # This should actually have a Statement instead
295 $self->_rollback( $Token );
296 my $Statement = PPI::Statement->new;
297 $self->_add_element( $Document, $Statement );
298 $self->_lex_statement( $Statement );
299 next;
300 }
301
302 # Is this the close of a structure.
303 if ( $Token->__LEXER__closes ) {
304 # Because we are at the top of the tree, this is an error.
305 # This means either a mis-parsing, or an mistake in the code.
306 # To handle this, we create a "Naked Close" statement
307 $self->_add_element( $Document,
308 PPI::Statement::UnmatchedBrace->new($Token)
309 );
310 next;
311 }
312
313 # Shouldn't be able to get here
314 PPI::Exception->throw('Lexer reached an illegal state');
315 }
316
317 # Did we leave the main loop because of a Tokenizer error?
318 unless ( defined $Token ) {
319 my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : '';
320 $errstr ||= 'Unknown Tokenizer Error';
321 PPI::Exception->throw($errstr);
322 }
323
324 # No error, it's just the end of file.
325 # Add any insignificant trailing tokens.
326 $self->_add_delayed( $Document );
327
328 # If the Tokenizer has any v6 blocks to attach, do so now.
329 # Checking once at the end is faster than adding a special
330 # case check for every statement parsed.
331 my $perl6 = $self->{Tokenizer}->{'perl6'};
332 if ( @$perl6 ) {
333 my $includes = $Document->find( 'PPI::Statement::Include::Perl6' );
334 foreach my $include ( @$includes ) {
335 unless ( @$perl6 ) {
336 PPI::Exception->throw('Failed to find a perl6 section');
337 }
338 $include->{perl6} = shift @$perl6;
339 }
340 }
341
342 return 1;
343}
344
345
346
347
348
349#####################################################################
350# Lex Methods - Statement Object
351
352use vars qw{%STATEMENT_CLASSES};
353BEGIN {
354 # Keyword -> Statement Subclass
355 %STATEMENT_CLASSES = (
356 # Things that affect the timing of execution
357 'BEGIN' => 'PPI::Statement::Scheduled',
358 'CHECK' => 'PPI::Statement::Scheduled',
359 'UNITCHECK' => 'PPI::Statement::Scheduled',
360 'INIT' => 'PPI::Statement::Scheduled',
361 'END' => 'PPI::Statement::Scheduled',
362
363 # Loading and context statement
364 'package' => 'PPI::Statement::Package',
365 # 'use' => 'PPI::Statement::Include',
366 'no' => 'PPI::Statement::Include',
367 'require' => 'PPI::Statement::Include',
368
369 # Various declarations
370 'my' => 'PPI::Statement::Variable',
371 'local' => 'PPI::Statement::Variable',
372 'our' => 'PPI::Statement::Variable',
373 'state' => 'PPI::Statement::Variable',
374 # Statements starting with 'sub' could be any one of...
375 # 'sub' => 'PPI::Statement::Sub',
376 # 'sub' => 'PPI::Statement::Scheduled',
377 # 'sub' => 'PPI::Statement',
378
379 # Compound statement
380 'if' => 'PPI::Statement::Compound',
381 'unless' => 'PPI::Statement::Compound',
382 'for' => 'PPI::Statement::Compound',
383 'foreach' => 'PPI::Statement::Compound',
384 'while' => 'PPI::Statement::Compound',
385 'until' => 'PPI::Statement::Compound',
386
387 # Switch statement
388 'given' => 'PPI::Statement::Given',
389 'when' => 'PPI::Statement::When',
390 'default' => 'PPI::Statement::When',
391
392 # Various ways of breaking out of scope
393 'redo' => 'PPI::Statement::Break',
394 'next' => 'PPI::Statement::Break',
395 'last' => 'PPI::Statement::Break',
396 'return' => 'PPI::Statement::Break',
397 'goto' => 'PPI::Statement::Break',
398
399 # Special sections of the file
400 '__DATA__' => 'PPI::Statement::Data',
401 '__END__' => 'PPI::Statement::End',
402 );
403}
404
405sub _statement {
406 my ($self, $Parent, $Token) = @_;
407 # my $self = shift;
408 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
409 # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
410
411 # Check for things like ( parent => ... )
412 if ( $Parent->isa('PPI::Structure::List') ) {
413 if ( $Token->isa('PPI::Token::Word') ) {
414 # Is the next significant token a =>
415 # Read ahead to the next significant token
416 my $Next;
417 while ( $Next = $self->_get_token ) {
418 unless ( $Next->significant ) {
419 push @{$self->{delayed}}, $Next;
420 # $self->_delay_element( $Next );
421 next;
422 }
423
424 # Got the next token
425 if (
426 $Next->isa('PPI::Token::Operator')
427 and
428 $Next->content eq '=>'
429 ) {
430 # Is an ordinary expression
431 $self->_rollback( $Next );
432 return 'PPI::Statement::Expression';
433 } else {
434 last;
435 }
436 }
437
438 # Rollback and continue
439 $self->_rollback( $Next );
440 }
441 }
442
443 # Is it a token in our known classes list
444 my $class = $STATEMENT_CLASSES{$Token->content};
445
446 # Handle potential barewords for subscripts
447 if ( $Parent->isa('PPI::Structure::Subscript') ) {
448 # Fast obvious case, just an expression
449 unless ( $class and $class->isa('PPI::Statement::Expression') ) {
450 return 'PPI::Statement::Expression';
451 }
452
453 # This is something like "my" or "our" etc... more subtle.
454 # Check if the next token is a closing curly brace.
455 # This means we are something like $h{my}
456 my $Next;
457 while ( $Next = $self->_get_token ) {
458 unless ( $Next->significant ) {
459 push @{$self->{delayed}}, $Next;
460 # $self->_delay_element( $Next );
461 next;
462 }
463
464 # Found the next significant token.
465 # Is it a closing curly brace?
466 if ( $Next->content eq '}' ) {
467 $self->_rollback( $Next );
468 return 'PPI::Statement::Expression';
469 } else {
470 $self->_rollback( $Next );
471 return $class;
472 }
473 }
474
475 # End of file... this means it is something like $h{our
476 # which is probably going to be $h{our} ... I think
477 $self->_rollback( $Next );
478 return 'PPI::Statement::Expression';
479 }
480
481 # If it's a token in our list, use that class
482 return $class if $class;
483
484 # Handle the more in-depth sub detection
485 if ( $Token->content eq 'sub' ) {
486 # Read ahead to the next significant token
487 my $Next;
488 while ( $Next = $self->_get_token ) {
489 unless ( $Next->significant ) {
490 push @{$self->{delayed}}, $Next;
491 # $self->_delay_element( $Next );
492 next;
493 }
494
495 # Got the next significant token
496 my $sclass = $STATEMENT_CLASSES{$Next->content};
497 if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) {
498 $self->_rollback( $Next );
499 return 'PPI::Statement::Scheduled';
500 }
501 if ( $Next->isa('PPI::Token::Word') ) {
502 $self->_rollback( $Next );
503 return 'PPI::Statement::Sub';
504 }
505
506 ### Comment out these two, as they would return PPI::Statement anyway
507 # if ( $content eq '{' ) {
508 # Anonymous sub at start of statement
509 # return 'PPI::Statement';
510 # }
511 #
512 # if ( $Next->isa('PPI::Token::Prototype') ) {
513 # Anonymous sub at start of statement
514 # return 'PPI::Statement';
515 # }
516
517 # PPI::Statement is the safest fall-through
518 $self->_rollback( $Next );
519 return 'PPI::Statement';
520 }
521
522 # End of file... PPI::Statement::Sub is the most likely
523 $self->_rollback( $Next );
524 return 'PPI::Statement::Sub';
525 }
526
527 if ( $Token->content eq 'use' ) {
528 # Add a special case for "use v6" lines.
529 my $Next;
530 while ( $Next = $self->_get_token ) {
531 unless ( $Next->significant ) {
532 push @{$self->{delayed}}, $Next;
533 # $self->_delay_element( $Next );
534 next;
535 }
536
537 # Found the next significant token.
538 # Is it a v6 use?
539 if ( $Next->content eq 'v6' ) {
540 $self->_rollback( $Next );
541 return 'PPI::Statement::Include::Perl6';
542 } else {
543 $self->_rollback( $Next );
544 return 'PPI::Statement::Include';
545 }
546 }
547
548 # End of file... this means it is an incomplete use
549 # line, just treat it as a normal include.
550 $self->_rollback( $Next );
551 return 'PPI::Statement::Include';
552 }
553
554 # If our parent is a Condition, we are an Expression
555 if ( $Parent->isa('PPI::Structure::Condition') ) {
556 return 'PPI::Statement::Expression';
557 }
558
559 # If our parent is a List, we are also an expression
560 if ( $Parent->isa('PPI::Structure::List') ) {
561 return 'PPI::Statement::Expression';
562 }
563
564 # Switch statements use expressions, as well.
565 if (
566 $Parent->isa('PPI::Structure::Given')
567 or
568 $Parent->isa('PPI::Structure::When')
569 ) {
570 return 'PPI::Statement::Expression';
571 }
572
573 if ( _INSTANCE($Token, 'PPI::Token::Label') ) {
574 return 'PPI::Statement::Compound';
575 }
576
577 # Beyond that, I have no idea for the moment.
578 # Just keep adding more conditions above this.
579 return 'PPI::Statement';
580}
581
582sub _lex_statement {
583 my ($self, $Statement) = @_;
584 # my $self = shift;
585 # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
586
587 # Handle some special statements
588 if ( $Statement->isa('PPI::Statement::End') ) {
589 return $self->_lex_end( $Statement );
590 }
591
592 # Begin processing tokens
593 my $Token;
594 while ( ref( $Token = $self->_get_token ) ) {
595 # Delay whitespace and comment tokens
596 unless ( $Token->significant ) {
597 push @{$self->{delayed}}, $Token;
598 # $self->_delay_element( $Token );
599 next;
600 }
601
602 # Structual closes, and __DATA__ and __END__ tags implicitly
603 # end every type of statement
604 if (
605 $Token->__LEXER__closes
606 or
607 $Token->isa('PPI::Token::Separator')
608 ) {
609 # Rollback and end the statement
610 return $self->_rollback( $Token );
611 }
612
613 # Normal statements never implicitly end
614 unless ( $Statement->__LEXER__normal ) {
615 # Have we hit an implicit end to the statement
616 unless ( $self->_continues( $Statement, $Token ) ) {
617 # Rollback and finish the statement
618 return $self->_rollback( $Token );
619 }
620 }
621
622 # Any normal character just gets added
623 unless ( $Token->isa('PPI::Token::Structure') ) {
624 $self->_add_element( $Statement, $Token );
625 next;
626 }
627
628 # Handle normal statement terminators
629 if ( $Token->content eq ';' ) {
630 $self->_add_element( $Statement, $Token );
631 return 1;
632 }
633
634 # Which leaves us with a new structure
635
636 # Determine the class for the structure and create it
637 my $method = $RESOLVE{$Token->content};
638 my $Structure = $self->$method($Statement)->new($Token);
639
640 # Move the lexing down into the Structure
641 $self->_add_delayed( $Statement );
642 $self->_add_element( $Statement, $Structure );
643 $self->_lex_structure( $Structure );
644 }
645
646 # Was it an error in the tokenizer?
647 unless ( defined $Token ) {
648 PPI::Exception->throw;
649 }
650
651 # No, it's just the end of the file...
652 # Roll back any insignificant tokens, they'll get added at the Document level
653 $self->_rollback;
654}
655
656sub _lex_end {
657 my ($self, $Statement) = @_;
658 # my $self = shift;
659 # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1";
660
661 # End of the file, EVERYTHING is ours
662 my $Token;
663 while ( $Token = $self->_get_token ) {
664 # Inlined $Statement->__add_element($Token);
665 Scalar::Util::weaken(
666 $_PARENT{Scalar::Util::refaddr $Token} = $Statement
667 );
668 push @{$Statement->{children}}, $Token;
669 }
670
671 # Was it an error in the tokenizer?
672 unless ( defined $Token ) {
673 PPI::Exception->throw;
674 }
675
676 # No, it's just the end of the file...
677 # Roll back any insignificant tokens, they get added at the Document level
678 $self->_rollback;
679}
680
681# For many statements, it can be dificult to determine the end-point.
682# This method takes a statement and the next significant token, and attempts
683# to determine if the there is a statement boundary between the two, or if
684# the statement can continue with the token.
685sub _continues {
686 my ($self, $Statement, $Token) = @_;
687 # my $self = shift;
688 # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1";
689 # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2";
690
691 # Handle the simple block case
692 # { print 1; }
693 if (
694 $Statement->schildren == 1
695 and
696 $Statement->schild(0)->isa('PPI::Structure::Block')
697 ) {
698 return '';
699 }
700
701 # Alrighty then, there are only five implied end statement types,
702 # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, and ::When
703 # statements.
704 unless ( ref($Statement) =~ /\b(?:Scheduled|Sub|Compound|Given|When)$/ ) {
705 return 1;
706 }
707
708 # Of these five, ::Scheduled, ::Sub, ::Given, and ::When follow the same
709 # simple rule and can be handled first.
710 my @part = $Statement->schildren;
711 my $LastChild = $part[-1];
712 unless ( $Statement->isa('PPI::Statement::Compound') ) {
713 # If the last significant element of the statement is a block,
714 # then a scheduled statement is done, no questions asked.
715 return ! $LastChild->isa('PPI::Structure::Block');
716 }
717
718 # Now we get to compound statements, which kind of suck (to lex).
719 # However, of them all, the 'if' type, which includes unless, are
720 # relatively easy to handle compared to the others.
721 my $type = $Statement->type;
722 if ( $type eq 'if' ) {
723 # This should be one of the following
724 # if (EXPR) BLOCK
725 # if (EXPR) BLOCK else BLOCK
726 # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK
727
728 # We only implicitly end on a block
729 unless ( $LastChild->isa('PPI::Structure::Block') ) {
730 # if (EXPR) ...
731 # if (EXPR) BLOCK else ...
732 # if (EXPR) BLOCK elsif (EXPR) BLOCK ...
733 return 1;
734 }
735
736 # If the token before the block is an 'else',
737 # it's over, no matter what.
738 my $NextLast = $Statement->schild(-2);
739 if (
740 $NextLast
741 and
742 $NextLast->isa('PPI::Token')
743 and
744 $NextLast->isa('PPI::Token::Word')
745 and
746 $NextLast->content eq 'else'
747 ) {
748 return '';
749 }
750
751 # Otherwise, we continue for 'elsif' or 'else' only.
752 if (
753 $Token->isa('PPI::Token::Word')
754 and (
755 $Token->content eq 'else'
756 or
757 $Token->content eq 'elsif'
758 )
759 ) {
760 return 1;
761 }
762
763 return '';
764 }
765
766 if ( $type eq 'label' ) {
767 # We only have the label so far, could be any of
768 # LABEL while (EXPR) BLOCK
769 # LABEL while (EXPR) BLOCK continue BLOCK
770 # LABEL for (EXPR; EXPR; EXPR) BLOCK
771 # LABEL foreach VAR (LIST) BLOCK
772 # LABEL foreach VAR (LIST) BLOCK continue BLOCK
773 # LABEL BLOCK continue BLOCK
774
775 # Handle cases with a word after the label
776 if (
777 $Token->isa('PPI::Token::Word')
778 and
779 $Token->content =~ /^(?:while|until|for|foreach)$/
780 ) {
781 return 1;
782 }
783
784 # Handle labelled blocks
785 if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) {
786 return 1;
787 }
788
789 return '';
790 }
791
792 # Handle the common "after round braces" case
793 if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) {
794 # LABEL while (EXPR) ...
795 # LABEL while (EXPR) ...
796 # LABEL for (EXPR; EXPR; EXPR) ...
797 # LABEL for VAR (LIST) ...
798 # LABEL foreach VAR (LIST) ...
799 # Only a block will do
800 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
801 }
802
803 if ( $type eq 'for' ) {
804 # LABEL for (EXPR; EXPR; EXPR) BLOCK
805 if (
806 $LastChild->isa('PPI::Token::Word')
807 and
808 $LastChild->content =~ /^for(?:each)?\z/
809 ) {
810 # LABEL for ...
811 if (
812 (
813 $Token->isa('PPI::Token::Structure')
814 and
815 $Token->content eq '('
816 )
817 or
818 $Token->isa('PPI::Token::QuoteLike::Words')
819 ) {
820 return 1;
821 }
822
823 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
824 # LABEL for VAR QW{} ...
825 # LABEL foreach VAR QW{} ...
826 # Only a block will do
827 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
828 }
829
830 # In this case, we can also behave like a foreach
831 $type = 'foreach';
832
833 } elsif ( $LastChild->isa('PPI::Structure::Block') ) {
834 # LABEL for (EXPR; EXPR; EXPR) BLOCK
835 # That's it, nothing can continue
836 return '';
837
838 } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
839 # LABEL for VAR QW{} ...
840 # LABEL foreach VAR QW{} ...
841 # Only a block will do
842 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
843 }
844 }
845
846 # Handle the common continue case
847 if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) {
848 # LABEL while (EXPR) BLOCK continue ...
849 # LABEL foreach VAR (LIST) BLOCK continue ...
850 # LABEL BLOCK continue ...
851 # Only a block will do
852 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
853 }
854
855 # Handle the common continuable block case
856 if ( $LastChild->isa('PPI::Structure::Block') ) {
857 # LABEL while (EXPR) BLOCK
858 # LABEL while (EXPR) BLOCK ...
859 # LABEL for (EXPR; EXPR; EXPR) BLOCK
860 # LABEL foreach VAR (LIST) BLOCK
861 # LABEL foreach VAR (LIST) BLOCK ...
862 # LABEL BLOCK ...
863 # Is this the block for a continue?
864 if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) {
865 # LABEL while (EXPR) BLOCK continue BLOCK
866 # LABEL foreach VAR (LIST) BLOCK continue BLOCK
867 # LABEL BLOCK continue BLOCK
868 # That's it, nothing can continue this
869 return '';
870 }
871
872 # Only a continue will do
873 return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue';
874 }
875
876 if ( $type eq 'block' ) {
877 # LABEL BLOCK continue BLOCK
878 # Every possible case is covered in the common cases above
879 }
880
881 if ( $type eq 'while' ) {
882 # LABEL while (EXPR) BLOCK
883 # LABEL while (EXPR) BLOCK continue BLOCK
884 # LABEL until (EXPR) BLOCK
885 # LABEL until (EXPR) BLOCK continue BLOCK
886 # The only case not covered is the while ...
887 if (
888 $LastChild->isa('PPI::Token::Word')
889 and (
890 $LastChild->content eq 'while'
891 or
892 $LastChild->content eq 'until'
893 )
894 ) {
895 # LABEL while ...
896 # LABEL until ...
897 # Only a condition structure will do
898 return $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
899 }
900 }
901
902 if ( $type eq 'foreach' ) {
903 # LABEL foreach VAR (LIST) BLOCK
904 # LABEL foreach VAR (LIST) BLOCK continue BLOCK
905 # The only two cases that have not been covered already are
906 # 'foreach ...' and 'foreach VAR ...'
907
908 if ( $LastChild->isa('PPI::Token::Symbol') ) {
909 # LABEL foreach my $scalar ...
910 # Open round brace, or a quotewords
911 return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '(';
912 return 1 if $Token->isa('PPI::Token::QuoteLike::Words');
913 return '';
914 }
915
916 if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) {
917 # There are three possibilities here
918 if (
919 $Token->isa('PPI::Token::Word')
920 and (
921 ($STATEMENT_CLASSES{ $Token->content } || '')
922 eq
923 'PPI::Statement::Variable'
924 )
925 ) {
926 # VAR == 'my ...'
927 return 1;
928 } elsif ( $Token->content =~ /^\$/ ) {
929 # VAR == '$scalar'
930 return 1;
931 } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) {
932 return 1;
933 } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) {
934 return 1;
935 } else {
936 return '';
937 }
938 }
939
940 if (
941 ($STATEMENT_CLASSES{ $LastChild->content } || '')
942 eq
943 'PPI::Statement::Variable'
944 ) {
945 # LABEL foreach my ...
946 # Only a scalar will do
947 return $Token->content =~ /^\$/;
948 }
949
950 # Handle the rare for my $foo qw{bar} ... case
951 if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) {
952 # LABEL for VAR QW ...
953 # LABEL foreach VAR QW ...
954 # Only a block will do
955 return $Token->isa('PPI::Token::Structure') && $Token->content eq '{';
956 }
957 }
958
959 # Something we don't know about... what could it be
960 PPI::Exception->throw("Illegal state in '$type' compound statement");
961}
962
963
964
965
966
967#####################################################################
968# Lex Methods - Structure Object
969
970# Given a parent element, and a ( token to open a structure, determine
971# the class that the structure should be.
972sub _round {
973 my ($self, $Parent) = @_;
974 # my $self = shift;
975 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
976
977 # Get the last significant element in the parent
978 my $Element = $Parent->schild(-1);
979 if ( _INSTANCE($Element, 'PPI::Token::Word') ) {
980 # Can it be determined because it is a keyword?
981 my $rclass = $ROUND{$Element->content};
982 return $rclass if $rclass;
983 }
984
985 # If we are part of a for or foreach statement, we are a ForLoop
986 if ( $Parent->isa('PPI::Statement::Compound') ) {
987 if ( $Parent->type =~ /^for(?:each)?$/ ) {
988 return 'PPI::Structure::For';
989 }
990 } elsif ( $Parent->isa('PPI::Statement::Given') ) {
991 return 'PPI::Structure::Given';
992 } elsif ( $Parent->isa('PPI::Statement::When') ) {
993 return 'PPI::Structure::When';
994 }
995
996 # Otherwise, it must be a list
997
998 # If the previous element is -> then we mark it as a dereference
999 if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) {
1000 $Element->{_dereference} = 1;
1001 }
1002
1003 'PPI::Structure::List'
1004}
1005
1006# Given a parent element, and a [ token to open a structure, determine
1007# the class that the structure should be.
1008sub _square {
1009 my ($self, $Parent) = @_;
1010 # my $self = shift;
1011 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1012
1013 # Get the last significant element in the parent
1014 my $Element = $Parent->schild(-1);
1015
1016 # Is this a subscript, like $foo[1] or $foo{expr}
1017
1018 if ( $Element ) {
1019 if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) {
1020 # $foo->[]
1021 $Element->{_dereference} = 1;
1022 return 'PPI::Structure::Subscript';
1023 }
1024 if ( $Element->isa('PPI::Structure::Subscript') ) {
1025 # $foo{}[]
1026 return 'PPI::Structure::Subscript';
1027 }
1028 if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) {
1029 # $foo[], @foo[]
1030 return 'PPI::Structure::Subscript';
1031 }
1032 # FIXME - More cases to catch
1033 }
1034
1035 # Otherwise, we assume that it's an anonymous arrayref constructor
1036 'PPI::Structure::Constructor';
1037}
1038
1039use vars qw{%CURLY_CLASSES};
1040BEGIN {
1041 # Keyword -> Structure class maps
1042 %CURLY_CLASSES = (
1043 # Blocks
1044 'sub' => 'PPI::Structure::Block',
1045 'grep' => 'PPI::Structure::Block',
1046 'map' => 'PPI::Structure::Block',
1047 'sort' => 'PPI::Structure::Block',
1048 'do' => 'PPI::Structure::Block',
1049
1050 # Hash constructors
1051 '=' => 'PPI::Structure::Constructor',
1052 '||=' => 'PPI::Structure::Constructor',
1053 ',' => 'PPI::Structure::Constructor',
1054 '=>' => 'PPI::Structure::Constructor',
1055 );
1056}
1057
1058# Given a parent element, and a { token to open a structure, determine
1059# the class that the structure should be.
1060sub _curly {
1061 my ($self, $Parent) = @_;
1062 # my $self = shift;
1063 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1064
1065 # Get the last significant element in the parent
1066 my $Element = $Parent->schild(-1);
1067 my $content = $Element ? $Element->content : '';
1068
1069 # Is this a subscript, like $foo[1] or $foo{expr}
1070 if ( $Element ) {
1071 if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) {
1072 # $foo->{}
1073 $Element->{_dereference} = 1;
1074 return 'PPI::Structure::Subscript';
1075 }
1076 if ( $Element->isa('PPI::Structure::Subscript') ) {
1077 # $foo[]{}
1078 return 'PPI::Structure::Subscript';
1079 }
1080 if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) {
1081 # $foo{}, @foo{}
1082 return 'PPI::Structure::Subscript';
1083 }
1084 if ( $CURLY_CLASSES{$content} ) {
1085 # Known type
1086 return $CURLY_CLASSES{$content};
1087 }
1088 }
1089
1090 # Are we in a compound statement
1091 if ( $Parent->isa('PPI::Statement::Compound') ) {
1092 # We will only encounter blocks in compound statements
1093 return 'PPI::Structure::Block';
1094 }
1095
1096 # Are we the second argument of use
1097 if ( $Parent->isa('PPI::Statement::Include') ) {
1098 if ( $Parent->schildren == 2 ) {
1099 # This is something like use constant { ... };
1100 return 'PPI::Structure::Constructor';
1101 }
1102 }
1103
1104 # Unless we are at the start of the statement, everything else should be a block
1105 ### FIXME This is possibly a bad choice, but will have to do for now.
1106 return 'PPI::Structure::Block' if $Element;
1107
1108 # Special case: Are we the param of a core function
1109 # i.e. map({ $_ => 1 } @foo)
1110 if (
1111 $Parent->isa('PPI::Statement')
1112 and
1113 _INSTANCE($Parent->parent, 'PPI::Structure::List')
1114 ) {
1115 my $function = $Parent->parent->parent->schild(-2);
1116 if ( $function and $function->content =~ /^(?:map|grep|sort)$/ ) {
1117 return 'PPI::Structure::Block';
1118 }
1119 }
1120
1121 # We need to scan ahead.
1122 my $Next;
1123 my $position = 0;
1124 my @delayed = ();
1125 while ( $Next = $self->_get_token ) {
1126 unless ( $Next->significant ) {
1127 push @delayed, $Next;
1128 next;
1129 }
1130
1131 # If a closing curly, this is an anonymous hash-ref
1132 if ( ++$position == 1 and $Next->content eq '}' ) {
1133 $self->_buffer( splice(@delayed), $Next );
1134 return 'PPI::Structure::Constructor';
1135 }
1136
1137 # If it contains a => as the second thing,
1138 # this is also an anonymous hash-ref.
1139 if ( $position == 2 and $Next->content eq '=>' ) {
1140 $self->_buffer( splice(@delayed), $Next );
1141 return 'PPI::Structure::Constructor';
1142 }
1143
1144 # We only check the first two, then default to block
1145 if ( $position >= 3 ) {
1146 $self->_buffer( splice(@delayed), $Next );
1147 last;
1148 }
1149
1150 # Delay and continue
1151 push @delayed, $Next;
1152 }
1153
1154 # Hit the end of the document, or bailed out, go with block
1155 $self->_buffer( splice(@delayed) );
1156 if ( ref $Parent eq 'PPI::Statement' ) {
1157 bless $Parent, 'PPI::Statement::Compound';
1158 }
1159 return 'PPI::Structure::Block';
1160}
1161
1162=pod
1163
1164=begin testing _lex_structure 4
1165
1166# Validate the creation of a null statement
1167SCOPE: {
1168 my $token = new_ok( 'PPI::Token::Structure' => [ ';' ] );
1169 my $null = new_ok( 'PPI::Statement::Null' => [ $token ] );
1170 is( $null->content, ';', '->content ok' );
1171}
1172
1173# Validate the creation of an empty statement
1174new_ok( 'PPI::Statement' => [ ] );
1175
1176=end testing
1177
1178=cut
1179
1180sub _lex_structure {
1181 my ($self, $Structure) = @_;
1182 # my $self = shift;
1183 # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1";
1184
1185 # Start the processing loop
1186 my $Token;
1187 while ( ref($Token = $self->_get_token) ) {
1188 # Is this a direct type token
1189 unless ( $Token->significant ) {
1190 push @{$self->{delayed}}, $Token;
1191 # $self->_delay_element( $Token );
1192 next;
1193 }
1194
1195 # Anything other than a Structure starts a Statement
1196 unless ( $Token->isa('PPI::Token::Structure') ) {
1197 # Because _statement may well delay and rollback itself,
1198 # we need to add the delayed tokens early
1199 $self->_add_delayed( $Structure );
1200
1201 # Determine the class for the Statement and create it
1202 my $Statement = $self->_statement($Structure, $Token)->new($Token);
1203
1204 # Move the lexing down into the Statement
1205 $self->_add_element( $Structure, $Statement );
1206 $self->_lex_statement( $Statement );
1207
1208 next;
1209 }
1210
1211 # Is this the opening of another structure directly inside us?
1212 if ( $Token->__LEXER__opens ) {
1213 # Rollback the Token, and recurse into the statement
1214 $self->_rollback( $Token );
1215 my $Statement = PPI::Statement->new;
1216 $self->_add_element( $Structure, $Statement );
1217 $self->_lex_statement( $Statement );
1218 next;
1219 }
1220
1221 # Is this the close of a structure ( which would be an error )
1222 if ( $Token->__LEXER__closes ) {
1223 # Is this OUR closing structure
1224 if ( $Token->content eq $Structure->start->__LEXER__opposite ) {
1225 # Add any delayed tokens, and the finishing token (the ugly way)
1226 $self->_add_delayed( $Structure );
1227 $Structure->{finish} = $Token;
1228 Scalar::Util::weaken(
1229 $_PARENT{Scalar::Util::refaddr $Token} = $Structure
1230 );
1231
1232 # Confirm that ForLoop structures are actually so, and
1233 # aren't really a list.
1234 if ( $Structure->isa('PPI::Structure::For') ) {
1235 if ( 2 > scalar grep {
1236 $_->isa('PPI::Statement')
1237 } $Structure->children ) {
1238 bless($Structure, 'PPI::Structure::List');
1239 }
1240 }
1241 return 1;
1242 }
1243
1244 # Unmatched closing brace.
1245 # Either they typed the wrong thing, or haven't put
1246 # one at all. Either way it's an error we need to
1247 # somehow handle gracefully. For now, we'll treat it
1248 # as implicitly ending the structure. This causes the
1249 # least damage across the various reasons why this
1250 # might have happened.
1251 return $self->_rollback( $Token );
1252 }
1253
1254 # It's a semi-colon on it's own, just inside the block.
1255 # This is a null statement.
1256 $self->_add_element(
1257 $Structure,
1258 PPI::Statement::Null->new($Token),
1259 );
1260 }
1261
1262 # Is this an error
1263 unless ( defined $Token ) {
1264 PPI::Exception->throw;
1265 }
1266
1267 # No, it's just the end of file.
1268 # Add any insignificant trailing tokens.
1269 $self->_add_delayed( $Structure );
1270}
1271
1272
1273
1274
1275
1276#####################################################################
1277# Support Methods
1278
1279# Get the next token for processing, handling buffering
1280sub _get_token {
1281 shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token;
1282}
1283
1284# Old long version of the above
1285# my $self = shift;
1286# # First from the buffer
1287# if ( @{$self->{buffer}} ) {
1288# return shift @{$self->{buffer}};
1289# }
1290#
1291# # Then from the Tokenizer
1292# $self->{Tokenizer}->get_token;
1293# }
1294
1295# Delay the addition of a insignificant elements.
1296# This ended up being inlined.
1297# sub _delay_element {
1298# my $self = shift;
1299# my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1";
1300# push @{ $_[0]->{delayed} }, $_[1];
1301# }
1302
1303# Add an Element to a Node, including any delayed Elements
1304sub _add_element {
1305 my ($self, $Parent, $Element) = @_;
1306 # my $self = shift;
1307 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1308 # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2";
1309
1310 # Handle a special case, where a statement is not fully resolved
1311 if ( ref $Parent eq 'PPI::Statement' ) {
1312 my $first = $Parent->schild(0);
1313 my $second = $Parent->schild(1);
1314 if ( $first and $first->isa('PPI::Token::Label') and ! $second ) {
1315 # It's a labelled statement
1316 if ( $STATEMENT_CLASSES{$second->content} ) {
1317 bless $Parent, $STATEMENT_CLASSES{$second->content};
1318 }
1319 }
1320 }
1321
1322 # Add first the delayed, from the front, then the passed element
1323 foreach my $el ( @{$self->{delayed}} ) {
1324 Scalar::Util::weaken(
1325 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1326 );
1327 # Inlined $Parent->__add_element($el);
1328 }
1329 Scalar::Util::weaken(
1330 $_PARENT{Scalar::Util::refaddr $Element} = $Parent
1331 );
1332 push @{$Parent->{children}}, @{$self->{delayed}}, $Element;
1333
1334 # Clear the delayed elements
1335 $self->{delayed} = [];
1336}
1337
1338# Specifically just add any delayed tokens, if any.
1339sub _add_delayed {
1340 my ($self, $Parent) = @_;
1341 # my $self = shift;
1342 # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1";
1343
1344 # Add any delayed
1345 foreach my $el ( @{$self->{delayed}} ) {
1346 Scalar::Util::weaken(
1347 $_PARENT{Scalar::Util::refaddr $el} = $Parent
1348 );
1349 # Inlined $Parent->__add_element($el);
1350 }
1351 push @{$Parent->{children}}, @{$self->{delayed}};
1352
1353 # Clear the delayed elements
1354 $self->{delayed} = [];
1355}
1356
1357# Rollback the delayed tokens, plus any passed. Once all the tokens
1358# have been moved back on to the buffer, the order should be.
1359# <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <----
1360sub _rollback {
1361 my $self = shift;
1362
1363 # First, put any passed objects back
1364 if ( @_ ) {
1365 unshift @{$self->{buffer}}, splice @_;
1366 }
1367
1368 # Then, put back anything delayed
1369 if ( @{$self->{delayed}} ) {
1370 unshift @{$self->{buffer}}, splice @{$self->{delayed}};
1371 }
1372
1373 1;
1374}
1375
1376# Partial rollback, just return a single list to the buffer
1377sub _buffer {
1378 my $self = shift;
1379
1380 # Put any passed objects back
1381 if ( @_ ) {
1382 unshift @{$self->{buffer}}, splice @_;
1383 }
1384
1385 1;
1386}
1387
1388
1389
1390
1391
1392#####################################################################
1393# Error Handling
1394
1395# Set the error message
1396sub _error {
1397 $errstr = $_[1];
1398 undef;
1399}
1400
1401# Clear the error message.
1402# Returns the object as a convenience.
1403sub _clear {
1404 $errstr = '';
1405 $_[0];
1406}
1407
1408=pod
1409
1410=head2 errstr
1411
1412For any error that occurs, you can use the C<errstr>, as either
1413a static or object method, to access the error message.
1414
1415If no error occurs for any particular action, C<errstr> will return false.
1416
1417=cut
1418
1419sub errstr {
1420 $errstr;
1421}
1422
1423
1424
1425
1426
1427#####################################################################
1428# PDOM Extensions
1429#
1430# This is something of a future expansion... ignore it for now :)
1431#
1432# use PPI::Statement::Sub ();
1433#
1434# sub PPI::Statement::Sub::__LEXER__normal { '' }
1435
14361;
1437
1438=pod
1439
1440=head1 TO DO
1441
1442- Add optional support for some of the more common source filters
1443
1444- Some additional checks for blessing things into various Statement
1445and Structure subclasses.
1446
1447=head1 SUPPORT
1448
1449See the L<support section|PPI/SUPPORT> in the main module.
1450
1451=head1 AUTHOR
1452
1453Adam Kennedy E<lt>adamk@cpan.orgE<gt>
1454
1455=head1 COPYRIGHT
1456
1457Copyright 2001 - 2009 Adam Kennedy.
1458
1459This program is free software; you can redistribute
1460it and/or modify it under the same terms as Perl itself.
1461
1462The full text of the license can be found in the
1463LICENSE file included with this module.
1464
1465=cut