Commit | Line | Data |
3fea05b9 |
1 | package PPI::Lexer; |
2 | |
3 | =pod |
4 | |
5 | =head1 NAME |
6 | |
7 | PPI::Lexer - The PPI Lexer |
8 | |
9 | =head1 SYNOPSIS |
10 | |
11 | use PPI; |
12 | |
13 | # Create a new Lexer |
14 | my $Lexer = PPI::Lexer->new; |
15 | |
16 | # Build a PPI::Document object from a Token stream |
17 | my $Tokenizer = PPI::Tokenizer->load('My/Module.pm'); |
18 | my $Document = $Lexer->lex_tokenizer($Tokenizer); |
19 | |
20 | # Build a PPI::Document object for some raw source |
21 | my $source = "print 'Hello World!'; kill(Humans->all);"; |
22 | $Document = $Lexer->lex_source($source); |
23 | |
24 | # Build a PPI::Document object for a particular file name |
25 | $Document = $Lexer->lex_file('My/Module.pm'); |
26 | |
27 | =head1 DESCRIPTION |
28 | |
29 | The is the L<PPI> Lexer. In the larger scheme of things, its job is to take |
30 | token streams, in a variety of forms, and "lex" them into nested structures. |
31 | |
32 | Pretty much everything in this module happens behind the scenes at this |
33 | point. In fact, at the moment you don't really need to instantiate the lexer |
34 | at all, the three main methods will auto-instantiate themselves a |
35 | C<PPI::Lexer> object as needed. |
36 | |
37 | All methods do a one-shot "lex this and give me a L<PPI::Document> object". |
38 | |
39 | In fact, if you are reading this, what you B<probably> want to do is to |
40 | just "load a document", in which case you can do this in a much more |
41 | direct and concise manner with one of the following. |
42 | |
43 | use PPI; |
44 | |
45 | $Document = PPI::Document->load( $filename ); |
46 | $Document = PPI::Document->new( $string ); |
47 | |
48 | See L<PPI::Document> for more details. |
49 | |
50 | For more unusual tasks, by all means forge onwards. |
51 | |
52 | =head1 METHODS |
53 | |
54 | =cut |
55 | |
56 | use strict; |
57 | use Scalar::Util (); |
58 | use Params::Util qw{_STRING _INSTANCE}; |
59 | use List::MoreUtils (); |
60 | use PPI (); |
61 | use PPI::Exception (); |
62 | |
63 | use vars qw{$VERSION $errstr *_PARENT %ROUND %RESOLVE}; |
64 | BEGIN { |
65 | $VERSION = '1.206'; |
66 | $errstr = ''; |
67 | |
68 | # Faster than having another method call just |
69 | # to set the structure finish token. |
70 | *_PARENT = *PPI::Element::_PARENT; |
71 | |
72 | # Keyword -> Structure class maps |
73 | %ROUND = ( |
74 | # Conditions |
75 | 'if' => 'PPI::Structure::Condition', |
76 | 'elsif' => 'PPI::Structure::Condition', |
77 | 'unless' => 'PPI::Structure::Condition', |
78 | 'while' => 'PPI::Structure::Condition', |
79 | 'until' => 'PPI::Structure::Condition', |
80 | |
81 | # For(each) |
82 | 'for' => 'PPI::Structure::For', |
83 | 'foreach' => 'PPI::Structure::For', |
84 | ); |
85 | |
86 | # Opening brace to refining method |
87 | %RESOLVE = ( |
88 | '(' => '_round', |
89 | '[' => '_square', |
90 | '{' => '_curly', |
91 | ); |
92 | } |
93 | |
94 | |
95 | |
96 | |
97 | |
98 | ##################################################################### |
99 | # Constructor |
100 | |
101 | =pod |
102 | |
103 | =head2 new |
104 | |
105 | The C<new> constructor creates a new C<PPI::Lexer> object. The object itself |
106 | is merely used to hold various buffers and state data during the lexing |
107 | process, and holds no significant data between -E<gt>lex_xxxxx calls. |
108 | |
109 | Returns a new C<PPI::Lexer> object |
110 | |
111 | =cut |
112 | |
113 | sub new { |
114 | my $class = shift->_clear; |
115 | bless { |
116 | Tokenizer => undef, # Where we store the tokenizer for a run |
117 | buffer => [], # The input token buffer |
118 | delayed => [], # The "delayed insignificant tokens" buffer |
119 | }, $class; |
120 | } |
121 | |
122 | |
123 | |
124 | |
125 | |
126 | ##################################################################### |
127 | # Main Lexing Methods |
128 | |
129 | =pod |
130 | |
131 | =head2 lex_file $filename |
132 | |
133 | The C<lex_file> method takes a filename as argument. It then loads the file, |
134 | creates a L<PPI::Tokenizer> for the content and lexes the token stream |
135 | produced by the tokenizer. Basically, a sort of all-in-one method for |
136 | getting a L<PPI::Document> object from a file name. |
137 | |
138 | Returns a L<PPI::Document> object, or C<undef> on error. |
139 | |
140 | =cut |
141 | |
142 | sub lex_file { |
143 | my $self = ref $_[0] ? shift : shift->new; |
144 | my $file = _STRING(shift); |
145 | unless ( defined $file ) { |
146 | return $self->_error("Did not pass a filename to PPI::Lexer::lex_file"); |
147 | } |
148 | |
149 | # Create the Tokenizer |
150 | my $Tokenizer = eval { |
151 | PPI::Tokenizer->new( $file ); |
152 | }; |
153 | if ( _INSTANCE($@, 'PPI::Exception') ) { |
154 | return $self->_error( $@->message ); |
155 | } elsif ( $@ ) { |
156 | return $self->_error( $errstr ); |
157 | } |
158 | |
159 | $self->lex_tokenizer( $Tokenizer ); |
160 | } |
161 | |
162 | =pod |
163 | |
164 | =head2 lex_source $string |
165 | |
166 | The C<lex_source> method takes a normal scalar string as argument. It |
167 | creates a L<PPI::Tokenizer> object for the string, and then lexes the |
168 | resulting token stream. |
169 | |
170 | Returns a L<PPI::Document> object, or C<undef> on error. |
171 | |
172 | =cut |
173 | |
174 | sub lex_source { |
175 | my $self = ref $_[0] ? shift : shift->new; |
176 | my $source = shift; |
177 | unless ( defined $source and not ref $source ) { |
178 | return $self->_error("Did not pass a string to PPI::Lexer::lex_source"); |
179 | } |
180 | |
181 | # Create the Tokenizer and hand off to the next method |
182 | my $Tokenizer = eval { |
183 | PPI::Tokenizer->new( \$source ); |
184 | }; |
185 | if ( _INSTANCE($@, 'PPI::Exception') ) { |
186 | return $self->_error( $@->message ); |
187 | } elsif ( $@ ) { |
188 | return $self->_error( $errstr ); |
189 | } |
190 | |
191 | $self->lex_tokenizer( $Tokenizer ); |
192 | } |
193 | |
194 | =pod |
195 | |
196 | =head2 lex_tokenizer $Tokenizer |
197 | |
198 | The C<lex_tokenizer> takes as argument a L<PPI::Tokenizer> object. It |
199 | lexes the token stream from the tokenizer into a L<PPI::Document> object. |
200 | |
201 | Returns a L<PPI::Document> object, or C<undef> on error. |
202 | |
203 | =cut |
204 | |
205 | sub lex_tokenizer { |
206 | my $self = ref $_[0] ? shift : shift->new; |
207 | my $Tokenizer = _INSTANCE(shift, 'PPI::Tokenizer'); |
208 | return $self->_error( |
209 | "Did not pass a PPI::Tokenizer object to PPI::Lexer::lex_tokenizer" |
210 | ) unless $Tokenizer; |
211 | |
212 | # Create the empty document |
213 | my $Document = PPI::Document->new; |
214 | |
215 | # Lex the token stream into the document |
216 | $self->{Tokenizer} = $Tokenizer; |
217 | eval { |
218 | $self->_lex_document($Document); |
219 | }; |
220 | if ( $@ ) { |
221 | # If an error occurs DESTROY the partially built document. |
222 | undef $Document; |
223 | if ( _INSTANCE($@, 'PPI::Exception') ) { |
224 | return $self->_error( $@->message ); |
225 | } else { |
226 | return $self->_error( $errstr ); |
227 | } |
228 | } |
229 | |
230 | return $Document; |
231 | } |
232 | |
233 | |
234 | |
235 | |
236 | |
237 | ##################################################################### |
238 | # Lex Methods - Document Object |
239 | |
240 | =pod |
241 | |
242 | =begin testing _lex_document 3 |
243 | |
244 | # Validate the creation of a null statement |
245 | SCOPE: { |
246 | my $token = new_ok( 'PPI::Token::Structure' => [ ')' ] ); |
247 | my $brace = new_ok( 'PPI::Statement::UnmatchedBrace' => [ $token ] ); |
248 | is( $brace->content, ')', '->content ok' ); |
249 | } |
250 | |
251 | =end testing |
252 | |
253 | =cut |
254 | |
255 | sub _lex_document { |
256 | my ($self, $Document) = @_; |
257 | # my $self = shift; |
258 | # my $Document = _INSTANCE(shift, 'PPI::Document') or return undef; |
259 | |
260 | # Start the processing loop |
261 | my $Token; |
262 | while ( ref($Token = $self->_get_token) ) { |
263 | # Add insignificant tokens directly beneath us |
264 | unless ( $Token->significant ) { |
265 | $self->_add_element( $Document, $Token ); |
266 | next; |
267 | } |
268 | |
269 | if ( $Token->content eq ';' ) { |
270 | # It's a semi-colon on it's own. |
271 | # We call this a null statement. |
272 | $self->_add_element( |
273 | $Document, |
274 | PPI::Statement::Null->new($Token), |
275 | ); |
276 | next; |
277 | } |
278 | |
279 | # Handle anything other than a structural element |
280 | unless ( ref $Token eq 'PPI::Token::Structure' ) { |
281 | # Determine the class for the Statement, and create it |
282 | my $Statement = $self->_statement($Document, $Token)->new($Token); |
283 | |
284 | # Move the lexing down into the statement |
285 | $self->_add_delayed( $Document ); |
286 | $self->_add_element( $Document, $Statement ); |
287 | $self->_lex_statement( $Statement ); |
288 | |
289 | next; |
290 | } |
291 | |
292 | # Is this the opening of a structure? |
293 | if ( $Token->__LEXER__opens ) { |
294 | # This should actually have a Statement instead |
295 | $self->_rollback( $Token ); |
296 | my $Statement = PPI::Statement->new; |
297 | $self->_add_element( $Document, $Statement ); |
298 | $self->_lex_statement( $Statement ); |
299 | next; |
300 | } |
301 | |
302 | # Is this the close of a structure. |
303 | if ( $Token->__LEXER__closes ) { |
304 | # Because we are at the top of the tree, this is an error. |
305 | # This means either a mis-parsing, or an mistake in the code. |
306 | # To handle this, we create a "Naked Close" statement |
307 | $self->_add_element( $Document, |
308 | PPI::Statement::UnmatchedBrace->new($Token) |
309 | ); |
310 | next; |
311 | } |
312 | |
313 | # Shouldn't be able to get here |
314 | PPI::Exception->throw('Lexer reached an illegal state'); |
315 | } |
316 | |
317 | # Did we leave the main loop because of a Tokenizer error? |
318 | unless ( defined $Token ) { |
319 | my $errstr = $self->{Tokenizer} ? $self->{Tokenizer}->errstr : ''; |
320 | $errstr ||= 'Unknown Tokenizer Error'; |
321 | PPI::Exception->throw($errstr); |
322 | } |
323 | |
324 | # No error, it's just the end of file. |
325 | # Add any insignificant trailing tokens. |
326 | $self->_add_delayed( $Document ); |
327 | |
328 | # If the Tokenizer has any v6 blocks to attach, do so now. |
329 | # Checking once at the end is faster than adding a special |
330 | # case check for every statement parsed. |
331 | my $perl6 = $self->{Tokenizer}->{'perl6'}; |
332 | if ( @$perl6 ) { |
333 | my $includes = $Document->find( 'PPI::Statement::Include::Perl6' ); |
334 | foreach my $include ( @$includes ) { |
335 | unless ( @$perl6 ) { |
336 | PPI::Exception->throw('Failed to find a perl6 section'); |
337 | } |
338 | $include->{perl6} = shift @$perl6; |
339 | } |
340 | } |
341 | |
342 | return 1; |
343 | } |
344 | |
345 | |
346 | |
347 | |
348 | |
349 | ##################################################################### |
350 | # Lex Methods - Statement Object |
351 | |
352 | use vars qw{%STATEMENT_CLASSES}; |
353 | BEGIN { |
354 | # Keyword -> Statement Subclass |
355 | %STATEMENT_CLASSES = ( |
356 | # Things that affect the timing of execution |
357 | 'BEGIN' => 'PPI::Statement::Scheduled', |
358 | 'CHECK' => 'PPI::Statement::Scheduled', |
359 | 'UNITCHECK' => 'PPI::Statement::Scheduled', |
360 | 'INIT' => 'PPI::Statement::Scheduled', |
361 | 'END' => 'PPI::Statement::Scheduled', |
362 | |
363 | # Loading and context statement |
364 | 'package' => 'PPI::Statement::Package', |
365 | # 'use' => 'PPI::Statement::Include', |
366 | 'no' => 'PPI::Statement::Include', |
367 | 'require' => 'PPI::Statement::Include', |
368 | |
369 | # Various declarations |
370 | 'my' => 'PPI::Statement::Variable', |
371 | 'local' => 'PPI::Statement::Variable', |
372 | 'our' => 'PPI::Statement::Variable', |
373 | 'state' => 'PPI::Statement::Variable', |
374 | # Statements starting with 'sub' could be any one of... |
375 | # 'sub' => 'PPI::Statement::Sub', |
376 | # 'sub' => 'PPI::Statement::Scheduled', |
377 | # 'sub' => 'PPI::Statement', |
378 | |
379 | # Compound statement |
380 | 'if' => 'PPI::Statement::Compound', |
381 | 'unless' => 'PPI::Statement::Compound', |
382 | 'for' => 'PPI::Statement::Compound', |
383 | 'foreach' => 'PPI::Statement::Compound', |
384 | 'while' => 'PPI::Statement::Compound', |
385 | 'until' => 'PPI::Statement::Compound', |
386 | |
387 | # Switch statement |
388 | 'given' => 'PPI::Statement::Given', |
389 | 'when' => 'PPI::Statement::When', |
390 | 'default' => 'PPI::Statement::When', |
391 | |
392 | # Various ways of breaking out of scope |
393 | 'redo' => 'PPI::Statement::Break', |
394 | 'next' => 'PPI::Statement::Break', |
395 | 'last' => 'PPI::Statement::Break', |
396 | 'return' => 'PPI::Statement::Break', |
397 | 'goto' => 'PPI::Statement::Break', |
398 | |
399 | # Special sections of the file |
400 | '__DATA__' => 'PPI::Statement::Data', |
401 | '__END__' => 'PPI::Statement::End', |
402 | ); |
403 | } |
404 | |
405 | sub _statement { |
406 | my ($self, $Parent, $Token) = @_; |
407 | # my $self = shift; |
408 | # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1"; |
409 | # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2"; |
410 | |
411 | # Check for things like ( parent => ... ) |
412 | if ( $Parent->isa('PPI::Structure::List') ) { |
413 | if ( $Token->isa('PPI::Token::Word') ) { |
414 | # Is the next significant token a => |
415 | # Read ahead to the next significant token |
416 | my $Next; |
417 | while ( $Next = $self->_get_token ) { |
418 | unless ( $Next->significant ) { |
419 | push @{$self->{delayed}}, $Next; |
420 | # $self->_delay_element( $Next ); |
421 | next; |
422 | } |
423 | |
424 | # Got the next token |
425 | if ( |
426 | $Next->isa('PPI::Token::Operator') |
427 | and |
428 | $Next->content eq '=>' |
429 | ) { |
430 | # Is an ordinary expression |
431 | $self->_rollback( $Next ); |
432 | return 'PPI::Statement::Expression'; |
433 | } else { |
434 | last; |
435 | } |
436 | } |
437 | |
438 | # Rollback and continue |
439 | $self->_rollback( $Next ); |
440 | } |
441 | } |
442 | |
443 | # Is it a token in our known classes list |
444 | my $class = $STATEMENT_CLASSES{$Token->content}; |
445 | |
446 | # Handle potential barewords for subscripts |
447 | if ( $Parent->isa('PPI::Structure::Subscript') ) { |
448 | # Fast obvious case, just an expression |
449 | unless ( $class and $class->isa('PPI::Statement::Expression') ) { |
450 | return 'PPI::Statement::Expression'; |
451 | } |
452 | |
453 | # This is something like "my" or "our" etc... more subtle. |
454 | # Check if the next token is a closing curly brace. |
455 | # This means we are something like $h{my} |
456 | my $Next; |
457 | while ( $Next = $self->_get_token ) { |
458 | unless ( $Next->significant ) { |
459 | push @{$self->{delayed}}, $Next; |
460 | # $self->_delay_element( $Next ); |
461 | next; |
462 | } |
463 | |
464 | # Found the next significant token. |
465 | # Is it a closing curly brace? |
466 | if ( $Next->content eq '}' ) { |
467 | $self->_rollback( $Next ); |
468 | return 'PPI::Statement::Expression'; |
469 | } else { |
470 | $self->_rollback( $Next ); |
471 | return $class; |
472 | } |
473 | } |
474 | |
475 | # End of file... this means it is something like $h{our |
476 | # which is probably going to be $h{our} ... I think |
477 | $self->_rollback( $Next ); |
478 | return 'PPI::Statement::Expression'; |
479 | } |
480 | |
481 | # If it's a token in our list, use that class |
482 | return $class if $class; |
483 | |
484 | # Handle the more in-depth sub detection |
485 | if ( $Token->content eq 'sub' ) { |
486 | # Read ahead to the next significant token |
487 | my $Next; |
488 | while ( $Next = $self->_get_token ) { |
489 | unless ( $Next->significant ) { |
490 | push @{$self->{delayed}}, $Next; |
491 | # $self->_delay_element( $Next ); |
492 | next; |
493 | } |
494 | |
495 | # Got the next significant token |
496 | my $sclass = $STATEMENT_CLASSES{$Next->content}; |
497 | if ( $sclass and $sclass eq 'PPI::Statement::Scheduled' ) { |
498 | $self->_rollback( $Next ); |
499 | return 'PPI::Statement::Scheduled'; |
500 | } |
501 | if ( $Next->isa('PPI::Token::Word') ) { |
502 | $self->_rollback( $Next ); |
503 | return 'PPI::Statement::Sub'; |
504 | } |
505 | |
506 | ### Comment out these two, as they would return PPI::Statement anyway |
507 | # if ( $content eq '{' ) { |
508 | # Anonymous sub at start of statement |
509 | # return 'PPI::Statement'; |
510 | # } |
511 | # |
512 | # if ( $Next->isa('PPI::Token::Prototype') ) { |
513 | # Anonymous sub at start of statement |
514 | # return 'PPI::Statement'; |
515 | # } |
516 | |
517 | # PPI::Statement is the safest fall-through |
518 | $self->_rollback( $Next ); |
519 | return 'PPI::Statement'; |
520 | } |
521 | |
522 | # End of file... PPI::Statement::Sub is the most likely |
523 | $self->_rollback( $Next ); |
524 | return 'PPI::Statement::Sub'; |
525 | } |
526 | |
527 | if ( $Token->content eq 'use' ) { |
528 | # Add a special case for "use v6" lines. |
529 | my $Next; |
530 | while ( $Next = $self->_get_token ) { |
531 | unless ( $Next->significant ) { |
532 | push @{$self->{delayed}}, $Next; |
533 | # $self->_delay_element( $Next ); |
534 | next; |
535 | } |
536 | |
537 | # Found the next significant token. |
538 | # Is it a v6 use? |
539 | if ( $Next->content eq 'v6' ) { |
540 | $self->_rollback( $Next ); |
541 | return 'PPI::Statement::Include::Perl6'; |
542 | } else { |
543 | $self->_rollback( $Next ); |
544 | return 'PPI::Statement::Include'; |
545 | } |
546 | } |
547 | |
548 | # End of file... this means it is an incomplete use |
549 | # line, just treat it as a normal include. |
550 | $self->_rollback( $Next ); |
551 | return 'PPI::Statement::Include'; |
552 | } |
553 | |
554 | # If our parent is a Condition, we are an Expression |
555 | if ( $Parent->isa('PPI::Structure::Condition') ) { |
556 | return 'PPI::Statement::Expression'; |
557 | } |
558 | |
559 | # If our parent is a List, we are also an expression |
560 | if ( $Parent->isa('PPI::Structure::List') ) { |
561 | return 'PPI::Statement::Expression'; |
562 | } |
563 | |
564 | # Switch statements use expressions, as well. |
565 | if ( |
566 | $Parent->isa('PPI::Structure::Given') |
567 | or |
568 | $Parent->isa('PPI::Structure::When') |
569 | ) { |
570 | return 'PPI::Statement::Expression'; |
571 | } |
572 | |
573 | if ( _INSTANCE($Token, 'PPI::Token::Label') ) { |
574 | return 'PPI::Statement::Compound'; |
575 | } |
576 | |
577 | # Beyond that, I have no idea for the moment. |
578 | # Just keep adding more conditions above this. |
579 | return 'PPI::Statement'; |
580 | } |
581 | |
582 | sub _lex_statement { |
583 | my ($self, $Statement) = @_; |
584 | # my $self = shift; |
585 | # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1"; |
586 | |
587 | # Handle some special statements |
588 | if ( $Statement->isa('PPI::Statement::End') ) { |
589 | return $self->_lex_end( $Statement ); |
590 | } |
591 | |
592 | # Begin processing tokens |
593 | my $Token; |
594 | while ( ref( $Token = $self->_get_token ) ) { |
595 | # Delay whitespace and comment tokens |
596 | unless ( $Token->significant ) { |
597 | push @{$self->{delayed}}, $Token; |
598 | # $self->_delay_element( $Token ); |
599 | next; |
600 | } |
601 | |
602 | # Structual closes, and __DATA__ and __END__ tags implicitly |
603 | # end every type of statement |
604 | if ( |
605 | $Token->__LEXER__closes |
606 | or |
607 | $Token->isa('PPI::Token::Separator') |
608 | ) { |
609 | # Rollback and end the statement |
610 | return $self->_rollback( $Token ); |
611 | } |
612 | |
613 | # Normal statements never implicitly end |
614 | unless ( $Statement->__LEXER__normal ) { |
615 | # Have we hit an implicit end to the statement |
616 | unless ( $self->_continues( $Statement, $Token ) ) { |
617 | # Rollback and finish the statement |
618 | return $self->_rollback( $Token ); |
619 | } |
620 | } |
621 | |
622 | # Any normal character just gets added |
623 | unless ( $Token->isa('PPI::Token::Structure') ) { |
624 | $self->_add_element( $Statement, $Token ); |
625 | next; |
626 | } |
627 | |
628 | # Handle normal statement terminators |
629 | if ( $Token->content eq ';' ) { |
630 | $self->_add_element( $Statement, $Token ); |
631 | return 1; |
632 | } |
633 | |
634 | # Which leaves us with a new structure |
635 | |
636 | # Determine the class for the structure and create it |
637 | my $method = $RESOLVE{$Token->content}; |
638 | my $Structure = $self->$method($Statement)->new($Token); |
639 | |
640 | # Move the lexing down into the Structure |
641 | $self->_add_delayed( $Statement ); |
642 | $self->_add_element( $Statement, $Structure ); |
643 | $self->_lex_structure( $Structure ); |
644 | } |
645 | |
646 | # Was it an error in the tokenizer? |
647 | unless ( defined $Token ) { |
648 | PPI::Exception->throw; |
649 | } |
650 | |
651 | # No, it's just the end of the file... |
652 | # Roll back any insignificant tokens, they'll get added at the Document level |
653 | $self->_rollback; |
654 | } |
655 | |
656 | sub _lex_end { |
657 | my ($self, $Statement) = @_; |
658 | # my $self = shift; |
659 | # my $Statement = _INSTANCE(shift, 'PPI::Statement::End') or die "Bad param 1"; |
660 | |
661 | # End of the file, EVERYTHING is ours |
662 | my $Token; |
663 | while ( $Token = $self->_get_token ) { |
664 | # Inlined $Statement->__add_element($Token); |
665 | Scalar::Util::weaken( |
666 | $_PARENT{Scalar::Util::refaddr $Token} = $Statement |
667 | ); |
668 | push @{$Statement->{children}}, $Token; |
669 | } |
670 | |
671 | # Was it an error in the tokenizer? |
672 | unless ( defined $Token ) { |
673 | PPI::Exception->throw; |
674 | } |
675 | |
676 | # No, it's just the end of the file... |
677 | # Roll back any insignificant tokens, they get added at the Document level |
678 | $self->_rollback; |
679 | } |
680 | |
681 | # For many statements, it can be dificult to determine the end-point. |
682 | # This method takes a statement and the next significant token, and attempts |
683 | # to determine if the there is a statement boundary between the two, or if |
684 | # the statement can continue with the token. |
685 | sub _continues { |
686 | my ($self, $Statement, $Token) = @_; |
687 | # my $self = shift; |
688 | # my $Statement = _INSTANCE(shift, 'PPI::Statement') or die "Bad param 1"; |
689 | # my $Token = _INSTANCE(shift, 'PPI::Token') or die "Bad param 2"; |
690 | |
691 | # Handle the simple block case |
692 | # { print 1; } |
693 | if ( |
694 | $Statement->schildren == 1 |
695 | and |
696 | $Statement->schild(0)->isa('PPI::Structure::Block') |
697 | ) { |
698 | return ''; |
699 | } |
700 | |
701 | # Alrighty then, there are only five implied end statement types, |
702 | # ::Scheduled blocks, ::Sub declarations, ::Compound, ::Given, and ::When |
703 | # statements. |
704 | unless ( ref($Statement) =~ /\b(?:Scheduled|Sub|Compound|Given|When)$/ ) { |
705 | return 1; |
706 | } |
707 | |
708 | # Of these five, ::Scheduled, ::Sub, ::Given, and ::When follow the same |
709 | # simple rule and can be handled first. |
710 | my @part = $Statement->schildren; |
711 | my $LastChild = $part[-1]; |
712 | unless ( $Statement->isa('PPI::Statement::Compound') ) { |
713 | # If the last significant element of the statement is a block, |
714 | # then a scheduled statement is done, no questions asked. |
715 | return ! $LastChild->isa('PPI::Structure::Block'); |
716 | } |
717 | |
718 | # Now we get to compound statements, which kind of suck (to lex). |
719 | # However, of them all, the 'if' type, which includes unless, are |
720 | # relatively easy to handle compared to the others. |
721 | my $type = $Statement->type; |
722 | if ( $type eq 'if' ) { |
723 | # This should be one of the following |
724 | # if (EXPR) BLOCK |
725 | # if (EXPR) BLOCK else BLOCK |
726 | # if (EXPR) BLOCK elsif (EXPR) BLOCK ... else BLOCK |
727 | |
728 | # We only implicitly end on a block |
729 | unless ( $LastChild->isa('PPI::Structure::Block') ) { |
730 | # if (EXPR) ... |
731 | # if (EXPR) BLOCK else ... |
732 | # if (EXPR) BLOCK elsif (EXPR) BLOCK ... |
733 | return 1; |
734 | } |
735 | |
736 | # If the token before the block is an 'else', |
737 | # it's over, no matter what. |
738 | my $NextLast = $Statement->schild(-2); |
739 | if ( |
740 | $NextLast |
741 | and |
742 | $NextLast->isa('PPI::Token') |
743 | and |
744 | $NextLast->isa('PPI::Token::Word') |
745 | and |
746 | $NextLast->content eq 'else' |
747 | ) { |
748 | return ''; |
749 | } |
750 | |
751 | # Otherwise, we continue for 'elsif' or 'else' only. |
752 | if ( |
753 | $Token->isa('PPI::Token::Word') |
754 | and ( |
755 | $Token->content eq 'else' |
756 | or |
757 | $Token->content eq 'elsif' |
758 | ) |
759 | ) { |
760 | return 1; |
761 | } |
762 | |
763 | return ''; |
764 | } |
765 | |
766 | if ( $type eq 'label' ) { |
767 | # We only have the label so far, could be any of |
768 | # LABEL while (EXPR) BLOCK |
769 | # LABEL while (EXPR) BLOCK continue BLOCK |
770 | # LABEL for (EXPR; EXPR; EXPR) BLOCK |
771 | # LABEL foreach VAR (LIST) BLOCK |
772 | # LABEL foreach VAR (LIST) BLOCK continue BLOCK |
773 | # LABEL BLOCK continue BLOCK |
774 | |
775 | # Handle cases with a word after the label |
776 | if ( |
777 | $Token->isa('PPI::Token::Word') |
778 | and |
779 | $Token->content =~ /^(?:while|until|for|foreach)$/ |
780 | ) { |
781 | return 1; |
782 | } |
783 | |
784 | # Handle labelled blocks |
785 | if ( $Token->isa('PPI::Token::Structure') && $Token->content eq '{' ) { |
786 | return 1; |
787 | } |
788 | |
789 | return ''; |
790 | } |
791 | |
792 | # Handle the common "after round braces" case |
793 | if ( $LastChild->isa('PPI::Structure') and $LastChild->braces eq '()' ) { |
794 | # LABEL while (EXPR) ... |
795 | # LABEL while (EXPR) ... |
796 | # LABEL for (EXPR; EXPR; EXPR) ... |
797 | # LABEL for VAR (LIST) ... |
798 | # LABEL foreach VAR (LIST) ... |
799 | # Only a block will do |
800 | return $Token->isa('PPI::Token::Structure') && $Token->content eq '{'; |
801 | } |
802 | |
803 | if ( $type eq 'for' ) { |
804 | # LABEL for (EXPR; EXPR; EXPR) BLOCK |
805 | if ( |
806 | $LastChild->isa('PPI::Token::Word') |
807 | and |
808 | $LastChild->content =~ /^for(?:each)?\z/ |
809 | ) { |
810 | # LABEL for ... |
811 | if ( |
812 | ( |
813 | $Token->isa('PPI::Token::Structure') |
814 | and |
815 | $Token->content eq '(' |
816 | ) |
817 | or |
818 | $Token->isa('PPI::Token::QuoteLike::Words') |
819 | ) { |
820 | return 1; |
821 | } |
822 | |
823 | if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) { |
824 | # LABEL for VAR QW{} ... |
825 | # LABEL foreach VAR QW{} ... |
826 | # Only a block will do |
827 | return $Token->isa('PPI::Token::Structure') && $Token->content eq '{'; |
828 | } |
829 | |
830 | # In this case, we can also behave like a foreach |
831 | $type = 'foreach'; |
832 | |
833 | } elsif ( $LastChild->isa('PPI::Structure::Block') ) { |
834 | # LABEL for (EXPR; EXPR; EXPR) BLOCK |
835 | # That's it, nothing can continue |
836 | return ''; |
837 | |
838 | } elsif ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) { |
839 | # LABEL for VAR QW{} ... |
840 | # LABEL foreach VAR QW{} ... |
841 | # Only a block will do |
842 | return $Token->isa('PPI::Token::Structure') && $Token->content eq '{'; |
843 | } |
844 | } |
845 | |
846 | # Handle the common continue case |
847 | if ( $LastChild->isa('PPI::Token::Word') and $LastChild->content eq 'continue' ) { |
848 | # LABEL while (EXPR) BLOCK continue ... |
849 | # LABEL foreach VAR (LIST) BLOCK continue ... |
850 | # LABEL BLOCK continue ... |
851 | # Only a block will do |
852 | return $Token->isa('PPI::Token::Structure') && $Token->content eq '{'; |
853 | } |
854 | |
855 | # Handle the common continuable block case |
856 | if ( $LastChild->isa('PPI::Structure::Block') ) { |
857 | # LABEL while (EXPR) BLOCK |
858 | # LABEL while (EXPR) BLOCK ... |
859 | # LABEL for (EXPR; EXPR; EXPR) BLOCK |
860 | # LABEL foreach VAR (LIST) BLOCK |
861 | # LABEL foreach VAR (LIST) BLOCK ... |
862 | # LABEL BLOCK ... |
863 | # Is this the block for a continue? |
864 | if ( _INSTANCE($part[-2], 'PPI::Token::Word') and $part[-2]->content eq 'continue' ) { |
865 | # LABEL while (EXPR) BLOCK continue BLOCK |
866 | # LABEL foreach VAR (LIST) BLOCK continue BLOCK |
867 | # LABEL BLOCK continue BLOCK |
868 | # That's it, nothing can continue this |
869 | return ''; |
870 | } |
871 | |
872 | # Only a continue will do |
873 | return $Token->isa('PPI::Token::Word') && $Token->content eq 'continue'; |
874 | } |
875 | |
876 | if ( $type eq 'block' ) { |
877 | # LABEL BLOCK continue BLOCK |
878 | # Every possible case is covered in the common cases above |
879 | } |
880 | |
881 | if ( $type eq 'while' ) { |
882 | # LABEL while (EXPR) BLOCK |
883 | # LABEL while (EXPR) BLOCK continue BLOCK |
884 | # LABEL until (EXPR) BLOCK |
885 | # LABEL until (EXPR) BLOCK continue BLOCK |
886 | # The only case not covered is the while ... |
887 | if ( |
888 | $LastChild->isa('PPI::Token::Word') |
889 | and ( |
890 | $LastChild->content eq 'while' |
891 | or |
892 | $LastChild->content eq 'until' |
893 | ) |
894 | ) { |
895 | # LABEL while ... |
896 | # LABEL until ... |
897 | # Only a condition structure will do |
898 | return $Token->isa('PPI::Token::Structure') && $Token->content eq '('; |
899 | } |
900 | } |
901 | |
902 | if ( $type eq 'foreach' ) { |
903 | # LABEL foreach VAR (LIST) BLOCK |
904 | # LABEL foreach VAR (LIST) BLOCK continue BLOCK |
905 | # The only two cases that have not been covered already are |
906 | # 'foreach ...' and 'foreach VAR ...' |
907 | |
908 | if ( $LastChild->isa('PPI::Token::Symbol') ) { |
909 | # LABEL foreach my $scalar ... |
910 | # Open round brace, or a quotewords |
911 | return 1 if $Token->isa('PPI::Token::Structure') && $Token->content eq '('; |
912 | return 1 if $Token->isa('PPI::Token::QuoteLike::Words'); |
913 | return ''; |
914 | } |
915 | |
916 | if ( $LastChild->content eq 'foreach' or $LastChild->content eq 'for' ) { |
917 | # There are three possibilities here |
918 | if ( |
919 | $Token->isa('PPI::Token::Word') |
920 | and ( |
921 | ($STATEMENT_CLASSES{ $Token->content } || '') |
922 | eq |
923 | 'PPI::Statement::Variable' |
924 | ) |
925 | ) { |
926 | # VAR == 'my ...' |
927 | return 1; |
928 | } elsif ( $Token->content =~ /^\$/ ) { |
929 | # VAR == '$scalar' |
930 | return 1; |
931 | } elsif ( $Token->isa('PPI::Token::Structure') and $Token->content eq '(' ) { |
932 | return 1; |
933 | } elsif ( $Token->isa('PPI::Token::QuoteLike::Words') ) { |
934 | return 1; |
935 | } else { |
936 | return ''; |
937 | } |
938 | } |
939 | |
940 | if ( |
941 | ($STATEMENT_CLASSES{ $LastChild->content } || '') |
942 | eq |
943 | 'PPI::Statement::Variable' |
944 | ) { |
945 | # LABEL foreach my ... |
946 | # Only a scalar will do |
947 | return $Token->content =~ /^\$/; |
948 | } |
949 | |
950 | # Handle the rare for my $foo qw{bar} ... case |
951 | if ( $LastChild->isa('PPI::Token::QuoteLike::Words') ) { |
952 | # LABEL for VAR QW ... |
953 | # LABEL foreach VAR QW ... |
954 | # Only a block will do |
955 | return $Token->isa('PPI::Token::Structure') && $Token->content eq '{'; |
956 | } |
957 | } |
958 | |
959 | # Something we don't know about... what could it be |
960 | PPI::Exception->throw("Illegal state in '$type' compound statement"); |
961 | } |
962 | |
963 | |
964 | |
965 | |
966 | |
967 | ##################################################################### |
968 | # Lex Methods - Structure Object |
969 | |
970 | # Given a parent element, and a ( token to open a structure, determine |
971 | # the class that the structure should be. |
972 | sub _round { |
973 | my ($self, $Parent) = @_; |
974 | # my $self = shift; |
975 | # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1"; |
976 | |
977 | # Get the last significant element in the parent |
978 | my $Element = $Parent->schild(-1); |
979 | if ( _INSTANCE($Element, 'PPI::Token::Word') ) { |
980 | # Can it be determined because it is a keyword? |
981 | my $rclass = $ROUND{$Element->content}; |
982 | return $rclass if $rclass; |
983 | } |
984 | |
985 | # If we are part of a for or foreach statement, we are a ForLoop |
986 | if ( $Parent->isa('PPI::Statement::Compound') ) { |
987 | if ( $Parent->type =~ /^for(?:each)?$/ ) { |
988 | return 'PPI::Structure::For'; |
989 | } |
990 | } elsif ( $Parent->isa('PPI::Statement::Given') ) { |
991 | return 'PPI::Structure::Given'; |
992 | } elsif ( $Parent->isa('PPI::Statement::When') ) { |
993 | return 'PPI::Structure::When'; |
994 | } |
995 | |
996 | # Otherwise, it must be a list |
997 | |
998 | # If the previous element is -> then we mark it as a dereference |
999 | if ( _INSTANCE($Element, 'PPI::Token::Operator') and $Element->content eq '->' ) { |
1000 | $Element->{_dereference} = 1; |
1001 | } |
1002 | |
1003 | 'PPI::Structure::List' |
1004 | } |
1005 | |
1006 | # Given a parent element, and a [ token to open a structure, determine |
1007 | # the class that the structure should be. |
1008 | sub _square { |
1009 | my ($self, $Parent) = @_; |
1010 | # my $self = shift; |
1011 | # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1"; |
1012 | |
1013 | # Get the last significant element in the parent |
1014 | my $Element = $Parent->schild(-1); |
1015 | |
1016 | # Is this a subscript, like $foo[1] or $foo{expr} |
1017 | |
1018 | if ( $Element ) { |
1019 | if ( $Element->isa('PPI::Token::Operator') and $Element->content eq '->' ) { |
1020 | # $foo->[] |
1021 | $Element->{_dereference} = 1; |
1022 | return 'PPI::Structure::Subscript'; |
1023 | } |
1024 | if ( $Element->isa('PPI::Structure::Subscript') ) { |
1025 | # $foo{}[] |
1026 | return 'PPI::Structure::Subscript'; |
1027 | } |
1028 | if ( $Element->isa('PPI::Token::Symbol') and $Element->content =~ /^(?:\$|\@)/ ) { |
1029 | # $foo[], @foo[] |
1030 | return 'PPI::Structure::Subscript'; |
1031 | } |
1032 | # FIXME - More cases to catch |
1033 | } |
1034 | |
1035 | # Otherwise, we assume that it's an anonymous arrayref constructor |
1036 | 'PPI::Structure::Constructor'; |
1037 | } |
1038 | |
1039 | use vars qw{%CURLY_CLASSES}; |
1040 | BEGIN { |
1041 | # Keyword -> Structure class maps |
1042 | %CURLY_CLASSES = ( |
1043 | # Blocks |
1044 | 'sub' => 'PPI::Structure::Block', |
1045 | 'grep' => 'PPI::Structure::Block', |
1046 | 'map' => 'PPI::Structure::Block', |
1047 | 'sort' => 'PPI::Structure::Block', |
1048 | 'do' => 'PPI::Structure::Block', |
1049 | |
1050 | # Hash constructors |
1051 | '=' => 'PPI::Structure::Constructor', |
1052 | '||=' => 'PPI::Structure::Constructor', |
1053 | ',' => 'PPI::Structure::Constructor', |
1054 | '=>' => 'PPI::Structure::Constructor', |
1055 | ); |
1056 | } |
1057 | |
1058 | # Given a parent element, and a { token to open a structure, determine |
1059 | # the class that the structure should be. |
1060 | sub _curly { |
1061 | my ($self, $Parent) = @_; |
1062 | # my $self = shift; |
1063 | # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1"; |
1064 | |
1065 | # Get the last significant element in the parent |
1066 | my $Element = $Parent->schild(-1); |
1067 | my $content = $Element ? $Element->content : ''; |
1068 | |
1069 | # Is this a subscript, like $foo[1] or $foo{expr} |
1070 | if ( $Element ) { |
1071 | if ( $content eq '->' and $Element->isa('PPI::Token::Operator') ) { |
1072 | # $foo->{} |
1073 | $Element->{_dereference} = 1; |
1074 | return 'PPI::Structure::Subscript'; |
1075 | } |
1076 | if ( $Element->isa('PPI::Structure::Subscript') ) { |
1077 | # $foo[]{} |
1078 | return 'PPI::Structure::Subscript'; |
1079 | } |
1080 | if ( $content =~ /^(?:\$|\@)/ and $Element->isa('PPI::Token::Symbol') ) { |
1081 | # $foo{}, @foo{} |
1082 | return 'PPI::Structure::Subscript'; |
1083 | } |
1084 | if ( $CURLY_CLASSES{$content} ) { |
1085 | # Known type |
1086 | return $CURLY_CLASSES{$content}; |
1087 | } |
1088 | } |
1089 | |
1090 | # Are we in a compound statement |
1091 | if ( $Parent->isa('PPI::Statement::Compound') ) { |
1092 | # We will only encounter blocks in compound statements |
1093 | return 'PPI::Structure::Block'; |
1094 | } |
1095 | |
1096 | # Are we the second argument of use |
1097 | if ( $Parent->isa('PPI::Statement::Include') ) { |
1098 | if ( $Parent->schildren == 2 ) { |
1099 | # This is something like use constant { ... }; |
1100 | return 'PPI::Structure::Constructor'; |
1101 | } |
1102 | } |
1103 | |
1104 | # Unless we are at the start of the statement, everything else should be a block |
1105 | ### FIXME This is possibly a bad choice, but will have to do for now. |
1106 | return 'PPI::Structure::Block' if $Element; |
1107 | |
1108 | # Special case: Are we the param of a core function |
1109 | # i.e. map({ $_ => 1 } @foo) |
1110 | if ( |
1111 | $Parent->isa('PPI::Statement') |
1112 | and |
1113 | _INSTANCE($Parent->parent, 'PPI::Structure::List') |
1114 | ) { |
1115 | my $function = $Parent->parent->parent->schild(-2); |
1116 | if ( $function and $function->content =~ /^(?:map|grep|sort)$/ ) { |
1117 | return 'PPI::Structure::Block'; |
1118 | } |
1119 | } |
1120 | |
1121 | # We need to scan ahead. |
1122 | my $Next; |
1123 | my $position = 0; |
1124 | my @delayed = (); |
1125 | while ( $Next = $self->_get_token ) { |
1126 | unless ( $Next->significant ) { |
1127 | push @delayed, $Next; |
1128 | next; |
1129 | } |
1130 | |
1131 | # If a closing curly, this is an anonymous hash-ref |
1132 | if ( ++$position == 1 and $Next->content eq '}' ) { |
1133 | $self->_buffer( splice(@delayed), $Next ); |
1134 | return 'PPI::Structure::Constructor'; |
1135 | } |
1136 | |
1137 | # If it contains a => as the second thing, |
1138 | # this is also an anonymous hash-ref. |
1139 | if ( $position == 2 and $Next->content eq '=>' ) { |
1140 | $self->_buffer( splice(@delayed), $Next ); |
1141 | return 'PPI::Structure::Constructor'; |
1142 | } |
1143 | |
1144 | # We only check the first two, then default to block |
1145 | if ( $position >= 3 ) { |
1146 | $self->_buffer( splice(@delayed), $Next ); |
1147 | last; |
1148 | } |
1149 | |
1150 | # Delay and continue |
1151 | push @delayed, $Next; |
1152 | } |
1153 | |
1154 | # Hit the end of the document, or bailed out, go with block |
1155 | $self->_buffer( splice(@delayed) ); |
1156 | if ( ref $Parent eq 'PPI::Statement' ) { |
1157 | bless $Parent, 'PPI::Statement::Compound'; |
1158 | } |
1159 | return 'PPI::Structure::Block'; |
1160 | } |
1161 | |
1162 | =pod |
1163 | |
1164 | =begin testing _lex_structure 4 |
1165 | |
1166 | # Validate the creation of a null statement |
1167 | SCOPE: { |
1168 | my $token = new_ok( 'PPI::Token::Structure' => [ ';' ] ); |
1169 | my $null = new_ok( 'PPI::Statement::Null' => [ $token ] ); |
1170 | is( $null->content, ';', '->content ok' ); |
1171 | } |
1172 | |
1173 | # Validate the creation of an empty statement |
1174 | new_ok( 'PPI::Statement' => [ ] ); |
1175 | |
1176 | =end testing |
1177 | |
1178 | =cut |
1179 | |
1180 | sub _lex_structure { |
1181 | my ($self, $Structure) = @_; |
1182 | # my $self = shift; |
1183 | # my $Structure = _INSTANCE(shift, 'PPI::Structure') or die "Bad param 1"; |
1184 | |
1185 | # Start the processing loop |
1186 | my $Token; |
1187 | while ( ref($Token = $self->_get_token) ) { |
1188 | # Is this a direct type token |
1189 | unless ( $Token->significant ) { |
1190 | push @{$self->{delayed}}, $Token; |
1191 | # $self->_delay_element( $Token ); |
1192 | next; |
1193 | } |
1194 | |
1195 | # Anything other than a Structure starts a Statement |
1196 | unless ( $Token->isa('PPI::Token::Structure') ) { |
1197 | # Because _statement may well delay and rollback itself, |
1198 | # we need to add the delayed tokens early |
1199 | $self->_add_delayed( $Structure ); |
1200 | |
1201 | # Determine the class for the Statement and create it |
1202 | my $Statement = $self->_statement($Structure, $Token)->new($Token); |
1203 | |
1204 | # Move the lexing down into the Statement |
1205 | $self->_add_element( $Structure, $Statement ); |
1206 | $self->_lex_statement( $Statement ); |
1207 | |
1208 | next; |
1209 | } |
1210 | |
1211 | # Is this the opening of another structure directly inside us? |
1212 | if ( $Token->__LEXER__opens ) { |
1213 | # Rollback the Token, and recurse into the statement |
1214 | $self->_rollback( $Token ); |
1215 | my $Statement = PPI::Statement->new; |
1216 | $self->_add_element( $Structure, $Statement ); |
1217 | $self->_lex_statement( $Statement ); |
1218 | next; |
1219 | } |
1220 | |
1221 | # Is this the close of a structure ( which would be an error ) |
1222 | if ( $Token->__LEXER__closes ) { |
1223 | # Is this OUR closing structure |
1224 | if ( $Token->content eq $Structure->start->__LEXER__opposite ) { |
1225 | # Add any delayed tokens, and the finishing token (the ugly way) |
1226 | $self->_add_delayed( $Structure ); |
1227 | $Structure->{finish} = $Token; |
1228 | Scalar::Util::weaken( |
1229 | $_PARENT{Scalar::Util::refaddr $Token} = $Structure |
1230 | ); |
1231 | |
1232 | # Confirm that ForLoop structures are actually so, and |
1233 | # aren't really a list. |
1234 | if ( $Structure->isa('PPI::Structure::For') ) { |
1235 | if ( 2 > scalar grep { |
1236 | $_->isa('PPI::Statement') |
1237 | } $Structure->children ) { |
1238 | bless($Structure, 'PPI::Structure::List'); |
1239 | } |
1240 | } |
1241 | return 1; |
1242 | } |
1243 | |
1244 | # Unmatched closing brace. |
1245 | # Either they typed the wrong thing, or haven't put |
1246 | # one at all. Either way it's an error we need to |
1247 | # somehow handle gracefully. For now, we'll treat it |
1248 | # as implicitly ending the structure. This causes the |
1249 | # least damage across the various reasons why this |
1250 | # might have happened. |
1251 | return $self->_rollback( $Token ); |
1252 | } |
1253 | |
1254 | # It's a semi-colon on it's own, just inside the block. |
1255 | # This is a null statement. |
1256 | $self->_add_element( |
1257 | $Structure, |
1258 | PPI::Statement::Null->new($Token), |
1259 | ); |
1260 | } |
1261 | |
1262 | # Is this an error |
1263 | unless ( defined $Token ) { |
1264 | PPI::Exception->throw; |
1265 | } |
1266 | |
1267 | # No, it's just the end of file. |
1268 | # Add any insignificant trailing tokens. |
1269 | $self->_add_delayed( $Structure ); |
1270 | } |
1271 | |
1272 | |
1273 | |
1274 | |
1275 | |
1276 | ##################################################################### |
1277 | # Support Methods |
1278 | |
1279 | # Get the next token for processing, handling buffering |
1280 | sub _get_token { |
1281 | shift(@{$_[0]->{buffer}}) or $_[0]->{Tokenizer}->get_token; |
1282 | } |
1283 | |
1284 | # Old long version of the above |
1285 | # my $self = shift; |
1286 | # # First from the buffer |
1287 | # if ( @{$self->{buffer}} ) { |
1288 | # return shift @{$self->{buffer}}; |
1289 | # } |
1290 | # |
1291 | # # Then from the Tokenizer |
1292 | # $self->{Tokenizer}->get_token; |
1293 | # } |
1294 | |
1295 | # Delay the addition of a insignificant elements. |
1296 | # This ended up being inlined. |
1297 | # sub _delay_element { |
1298 | # my $self = shift; |
1299 | # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 1"; |
1300 | # push @{ $_[0]->{delayed} }, $_[1]; |
1301 | # } |
1302 | |
1303 | # Add an Element to a Node, including any delayed Elements |
1304 | sub _add_element { |
1305 | my ($self, $Parent, $Element) = @_; |
1306 | # my $self = shift; |
1307 | # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1"; |
1308 | # my $Element = _INSTANCE(shift, 'PPI::Element') or die "Bad param 2"; |
1309 | |
1310 | # Handle a special case, where a statement is not fully resolved |
1311 | if ( ref $Parent eq 'PPI::Statement' ) { |
1312 | my $first = $Parent->schild(0); |
1313 | my $second = $Parent->schild(1); |
1314 | if ( $first and $first->isa('PPI::Token::Label') and ! $second ) { |
1315 | # It's a labelled statement |
1316 | if ( $STATEMENT_CLASSES{$second->content} ) { |
1317 | bless $Parent, $STATEMENT_CLASSES{$second->content}; |
1318 | } |
1319 | } |
1320 | } |
1321 | |
1322 | # Add first the delayed, from the front, then the passed element |
1323 | foreach my $el ( @{$self->{delayed}} ) { |
1324 | Scalar::Util::weaken( |
1325 | $_PARENT{Scalar::Util::refaddr $el} = $Parent |
1326 | ); |
1327 | # Inlined $Parent->__add_element($el); |
1328 | } |
1329 | Scalar::Util::weaken( |
1330 | $_PARENT{Scalar::Util::refaddr $Element} = $Parent |
1331 | ); |
1332 | push @{$Parent->{children}}, @{$self->{delayed}}, $Element; |
1333 | |
1334 | # Clear the delayed elements |
1335 | $self->{delayed} = []; |
1336 | } |
1337 | |
1338 | # Specifically just add any delayed tokens, if any. |
1339 | sub _add_delayed { |
1340 | my ($self, $Parent) = @_; |
1341 | # my $self = shift; |
1342 | # my $Parent = _INSTANCE(shift, 'PPI::Node') or die "Bad param 1"; |
1343 | |
1344 | # Add any delayed |
1345 | foreach my $el ( @{$self->{delayed}} ) { |
1346 | Scalar::Util::weaken( |
1347 | $_PARENT{Scalar::Util::refaddr $el} = $Parent |
1348 | ); |
1349 | # Inlined $Parent->__add_element($el); |
1350 | } |
1351 | push @{$Parent->{children}}, @{$self->{delayed}}; |
1352 | |
1353 | # Clear the delayed elements |
1354 | $self->{delayed} = []; |
1355 | } |
1356 | |
1357 | # Rollback the delayed tokens, plus any passed. Once all the tokens |
1358 | # have been moved back on to the buffer, the order should be. |
1359 | # <--- @{$self->{delayed}}, @_, @{$self->{buffer}} <---- |
1360 | sub _rollback { |
1361 | my $self = shift; |
1362 | |
1363 | # First, put any passed objects back |
1364 | if ( @_ ) { |
1365 | unshift @{$self->{buffer}}, splice @_; |
1366 | } |
1367 | |
1368 | # Then, put back anything delayed |
1369 | if ( @{$self->{delayed}} ) { |
1370 | unshift @{$self->{buffer}}, splice @{$self->{delayed}}; |
1371 | } |
1372 | |
1373 | 1; |
1374 | } |
1375 | |
1376 | # Partial rollback, just return a single list to the buffer |
1377 | sub _buffer { |
1378 | my $self = shift; |
1379 | |
1380 | # Put any passed objects back |
1381 | if ( @_ ) { |
1382 | unshift @{$self->{buffer}}, splice @_; |
1383 | } |
1384 | |
1385 | 1; |
1386 | } |
1387 | |
1388 | |
1389 | |
1390 | |
1391 | |
1392 | ##################################################################### |
1393 | # Error Handling |
1394 | |
1395 | # Set the error message |
1396 | sub _error { |
1397 | $errstr = $_[1]; |
1398 | undef; |
1399 | } |
1400 | |
1401 | # Clear the error message. |
1402 | # Returns the object as a convenience. |
1403 | sub _clear { |
1404 | $errstr = ''; |
1405 | $_[0]; |
1406 | } |
1407 | |
1408 | =pod |
1409 | |
1410 | =head2 errstr |
1411 | |
1412 | For any error that occurs, you can use the C<errstr>, as either |
1413 | a static or object method, to access the error message. |
1414 | |
1415 | If no error occurs for any particular action, C<errstr> will return false. |
1416 | |
1417 | =cut |
1418 | |
1419 | sub errstr { |
1420 | $errstr; |
1421 | } |
1422 | |
1423 | |
1424 | |
1425 | |
1426 | |
1427 | ##################################################################### |
1428 | # PDOM Extensions |
1429 | # |
1430 | # This is something of a future expansion... ignore it for now :) |
1431 | # |
1432 | # use PPI::Statement::Sub (); |
1433 | # |
1434 | # sub PPI::Statement::Sub::__LEXER__normal { '' } |
1435 | |
1436 | 1; |
1437 | |
1438 | =pod |
1439 | |
1440 | =head1 TO DO |
1441 | |
1442 | - Add optional support for some of the more common source filters |
1443 | |
1444 | - Some additional checks for blessing things into various Statement |
1445 | and Structure subclasses. |
1446 | |
1447 | =head1 SUPPORT |
1448 | |
1449 | See the L<support section|PPI/SUPPORT> in the main module. |
1450 | |
1451 | =head1 AUTHOR |
1452 | |
1453 | Adam Kennedy E<lt>adamk@cpan.orgE<gt> |
1454 | |
1455 | =head1 COPYRIGHT |
1456 | |
1457 | Copyright 2001 - 2009 Adam Kennedy. |
1458 | |
1459 | This program is free software; you can redistribute |
1460 | it and/or modify it under the same terms as Perl itself. |
1461 | |
1462 | The full text of the license can be found in the |
1463 | LICENSE file included with this module. |
1464 | |
1465 | =cut |