1 package PPI::Token::Word;
7 PPI::Token::Word - The generic "word" Token
17 A C<PPI::Token::Word> object is a PPI-specific representation of several
18 different types of word-like things, and is one of the most common Token
19 classes found in typical documents.
21 Specifically, it includes not only barewords, but also any other valid
22 Perl identifier including non-operator keywords and core functions, and
23 any include C<::> separators inside it, as long as it fits the
24 format of a class, function, etc.
28 There are no methods available for C<PPI::Token::Word> beyond those
29 provided by its L<PPI::Token> and L<PPI::Element> parent
32 We expect to add additional methods to help further resolve a Word as
33 a function, method, etc over time. If you need such a thing right
34 now, look at L<Perl::Critic::Utils>.
41 use vars qw{$VERSION @ISA %OPERATOR %QUOTELIKE};
46 # Copy in OPERATOR from PPI::Token::Operator
47 *OPERATOR = *PPI::Token::Operator::OPERATOR;
50 'q' => 'Quote::Literal',
51 'qq' => 'Quote::Interpolate',
52 'qx' => 'QuoteLike::Command',
53 'qw' => 'QuoteLike::Words',
54 'qr' => 'QuoteLike::Regexp',
55 'm' => 'Regexp::Match',
56 's' => 'Regexp::Substitute',
57 'tr' => 'Regexp::Transliterate',
58 'y' => 'Regexp::Transliterate',
66 Returns the value of the Word as a string. This assumes (often
67 incorrectly) that the Word is a bareword and not a function, method,
68 keyword, etc. This differs from C<content> because C<Foo'Bar> expands
71 =begin testing literal 9
75 "Foo::Bar", 'Foo::Bar',
76 "Foo'Bar", 'Foo::Bar',
79 my $from = shift @pairs;
80 my $to = shift @pairs;
81 my $doc = PPI::Document->new( \"$from;" );
82 isa_ok( $doc, 'PPI::Document' );
83 my $word = $doc->find_first('Token::Word');
84 isa_ok( $word, 'PPI::Token::Word' );
85 is( $word->literal, $to, "The source $from becomes $to ok" );
94 my $word = $self->content;
96 # Expand Foo'Bar to Foo::Bar
106 Answers whether this is the name of a method in a method call. Returns true if
107 yes, false if no, and nothing if unknown.
109 =begin testing method_call 24
111 my $Document = PPI::Document->new(\<<'END_PERL');
113 indirect_class_with_colon Foo::;
114 $bar->method_with_parentheses;
115 print SomeClass->method_without_parentheses + 1;
117 $baz->chained_from->chained_to;
118 a_first_thing a_middle_thing a_last_thing;
119 (first_list_element, second_list_element, third_list_element);
120 first_comma_separated_word, second_comma_separated_word, third_comma_separated_word;
121 single_bareword_statement;
122 { bareword_no_semicolon_end_of_block }
124 fat_comma_left_side => $thingy;
127 isa_ok( $Document, 'PPI::Document' );
128 my $words = $Document->find('Token::Word');
129 is( scalar @{$words}, 23, 'Found the 23 test words' );
130 my %words = map { $_ => $_ } @{$words};
132 scalar $words{indirect}->method_call,
134 'Indirect notation is unknown.',
137 scalar $words{indirect_class_with_colon}->method_call,
139 'Indirect notation with following word ending with colons is true.',
142 scalar $words{method_with_parentheses}->method_call,
144 'Method with parentheses is true.',
147 scalar $words{method_without_parentheses}->method_call,
149 'Method without parentheses is true.',
152 scalar $words{print}->method_call,
154 'Plain print is unknown.',
157 scalar $words{SomeClass}->method_call,
159 'Class in class method call is unknown.',
162 scalar $words{sub_call}->method_call,
164 'Subroutine call is false.',
167 scalar $words{chained_from}->method_call,
169 'Method that is chained from is true.',
172 scalar $words{chained_to}->method_call,
174 'Method that is chained to is true.',
177 scalar $words{a_first_thing}->method_call,
179 'First bareword is unknown.',
182 scalar $words{a_middle_thing}->method_call,
184 'Bareword in the middle is unknown.',
187 scalar $words{a_last_thing}->method_call,
189 'Bareword at the end is false.',
191 foreach my $false_word (
193 first_list_element second_list_element third_list_element
194 first_comma_separated_word second_comma_separated_word third_comma_separated_word
195 single_bareword_statement
196 bareword_no_semicolon_end_of_block
202 scalar $words{$false_word}->method_call,
204 "$false_word is false.",
215 my $previous = $self->sprevious_sibling;
219 $previous->isa('PPI::Token::Operator')
221 $previous->content eq '->'
226 my $snext = $self->snext_sibling;
227 return 0 unless $snext;
230 $snext->isa('PPI::Structure::List')
232 $snext->isa('PPI::Token::Structure')
234 $snext->isa('PPI::Token::Operator')
236 $snext->content eq ','
238 $snext->content eq '=>'
245 $snext->isa('PPI::Token::Word')
247 $snext->content =~ m< \w :: \z >xms
255 sub __TOKENIZER__on_char {
259 # Suck in till the end of the bareword
260 my $rest = substr( $t->{line}, $t->{line_cursor} );
261 if ( $rest =~ /^(\w+(?:(?:\'|::)(?!\d)\w+)*(?:::)?)/ ) {
262 $t->{token}->{content} .= $1;
263 $t->{line_cursor} += length $1;
265 # Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
266 # then unwind it and just make it 'eq' (or the other stringy comparitors)
267 if ( $t->{token}->{content} =~ /^(?:eq|ne|q|qq|qx|qw|qr|m|s|tr|y)\'/ ) {
268 if ( substr($t->{token}->{content}, 1, 1) eq "'" ) {
269 $t->{line_cursor} -= (length($t->{token}->{content}) - 1);
270 $t->{token}->{content} = substr($t->{token}->{content}, 0, 1);
272 $t->{line_cursor} -= (length($t->{token}->{content}) - 2);
273 $t->{token}->{content} = substr($t->{token}->{content}, 0, 2);
278 # We might be a subroutine attribute.
279 my $tokens = $t->_previous_significant_tokens(1);
280 if ( $tokens and $tokens->[0]->{_attribute} ) {
281 $t->{class} = $t->{token}->set_class( 'Attribute' );
282 return $t->{class}->__TOKENIZER__commit( $t );
285 # Check for a quote like operator
286 my $word = $t->{token}->{content};
287 if ( $QUOTELIKE{$word} and ! $class->__TOKENIZER__literal($t, $word, $tokens) ) {
288 $t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} );
289 return $t->{class}->__TOKENIZER__on_char( $t );
292 # Or one of the word operators
293 if ( $OPERATOR{$word} and ! $class->__TOKENIZER__literal($t, $word, $tokens) ) {
294 $t->{class} = $t->{token}->set_class( 'Operator' );
295 return $t->_finalize_token->__TOKENIZER__on_char( $t );
298 # Unless this is a simple identifier, at this point
299 # it has to be a normal bareword
300 if ( $word =~ /\:/ ) {
301 return $t->_finalize_token->__TOKENIZER__on_char( $t );
304 # If the NEXT character in the line is a colon, this
306 my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
307 if ( $char eq ':' ) {
308 $t->{token}->{content} .= ':';
310 $t->{class} = $t->{token}->set_class( 'Label' );
312 # If not a label, '_' on its own is the magic filehandle
313 } elsif ( $word eq '_' ) {
314 $t->{class} = $t->{token}->set_class( 'Magic' );
318 # Finalise and process the character again
319 $t->_finalize_token->__TOKENIZER__on_char( $t );
324 # We are committed to being a bareword.
325 # Or so we would like to believe.
326 sub __TOKENIZER__commit {
327 my ($class, $t) = @_;
329 # Our current position is the first character of the bareword.
330 # Capture the bareword.
331 my $rest = substr( $t->{line}, $t->{line_cursor} );
332 unless ( $rest =~ /^((?!\d)\w+(?:(?:\'|::)(?!\d)\w+)*(?:::)?)/ ) {
334 die "Fatal error... regex failed to match in '$rest' when expected";
337 # Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
338 # then unwind it and just make it 'eq' (or the other stringy comparitors)
340 if ( $word =~ /^(?:eq|ne|q|qq|qx|qw|qr|m|s|tr|y)\'/ ) {
341 if ( substr($word, 1, 1) eq "'" ) {
342 $word = substr($word, 0, 1);
344 $word = substr($word, 0, 2);
348 # Advance the position one after the end of the bareword
349 $t->{line_cursor} += length $word;
351 # We might be a subroutine attribute.
352 my $tokens = $t->_previous_significant_tokens(1);
353 if ( $tokens and $tokens->[0]->{_attribute} ) {
354 $t->_new_token( 'Attribute', $word );
355 return ($t->{line_cursor} >= $t->{line_length}) ? 0
356 : $t->{class}->__TOKENIZER__on_char($t);
359 # Check for the end of the file
360 if ( $word eq '__END__' ) {
361 # Create the token for the __END__ itself
362 $t->_new_token( 'Separator', $1 );
365 # Move into the End zone (heh)
366 $t->{zone} = 'PPI::Token::End';
368 # Add the rest of the line as a comment, and a whitespace newline
369 # Anything after the __END__ on the line is "ignored". So we must
370 # also ignore it, by turning it into a comment.
371 $rest = substr( $t->{line}, $t->{line_cursor} );
372 $t->{line_cursor} = length $t->{line};
373 if ( $rest =~ /\n$/ ) {
375 $t->_new_token( 'Comment', $rest ) if length $rest;
376 $t->_new_token( 'Whitespace', "\n" );
378 $t->_new_token( 'Comment', $rest ) if length $rest;
385 # Check for the data section
386 if ( $word eq '__DATA__' ) {
387 # Create the token for the __DATA__ itself
388 $t->_new_token( 'Separator', "$1" );
391 # Move into the Data zone
392 $t->{zone} = 'PPI::Token::Data';
394 # Add the rest of the line as the Data token
395 $rest = substr( $t->{line}, $t->{line_cursor} );
396 $t->{line_cursor} = length $t->{line};
397 if ( $rest =~ /\n$/ ) {
399 $t->_new_token( 'Comment', $rest ) if length $rest;
400 $t->_new_token( 'Whitespace', "\n" );
402 $t->_new_token( 'Comment', $rest ) if length $rest;
410 if ( $word =~ /\:/ ) {
411 # Since its not a simple identifier...
412 $token_class = 'Word';
414 } elsif ( $class->__TOKENIZER__literal($t, $word, $tokens) ) {
415 $token_class = 'Word';
417 } elsif ( $QUOTELIKE{$word} ) {
418 # Special Case: A Quote-like operator
419 $t->_new_token( $QUOTELIKE{$word}, $word );
420 return ($t->{line_cursor} >= $t->{line_length}) ? 0
421 : $t->{class}->__TOKENIZER__on_char( $t );
423 } elsif ( $OPERATOR{$word} ) {
425 $token_class = 'Operator';
428 # If the next character is a ':' then its a label...
429 my $string = substr( $t->{line}, $t->{line_cursor} );
430 if ( $string =~ /^(\s*:)(?!:)/ ) {
431 if ( $tokens and $tokens->[0]->{content} eq 'sub' ) {
432 # ... UNLESS its after 'sub' in which
433 # case it is a sub name and an attribute
435 # We COULD have checked this at the top
436 # level of checks, but this would impose
437 # an additional performance per-word
438 # penalty, and every other case where the
439 # attribute operator doesn't directly
440 # touch the object name already works.
441 $token_class = 'Word';
444 $t->{line_cursor} += length($1);
445 $token_class = 'Label';
447 } elsif ( $word eq '_' ) {
448 $token_class = 'Magic';
450 $token_class = 'Word';
454 # Create the new token and finalise
455 $t->_new_token( $token_class, $word );
456 if ( $t->{line_cursor} >= $t->{line_length} ) {
461 $t->_finalize_token->__TOKENIZER__on_char($t);
464 # Is the word in a "forced" context, and thus cannot be either an
465 # operator or a quote-like thing. This version is only useful
466 # during tokenization.
467 sub __TOKENIZER__literal {
468 my ($class, $t, $word, $tokens) = @_;
470 # Is this a forced-word context?
471 # i.e. Would normally be seen as an operator.
472 unless ( $QUOTELIKE{$word} or $PPI::Token::Operator::OPERATOR{$word} ) {
476 # Check the cases when we have previous tokens
477 my $rest = substr( $t->{line}, $t->{line_cursor} );
479 my $token = $tokens->[0] or return '';
481 # We are forced if we are a method name
482 return 1 if $token->{content} eq '->';
484 # We are forced if we are a sub name
485 return 1 if $token->isa('PPI::Token::Word') && $token->{content} eq 'sub';
487 # If we are contained in a pair of curly braces,
488 # we are probably a bareword hash key
489 if ( $token->{content} eq '{' and $rest =~ /^\s*\}/ ) {
494 # In addition, if the word is followed by => it is probably
495 # also actually a word and not a regex.
496 if ( $rest =~ /^\s*=>/ ) {
500 # Otherwise we probably arn't forced
510 - Add C<function>, C<method> etc detector methods
514 See the L<support section|PPI/SUPPORT> in the main module.
518 Adam Kennedy E<lt>adamk@cpan.orgE<gt>
522 Copyright 2001 - 2009 Adam Kennedy.
524 This program is free software; you can redistribute
525 it and/or modify it under the same terms as Perl itself.
527 The full text of the license can be found in the
528 LICENSE file included with this module.