local-lib5/lib/perl5/PPI/Token/Word.pm

   1 package PPI::Token::Word;
   2
   3 =pod
   4
   5 =head1 NAME
   6
   7 PPI::Token::Word - The generic "word" Token
   8
   9 =head1 INHERITANCE
  10
  11   PPI::Token::Word
  12   isa PPI::Token
  13       isa PPI::Element
  14
  15 =head1 DESCRIPTION
  16
  17 A C<PPI::Token::Word> object is a PPI-specific representation of several
  18 different types of word-like things, and is one of the most common Token
  19 classes found in typical documents.
  20
  21 Specifically, it includes not only barewords, but also any other valid
  22 Perl identifier including non-operator keywords and core functions, and
  23 any include C<::> separators inside it, as long as it fits the
  24 format of a class, function, etc.
  25
  26 =head1 METHODS
  27
  28 There are no methods available for C<PPI::Token::Word> beyond those
  29 provided by its L<PPI::Token> and L<PPI::Element> parent
  30 classes.
  31
  32 We expect to add additional methods to help further resolve a Word as
  33 a function, method, etc over time.  If you need such a thing right
  34 now, look at L<Perl::Critic::Utils>.
  35
  36 =cut
  37
  38 use strict;
  39 use PPI::Token ();
  40
  41 use vars qw{$VERSION @ISA %OPERATOR %QUOTELIKE};
  42 BEGIN {
  43         $VERSION = '1.206';
  44         @ISA     = 'PPI::Token';
  45
  46         # Copy in OPERATOR from PPI::Token::Operator
  47         *OPERATOR  = *PPI::Token::Operator::OPERATOR;
  48
  49         %QUOTELIKE = (
  50                 'q'  => 'Quote::Literal',
  51                 'qq' => 'Quote::Interpolate',
  52                 'qx' => 'QuoteLike::Command',
  53                 'qw' => 'QuoteLike::Words',
  54                 'qr' => 'QuoteLike::Regexp',
  55                 'm'  => 'Regexp::Match',
  56                 's'  => 'Regexp::Substitute',
  57                 'tr' => 'Regexp::Transliterate',
  58                 'y'  => 'Regexp::Transliterate',
  59         );
  60 }
  61
  62 =pod
  63
  64 =head2 literal
  65
  66 Returns the value of the Word as a string.  This assumes (often
  67 incorrectly) that the Word is a bareword and not a function, method,
  68 keyword, etc.  This differs from C<content> because C<Foo'Bar> expands
  69 to C<Foo::Bar>.
  70
  71 =begin testing literal 9
  72
  73 my @pairs = (
  74         "F",          'F',
  75         "Foo::Bar",   'Foo::Bar',
  76         "Foo'Bar",    'Foo::Bar',
  77 );
  78 while ( @pairs ) {
  79         my $from  = shift @pairs;
  80         my $to    = shift @pairs;
  81         my $doc   = PPI::Document->new( \"$from;" );
  82         isa_ok( $doc, 'PPI::Document' );
  83         my $word = $doc->find_first('Token::Word');
  84         isa_ok( $word, 'PPI::Token::Word' );
  85         is( $word->literal, $to, "The source $from becomes $to ok" );
  86 }
  87
  88 =end testing
  89
  90 =cut
  91
  92 sub literal {
  93         my $self = shift;
  94         my $word = $self->content;
  95
  96         # Expand Foo'Bar to Foo::Bar
  97         $word =~ s/\'/::/g;
  98
  99         return $word;
 100 }
 101
 102 =pod
 103
 104 =head2 method_call
 105
 106 Answers whether this is the name of a method in a method call. Returns true if
 107 yes, false if no, and nothing if unknown.
 108
 109 =begin testing method_call 24
 110
 111 my $Document = PPI::Document->new(\<<'END_PERL');
 112 indirect $foo;
 113 indirect_class_with_colon Foo::;
 114 $bar->method_with_parentheses;
 115 print SomeClass->method_without_parentheses + 1;
 116 sub_call();
 117 $baz->chained_from->chained_to;
 118 a_first_thing a_middle_thing a_last_thing;
 119 (first_list_element, second_list_element, third_list_element);
 120 first_comma_separated_word, second_comma_separated_word, third_comma_separated_word;
 121 single_bareword_statement;
 122 { bareword_no_semicolon_end_of_block }
 123 $buz{hash_key};
 124 fat_comma_left_side => $thingy;
 125 END_PERL
 126
 127 isa_ok( $Document, 'PPI::Document' );
 128 my $words = $Document->find('Token::Word');
 129 is( scalar @{$words}, 23, 'Found the 23 test words' );
 130 my %words = map { $_ => $_ } @{$words};
 131 is(
 132         scalar $words{indirect}->method_call,
 133         undef,
 134         'Indirect notation is unknown.',
 135 );
 136 is(
 137         scalar $words{indirect_class_with_colon}->method_call,
 138         1,
 139         'Indirect notation with following word ending with colons is true.',
 140 );
 141 is(
 142         scalar $words{method_with_parentheses}->method_call,
 143         1,
 144         'Method with parentheses is true.',
 145 );
 146 is(
 147         scalar $words{method_without_parentheses}->method_call,
 148         1,
 149         'Method without parentheses is true.',
 150 );
 151 is(
 152         scalar $words{print}->method_call,
 153         undef,
 154         'Plain print is unknown.',
 155 );
 156 is(
 157         scalar $words{SomeClass}->method_call,
 158         undef,
 159         'Class in class method call is unknown.',
 160 );
 161 is(
 162         scalar $words{sub_call}->method_call,
 163         0,
 164         'Subroutine call is false.',
 165 );
 166 is(
 167         scalar $words{chained_from}->method_call,
 168         1,
 169         'Method that is chained from is true.',
 170 );
 171 is(
 172         scalar $words{chained_to}->method_call,
 173         1,
 174         'Method that is chained to is true.',
 175 );
 176 is(
 177         scalar $words{a_first_thing}->method_call,
 178         undef,
 179         'First bareword is unknown.',
 180 );
 181 is(
 182         scalar $words{a_middle_thing}->method_call,
 183         undef,
 184         'Bareword in the middle is unknown.',
 185 );
 186 is(
 187         scalar $words{a_last_thing}->method_call,
 188         0,
 189         'Bareword at the end is false.',
 190 );
 191 foreach my $false_word (
 192         qw<
 193                 first_list_element second_list_element third_list_element
 194                 first_comma_separated_word second_comma_separated_word third_comma_separated_word
 195                 single_bareword_statement
 196                 bareword_no_semicolon_end_of_block
 197                 hash_key
 198                 fat_comma_left_side
 199         >
 200 ) {
 201         is(
 202                 scalar $words{$false_word}->method_call,
 203                 0,
 204                 "$false_word is false.",
 205         );
 206 }
 207
 208 =end testing
 209
 210 =cut
 211
 212 sub method_call {
 213         my $self = shift;
 214
 215         my $previous = $self->sprevious_sibling;
 216         if (
 217                 $previous
 218                 and
 219                 $previous->isa('PPI::Token::Operator')
 220                 and
 221                 $previous->content eq '->'
 222         ) {
 223                 return 1;
 224         }
 225
 226         my $snext = $self->snext_sibling;
 227         return 0 unless $snext;
 228
 229         if (
 230                 $snext->isa('PPI::Structure::List')
 231                 or
 232                 $snext->isa('PPI::Token::Structure')
 233                 or
 234                 $snext->isa('PPI::Token::Operator')
 235                 and (
 236                         $snext->content eq ','
 237                         or
 238                         $snext->content eq '=>'
 239                 )
 240         ) {
 241                 return 0;
 242         }
 243
 244         if (
 245                 $snext->isa('PPI::Token::Word')
 246                 and
 247                 $snext->content =~ m< \w :: \z >xms
 248         ) {
 249                 return 1;
 250         }
 251
 252         return;
 253 }
 254
 255 sub __TOKENIZER__on_char {
 256         my $class = shift;
 257         my $t     = shift;
 258
 259         # Suck in till the end of the bareword
 260         my $rest = substr( $t->{line}, $t->{line_cursor} );
 261         if ( $rest =~ /^(\w+(?:(?:\'|::)(?!\d)\w+)*(?:::)?)/ ) {
 262                 $t->{token}->{content} .= $1;
 263                 $t->{line_cursor} += length $1;
 264
 265                 # Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
 266                 # then unwind it and just make it 'eq' (or the other stringy comparitors)
 267                 if ( $t->{token}->{content} =~ /^(?:eq|ne|q|qq|qx|qw|qr|m|s|tr|y)\'/ ) {
 268                         if ( substr($t->{token}->{content}, 1, 1) eq "'" ) {
 269                                 $t->{line_cursor} -= (length($t->{token}->{content}) - 1);
 270                                 $t->{token}->{content} = substr($t->{token}->{content}, 0, 1);
 271                         } else {
 272                                 $t->{line_cursor} -= (length($t->{token}->{content}) - 2);
 273                                 $t->{token}->{content} = substr($t->{token}->{content}, 0, 2);
 274                         }
 275                 }
 276         }
 277
 278         # We might be a subroutine attribute.
 279         my $tokens = $t->_previous_significant_tokens(1);
 280         if ( $tokens and $tokens->[0]->{_attribute} ) {
 281                 $t->{class} = $t->{token}->set_class( 'Attribute' );
 282                 return $t->{class}->__TOKENIZER__commit( $t );
 283         }
 284
 285         # Check for a quote like operator
 286         my $word = $t->{token}->{content};
 287         if ( $QUOTELIKE{$word} and ! $class->__TOKENIZER__literal($t, $word, $tokens) ) {
 288                 $t->{class} = $t->{token}->set_class( $QUOTELIKE{$word} );
 289                 return $t->{class}->__TOKENIZER__on_char( $t );
 290         }
 291
 292         # Or one of the word operators
 293         if ( $OPERATOR{$word} and ! $class->__TOKENIZER__literal($t, $word, $tokens) ) {
 294                 $t->{class} = $t->{token}->set_class( 'Operator' );
 295                 return $t->_finalize_token->__TOKENIZER__on_char( $t );
 296         }
 297
 298         # Unless this is a simple identifier, at this point
 299         # it has to be a normal bareword
 300         if ( $word =~ /\:/ ) {
 301                 return $t->_finalize_token->__TOKENIZER__on_char( $t );
 302         }
 303
 304         # If the NEXT character in the line is a colon, this
 305         # is a label.
 306         my $char = substr( $t->{line}, $t->{line_cursor}, 1 );
 307         if ( $char eq ':' ) {
 308                 $t->{token}->{content} .= ':';
 309                 $t->{line_cursor}++;
 310                 $t->{class} = $t->{token}->set_class( 'Label' );
 311
 312         # If not a label, '_' on its own is the magic filehandle
 313         } elsif ( $word eq '_' ) {
 314                 $t->{class} = $t->{token}->set_class( 'Magic' );
 315
 316         }
 317
 318         # Finalise and process the character again
 319         $t->_finalize_token->__TOKENIZER__on_char( $t );
 320 }
 321
 322
 323
 324 # We are committed to being a bareword.
 325 # Or so we would like to believe.
 326 sub __TOKENIZER__commit {
 327         my ($class, $t) = @_;
 328
 329         # Our current position is the first character of the bareword.
 330         # Capture the bareword.
 331         my $rest = substr( $t->{line}, $t->{line_cursor} );
 332         unless ( $rest =~ /^((?!\d)\w+(?:(?:\'|::)(?!\d)\w+)*(?:::)?)/ ) {
 333                 # Programmer error
 334                 die "Fatal error... regex failed to match in '$rest' when expected";
 335         }
 336
 337         # Special Case: If we accidentally treat eq'foo' like the word "eq'foo",
 338         # then unwind it and just make it 'eq' (or the other stringy comparitors)
 339         my $word = $1;
 340         if ( $word =~ /^(?:eq|ne|q|qq|qx|qw|qr|m|s|tr|y)\'/ ) {
 341                 if ( substr($word, 1, 1) eq "'" ) {
 342                         $word = substr($word, 0, 1);
 343                 } else {
 344                         $word = substr($word, 0, 2);
 345                 }
 346         }
 347
 348         # Advance the position one after the end of the bareword
 349         $t->{line_cursor} += length $word;
 350
 351         # We might be a subroutine attribute.
 352         my $tokens = $t->_previous_significant_tokens(1);
 353         if ( $tokens and $tokens->[0]->{_attribute} ) {
 354                 $t->_new_token( 'Attribute', $word );
 355                 return ($t->{line_cursor} >= $t->{line_length}) ? 0
 356                         : $t->{class}->__TOKENIZER__on_char($t);
 357         }
 358
 359         # Check for the end of the file
 360         if ( $word eq '__END__' ) {
 361                 # Create the token for the __END__ itself
 362                 $t->_new_token( 'Separator', $1 );
 363                 $t->_finalize_token;
 364
 365                 # Move into the End zone (heh)
 366                 $t->{zone} = 'PPI::Token::End';
 367
 368                 # Add the rest of the line as a comment, and a whitespace newline
 369                 # Anything after the __END__ on the line is "ignored". So we must
 370                 # also ignore it, by turning it into a comment.
 371                 $rest = substr( $t->{line}, $t->{line_cursor} );
 372                 $t->{line_cursor} = length $t->{line};
 373                 if ( $rest =~ /\n$/ ) {
 374                         chomp $rest;
 375                         $t->_new_token( 'Comment', $rest ) if length $rest;
 376                         $t->_new_token( 'Whitespace', "\n" );
 377                 } else {
 378                         $t->_new_token( 'Comment', $rest ) if length $rest;
 379                 }
 380                 $t->_finalize_token;
 381
 382                 return 0;
 383         }
 384
 385         # Check for the data section
 386         if ( $word eq '__DATA__' ) {
 387                 # Create the token for the __DATA__ itself
 388                 $t->_new_token( 'Separator', "$1" );
 389                 $t->_finalize_token;
 390
 391                 # Move into the Data zone
 392                 $t->{zone} = 'PPI::Token::Data';
 393
 394                 # Add the rest of the line as the Data token
 395                 $rest = substr( $t->{line}, $t->{line_cursor} );
 396                 $t->{line_cursor} = length $t->{line};
 397                 if ( $rest =~ /\n$/ ) {
 398                         chomp $rest;
 399                         $t->_new_token( 'Comment', $rest ) if length $rest;
 400                         $t->_new_token( 'Whitespace', "\n" );
 401                 } else {
 402                         $t->_new_token( 'Comment', $rest ) if length $rest;
 403                 }
 404                 $t->_finalize_token;
 405
 406                 return 0;
 407         }
 408
 409         my $token_class;
 410         if ( $word =~ /\:/ ) {
 411                 # Since its not a simple identifier...
 412                 $token_class = 'Word';
 413
 414         } elsif ( $class->__TOKENIZER__literal($t, $word, $tokens) ) {
 415                 $token_class = 'Word';
 416
 417         } elsif ( $QUOTELIKE{$word} ) {
 418                 # Special Case: A Quote-like operator
 419                 $t->_new_token( $QUOTELIKE{$word}, $word );
 420                 return ($t->{line_cursor} >= $t->{line_length}) ? 0
 421                         : $t->{class}->__TOKENIZER__on_char( $t );
 422
 423         } elsif ( $OPERATOR{$word} ) {
 424                 # Word operator
 425                 $token_class = 'Operator';
 426
 427         } else {
 428                 # If the next character is a ':' then its a label...
 429                 my $string = substr( $t->{line}, $t->{line_cursor} );
 430                 if ( $string =~ /^(\s*:)(?!:)/ ) {
 431                         if ( $tokens and $tokens->[0]->{content} eq 'sub' ) {
 432                                 # ... UNLESS its after 'sub' in which
 433                                 # case it is a sub name and an attribute
 434                                 # operator.
 435                                 # We COULD have checked this at the top
 436                                 # level of checks, but this would impose
 437                                 # an additional performance per-word
 438                                 # penalty, and every other case where the
 439                                 # attribute operator doesn't directly
 440                                 # touch the object name already works.
 441                                 $token_class = 'Word';
 442                         } else {
 443                                 $word .= $1;
 444                                 $t->{line_cursor} += length($1);
 445                                 $token_class = 'Label';
 446                         }
 447                 } elsif ( $word eq '_' ) {
 448                         $token_class = 'Magic';
 449                 } else {
 450                         $token_class = 'Word';
 451                 }
 452         }
 453
 454         # Create the new token and finalise
 455         $t->_new_token( $token_class, $word );
 456         if ( $t->{line_cursor} >= $t->{line_length} ) {
 457                 # End of the line
 458                 $t->_finalize_token;
 459                 return 0;
 460         }
 461         $t->_finalize_token->__TOKENIZER__on_char($t);
 462 }
 463
 464 # Is the word in a "forced" context, and thus cannot be either an
 465 # operator or a quote-like thing. This version is only useful
 466 # during tokenization.
 467 sub __TOKENIZER__literal {
 468         my ($class, $t, $word, $tokens) = @_;
 469
 470         # Is this a forced-word context?
 471         # i.e. Would normally be seen as an operator.
 472         unless ( $QUOTELIKE{$word} or $PPI::Token::Operator::OPERATOR{$word} ) {
 473                 return '';
 474         }
 475
 476         # Check the cases when we have previous tokens
 477         my $rest = substr( $t->{line}, $t->{line_cursor} );
 478         if ( $tokens ) {
 479                 my $token = $tokens->[0] or return '';
 480
 481                 # We are forced if we are a method name
 482                 return 1 if $token->{content} eq '->';
 483
 484                 # We are forced if we are a sub name
 485                 return 1 if $token->isa('PPI::Token::Word') && $token->{content} eq 'sub';
 486
 487                 # If we are contained in a pair of curly braces,
 488                 # we are probably a bareword hash key
 489                 if ( $token->{content} eq '{' and $rest =~ /^\s*\}/ ) {
 490                         return 1;
 491                 }
 492         }
 493
 494         # In addition, if the word is followed by => it is probably
 495         # also actually a word and not a regex.
 496         if ( $rest =~ /^\s*=>/ ) {
 497                 return 1;
 498         }
 499
 500         # Otherwise we probably arn't forced
 501         '';
 502 }
 503
 504 1;
 505
 506 =pod
 507
 508 =head1 TO DO
 509
 510 - Add C<function>, C<method> etc detector methods
 511
 512 =head1 SUPPORT
 513
 514 See the L<support section|PPI/SUPPORT> in the main module.
 515
 516 =head1 AUTHOR
 517
 518 Adam Kennedy E<lt>adamk@cpan.orgE<gt>
 519
 520 =head1 COPYRIGHT
 521
 522 Copyright 2001 - 2009 Adam Kennedy.
 523
 524 This program is free software; you can redistribute
 525 it and/or modify it under the same terms as Perl itself.
 526
 527 The full text of the license can be found in the
 528 LICENSE file included with this module.
 529
 530 =cut