lib/TAP/Parser/Grammar.pm

   1 package TAP::Parser::Grammar;
   2
   3 use strict;
   4 use vars qw($VERSION);
   5
   6 use TAP::Parser::Result          ();
   7 use TAP::Parser::YAMLish::Reader ();
   8
   9 =head1 NAME
  10
  11 TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
  12
  13 =head1 VERSION
  14
  15 Version 3.06
  16
  17 =cut
  18
  19 $VERSION = '3.06';
  20
  21 =head1 DESCRIPTION
  22
  23 C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
  24 L<TAP::Parser::Result> subclasses to represent the tokens.
  25
  26 Do not attempt to use this class directly.  It won't make sense.  It's mainly
  27 here to ensure that we will be able to have pluggable grammars when TAP is
  28 expanded at some future date (plus, this stuff was really cluttering the
  29 parser).
  30
  31 =cut
  32
  33 ##############################################################################
  34
  35 =head2 Class Methods
  36
  37
  38 =head3 C<new>
  39
  40   my $grammar = TAP::Grammar->new($stream);
  41
  42 Returns TAP grammar object that will parse the specified stream.
  43
  44 =cut
  45
  46 sub new {
  47     my ( $class, $stream ) = @_;
  48     my $self = bless { stream => $stream }, $class;
  49     $self->set_version(12);
  50     return $self;
  51 }
  52
  53 my %language_for;
  54
  55 {
  56
  57     # XXX the 'not' and 'ok' might be on separate lines in VMS ...
  58     my $ok  = qr/(?:not )?ok\b/;
  59     my $num = qr/\d+/;
  60
  61     my %v12 = (
  62         version => {
  63             syntax  => qr/^TAP\s+version\s+(\d+)\s*\z/i,
  64             handler => sub {
  65                 my ( $self, $line ) = @_;
  66                 my $version = $1;
  67                 return $self->_make_version_token( $line, $version, );
  68             },
  69         },
  70         plan => {
  71             syntax  => qr/^1\.\.(\d+)\s*(.*)\z/,
  72             handler => sub {
  73                 my ( $self, $line ) = @_;
  74                 my ( $tests_planned, $tail ) = ( $1, $2 );
  75                 my $explanation = undef;
  76                 my $skip        = '';
  77
  78                 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
  79                     my @todo = split /\s+/, _trim($1);
  80                     return $self->_make_plan_token(
  81                         $line, $tests_planned, 'TODO',
  82                         '',    \@todo
  83                     );
  84                 }
  85                 elsif ( 0 == $tests_planned ) {
  86                     $skip        = 'SKIP';
  87                     $explanation = $tail;
  88
  89                     # Trim valid SKIP directive without being strict
  90                     # about its presence.
  91                     $explanation =~ s/^#\s*//;
  92                     $explanation =~ s/^skip\S*\s+//i;
  93                 }
  94                 elsif ( $tail !~ /^\s*$/ ) {
  95                     return $self->_make_unknown_token($line);
  96                 }
  97
  98                 $explanation = '' unless defined $explanation;
  99
 100                 return $self->_make_plan_token(
 101                     $line, $tests_planned, $skip,
 102                     $explanation, []
 103                 );
 104
 105             },
 106         },
 107
 108         # An optimization to handle the most common test lines without
 109         # directives.
 110         simple_test => {
 111             syntax  => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
 112             handler => sub {
 113                 my ( $self, $line ) = @_;
 114                 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
 115
 116                 return $self->_make_test_token(
 117                     $line, $ok, $num,
 118                     $desc
 119                 );
 120             },
 121         },
 122         test => {
 123             syntax  => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
 124             handler => sub {
 125                 my ( $self, $line ) = @_;
 126                 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
 127                 my ( $dir, $explanation ) = ( '', '' );
 128                 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
 129                        \# \s* (SKIP|TODO) \b \s* (.*) $/ix
 130                   )
 131                 {
 132                     ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
 133                 }
 134                 return $self->_make_test_token(
 135                     $line,   $ok, $num, $desc,
 136                     uc $dir, $explanation
 137                 );
 138             },
 139         },
 140         comment => {
 141             syntax  => qr/^#(.*)/,
 142             handler => sub {
 143                 my ( $self, $line ) = @_;
 144                 my $comment = $1;
 145                 return $self->_make_comment_token( $line, $comment );
 146             },
 147         },
 148         bailout => {
 149             syntax  => qr/^Bail out!\s*(.*)/,
 150             handler => sub {
 151                 my ( $self, $line ) = @_;
 152                 my $explanation = $1;
 153                 return $self->_make_bailout_token(
 154                     $line,
 155                     $explanation
 156                 );
 157             },
 158         },
 159     );
 160
 161     my %v13 = (
 162         %v12,
 163         plan => {
 164             syntax  => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
 165             handler => sub {
 166                 my ( $self, $line ) = @_;
 167                 my ( $tests_planned, $explanation ) = ( $1, $2 );
 168                 my $skip
 169                   = ( 0 == $tests_planned || defined $explanation )
 170                   ? 'SKIP'
 171                   : '';
 172                 $explanation = '' unless defined $explanation;
 173                 return $self->_make_plan_token(
 174                     $line, $tests_planned, $skip,
 175                     $explanation, []
 176                 );
 177             },
 178         },
 179         yaml => {
 180             syntax  => qr/^ (\s+) (---.*) $/x,
 181             handler => sub {
 182                 my ( $self, $line ) = @_;
 183                 my ( $pad, $marker ) = ( $1, $2 );
 184                 return $self->_make_yaml_token( $pad, $marker );
 185             },
 186         },
 187     );
 188
 189     %language_for = (
 190         '12' => {
 191             tokens => \%v12,
 192         },
 193         '13' => {
 194             tokens => \%v13,
 195             setup  => sub {
 196                 shift->{stream}->handle_unicode;
 197             },
 198         },
 199     );
 200 }
 201
 202 ##############################################################################
 203
 204 =head2 Instance Methods
 205
 206 =head3 C<set_version>
 207
 208   $grammar->set_version(13);
 209
 210 Tell the grammar which TAP syntax version to support. The lowest
 211 supported version is 12. Although 'TAP version' isn't valid version 12
 212 syntax it is accepted so that higher version numbers may be parsed.
 213
 214 =cut
 215
 216 sub set_version {
 217     my $self    = shift;
 218     my $version = shift;
 219
 220     if ( my $language = $language_for{$version} ) {
 221         $self->{tokens} = $language->{tokens};
 222
 223         if ( my $setup = $language->{setup} ) {
 224             $self->$setup();
 225         }
 226
 227         $self->_order_tokens;
 228     }
 229     else {
 230         require Carp;
 231         Carp::croak("Unsupported syntax version: $version");
 232     }
 233 }
 234
 235 # Optimization to put the most frequent tokens first.
 236 sub _order_tokens {
 237     my $self = shift;
 238
 239     my %copy = %{ $self->{tokens} };
 240     my @ordered_tokens = grep {defined}
 241       map { delete $copy{$_} } qw( simple_test test comment plan );
 242     push @ordered_tokens, values %copy;
 243
 244     $self->{ordered_tokens} = \@ordered_tokens;
 245 }
 246
 247 ##############################################################################
 248
 249 =head3 C<tokenize>
 250
 251   my $token = $grammar->tokenize;
 252
 253 This method will return a L<TAP::Parser::Result> object representing the
 254 current line of TAP.
 255
 256 =cut
 257
 258 sub tokenize {
 259     my $self = shift;
 260
 261     my $line = $self->{stream}->next;
 262     return unless defined $line;
 263
 264     my $token;
 265
 266     foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
 267         if ( $line =~ $token_data->{syntax} ) {
 268             my $handler = $token_data->{handler};
 269             $token = $self->$handler($line);
 270             last;
 271         }
 272     }
 273
 274     $token = $self->_make_unknown_token($line) unless $token;
 275
 276     return TAP::Parser::Result->new($token);
 277 }
 278
 279 ##############################################################################
 280
 281 =head3 C<token_types>
 282
 283   my @types = $grammar->token_types;
 284
 285 Returns the different types of tokens which this grammar can parse.
 286
 287 =cut
 288
 289 sub token_types {
 290     my $self = shift;
 291     return keys %{ $self->{tokens} };
 292 }
 293
 294 ##############################################################################
 295
 296 =head3 C<syntax_for>
 297
 298   my $syntax = $grammar->syntax_for($token_type);
 299
 300 Returns a pre-compiled regular expression which will match a chunk of TAP
 301 corresponding to the token type.  For example (not that you should really pay
 302 attention to this, C<< $grammar->syntax_for('comment') >> will return
 303 C<< qr/^#(.*)/ >>.
 304
 305 =cut
 306
 307 sub syntax_for {
 308     my ( $self, $type ) = @_;
 309     return $self->{tokens}->{$type}->{syntax};
 310 }
 311
 312 ##############################################################################
 313
 314 =head3 C<handler_for>
 315
 316   my $handler = $grammar->handler_for($token_type);
 317
 318 Returns a code reference which, when passed an appropriate line of TAP,
 319 returns the lexed token corresponding to that line.  As a result, the basic
 320 TAP parsing loop looks similar to the following:
 321
 322  my @tokens;
 323  my $grammar = TAP::Grammar->new;
 324  LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
 325      foreach my $type ( $grammar->token_types ) {
 326          my $syntax  = $grammar->syntax_for($type);
 327          if ( $line =~ $syntax ) {
 328              my $handler = $grammar->handler_for($type);
 329              push @tokens => $grammar->$handler($line);
 330              next LINE;
 331          }
 332      }
 333      push @tokens => $grammar->_make_unknown_token($line);
 334  }
 335
 336 =cut
 337
 338 sub handler_for {
 339     my ( $self, $type ) = @_;
 340     return $self->{tokens}->{$type}->{handler};
 341 }
 342
 343 sub _make_version_token {
 344     my ( $self, $line, $version ) = @_;
 345     return {
 346         type    => 'version',
 347         raw     => $line,
 348         version => $version,
 349     };
 350 }
 351
 352 sub _make_plan_token {
 353     my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
 354
 355     if ( $directive eq 'SKIP' && 0 != $tests_planned ) {
 356         warn
 357           "Specified SKIP directive in plan but more than 0 tests ($line)\n";
 358     }
 359     return {
 360         type          => 'plan',
 361         raw           => $line,
 362         tests_planned => $tests_planned,
 363         directive     => $directive,
 364         explanation   => _trim($explanation),
 365         todo_list     => $todo,
 366     };
 367 }
 368
 369 sub _make_test_token {
 370     my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
 371     my %test = (
 372         ok          => $ok,
 373         test_num    => $num,
 374         description => _trim($desc),
 375         directive   => uc($dir),
 376         explanation => _trim($explanation),
 377         raw         => $line,
 378         type        => 'test',
 379     );
 380     return \%test;
 381 }
 382
 383 sub _make_unknown_token {
 384     my ( $self, $line ) = @_;
 385     return {
 386         raw  => $line,
 387         type => 'unknown',
 388     };
 389 }
 390
 391 sub _make_comment_token {
 392     my ( $self, $line, $comment ) = @_;
 393     return {
 394         type    => 'comment',
 395         raw     => $line,
 396         comment => _trim($comment)
 397     };
 398 }
 399
 400 sub _make_bailout_token {
 401     my ( $self, $line, $explanation ) = @_;
 402     return {
 403         type    => 'bailout',
 404         raw     => $line,
 405         bailout => _trim($explanation)
 406     };
 407 }
 408
 409 sub _make_yaml_token {
 410     my ( $self, $pad, $marker ) = @_;
 411
 412     my $yaml = TAP::Parser::YAMLish::Reader->new;
 413
 414     my $stream = $self->{stream};
 415
 416     # Construct a reader that reads from our input stripping leading
 417     # spaces from each line.
 418     my $leader = length($pad);
 419     my $strip  = qr{ ^ (\s{$leader}) (.*) $ }x;
 420     my @extra  = ($marker);
 421     my $reader = sub {
 422         return shift @extra if @extra;
 423         my $line = $stream->next;
 424         return $2 if $line =~ $strip;
 425         return;
 426     };
 427
 428     my $data = $yaml->read($reader);
 429
 430     # Reconstitute input. This is convoluted. Maybe we should just
 431     # record it on the way in...
 432     chomp( my $raw = $yaml->get_raw );
 433     $raw =~ s/^/$pad/mg;
 434
 435     return {
 436         type => 'yaml',
 437         raw  => $raw,
 438         data => $data
 439     };
 440 }
 441
 442 sub _trim {
 443     my $data = shift;
 444
 445     return '' unless defined $data;
 446
 447     $data =~ s/^\s+//;
 448     $data =~ s/\s+$//;
 449     return $data;
 450 }
 451
 452 =head1 TAP GRAMMAR
 453
 454 B<NOTE:>  This grammar is slightly out of date.  There's still some discussion
 455 about it and a new one will be provided when we have things better defined.
 456
 457 The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
 458 stream-based protocol.  In fact, it's quite legal to have an infinite stream.
 459 For the same reason that we don't apply regexes to streams, we're not using a
 460 formal grammar here.  Instead, we parse the TAP in lines.
 461
 462 For purposes for forward compatability, any result which does not match the
 463 following grammar is currently referred to as
 464 L<TAP::Parser::Result::Unknown>.  It is I<not> a parse error.
 465
 466 A formal grammar would look similar to the following:
 467
 468  (*
 469      For the time being, I'm cheating on the EBNF by allowing
 470      certain terms to be defined by POSIX character classes by
 471      using the following syntax:
 472
 473        digit ::= [:digit:]
 474
 475      As far as I am aware, that's not valid EBNF.  Sue me.  I
 476      didn't know how to write "char" otherwise (Unicode issues).
 477      Suggestions welcome.
 478  *)
 479
 480  tap            ::= version? { comment | unknown } leading_plan lines
 481                     |
 482                     lines trailing_plan {comment}
 483
 484  version        ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
 485
 486  leading_plan   ::= plan skip_directive? "\n"
 487
 488  trailing_plan  ::= plan "\n"
 489
 490  plan           ::= '1..' nonNegativeInteger
 491
 492  lines          ::= line {line}
 493
 494  line           ::= (comment | test | unknown | bailout ) "\n"
 495
 496  test           ::= status positiveInteger? description? directive?
 497
 498  status         ::= 'not '? 'ok '
 499
 500  description    ::= (character - (digit | '#')) {character - '#'}
 501
 502  directive      ::= todo_directive | skip_directive
 503
 504  todo_directive ::= hash_mark 'TODO' ' ' {character}
 505
 506  skip_directive ::= hash_mark 'SKIP' ' ' {character}
 507
 508  comment        ::= hash_mark {character}
 509
 510  hash_mark      ::= '#' {' '}
 511
 512  bailout        ::= 'Bail out!' {character}
 513
 514  unknown        ::= { (character - "\n") }
 515
 516  (* POSIX character classes and other terminals *)
 517
 518  digit              ::= [:digit:]
 519  character          ::= ([:print:] - "\n")
 520  positiveInteger    ::= ( digit - '0' ) {digit}
 521  nonNegativeInteger ::= digit {digit}
 522
 523
 524 =cut
 525
 526 1;