1 package TAP::Parser::Grammar;
4 use vars qw($VERSION @ISA);
7 use TAP::Parser::ResultFactory ();
8 use TAP::Parser::YAMLish::Reader ();
10 @ISA = qw(TAP::Object);
14 TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
26 use TAP::Parser::Grammar;
27 my $grammar = $self->make_grammar({
28 stream => $tap_parser_stream,
29 parser => $tap_parser,
33 my $result = $grammar->tokenize;
37 C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
38 L<TAP::Parser::Result> subclasses to represent the tokens.
40 Do not attempt to use this class directly. It won't make sense. It's mainly
41 here to ensure that we will be able to have pluggable grammars when TAP is
42 expanded at some future date (plus, this stuff was really cluttering the
51 my $grammar = TAP::Parser::Grammar->new({
57 Returns L<TAP::Parser> grammar object that will parse the specified stream.
58 Both C<stream> and C<parser> are required arguments. If C<version> is not set
59 it defaults to C<12> (see L</set_version> for more details).
63 # new() implementation supplied by TAP::Object
65 my ( $self, $args ) = @_;
66 $self->{stream} = $args->{stream}; # TODO: accessor
67 $self->{parser} = $args->{parser}; # TODO: accessor
68 $self->set_version( $args->{version} || 12 );
76 # XXX the 'not' and 'ok' might be on separate lines in VMS ...
77 my $ok = qr/(?:not )?ok\b/;
82 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
84 my ( $self, $line ) = @_;
86 return $self->_make_version_token( $line, $version, );
90 syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
92 my ( $self, $line ) = @_;
93 my ( $tests_planned, $tail ) = ( $1, $2 );
94 my $explanation = undef;
97 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
98 my @todo = split /\s+/, _trim($1);
99 return $self->_make_plan_token(
100 $line, $tests_planned, 'TODO',
104 elsif ( 0 == $tests_planned ) {
107 # If we can't match # SKIP the directive should be undef.
108 ($explanation) = $tail =~ /^#\s*SKIP\s+(.*)/i;
110 elsif ( $tail !~ /^\s*$/ ) {
111 return $self->_make_unknown_token($line);
114 $explanation = '' unless defined $explanation;
116 return $self->_make_plan_token(
117 $line, $tests_planned, $skip,
124 # An optimization to handle the most common test lines without
127 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
129 my ( $self, $line ) = @_;
130 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
132 return $self->_make_test_token(
139 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
141 my ( $self, $line ) = @_;
142 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
143 my ( $dir, $explanation ) = ( '', '' );
144 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
145 \# \s* (SKIP|TODO) \b \s* (.*) $/ix
148 ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
150 return $self->_make_test_token(
151 $line, $ok, $num, $desc,
157 syntax => qr/^#(.*)/,
159 my ( $self, $line ) = @_;
161 return $self->_make_comment_token( $line, $comment );
165 syntax => qr/^Bail out!\s*(.*)/,
167 my ( $self, $line ) = @_;
168 my $explanation = $1;
169 return $self->_make_bailout_token(
180 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
182 my ( $self, $line ) = @_;
183 my ( $tests_planned, $explanation ) = ( $1, $2 );
185 = ( 0 == $tests_planned || defined $explanation )
188 $explanation = '' unless defined $explanation;
189 return $self->_make_plan_token(
190 $line, $tests_planned, $skip,
196 syntax => qr/^ (\s+) (---.*) $/x,
198 my ( $self, $line ) = @_;
199 my ( $pad, $marker ) = ( $1, $2 );
200 return $self->_make_yaml_token( $pad, $marker );
205 qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
207 my ( $self, $line ) = @_;
209 return $self->_make_pragma_token( $line, $pragmas );
221 shift->{stream}->handle_unicode;
227 ##############################################################################
229 =head2 Instance Methods
231 =head3 C<set_version>
233 $grammar->set_version(13);
235 Tell the grammar which TAP syntax version to support. The lowest
236 supported version is 12. Although 'TAP version' isn't valid version 12
237 syntax it is accepted so that higher version numbers may be parsed.
245 if ( my $language = $language_for{$version} ) {
246 $self->{version} = $version;
247 $self->{tokens} = $language->{tokens};
249 if ( my $setup = $language->{setup} ) {
253 $self->_order_tokens;
257 Carp::croak("Unsupported syntax version: $version");
261 # Optimization to put the most frequent tokens first.
265 my %copy = %{ $self->{tokens} };
266 my @ordered_tokens = grep {defined}
267 map { delete $copy{$_} } qw( simple_test test comment plan );
268 push @ordered_tokens, values %copy;
270 $self->{ordered_tokens} = \@ordered_tokens;
273 ##############################################################################
277 my $token = $grammar->tokenize;
279 This method will return a L<TAP::Parser::Result> object representing the
287 my $line = $self->{stream}->next;
288 unless ( defined $line ) {
289 delete $self->{parser}; # break circular ref
295 foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
296 if ( $line =~ $token_data->{syntax} ) {
297 my $handler = $token_data->{handler};
298 $token = $self->$handler($line);
303 $token = $self->_make_unknown_token($line) unless $token;
305 return $self->{parser}->make_result($token);
308 ##############################################################################
310 =head3 C<token_types>
312 my @types = $grammar->token_types;
314 Returns the different types of tokens which this grammar can parse.
320 return keys %{ $self->{tokens} };
323 ##############################################################################
327 my $syntax = $grammar->syntax_for($token_type);
329 Returns a pre-compiled regular expression which will match a chunk of TAP
330 corresponding to the token type. For example (not that you should really pay
331 attention to this, C<< $grammar->syntax_for('comment') >> will return
337 my ( $self, $type ) = @_;
338 return $self->{tokens}->{$type}->{syntax};
341 ##############################################################################
343 =head3 C<handler_for>
345 my $handler = $grammar->handler_for($token_type);
347 Returns a code reference which, when passed an appropriate line of TAP,
348 returns the lexed token corresponding to that line. As a result, the basic
349 TAP parsing loop looks similar to the following:
352 my $grammar = TAP::Grammar->new;
353 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
354 foreach my $type ( $grammar->token_types ) {
355 my $syntax = $grammar->syntax_for($type);
356 if ( $line =~ $syntax ) {
357 my $handler = $grammar->handler_for($type);
358 push @tokens => $grammar->$handler($line);
362 push @tokens => $grammar->_make_unknown_token($line);
368 my ( $self, $type ) = @_;
369 return $self->{tokens}->{$type}->{handler};
372 sub _make_version_token {
373 my ( $self, $line, $version ) = @_;
381 sub _make_plan_token {
382 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
384 if ( $directive eq 'SKIP'
385 && 0 != $tests_planned
386 && $self->{version} < 13 )
389 "Specified SKIP directive in plan but more than 0 tests ($line)\n";
395 tests_planned => $tests_planned,
396 directive => $directive,
397 explanation => _trim($explanation),
402 sub _make_test_token {
403 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
407 description => _trim($desc),
408 directive => ( defined $dir ? uc $dir : '' ),
409 explanation => _trim($explanation),
415 sub _make_unknown_token {
416 my ( $self, $line ) = @_;
423 sub _make_comment_token {
424 my ( $self, $line, $comment ) = @_;
428 comment => _trim($comment)
432 sub _make_bailout_token {
433 my ( $self, $line, $explanation ) = @_;
437 bailout => _trim($explanation)
441 sub _make_yaml_token {
442 my ( $self, $pad, $marker ) = @_;
444 my $yaml = TAP::Parser::YAMLish::Reader->new;
446 my $stream = $self->{stream};
448 # Construct a reader that reads from our input stripping leading
449 # spaces from each line.
450 my $leader = length($pad);
451 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
452 my @extra = ($marker);
454 return shift @extra if @extra;
455 my $line = $stream->next;
456 return $2 if $line =~ $strip;
460 my $data = $yaml->read($reader);
462 # Reconstitute input. This is convoluted. Maybe we should just
463 # record it on the way in...
464 chomp( my $raw = $yaml->get_raw );
474 sub _make_pragma_token {
475 my ( $self, $line, $pragmas ) = @_;
479 pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
486 return '' unless defined $data;
497 B<NOTE:> This grammar is slightly out of date. There's still some discussion
498 about it and a new one will be provided when we have things better defined.
500 The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
501 stream-based protocol. In fact, it's quite legal to have an infinite stream.
502 For the same reason that we don't apply regexes to streams, we're not using a
503 formal grammar here. Instead, we parse the TAP in lines.
505 For purposes for forward compatability, any result which does not match the
506 following grammar is currently referred to as
507 L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
509 A formal grammar would look similar to the following:
512 For the time being, I'm cheating on the EBNF by allowing
513 certain terms to be defined by POSIX character classes by
514 using the following syntax:
518 As far as I am aware, that's not valid EBNF. Sue me. I
519 didn't know how to write "char" otherwise (Unicode issues).
523 tap ::= version? { comment | unknown } leading_plan lines
525 lines trailing_plan {comment}
527 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
529 leading_plan ::= plan skip_directive? "\n"
531 trailing_plan ::= plan "\n"
533 plan ::= '1..' nonNegativeInteger
535 lines ::= line {line}
537 line ::= (comment | test | unknown | bailout ) "\n"
539 test ::= status positiveInteger? description? directive?
541 status ::= 'not '? 'ok '
543 description ::= (character - (digit | '#')) {character - '#'}
545 directive ::= todo_directive | skip_directive
547 todo_directive ::= hash_mark 'TODO' ' ' {character}
549 skip_directive ::= hash_mark 'SKIP' ' ' {character}
551 comment ::= hash_mark {character}
553 hash_mark ::= '#' {' '}
555 bailout ::= 'Bail out!' {character}
557 unknown ::= { (character - "\n") }
559 (* POSIX character classes and other terminals *)
562 character ::= ([:print:] - "\n")
563 positiveInteger ::= ( digit - '0' ) {digit}
564 nonNegativeInteger ::= digit {digit}
568 Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
570 If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
571 do is read through the code. There's no easy way of summarizing it here.
577 L<TAP::Parser::Iterator>,
578 L<TAP::Parser::Result>,