1 package TAP::Parser::Grammar;
6 use TAP::Parser::Result ();
7 use TAP::Parser::YAMLish::Reader ();
11 TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
23 C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
24 L<TAP::Parser::Result> subclasses to represent the tokens.
26 Do not attempt to use this class directly. It won't make sense. It's mainly
27 here to ensure that we will be able to have pluggable grammars when TAP is
28 expanded at some future date (plus, this stuff was really cluttering the
33 ##############################################################################
40 my $grammar = TAP::Grammar->new($stream);
42 Returns TAP grammar object that will parse the specified stream.
47 my ( $class, $stream ) = @_;
48 my $self = bless { stream => $stream }, $class;
49 $self->set_version(12);
57 # XXX the 'not' and 'ok' might be on separate lines in VMS ...
58 my $ok = qr/(?:not )?ok\b/;
63 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
65 my ( $self, $line ) = @_;
67 return $self->_make_version_token( $line, $version, );
71 syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
73 my ( $self, $line ) = @_;
74 my ( $tests_planned, $tail ) = ( $1, $2 );
75 my $explanation = undef;
78 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
79 my @todo = split /\s+/, _trim($1);
80 return $self->_make_plan_token(
81 $line, $tests_planned, 'TODO',
85 elsif ( 0 == $tests_planned ) {
89 # Trim valid SKIP directive without being strict
91 $explanation =~ s/^#\s*//;
92 $explanation =~ s/^skip\S*\s+//i;
94 elsif ( $tail !~ /^\s*$/ ) {
95 return $self->_make_unknown_token($line);
98 $explanation = '' unless defined $explanation;
100 return $self->_make_plan_token(
101 $line, $tests_planned, $skip,
108 # An optimization to handle the most common test lines without
111 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
113 my ( $self, $line ) = @_;
114 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
116 return $self->_make_test_token(
123 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
125 my ( $self, $line ) = @_;
126 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
127 my ( $dir, $explanation ) = ( '', '' );
128 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
129 \# \s* (SKIP|TODO) \b \s* (.*) $/ix
132 ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
134 return $self->_make_test_token(
135 $line, $ok, $num, $desc,
136 uc $dir, $explanation
141 syntax => qr/^#(.*)/,
143 my ( $self, $line ) = @_;
145 return $self->_make_comment_token( $line, $comment );
149 syntax => qr/^Bail out!\s*(.*)/,
151 my ( $self, $line ) = @_;
152 my $explanation = $1;
153 return $self->_make_bailout_token(
164 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
166 my ( $self, $line ) = @_;
167 my ( $tests_planned, $explanation ) = ( $1, $2 );
169 = ( 0 == $tests_planned || defined $explanation )
172 $explanation = '' unless defined $explanation;
173 return $self->_make_plan_token(
174 $line, $tests_planned, $skip,
180 syntax => qr/^ (\s+) (---.*) $/x,
182 my ( $self, $line ) = @_;
183 my ( $pad, $marker ) = ( $1, $2 );
184 return $self->_make_yaml_token( $pad, $marker );
196 shift->{stream}->handle_unicode;
202 ##############################################################################
204 =head2 Instance Methods
206 =head3 C<set_version>
208 $grammar->set_version(13);
210 Tell the grammar which TAP syntax version to support. The lowest
211 supported version is 12. Although 'TAP version' isn't valid version 12
212 syntax it is accepted so that higher version numbers may be parsed.
220 if ( my $language = $language_for{$version} ) {
221 $self->{tokens} = $language->{tokens};
223 if ( my $setup = $language->{setup} ) {
227 $self->_order_tokens;
231 Carp::croak("Unsupported syntax version: $version");
235 # Optimization to put the most frequent tokens first.
239 my %copy = %{ $self->{tokens} };
240 my @ordered_tokens = grep {defined}
241 map { delete $copy{$_} } qw( simple_test test comment plan );
242 push @ordered_tokens, values %copy;
244 $self->{ordered_tokens} = \@ordered_tokens;
247 ##############################################################################
251 my $token = $grammar->tokenize;
253 This method will return a L<TAP::Parser::Result> object representing the
261 my $line = $self->{stream}->next;
262 return unless defined $line;
266 foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
267 if ( $line =~ $token_data->{syntax} ) {
268 my $handler = $token_data->{handler};
269 $token = $self->$handler($line);
274 $token = $self->_make_unknown_token($line) unless $token;
276 return TAP::Parser::Result->new($token);
279 ##############################################################################
281 =head3 C<token_types>
283 my @types = $grammar->token_types;
285 Returns the different types of tokens which this grammar can parse.
291 return keys %{ $self->{tokens} };
294 ##############################################################################
298 my $syntax = $grammar->syntax_for($token_type);
300 Returns a pre-compiled regular expression which will match a chunk of TAP
301 corresponding to the token type. For example (not that you should really pay
302 attention to this, C<< $grammar->syntax_for('comment') >> will return
308 my ( $self, $type ) = @_;
309 return $self->{tokens}->{$type}->{syntax};
312 ##############################################################################
314 =head3 C<handler_for>
316 my $handler = $grammar->handler_for($token_type);
318 Returns a code reference which, when passed an appropriate line of TAP,
319 returns the lexed token corresponding to that line. As a result, the basic
320 TAP parsing loop looks similar to the following:
323 my $grammar = TAP::Grammar->new;
324 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
325 foreach my $type ( $grammar->token_types ) {
326 my $syntax = $grammar->syntax_for($type);
327 if ( $line =~ $syntax ) {
328 my $handler = $grammar->handler_for($type);
329 push @tokens => $grammar->$handler($line);
333 push @tokens => $grammar->_make_unknown_token($line);
339 my ( $self, $type ) = @_;
340 return $self->{tokens}->{$type}->{handler};
343 sub _make_version_token {
344 my ( $self, $line, $version ) = @_;
352 sub _make_plan_token {
353 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
355 if ( $directive eq 'SKIP' && 0 != $tests_planned ) {
357 "Specified SKIP directive in plan but more than 0 tests ($line)\n";
362 tests_planned => $tests_planned,
363 directive => $directive,
364 explanation => _trim($explanation),
369 sub _make_test_token {
370 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
374 description => _trim($desc),
375 directive => uc($dir),
376 explanation => _trim($explanation),
383 sub _make_unknown_token {
384 my ( $self, $line ) = @_;
391 sub _make_comment_token {
392 my ( $self, $line, $comment ) = @_;
396 comment => _trim($comment)
400 sub _make_bailout_token {
401 my ( $self, $line, $explanation ) = @_;
405 bailout => _trim($explanation)
409 sub _make_yaml_token {
410 my ( $self, $pad, $marker ) = @_;
412 my $yaml = TAP::Parser::YAMLish::Reader->new;
414 my $stream = $self->{stream};
416 # Construct a reader that reads from our input stripping leading
417 # spaces from each line.
418 my $leader = length($pad);
419 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
420 my @extra = ($marker);
422 return shift @extra if @extra;
423 my $line = $stream->next;
424 return $2 if $line =~ $strip;
428 my $data = $yaml->read($reader);
430 # Reconstitute input. This is convoluted. Maybe we should just
431 # record it on the way in...
432 chomp( my $raw = $yaml->get_raw );
445 return '' unless defined $data;
454 B<NOTE:> This grammar is slightly out of date. There's still some discussion
455 about it and a new one will be provided when we have things better defined.
457 The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
458 stream-based protocol. In fact, it's quite legal to have an infinite stream.
459 For the same reason that we don't apply regexes to streams, we're not using a
460 formal grammar here. Instead, we parse the TAP in lines.
462 For purposes for forward compatability, any result which does not match the
463 following grammar is currently referred to as
464 L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
466 A formal grammar would look similar to the following:
469 For the time being, I'm cheating on the EBNF by allowing
470 certain terms to be defined by POSIX character classes by
471 using the following syntax:
475 As far as I am aware, that's not valid EBNF. Sue me. I
476 didn't know how to write "char" otherwise (Unicode issues).
480 tap ::= version? { comment | unknown } leading_plan lines
482 lines trailing_plan {comment}
484 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
486 leading_plan ::= plan skip_directive? "\n"
488 trailing_plan ::= plan "\n"
490 plan ::= '1..' nonNegativeInteger
492 lines ::= line {line}
494 line ::= (comment | test | unknown | bailout ) "\n"
496 test ::= status positiveInteger? description? directive?
498 status ::= 'not '? 'ok '
500 description ::= (character - (digit | '#')) {character - '#'}
502 directive ::= todo_directive | skip_directive
504 todo_directive ::= hash_mark 'TODO' ' ' {character}
506 skip_directive ::= hash_mark 'SKIP' ' ' {character}
508 comment ::= hash_mark {character}
510 hash_mark ::= '#' {' '}
512 bailout ::= 'Bail out!' {character}
514 unknown ::= { (character - "\n") }
516 (* POSIX character classes and other terminals *)
519 character ::= ([:print:] - "\n")
520 positiveInteger ::= ( digit - '0' ) {digit}
521 nonNegativeInteger ::= digit {digit}