1 package TAP::Parser::Grammar;
4 use vars qw($VERSION @ISA);
7 use TAP::Parser::ResultFactory ();
8 use TAP::Parser::YAMLish::Reader ();
10 @ISA = qw(TAP::Object);
14 TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
26 use TAP::Parser::Grammar;
27 my $grammar = $self->make_grammar({
28 stream => $tap_parser_stream,
29 parser => $tap_parser,
33 my $result = $grammar->tokenize;
37 C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
38 L<TAP::Parser::Result> subclasses to represent the tokens.
40 Do not attempt to use this class directly. It won't make sense. It's mainly
41 here to ensure that we will be able to have pluggable grammars when TAP is
42 expanded at some future date (plus, this stuff was really cluttering the
51 my $grammar = TAP::Parser::Grammar->new({
57 Returns L<TAP::Parser> grammar object that will parse the specified stream.
58 Both C<stream> and C<parser> are required arguments. If C<version> is not set
59 it defaults to C<12> (see L</set_version> for more details).
63 # new() implementation supplied by TAP::Object
65 my ( $self, $args ) = @_;
66 $self->{stream} = $args->{stream}; # TODO: accessor
67 $self->{parser} = $args->{parser}; # TODO: accessor
68 $self->set_version( $args->{version} || 12 );
76 # XXX the 'not' and 'ok' might be on separate lines in VMS ...
77 my $ok = qr/(?:not )?ok\b/;
82 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
84 my ( $self, $line ) = @_;
86 return $self->_make_version_token( $line, $version, );
90 syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
92 my ( $self, $line ) = @_;
93 my ( $tests_planned, $tail ) = ( $1, $2 );
94 my $explanation = undef;
97 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
98 my @todo = split /\s+/, _trim($1);
99 return $self->_make_plan_token(
100 $line, $tests_planned, 'TODO',
104 elsif ( 0 == $tests_planned ) {
107 # If we can't match # SKIP the directive should be undef.
108 ($explanation) = $tail =~ /^#\s*SKIP\s+(.*)/i;
110 elsif ( $tail !~ /^\s*$/ ) {
111 return $self->_make_unknown_token($line);
114 $explanation = '' unless defined $explanation;
116 return $self->_make_plan_token(
117 $line, $tests_planned, $skip,
124 # An optimization to handle the most common test lines without
127 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
129 my ( $self, $line ) = @_;
130 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
132 return $self->_make_test_token(
139 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
141 my ( $self, $line ) = @_;
142 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
143 my ( $dir, $explanation ) = ( '', '' );
144 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
145 \# \s* (SKIP|TODO) \b \s* (.*) $/ix
148 ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
150 return $self->_make_test_token(
151 $line, $ok, $num, $desc,
157 syntax => qr/^#(.*)/,
159 my ( $self, $line ) = @_;
161 return $self->_make_comment_token( $line, $comment );
165 syntax => qr/^Bail out!\s*(.*)/,
167 my ( $self, $line ) = @_;
168 my $explanation = $1;
169 return $self->_make_bailout_token(
180 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
182 my ( $self, $line ) = @_;
183 my ( $tests_planned, $explanation ) = ( $1, $2 );
185 = ( 0 == $tests_planned || defined $explanation )
188 $explanation = '' unless defined $explanation;
189 return $self->_make_plan_token(
190 $line, $tests_planned, $skip,
196 syntax => qr/^ (\s+) (---.*) $/x,
198 my ( $self, $line ) = @_;
199 my ( $pad, $marker ) = ( $1, $2 );
200 return $self->_make_yaml_token( $pad, $marker );
205 qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
207 my ( $self, $line ) = @_;
209 return $self->_make_pragma_token( $line, $pragmas );
221 shift->{stream}->handle_unicode;
227 ##############################################################################
229 =head2 Instance Methods
231 =head3 C<set_version>
233 $grammar->set_version(13);
235 Tell the grammar which TAP syntax version to support. The lowest
236 supported version is 12. Although 'TAP version' isn't valid version 12
237 syntax it is accepted so that higher version numbers may be parsed.
245 if ( my $language = $language_for{$version} ) {
246 $self->{version} = $version;
247 $self->{tokens} = $language->{tokens};
249 if ( my $setup = $language->{setup} ) {
253 $self->_order_tokens;
257 Carp::croak("Unsupported syntax version: $version");
261 # Optimization to put the most frequent tokens first.
265 my %copy = %{ $self->{tokens} };
266 my @ordered_tokens = grep {defined}
267 map { delete $copy{$_} } qw( simple_test test comment plan );
268 push @ordered_tokens, values %copy;
270 $self->{ordered_tokens} = \@ordered_tokens;
273 ##############################################################################
277 my $token = $grammar->tokenize;
279 This method will return a L<TAP::Parser::Result> object representing the
287 my $line = $self->{stream}->next;
288 unless ( defined $line ) {
289 delete $self->{parser}; # break circular ref
295 foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
296 if ( $line =~ $token_data->{syntax} ) {
297 my $handler = $token_data->{handler};
298 $token = $self->$handler($line);
303 $token = $self->_make_unknown_token($line) unless $token;
305 return $self->{parser}->make_result($token);
308 ##############################################################################
310 =head3 C<token_types>
312 my @types = $grammar->token_types;
314 Returns the different types of tokens which this grammar can parse.
320 return keys %{ $self->{tokens} };
323 ##############################################################################
327 my $syntax = $grammar->syntax_for($token_type);
329 Returns a pre-compiled regular expression which will match a chunk of TAP
330 corresponding to the token type. For example (not that you should really pay
331 attention to this, C<< $grammar->syntax_for('comment') >> will return
337 my ( $self, $type ) = @_;
338 return $self->{tokens}->{$type}->{syntax};
341 ##############################################################################
343 =head3 C<handler_for>
345 my $handler = $grammar->handler_for($token_type);
347 Returns a code reference which, when passed an appropriate line of TAP,
348 returns the lexed token corresponding to that line. As a result, the basic
349 TAP parsing loop looks similar to the following:
352 my $grammar = TAP::Grammar->new;
353 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
354 foreach my $type ( $grammar->token_types ) {
355 my $syntax = $grammar->syntax_for($type);
356 if ( $line =~ $syntax ) {
357 my $handler = $grammar->handler_for($type);
358 push @tokens => $grammar->$handler($line);
362 push @tokens => $grammar->_make_unknown_token($line);
368 my ( $self, $type ) = @_;
369 return $self->{tokens}->{$type}->{handler};
372 sub _make_version_token {
373 my ( $self, $line, $version ) = @_;
381 sub _make_plan_token {
382 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
384 if ( $directive eq 'SKIP'
385 && 0 != $tests_planned
386 && $self->{version} < 13 )
389 "Specified SKIP directive in plan but more than 0 tests ($line)\n";
395 tests_planned => $tests_planned,
396 directive => $directive,
397 explanation => _trim($explanation),
402 sub _make_test_token {
403 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
407 description => _trim($desc),
408 directive => uc( defined $dir ? $dir : '' ),
409 explanation => _trim($explanation),
416 sub _make_unknown_token {
417 my ( $self, $line ) = @_;
424 sub _make_comment_token {
425 my ( $self, $line, $comment ) = @_;
429 comment => _trim($comment)
433 sub _make_bailout_token {
434 my ( $self, $line, $explanation ) = @_;
438 bailout => _trim($explanation)
442 sub _make_yaml_token {
443 my ( $self, $pad, $marker ) = @_;
445 my $yaml = TAP::Parser::YAMLish::Reader->new;
447 my $stream = $self->{stream};
449 # Construct a reader that reads from our input stripping leading
450 # spaces from each line.
451 my $leader = length($pad);
452 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
453 my @extra = ($marker);
455 return shift @extra if @extra;
456 my $line = $stream->next;
457 return $2 if $line =~ $strip;
461 my $data = $yaml->read($reader);
463 # Reconstitute input. This is convoluted. Maybe we should just
464 # record it on the way in...
465 chomp( my $raw = $yaml->get_raw );
475 sub _make_pragma_token {
476 my ( $self, $line, $pragmas ) = @_;
480 pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
487 return '' unless defined $data;
498 B<NOTE:> This grammar is slightly out of date. There's still some discussion
499 about it and a new one will be provided when we have things better defined.
501 The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
502 stream-based protocol. In fact, it's quite legal to have an infinite stream.
503 For the same reason that we don't apply regexes to streams, we're not using a
504 formal grammar here. Instead, we parse the TAP in lines.
506 For purposes for forward compatability, any result which does not match the
507 following grammar is currently referred to as
508 L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
510 A formal grammar would look similar to the following:
513 For the time being, I'm cheating on the EBNF by allowing
514 certain terms to be defined by POSIX character classes by
515 using the following syntax:
519 As far as I am aware, that's not valid EBNF. Sue me. I
520 didn't know how to write "char" otherwise (Unicode issues).
524 tap ::= version? { comment | unknown } leading_plan lines
526 lines trailing_plan {comment}
528 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
530 leading_plan ::= plan skip_directive? "\n"
532 trailing_plan ::= plan "\n"
534 plan ::= '1..' nonNegativeInteger
536 lines ::= line {line}
538 line ::= (comment | test | unknown | bailout ) "\n"
540 test ::= status positiveInteger? description? directive?
542 status ::= 'not '? 'ok '
544 description ::= (character - (digit | '#')) {character - '#'}
546 directive ::= todo_directive | skip_directive
548 todo_directive ::= hash_mark 'TODO' ' ' {character}
550 skip_directive ::= hash_mark 'SKIP' ' ' {character}
552 comment ::= hash_mark {character}
554 hash_mark ::= '#' {' '}
556 bailout ::= 'Bail out!' {character}
558 unknown ::= { (character - "\n") }
560 (* POSIX character classes and other terminals *)
563 character ::= ([:print:] - "\n")
564 positiveInteger ::= ( digit - '0' ) {digit}
565 nonNegativeInteger ::= digit {digit}
569 Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
571 If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
572 do is read through the code. There's no easy way of summarizing it here.
578 L<TAP::Parser::Iterator>,
579 L<TAP::Parser::Result>,