Upgrade to Test::Harness 3.13
[p5sagit/p5-mst-13.2.git] / lib / TAP / Parser / Grammar.pm
CommitLineData
b965d173 1package TAP::Parser::Grammar;
2
3use strict;
f7c69158 4use vars qw($VERSION @ISA);
b965d173 5
f7c69158 6use TAP::Object ();
7use TAP::Parser::ResultFactory ();
b965d173 8use TAP::Parser::YAMLish::Reader ();
9
f7c69158 10@ISA = qw(TAP::Object);
11
b965d173 12=head1 NAME
13
14TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
15
16=head1 VERSION
17
f7c69158 18Version 3.13
b965d173 19
20=cut
21
f7c69158 22$VERSION = '3.13';
23
24=head1 SYNOPSIS
25
26 use TAP::Parser::Grammar;
27 my $grammar = $self->make_grammar({
28 stream => $tap_parser_stream,
29 parser => $tap_parser,
30 version => 12,
31 });
32
33 my $result = $grammar->tokenize;
b965d173 34
35=head1 DESCRIPTION
36
37C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
38L<TAP::Parser::Result> subclasses to represent the tokens.
39
40Do not attempt to use this class directly. It won't make sense. It's mainly
41here to ensure that we will be able to have pluggable grammars when TAP is
42expanded at some future date (plus, this stuff was really cluttering the
43parser).
44
f7c69158 45=head1 METHODS
b965d173 46
47=head2 Class Methods
48
b965d173 49=head3 C<new>
50
f7c69158 51 my $grammar = TAP::Parser::Grammar->new({
52 stream => $stream,
53 parser => $parser,
54 version => $version,
55 });
b965d173 56
f7c69158 57Returns L<TAP::Parser> grammar object that will parse the specified stream.
58Both C<stream> and C<parser> are required arguments. If C<version> is not set
59it defaults to C<12> (see L</set_version> for more details).
b965d173 60
61=cut
62
f7c69158 63# new() implementation supplied by TAP::Object
64sub _initialize {
65 my ( $self, $args ) = @_;
66 $self->{stream} = $args->{stream}; # TODO: accessor
67 $self->{parser} = $args->{parser}; # TODO: accessor
68 $self->set_version( $args->{version} || 12 );
b965d173 69 return $self;
70}
71
72my %language_for;
73
74{
75
76 # XXX the 'not' and 'ok' might be on separate lines in VMS ...
77 my $ok = qr/(?:not )?ok\b/;
78 my $num = qr/\d+/;
79
80 my %v12 = (
81 version => {
82 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
83 handler => sub {
84 my ( $self, $line ) = @_;
85 my $version = $1;
86 return $self->_make_version_token( $line, $version, );
87 },
88 },
89 plan => {
90 syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
91 handler => sub {
92 my ( $self, $line ) = @_;
93 my ( $tests_planned, $tail ) = ( $1, $2 );
94 my $explanation = undef;
95 my $skip = '';
96
97 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
98 my @todo = split /\s+/, _trim($1);
99 return $self->_make_plan_token(
100 $line, $tests_planned, 'TODO',
101 '', \@todo
102 );
103 }
104 elsif ( 0 == $tests_planned ) {
f7c69158 105 $skip = 'SKIP';
b965d173 106
f7c69158 107 # If we can't match # SKIP the directive should be undef.
108 ($explanation) = $tail =~ /^#\s*SKIP\s+(.*)/i;
b965d173 109 }
110 elsif ( $tail !~ /^\s*$/ ) {
111 return $self->_make_unknown_token($line);
112 }
113
114 $explanation = '' unless defined $explanation;
115
116 return $self->_make_plan_token(
117 $line, $tests_planned, $skip,
118 $explanation, []
119 );
120
121 },
122 },
123
124 # An optimization to handle the most common test lines without
125 # directives.
126 simple_test => {
127 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
128 handler => sub {
129 my ( $self, $line ) = @_;
130 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
131
132 return $self->_make_test_token(
133 $line, $ok, $num,
134 $desc
135 );
136 },
137 },
138 test => {
139 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
140 handler => sub {
141 my ( $self, $line ) = @_;
142 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
143 my ( $dir, $explanation ) = ( '', '' );
144 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
145 \# \s* (SKIP|TODO) \b \s* (.*) $/ix
146 )
147 {
148 ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
149 }
150 return $self->_make_test_token(
151 $line, $ok, $num, $desc,
a62d3a1b 152 $dir, $explanation
b965d173 153 );
154 },
155 },
156 comment => {
157 syntax => qr/^#(.*)/,
158 handler => sub {
159 my ( $self, $line ) = @_;
160 my $comment = $1;
161 return $self->_make_comment_token( $line, $comment );
162 },
163 },
164 bailout => {
165 syntax => qr/^Bail out!\s*(.*)/,
166 handler => sub {
167 my ( $self, $line ) = @_;
168 my $explanation = $1;
169 return $self->_make_bailout_token(
170 $line,
171 $explanation
172 );
173 },
174 },
175 );
176
177 my %v13 = (
178 %v12,
179 plan => {
180 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
181 handler => sub {
182 my ( $self, $line ) = @_;
183 my ( $tests_planned, $explanation ) = ( $1, $2 );
184 my $skip
185 = ( 0 == $tests_planned || defined $explanation )
186 ? 'SKIP'
187 : '';
188 $explanation = '' unless defined $explanation;
189 return $self->_make_plan_token(
190 $line, $tests_planned, $skip,
191 $explanation, []
192 );
193 },
194 },
195 yaml => {
196 syntax => qr/^ (\s+) (---.*) $/x,
197 handler => sub {
198 my ( $self, $line ) = @_;
199 my ( $pad, $marker ) = ( $1, $2 );
200 return $self->_make_yaml_token( $pad, $marker );
201 },
202 },
2a7f4b9b 203 pragma => {
204 syntax =>
205 qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
206 handler => sub {
207 my ( $self, $line ) = @_;
208 my $pragmas = $1;
209 return $self->_make_pragma_token( $line, $pragmas );
210 },
211 },
b965d173 212 );
213
214 %language_for = (
215 '12' => {
216 tokens => \%v12,
217 },
218 '13' => {
219 tokens => \%v13,
220 setup => sub {
221 shift->{stream}->handle_unicode;
222 },
223 },
224 );
225}
226
227##############################################################################
228
229=head2 Instance Methods
230
231=head3 C<set_version>
232
233 $grammar->set_version(13);
234
235Tell the grammar which TAP syntax version to support. The lowest
236supported version is 12. Although 'TAP version' isn't valid version 12
237syntax it is accepted so that higher version numbers may be parsed.
238
239=cut
240
241sub set_version {
242 my $self = shift;
243 my $version = shift;
244
245 if ( my $language = $language_for{$version} ) {
f7c69158 246 $self->{version} = $version;
247 $self->{tokens} = $language->{tokens};
b965d173 248
249 if ( my $setup = $language->{setup} ) {
250 $self->$setup();
251 }
252
253 $self->_order_tokens;
254 }
255 else {
256 require Carp;
257 Carp::croak("Unsupported syntax version: $version");
258 }
259}
260
261# Optimization to put the most frequent tokens first.
262sub _order_tokens {
263 my $self = shift;
264
265 my %copy = %{ $self->{tokens} };
266 my @ordered_tokens = grep {defined}
267 map { delete $copy{$_} } qw( simple_test test comment plan );
268 push @ordered_tokens, values %copy;
269
270 $self->{ordered_tokens} = \@ordered_tokens;
271}
272
273##############################################################################
274
275=head3 C<tokenize>
276
277 my $token = $grammar->tokenize;
278
279This method will return a L<TAP::Parser::Result> object representing the
280current line of TAP.
281
282=cut
283
284sub tokenize {
285 my $self = shift;
286
287 my $line = $self->{stream}->next;
f7c69158 288 unless ( defined $line ) {
289 delete $self->{parser}; # break circular ref
290 return;
291 }
b965d173 292
293 my $token;
294
295 foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
296 if ( $line =~ $token_data->{syntax} ) {
297 my $handler = $token_data->{handler};
298 $token = $self->$handler($line);
299 last;
300 }
301 }
302
303 $token = $self->_make_unknown_token($line) unless $token;
304
f7c69158 305 return $self->{parser}->make_result($token);
b965d173 306}
307
308##############################################################################
309
310=head3 C<token_types>
311
312 my @types = $grammar->token_types;
313
314Returns the different types of tokens which this grammar can parse.
315
316=cut
317
318sub token_types {
319 my $self = shift;
320 return keys %{ $self->{tokens} };
321}
322
323##############################################################################
324
325=head3 C<syntax_for>
326
327 my $syntax = $grammar->syntax_for($token_type);
328
329Returns a pre-compiled regular expression which will match a chunk of TAP
330corresponding to the token type. For example (not that you should really pay
331attention to this, C<< $grammar->syntax_for('comment') >> will return
332C<< qr/^#(.*)/ >>.
333
334=cut
335
336sub syntax_for {
337 my ( $self, $type ) = @_;
338 return $self->{tokens}->{$type}->{syntax};
339}
340
341##############################################################################
342
343=head3 C<handler_for>
344
345 my $handler = $grammar->handler_for($token_type);
346
347Returns a code reference which, when passed an appropriate line of TAP,
348returns the lexed token corresponding to that line. As a result, the basic
349TAP parsing loop looks similar to the following:
350
351 my @tokens;
352 my $grammar = TAP::Grammar->new;
353 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
354 foreach my $type ( $grammar->token_types ) {
355 my $syntax = $grammar->syntax_for($type);
356 if ( $line =~ $syntax ) {
357 my $handler = $grammar->handler_for($type);
358 push @tokens => $grammar->$handler($line);
359 next LINE;
360 }
361 }
362 push @tokens => $grammar->_make_unknown_token($line);
363 }
364
365=cut
366
367sub handler_for {
368 my ( $self, $type ) = @_;
369 return $self->{tokens}->{$type}->{handler};
370}
371
372sub _make_version_token {
373 my ( $self, $line, $version ) = @_;
374 return {
375 type => 'version',
376 raw => $line,
377 version => $version,
378 };
379}
380
381sub _make_plan_token {
382 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
383
f7c69158 384 if ( $directive eq 'SKIP'
385 && 0 != $tests_planned
386 && $self->{version} < 13 )
387 {
b965d173 388 warn
389 "Specified SKIP directive in plan but more than 0 tests ($line)\n";
390 }
f7c69158 391
b965d173 392 return {
393 type => 'plan',
394 raw => $line,
395 tests_planned => $tests_planned,
396 directive => $directive,
397 explanation => _trim($explanation),
398 todo_list => $todo,
399 };
400}
401
402sub _make_test_token {
403 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
404 my %test = (
405 ok => $ok,
406 test_num => $num,
407 description => _trim($desc),
2a7f4b9b 408 directive => uc( defined $dir ? $dir : '' ),
b965d173 409 explanation => _trim($explanation),
410 raw => $line,
411 type => 'test',
412 );
413 return \%test;
414}
415
416sub _make_unknown_token {
417 my ( $self, $line ) = @_;
418 return {
419 raw => $line,
420 type => 'unknown',
421 };
422}
423
424sub _make_comment_token {
425 my ( $self, $line, $comment ) = @_;
426 return {
427 type => 'comment',
428 raw => $line,
429 comment => _trim($comment)
430 };
431}
432
433sub _make_bailout_token {
434 my ( $self, $line, $explanation ) = @_;
435 return {
436 type => 'bailout',
437 raw => $line,
438 bailout => _trim($explanation)
439 };
440}
441
442sub _make_yaml_token {
443 my ( $self, $pad, $marker ) = @_;
444
445 my $yaml = TAP::Parser::YAMLish::Reader->new;
446
447 my $stream = $self->{stream};
448
449 # Construct a reader that reads from our input stripping leading
450 # spaces from each line.
451 my $leader = length($pad);
452 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
453 my @extra = ($marker);
454 my $reader = sub {
455 return shift @extra if @extra;
456 my $line = $stream->next;
457 return $2 if $line =~ $strip;
458 return;
459 };
460
461 my $data = $yaml->read($reader);
462
463 # Reconstitute input. This is convoluted. Maybe we should just
464 # record it on the way in...
465 chomp( my $raw = $yaml->get_raw );
466 $raw =~ s/^/$pad/mg;
467
468 return {
469 type => 'yaml',
470 raw => $raw,
471 data => $data
472 };
473}
474
2a7f4b9b 475sub _make_pragma_token {
476 my ( $self, $line, $pragmas ) = @_;
477 return {
478 type => 'pragma',
479 raw => $line,
480 pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
481 };
482}
483
b965d173 484sub _trim {
485 my $data = shift;
486
487 return '' unless defined $data;
488
489 $data =~ s/^\s+//;
490 $data =~ s/\s+$//;
491 return $data;
492}
493
f7c69158 4941;
495
b965d173 496=head1 TAP GRAMMAR
497
498B<NOTE:> This grammar is slightly out of date. There's still some discussion
499about it and a new one will be provided when we have things better defined.
500
501The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
502stream-based protocol. In fact, it's quite legal to have an infinite stream.
503For the same reason that we don't apply regexes to streams, we're not using a
504formal grammar here. Instead, we parse the TAP in lines.
505
506For purposes for forward compatability, any result which does not match the
507following grammar is currently referred to as
508L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
509
510A formal grammar would look similar to the following:
511
512 (*
513 For the time being, I'm cheating on the EBNF by allowing
514 certain terms to be defined by POSIX character classes by
515 using the following syntax:
516
517 digit ::= [:digit:]
518
519 As far as I am aware, that's not valid EBNF. Sue me. I
520 didn't know how to write "char" otherwise (Unicode issues).
521 Suggestions welcome.
522 *)
523
524 tap ::= version? { comment | unknown } leading_plan lines
525 |
526 lines trailing_plan {comment}
527
528 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
529
530 leading_plan ::= plan skip_directive? "\n"
531
532 trailing_plan ::= plan "\n"
533
534 plan ::= '1..' nonNegativeInteger
535
536 lines ::= line {line}
537
538 line ::= (comment | test | unknown | bailout ) "\n"
539
540 test ::= status positiveInteger? description? directive?
541
542 status ::= 'not '? 'ok '
543
544 description ::= (character - (digit | '#')) {character - '#'}
545
546 directive ::= todo_directive | skip_directive
547
548 todo_directive ::= hash_mark 'TODO' ' ' {character}
549
550 skip_directive ::= hash_mark 'SKIP' ' ' {character}
551
552 comment ::= hash_mark {character}
553
554 hash_mark ::= '#' {' '}
555
556 bailout ::= 'Bail out!' {character}
557
558 unknown ::= { (character - "\n") }
559
560 (* POSIX character classes and other terminals *)
561
562 digit ::= [:digit:]
563 character ::= ([:print:] - "\n")
564 positiveInteger ::= ( digit - '0' ) {digit}
565 nonNegativeInteger ::= digit {digit}
566
f7c69158 567=head1 SUBCLASSING
b965d173 568
f7c69158 569Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
b965d173 570
f7c69158 571If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
572do is read through the code. There's no easy way of summarizing it here.
573
574=head1 SEE ALSO
575
576L<TAP::Object>,
577L<TAP::Parser>,
578L<TAP::Parser::Iterator>,
579L<TAP::Parser::Result>,
580
581=cut