bring Test::Harness up to 3.06
[p5sagit/p5-mst-13.2.git] / lib / TAP / Parser / Grammar.pm
CommitLineData
b965d173 1package TAP::Parser::Grammar;
2
3use strict;
4use vars qw($VERSION);
5
6use TAP::Parser::Result ();
7use TAP::Parser::YAMLish::Reader ();
8
9=head1 NAME
10
11TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
12
13=head1 VERSION
14
69f36734 15Version 3.06
b965d173 16
17=cut
18
69f36734 19$VERSION = '3.06';
b965d173 20
21=head1 DESCRIPTION
22
23C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
24L<TAP::Parser::Result> subclasses to represent the tokens.
25
26Do not attempt to use this class directly. It won't make sense. It's mainly
27here to ensure that we will be able to have pluggable grammars when TAP is
28expanded at some future date (plus, this stuff was really cluttering the
29parser).
30
31=cut
32
33##############################################################################
34
35=head2 Class Methods
36
37
38=head3 C<new>
39
40 my $grammar = TAP::Grammar->new($stream);
41
42Returns TAP grammar object that will parse the specified stream.
43
44=cut
45
46sub new {
47 my ( $class, $stream ) = @_;
48 my $self = bless { stream => $stream }, $class;
49 $self->set_version(12);
50 return $self;
51}
52
53my %language_for;
54
55{
56
57 # XXX the 'not' and 'ok' might be on separate lines in VMS ...
58 my $ok = qr/(?:not )?ok\b/;
59 my $num = qr/\d+/;
60
61 my %v12 = (
62 version => {
63 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
64 handler => sub {
65 my ( $self, $line ) = @_;
66 my $version = $1;
67 return $self->_make_version_token( $line, $version, );
68 },
69 },
70 plan => {
71 syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
72 handler => sub {
73 my ( $self, $line ) = @_;
74 my ( $tests_planned, $tail ) = ( $1, $2 );
75 my $explanation = undef;
76 my $skip = '';
77
78 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
79 my @todo = split /\s+/, _trim($1);
80 return $self->_make_plan_token(
81 $line, $tests_planned, 'TODO',
82 '', \@todo
83 );
84 }
85 elsif ( 0 == $tests_planned ) {
86 $skip = 'SKIP';
87 $explanation = $tail;
88
89 # Trim valid SKIP directive without being strict
90 # about its presence.
91 $explanation =~ s/^#\s*//;
92 $explanation =~ s/^skip\S*\s+//i;
93 }
94 elsif ( $tail !~ /^\s*$/ ) {
95 return $self->_make_unknown_token($line);
96 }
97
98 $explanation = '' unless defined $explanation;
99
100 return $self->_make_plan_token(
101 $line, $tests_planned, $skip,
102 $explanation, []
103 );
104
105 },
106 },
107
108 # An optimization to handle the most common test lines without
109 # directives.
110 simple_test => {
111 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
112 handler => sub {
113 my ( $self, $line ) = @_;
114 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
115
116 return $self->_make_test_token(
117 $line, $ok, $num,
118 $desc
119 );
120 },
121 },
122 test => {
123 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
124 handler => sub {
125 my ( $self, $line ) = @_;
126 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
127 my ( $dir, $explanation ) = ( '', '' );
128 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
129 \# \s* (SKIP|TODO) \b \s* (.*) $/ix
130 )
131 {
132 ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
133 }
134 return $self->_make_test_token(
135 $line, $ok, $num, $desc,
136 uc $dir, $explanation
137 );
138 },
139 },
140 comment => {
141 syntax => qr/^#(.*)/,
142 handler => sub {
143 my ( $self, $line ) = @_;
144 my $comment = $1;
145 return $self->_make_comment_token( $line, $comment );
146 },
147 },
148 bailout => {
149 syntax => qr/^Bail out!\s*(.*)/,
150 handler => sub {
151 my ( $self, $line ) = @_;
152 my $explanation = $1;
153 return $self->_make_bailout_token(
154 $line,
155 $explanation
156 );
157 },
158 },
159 );
160
161 my %v13 = (
162 %v12,
163 plan => {
164 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
165 handler => sub {
166 my ( $self, $line ) = @_;
167 my ( $tests_planned, $explanation ) = ( $1, $2 );
168 my $skip
169 = ( 0 == $tests_planned || defined $explanation )
170 ? 'SKIP'
171 : '';
172 $explanation = '' unless defined $explanation;
173 return $self->_make_plan_token(
174 $line, $tests_planned, $skip,
175 $explanation, []
176 );
177 },
178 },
179 yaml => {
180 syntax => qr/^ (\s+) (---.*) $/x,
181 handler => sub {
182 my ( $self, $line ) = @_;
183 my ( $pad, $marker ) = ( $1, $2 );
184 return $self->_make_yaml_token( $pad, $marker );
185 },
186 },
187 );
188
189 %language_for = (
190 '12' => {
191 tokens => \%v12,
192 },
193 '13' => {
194 tokens => \%v13,
195 setup => sub {
196 shift->{stream}->handle_unicode;
197 },
198 },
199 );
200}
201
202##############################################################################
203
204=head2 Instance Methods
205
206=head3 C<set_version>
207
208 $grammar->set_version(13);
209
210Tell the grammar which TAP syntax version to support. The lowest
211supported version is 12. Although 'TAP version' isn't valid version 12
212syntax it is accepted so that higher version numbers may be parsed.
213
214=cut
215
216sub set_version {
217 my $self = shift;
218 my $version = shift;
219
220 if ( my $language = $language_for{$version} ) {
221 $self->{tokens} = $language->{tokens};
222
223 if ( my $setup = $language->{setup} ) {
224 $self->$setup();
225 }
226
227 $self->_order_tokens;
228 }
229 else {
230 require Carp;
231 Carp::croak("Unsupported syntax version: $version");
232 }
233}
234
235# Optimization to put the most frequent tokens first.
236sub _order_tokens {
237 my $self = shift;
238
239 my %copy = %{ $self->{tokens} };
240 my @ordered_tokens = grep {defined}
241 map { delete $copy{$_} } qw( simple_test test comment plan );
242 push @ordered_tokens, values %copy;
243
244 $self->{ordered_tokens} = \@ordered_tokens;
245}
246
247##############################################################################
248
249=head3 C<tokenize>
250
251 my $token = $grammar->tokenize;
252
253This method will return a L<TAP::Parser::Result> object representing the
254current line of TAP.
255
256=cut
257
258sub tokenize {
259 my $self = shift;
260
261 my $line = $self->{stream}->next;
262 return unless defined $line;
263
264 my $token;
265
266 foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
267 if ( $line =~ $token_data->{syntax} ) {
268 my $handler = $token_data->{handler};
269 $token = $self->$handler($line);
270 last;
271 }
272 }
273
274 $token = $self->_make_unknown_token($line) unless $token;
275
276 return TAP::Parser::Result->new($token);
277}
278
279##############################################################################
280
281=head3 C<token_types>
282
283 my @types = $grammar->token_types;
284
285Returns the different types of tokens which this grammar can parse.
286
287=cut
288
289sub token_types {
290 my $self = shift;
291 return keys %{ $self->{tokens} };
292}
293
294##############################################################################
295
296=head3 C<syntax_for>
297
298 my $syntax = $grammar->syntax_for($token_type);
299
300Returns a pre-compiled regular expression which will match a chunk of TAP
301corresponding to the token type. For example (not that you should really pay
302attention to this, C<< $grammar->syntax_for('comment') >> will return
303C<< qr/^#(.*)/ >>.
304
305=cut
306
307sub syntax_for {
308 my ( $self, $type ) = @_;
309 return $self->{tokens}->{$type}->{syntax};
310}
311
312##############################################################################
313
314=head3 C<handler_for>
315
316 my $handler = $grammar->handler_for($token_type);
317
318Returns a code reference which, when passed an appropriate line of TAP,
319returns the lexed token corresponding to that line. As a result, the basic
320TAP parsing loop looks similar to the following:
321
322 my @tokens;
323 my $grammar = TAP::Grammar->new;
324 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
325 foreach my $type ( $grammar->token_types ) {
326 my $syntax = $grammar->syntax_for($type);
327 if ( $line =~ $syntax ) {
328 my $handler = $grammar->handler_for($type);
329 push @tokens => $grammar->$handler($line);
330 next LINE;
331 }
332 }
333 push @tokens => $grammar->_make_unknown_token($line);
334 }
335
336=cut
337
338sub handler_for {
339 my ( $self, $type ) = @_;
340 return $self->{tokens}->{$type}->{handler};
341}
342
343sub _make_version_token {
344 my ( $self, $line, $version ) = @_;
345 return {
346 type => 'version',
347 raw => $line,
348 version => $version,
349 };
350}
351
352sub _make_plan_token {
353 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
354
355 if ( $directive eq 'SKIP' && 0 != $tests_planned ) {
356 warn
357 "Specified SKIP directive in plan but more than 0 tests ($line)\n";
358 }
359 return {
360 type => 'plan',
361 raw => $line,
362 tests_planned => $tests_planned,
363 directive => $directive,
364 explanation => _trim($explanation),
365 todo_list => $todo,
366 };
367}
368
369sub _make_test_token {
370 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
371 my %test = (
372 ok => $ok,
373 test_num => $num,
374 description => _trim($desc),
375 directive => uc($dir),
376 explanation => _trim($explanation),
377 raw => $line,
378 type => 'test',
379 );
380 return \%test;
381}
382
383sub _make_unknown_token {
384 my ( $self, $line ) = @_;
385 return {
386 raw => $line,
387 type => 'unknown',
388 };
389}
390
391sub _make_comment_token {
392 my ( $self, $line, $comment ) = @_;
393 return {
394 type => 'comment',
395 raw => $line,
396 comment => _trim($comment)
397 };
398}
399
400sub _make_bailout_token {
401 my ( $self, $line, $explanation ) = @_;
402 return {
403 type => 'bailout',
404 raw => $line,
405 bailout => _trim($explanation)
406 };
407}
408
409sub _make_yaml_token {
410 my ( $self, $pad, $marker ) = @_;
411
412 my $yaml = TAP::Parser::YAMLish::Reader->new;
413
414 my $stream = $self->{stream};
415
416 # Construct a reader that reads from our input stripping leading
417 # spaces from each line.
418 my $leader = length($pad);
419 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
420 my @extra = ($marker);
421 my $reader = sub {
422 return shift @extra if @extra;
423 my $line = $stream->next;
424 return $2 if $line =~ $strip;
425 return;
426 };
427
428 my $data = $yaml->read($reader);
429
430 # Reconstitute input. This is convoluted. Maybe we should just
431 # record it on the way in...
432 chomp( my $raw = $yaml->get_raw );
433 $raw =~ s/^/$pad/mg;
434
435 return {
436 type => 'yaml',
437 raw => $raw,
438 data => $data
439 };
440}
441
442sub _trim {
443 my $data = shift;
444
445 return '' unless defined $data;
446
447 $data =~ s/^\s+//;
448 $data =~ s/\s+$//;
449 return $data;
450}
451
452=head1 TAP GRAMMAR
453
454B<NOTE:> This grammar is slightly out of date. There's still some discussion
455about it and a new one will be provided when we have things better defined.
456
457The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
458stream-based protocol. In fact, it's quite legal to have an infinite stream.
459For the same reason that we don't apply regexes to streams, we're not using a
460formal grammar here. Instead, we parse the TAP in lines.
461
462For purposes for forward compatability, any result which does not match the
463following grammar is currently referred to as
464L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
465
466A formal grammar would look similar to the following:
467
468 (*
469 For the time being, I'm cheating on the EBNF by allowing
470 certain terms to be defined by POSIX character classes by
471 using the following syntax:
472
473 digit ::= [:digit:]
474
475 As far as I am aware, that's not valid EBNF. Sue me. I
476 didn't know how to write "char" otherwise (Unicode issues).
477 Suggestions welcome.
478 *)
479
480 tap ::= version? { comment | unknown } leading_plan lines
481 |
482 lines trailing_plan {comment}
483
484 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
485
486 leading_plan ::= plan skip_directive? "\n"
487
488 trailing_plan ::= plan "\n"
489
490 plan ::= '1..' nonNegativeInteger
491
492 lines ::= line {line}
493
494 line ::= (comment | test | unknown | bailout ) "\n"
495
496 test ::= status positiveInteger? description? directive?
497
498 status ::= 'not '? 'ok '
499
500 description ::= (character - (digit | '#')) {character - '#'}
501
502 directive ::= todo_directive | skip_directive
503
504 todo_directive ::= hash_mark 'TODO' ' ' {character}
505
506 skip_directive ::= hash_mark 'SKIP' ' ' {character}
507
508 comment ::= hash_mark {character}
509
510 hash_mark ::= '#' {' '}
511
512 bailout ::= 'Bail out!' {character}
513
514 unknown ::= { (character - "\n") }
515
516 (* POSIX character classes and other terminals *)
517
518 digit ::= [:digit:]
519 character ::= ([:print:] - "\n")
520 positiveInteger ::= ( digit - '0' ) {digit}
521 nonNegativeInteger ::= digit {digit}
522
523
524=cut
525
5261;