Add d_timegm to uconfig.sh to fix the generated uconfig.h.
[p5sagit/p5-mst-13.2.git] / lib / TAP / Parser / Grammar.pm
CommitLineData
b965d173 1package TAP::Parser::Grammar;
2
3use strict;
4use vars qw($VERSION);
5
6use TAP::Parser::Result ();
7use TAP::Parser::YAMLish::Reader ();
8
9=head1 NAME
10
11TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
12
13=head1 VERSION
14
2a7f4b9b 15Version 3.10
b965d173 16
17=cut
18
2a7f4b9b 19$VERSION = '3.10';
b965d173 20
21=head1 DESCRIPTION
22
23C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
24L<TAP::Parser::Result> subclasses to represent the tokens.
25
26Do not attempt to use this class directly. It won't make sense. It's mainly
27here to ensure that we will be able to have pluggable grammars when TAP is
28expanded at some future date (plus, this stuff was really cluttering the
29parser).
30
31=cut
32
33##############################################################################
34
35=head2 Class Methods
36
37
38=head3 C<new>
39
40 my $grammar = TAP::Grammar->new($stream);
41
42Returns TAP grammar object that will parse the specified stream.
43
44=cut
45
46sub new {
47 my ( $class, $stream ) = @_;
48 my $self = bless { stream => $stream }, $class;
49 $self->set_version(12);
50 return $self;
51}
52
53my %language_for;
54
55{
56
57 # XXX the 'not' and 'ok' might be on separate lines in VMS ...
58 my $ok = qr/(?:not )?ok\b/;
59 my $num = qr/\d+/;
60
61 my %v12 = (
62 version => {
63 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
64 handler => sub {
65 my ( $self, $line ) = @_;
66 my $version = $1;
67 return $self->_make_version_token( $line, $version, );
68 },
69 },
70 plan => {
71 syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
72 handler => sub {
73 my ( $self, $line ) = @_;
74 my ( $tests_planned, $tail ) = ( $1, $2 );
75 my $explanation = undef;
76 my $skip = '';
77
78 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
79 my @todo = split /\s+/, _trim($1);
80 return $self->_make_plan_token(
81 $line, $tests_planned, 'TODO',
82 '', \@todo
83 );
84 }
85 elsif ( 0 == $tests_planned ) {
86 $skip = 'SKIP';
87 $explanation = $tail;
88
89 # Trim valid SKIP directive without being strict
90 # about its presence.
91 $explanation =~ s/^#\s*//;
92 $explanation =~ s/^skip\S*\s+//i;
93 }
94 elsif ( $tail !~ /^\s*$/ ) {
95 return $self->_make_unknown_token($line);
96 }
97
98 $explanation = '' unless defined $explanation;
99
100 return $self->_make_plan_token(
101 $line, $tests_planned, $skip,
102 $explanation, []
103 );
104
105 },
106 },
107
108 # An optimization to handle the most common test lines without
109 # directives.
110 simple_test => {
111 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
112 handler => sub {
113 my ( $self, $line ) = @_;
114 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
115
116 return $self->_make_test_token(
117 $line, $ok, $num,
118 $desc
119 );
120 },
121 },
122 test => {
123 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
124 handler => sub {
125 my ( $self, $line ) = @_;
126 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
127 my ( $dir, $explanation ) = ( '', '' );
128 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
129 \# \s* (SKIP|TODO) \b \s* (.*) $/ix
130 )
131 {
132 ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
133 }
134 return $self->_make_test_token(
135 $line, $ok, $num, $desc,
a62d3a1b 136 $dir, $explanation
b965d173 137 );
138 },
139 },
140 comment => {
141 syntax => qr/^#(.*)/,
142 handler => sub {
143 my ( $self, $line ) = @_;
144 my $comment = $1;
145 return $self->_make_comment_token( $line, $comment );
146 },
147 },
148 bailout => {
149 syntax => qr/^Bail out!\s*(.*)/,
150 handler => sub {
151 my ( $self, $line ) = @_;
152 my $explanation = $1;
153 return $self->_make_bailout_token(
154 $line,
155 $explanation
156 );
157 },
158 },
159 );
160
161 my %v13 = (
162 %v12,
163 plan => {
164 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
165 handler => sub {
166 my ( $self, $line ) = @_;
167 my ( $tests_planned, $explanation ) = ( $1, $2 );
168 my $skip
169 = ( 0 == $tests_planned || defined $explanation )
170 ? 'SKIP'
171 : '';
172 $explanation = '' unless defined $explanation;
173 return $self->_make_plan_token(
174 $line, $tests_planned, $skip,
175 $explanation, []
176 );
177 },
178 },
179 yaml => {
180 syntax => qr/^ (\s+) (---.*) $/x,
181 handler => sub {
182 my ( $self, $line ) = @_;
183 my ( $pad, $marker ) = ( $1, $2 );
184 return $self->_make_yaml_token( $pad, $marker );
185 },
186 },
2a7f4b9b 187 pragma => {
188 syntax =>
189 qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
190 handler => sub {
191 my ( $self, $line ) = @_;
192 my $pragmas = $1;
193 return $self->_make_pragma_token( $line, $pragmas );
194 },
195 },
b965d173 196 );
197
198 %language_for = (
199 '12' => {
200 tokens => \%v12,
201 },
202 '13' => {
203 tokens => \%v13,
204 setup => sub {
205 shift->{stream}->handle_unicode;
206 },
207 },
208 );
209}
210
211##############################################################################
212
213=head2 Instance Methods
214
215=head3 C<set_version>
216
217 $grammar->set_version(13);
218
219Tell the grammar which TAP syntax version to support. The lowest
220supported version is 12. Although 'TAP version' isn't valid version 12
221syntax it is accepted so that higher version numbers may be parsed.
222
223=cut
224
225sub set_version {
226 my $self = shift;
227 my $version = shift;
228
229 if ( my $language = $language_for{$version} ) {
230 $self->{tokens} = $language->{tokens};
231
232 if ( my $setup = $language->{setup} ) {
233 $self->$setup();
234 }
235
236 $self->_order_tokens;
237 }
238 else {
239 require Carp;
240 Carp::croak("Unsupported syntax version: $version");
241 }
242}
243
244# Optimization to put the most frequent tokens first.
245sub _order_tokens {
246 my $self = shift;
247
248 my %copy = %{ $self->{tokens} };
249 my @ordered_tokens = grep {defined}
250 map { delete $copy{$_} } qw( simple_test test comment plan );
251 push @ordered_tokens, values %copy;
252
253 $self->{ordered_tokens} = \@ordered_tokens;
254}
255
256##############################################################################
257
258=head3 C<tokenize>
259
260 my $token = $grammar->tokenize;
261
262This method will return a L<TAP::Parser::Result> object representing the
263current line of TAP.
264
265=cut
266
267sub tokenize {
268 my $self = shift;
269
270 my $line = $self->{stream}->next;
271 return unless defined $line;
272
273 my $token;
274
275 foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
276 if ( $line =~ $token_data->{syntax} ) {
277 my $handler = $token_data->{handler};
278 $token = $self->$handler($line);
279 last;
280 }
281 }
282
283 $token = $self->_make_unknown_token($line) unless $token;
284
285 return TAP::Parser::Result->new($token);
286}
287
288##############################################################################
289
290=head3 C<token_types>
291
292 my @types = $grammar->token_types;
293
294Returns the different types of tokens which this grammar can parse.
295
296=cut
297
298sub token_types {
299 my $self = shift;
300 return keys %{ $self->{tokens} };
301}
302
303##############################################################################
304
305=head3 C<syntax_for>
306
307 my $syntax = $grammar->syntax_for($token_type);
308
309Returns a pre-compiled regular expression which will match a chunk of TAP
310corresponding to the token type. For example (not that you should really pay
311attention to this, C<< $grammar->syntax_for('comment') >> will return
312C<< qr/^#(.*)/ >>.
313
314=cut
315
316sub syntax_for {
317 my ( $self, $type ) = @_;
318 return $self->{tokens}->{$type}->{syntax};
319}
320
321##############################################################################
322
323=head3 C<handler_for>
324
325 my $handler = $grammar->handler_for($token_type);
326
327Returns a code reference which, when passed an appropriate line of TAP,
328returns the lexed token corresponding to that line. As a result, the basic
329TAP parsing loop looks similar to the following:
330
331 my @tokens;
332 my $grammar = TAP::Grammar->new;
333 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
334 foreach my $type ( $grammar->token_types ) {
335 my $syntax = $grammar->syntax_for($type);
336 if ( $line =~ $syntax ) {
337 my $handler = $grammar->handler_for($type);
338 push @tokens => $grammar->$handler($line);
339 next LINE;
340 }
341 }
342 push @tokens => $grammar->_make_unknown_token($line);
343 }
344
345=cut
346
347sub handler_for {
348 my ( $self, $type ) = @_;
349 return $self->{tokens}->{$type}->{handler};
350}
351
352sub _make_version_token {
353 my ( $self, $line, $version ) = @_;
354 return {
355 type => 'version',
356 raw => $line,
357 version => $version,
358 };
359}
360
361sub _make_plan_token {
362 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
363
364 if ( $directive eq 'SKIP' && 0 != $tests_planned ) {
365 warn
366 "Specified SKIP directive in plan but more than 0 tests ($line)\n";
367 }
368 return {
369 type => 'plan',
370 raw => $line,
371 tests_planned => $tests_planned,
372 directive => $directive,
373 explanation => _trim($explanation),
374 todo_list => $todo,
375 };
376}
377
378sub _make_test_token {
379 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
380 my %test = (
381 ok => $ok,
382 test_num => $num,
383 description => _trim($desc),
2a7f4b9b 384 directive => uc( defined $dir ? $dir : '' ),
b965d173 385 explanation => _trim($explanation),
386 raw => $line,
387 type => 'test',
388 );
389 return \%test;
390}
391
392sub _make_unknown_token {
393 my ( $self, $line ) = @_;
394 return {
395 raw => $line,
396 type => 'unknown',
397 };
398}
399
400sub _make_comment_token {
401 my ( $self, $line, $comment ) = @_;
402 return {
403 type => 'comment',
404 raw => $line,
405 comment => _trim($comment)
406 };
407}
408
409sub _make_bailout_token {
410 my ( $self, $line, $explanation ) = @_;
411 return {
412 type => 'bailout',
413 raw => $line,
414 bailout => _trim($explanation)
415 };
416}
417
418sub _make_yaml_token {
419 my ( $self, $pad, $marker ) = @_;
420
421 my $yaml = TAP::Parser::YAMLish::Reader->new;
422
423 my $stream = $self->{stream};
424
425 # Construct a reader that reads from our input stripping leading
426 # spaces from each line.
427 my $leader = length($pad);
428 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
429 my @extra = ($marker);
430 my $reader = sub {
431 return shift @extra if @extra;
432 my $line = $stream->next;
433 return $2 if $line =~ $strip;
434 return;
435 };
436
437 my $data = $yaml->read($reader);
438
439 # Reconstitute input. This is convoluted. Maybe we should just
440 # record it on the way in...
441 chomp( my $raw = $yaml->get_raw );
442 $raw =~ s/^/$pad/mg;
443
444 return {
445 type => 'yaml',
446 raw => $raw,
447 data => $data
448 };
449}
450
2a7f4b9b 451sub _make_pragma_token {
452 my ( $self, $line, $pragmas ) = @_;
453 return {
454 type => 'pragma',
455 raw => $line,
456 pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
457 };
458}
459
b965d173 460sub _trim {
461 my $data = shift;
462
463 return '' unless defined $data;
464
465 $data =~ s/^\s+//;
466 $data =~ s/\s+$//;
467 return $data;
468}
469
470=head1 TAP GRAMMAR
471
472B<NOTE:> This grammar is slightly out of date. There's still some discussion
473about it and a new one will be provided when we have things better defined.
474
475The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
476stream-based protocol. In fact, it's quite legal to have an infinite stream.
477For the same reason that we don't apply regexes to streams, we're not using a
478formal grammar here. Instead, we parse the TAP in lines.
479
480For purposes for forward compatability, any result which does not match the
481following grammar is currently referred to as
482L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
483
484A formal grammar would look similar to the following:
485
486 (*
487 For the time being, I'm cheating on the EBNF by allowing
488 certain terms to be defined by POSIX character classes by
489 using the following syntax:
490
491 digit ::= [:digit:]
492
493 As far as I am aware, that's not valid EBNF. Sue me. I
494 didn't know how to write "char" otherwise (Unicode issues).
495 Suggestions welcome.
496 *)
497
498 tap ::= version? { comment | unknown } leading_plan lines
499 |
500 lines trailing_plan {comment}
501
502 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
503
504 leading_plan ::= plan skip_directive? "\n"
505
506 trailing_plan ::= plan "\n"
507
508 plan ::= '1..' nonNegativeInteger
509
510 lines ::= line {line}
511
512 line ::= (comment | test | unknown | bailout ) "\n"
513
514 test ::= status positiveInteger? description? directive?
515
516 status ::= 'not '? 'ok '
517
518 description ::= (character - (digit | '#')) {character - '#'}
519
520 directive ::= todo_directive | skip_directive
521
522 todo_directive ::= hash_mark 'TODO' ' ' {character}
523
524 skip_directive ::= hash_mark 'SKIP' ' ' {character}
525
526 comment ::= hash_mark {character}
527
528 hash_mark ::= '#' {' '}
529
530 bailout ::= 'Bail out!' {character}
531
532 unknown ::= { (character - "\n") }
533
534 (* POSIX character classes and other terminals *)
535
536 digit ::= [:digit:]
537 character ::= ([:print:] - "\n")
538 positiveInteger ::= ( digit - '0' ) {digit}
539 nonNegativeInteger ::= digit {digit}
540
541
542=cut
543
5441;