Commit | Line | Data |
b965d173 |
1 | package TAP::Parser::Grammar; |
2 | |
3 | use strict; |
4 | use vars qw($VERSION); |
5 | |
6 | use TAP::Parser::Result (); |
7 | use TAP::Parser::YAMLish::Reader (); |
8 | |
9 | =head1 NAME |
10 | |
11 | TAP::Parser::Grammar - A grammar for the Test Anything Protocol. |
12 | |
13 | =head1 VERSION |
14 | |
69f36734 |
15 | Version 3.06 |
b965d173 |
16 | |
17 | =cut |
18 | |
69f36734 |
19 | $VERSION = '3.06'; |
b965d173 |
20 | |
21 | =head1 DESCRIPTION |
22 | |
23 | C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs |
24 | L<TAP::Parser::Result> subclasses to represent the tokens. |
25 | |
26 | Do not attempt to use this class directly. It won't make sense. It's mainly |
27 | here to ensure that we will be able to have pluggable grammars when TAP is |
28 | expanded at some future date (plus, this stuff was really cluttering the |
29 | parser). |
30 | |
31 | =cut |
32 | |
33 | ############################################################################## |
34 | |
35 | =head2 Class Methods |
36 | |
37 | |
38 | =head3 C<new> |
39 | |
40 | my $grammar = TAP::Grammar->new($stream); |
41 | |
42 | Returns TAP grammar object that will parse the specified stream. |
43 | |
44 | =cut |
45 | |
46 | sub new { |
47 | my ( $class, $stream ) = @_; |
48 | my $self = bless { stream => $stream }, $class; |
49 | $self->set_version(12); |
50 | return $self; |
51 | } |
52 | |
53 | my %language_for; |
54 | |
55 | { |
56 | |
57 | # XXX the 'not' and 'ok' might be on separate lines in VMS ... |
58 | my $ok = qr/(?:not )?ok\b/; |
59 | my $num = qr/\d+/; |
60 | |
61 | my %v12 = ( |
62 | version => { |
63 | syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i, |
64 | handler => sub { |
65 | my ( $self, $line ) = @_; |
66 | my $version = $1; |
67 | return $self->_make_version_token( $line, $version, ); |
68 | }, |
69 | }, |
70 | plan => { |
71 | syntax => qr/^1\.\.(\d+)\s*(.*)\z/, |
72 | handler => sub { |
73 | my ( $self, $line ) = @_; |
74 | my ( $tests_planned, $tail ) = ( $1, $2 ); |
75 | my $explanation = undef; |
76 | my $skip = ''; |
77 | |
78 | if ( $tail =~ /^todo((?:\s+\d+)+)/ ) { |
79 | my @todo = split /\s+/, _trim($1); |
80 | return $self->_make_plan_token( |
81 | $line, $tests_planned, 'TODO', |
82 | '', \@todo |
83 | ); |
84 | } |
85 | elsif ( 0 == $tests_planned ) { |
86 | $skip = 'SKIP'; |
87 | $explanation = $tail; |
88 | |
89 | # Trim valid SKIP directive without being strict |
90 | # about its presence. |
91 | $explanation =~ s/^#\s*//; |
92 | $explanation =~ s/^skip\S*\s+//i; |
93 | } |
94 | elsif ( $tail !~ /^\s*$/ ) { |
95 | return $self->_make_unknown_token($line); |
96 | } |
97 | |
98 | $explanation = '' unless defined $explanation; |
99 | |
100 | return $self->_make_plan_token( |
101 | $line, $tests_planned, $skip, |
102 | $explanation, [] |
103 | ); |
104 | |
105 | }, |
106 | }, |
107 | |
108 | # An optimization to handle the most common test lines without |
109 | # directives. |
110 | simple_test => { |
111 | syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x, |
112 | handler => sub { |
113 | my ( $self, $line ) = @_; |
114 | my ( $ok, $num, $desc ) = ( $1, $2, $3 ); |
115 | |
116 | return $self->_make_test_token( |
117 | $line, $ok, $num, |
118 | $desc |
119 | ); |
120 | }, |
121 | }, |
122 | test => { |
123 | syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x, |
124 | handler => sub { |
125 | my ( $self, $line ) = @_; |
126 | my ( $ok, $num, $desc ) = ( $1, $2, $3 ); |
127 | my ( $dir, $explanation ) = ( '', '' ); |
128 | if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* ) |
129 | \# \s* (SKIP|TODO) \b \s* (.*) $/ix |
130 | ) |
131 | { |
132 | ( $desc, $dir, $explanation ) = ( $1, $2, $3 ); |
133 | } |
134 | return $self->_make_test_token( |
135 | $line, $ok, $num, $desc, |
136 | uc $dir, $explanation |
137 | ); |
138 | }, |
139 | }, |
140 | comment => { |
141 | syntax => qr/^#(.*)/, |
142 | handler => sub { |
143 | my ( $self, $line ) = @_; |
144 | my $comment = $1; |
145 | return $self->_make_comment_token( $line, $comment ); |
146 | }, |
147 | }, |
148 | bailout => { |
149 | syntax => qr/^Bail out!\s*(.*)/, |
150 | handler => sub { |
151 | my ( $self, $line ) = @_; |
152 | my $explanation = $1; |
153 | return $self->_make_bailout_token( |
154 | $line, |
155 | $explanation |
156 | ); |
157 | }, |
158 | }, |
159 | ); |
160 | |
161 | my %v13 = ( |
162 | %v12, |
163 | plan => { |
164 | syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i, |
165 | handler => sub { |
166 | my ( $self, $line ) = @_; |
167 | my ( $tests_planned, $explanation ) = ( $1, $2 ); |
168 | my $skip |
169 | = ( 0 == $tests_planned || defined $explanation ) |
170 | ? 'SKIP' |
171 | : ''; |
172 | $explanation = '' unless defined $explanation; |
173 | return $self->_make_plan_token( |
174 | $line, $tests_planned, $skip, |
175 | $explanation, [] |
176 | ); |
177 | }, |
178 | }, |
179 | yaml => { |
180 | syntax => qr/^ (\s+) (---.*) $/x, |
181 | handler => sub { |
182 | my ( $self, $line ) = @_; |
183 | my ( $pad, $marker ) = ( $1, $2 ); |
184 | return $self->_make_yaml_token( $pad, $marker ); |
185 | }, |
186 | }, |
187 | ); |
188 | |
189 | %language_for = ( |
190 | '12' => { |
191 | tokens => \%v12, |
192 | }, |
193 | '13' => { |
194 | tokens => \%v13, |
195 | setup => sub { |
196 | shift->{stream}->handle_unicode; |
197 | }, |
198 | }, |
199 | ); |
200 | } |
201 | |
202 | ############################################################################## |
203 | |
204 | =head2 Instance Methods |
205 | |
206 | =head3 C<set_version> |
207 | |
208 | $grammar->set_version(13); |
209 | |
210 | Tell the grammar which TAP syntax version to support. The lowest |
211 | supported version is 12. Although 'TAP version' isn't valid version 12 |
212 | syntax it is accepted so that higher version numbers may be parsed. |
213 | |
214 | =cut |
215 | |
216 | sub set_version { |
217 | my $self = shift; |
218 | my $version = shift; |
219 | |
220 | if ( my $language = $language_for{$version} ) { |
221 | $self->{tokens} = $language->{tokens}; |
222 | |
223 | if ( my $setup = $language->{setup} ) { |
224 | $self->$setup(); |
225 | } |
226 | |
227 | $self->_order_tokens; |
228 | } |
229 | else { |
230 | require Carp; |
231 | Carp::croak("Unsupported syntax version: $version"); |
232 | } |
233 | } |
234 | |
235 | # Optimization to put the most frequent tokens first. |
236 | sub _order_tokens { |
237 | my $self = shift; |
238 | |
239 | my %copy = %{ $self->{tokens} }; |
240 | my @ordered_tokens = grep {defined} |
241 | map { delete $copy{$_} } qw( simple_test test comment plan ); |
242 | push @ordered_tokens, values %copy; |
243 | |
244 | $self->{ordered_tokens} = \@ordered_tokens; |
245 | } |
246 | |
247 | ############################################################################## |
248 | |
249 | =head3 C<tokenize> |
250 | |
251 | my $token = $grammar->tokenize; |
252 | |
253 | This method will return a L<TAP::Parser::Result> object representing the |
254 | current line of TAP. |
255 | |
256 | =cut |
257 | |
258 | sub tokenize { |
259 | my $self = shift; |
260 | |
261 | my $line = $self->{stream}->next; |
262 | return unless defined $line; |
263 | |
264 | my $token; |
265 | |
266 | foreach my $token_data ( @{ $self->{ordered_tokens} } ) { |
267 | if ( $line =~ $token_data->{syntax} ) { |
268 | my $handler = $token_data->{handler}; |
269 | $token = $self->$handler($line); |
270 | last; |
271 | } |
272 | } |
273 | |
274 | $token = $self->_make_unknown_token($line) unless $token; |
275 | |
276 | return TAP::Parser::Result->new($token); |
277 | } |
278 | |
279 | ############################################################################## |
280 | |
281 | =head3 C<token_types> |
282 | |
283 | my @types = $grammar->token_types; |
284 | |
285 | Returns the different types of tokens which this grammar can parse. |
286 | |
287 | =cut |
288 | |
289 | sub token_types { |
290 | my $self = shift; |
291 | return keys %{ $self->{tokens} }; |
292 | } |
293 | |
294 | ############################################################################## |
295 | |
296 | =head3 C<syntax_for> |
297 | |
298 | my $syntax = $grammar->syntax_for($token_type); |
299 | |
300 | Returns a pre-compiled regular expression which will match a chunk of TAP |
301 | corresponding to the token type. For example (not that you should really pay |
302 | attention to this, C<< $grammar->syntax_for('comment') >> will return |
303 | C<< qr/^#(.*)/ >>. |
304 | |
305 | =cut |
306 | |
307 | sub syntax_for { |
308 | my ( $self, $type ) = @_; |
309 | return $self->{tokens}->{$type}->{syntax}; |
310 | } |
311 | |
312 | ############################################################################## |
313 | |
314 | =head3 C<handler_for> |
315 | |
316 | my $handler = $grammar->handler_for($token_type); |
317 | |
318 | Returns a code reference which, when passed an appropriate line of TAP, |
319 | returns the lexed token corresponding to that line. As a result, the basic |
320 | TAP parsing loop looks similar to the following: |
321 | |
322 | my @tokens; |
323 | my $grammar = TAP::Grammar->new; |
324 | LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) { |
325 | foreach my $type ( $grammar->token_types ) { |
326 | my $syntax = $grammar->syntax_for($type); |
327 | if ( $line =~ $syntax ) { |
328 | my $handler = $grammar->handler_for($type); |
329 | push @tokens => $grammar->$handler($line); |
330 | next LINE; |
331 | } |
332 | } |
333 | push @tokens => $grammar->_make_unknown_token($line); |
334 | } |
335 | |
336 | =cut |
337 | |
338 | sub handler_for { |
339 | my ( $self, $type ) = @_; |
340 | return $self->{tokens}->{$type}->{handler}; |
341 | } |
342 | |
343 | sub _make_version_token { |
344 | my ( $self, $line, $version ) = @_; |
345 | return { |
346 | type => 'version', |
347 | raw => $line, |
348 | version => $version, |
349 | }; |
350 | } |
351 | |
352 | sub _make_plan_token { |
353 | my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_; |
354 | |
355 | if ( $directive eq 'SKIP' && 0 != $tests_planned ) { |
356 | warn |
357 | "Specified SKIP directive in plan but more than 0 tests ($line)\n"; |
358 | } |
359 | return { |
360 | type => 'plan', |
361 | raw => $line, |
362 | tests_planned => $tests_planned, |
363 | directive => $directive, |
364 | explanation => _trim($explanation), |
365 | todo_list => $todo, |
366 | }; |
367 | } |
368 | |
369 | sub _make_test_token { |
370 | my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_; |
371 | my %test = ( |
372 | ok => $ok, |
373 | test_num => $num, |
374 | description => _trim($desc), |
375 | directive => uc($dir), |
376 | explanation => _trim($explanation), |
377 | raw => $line, |
378 | type => 'test', |
379 | ); |
380 | return \%test; |
381 | } |
382 | |
383 | sub _make_unknown_token { |
384 | my ( $self, $line ) = @_; |
385 | return { |
386 | raw => $line, |
387 | type => 'unknown', |
388 | }; |
389 | } |
390 | |
391 | sub _make_comment_token { |
392 | my ( $self, $line, $comment ) = @_; |
393 | return { |
394 | type => 'comment', |
395 | raw => $line, |
396 | comment => _trim($comment) |
397 | }; |
398 | } |
399 | |
400 | sub _make_bailout_token { |
401 | my ( $self, $line, $explanation ) = @_; |
402 | return { |
403 | type => 'bailout', |
404 | raw => $line, |
405 | bailout => _trim($explanation) |
406 | }; |
407 | } |
408 | |
409 | sub _make_yaml_token { |
410 | my ( $self, $pad, $marker ) = @_; |
411 | |
412 | my $yaml = TAP::Parser::YAMLish::Reader->new; |
413 | |
414 | my $stream = $self->{stream}; |
415 | |
416 | # Construct a reader that reads from our input stripping leading |
417 | # spaces from each line. |
418 | my $leader = length($pad); |
419 | my $strip = qr{ ^ (\s{$leader}) (.*) $ }x; |
420 | my @extra = ($marker); |
421 | my $reader = sub { |
422 | return shift @extra if @extra; |
423 | my $line = $stream->next; |
424 | return $2 if $line =~ $strip; |
425 | return; |
426 | }; |
427 | |
428 | my $data = $yaml->read($reader); |
429 | |
430 | # Reconstitute input. This is convoluted. Maybe we should just |
431 | # record it on the way in... |
432 | chomp( my $raw = $yaml->get_raw ); |
433 | $raw =~ s/^/$pad/mg; |
434 | |
435 | return { |
436 | type => 'yaml', |
437 | raw => $raw, |
438 | data => $data |
439 | }; |
440 | } |
441 | |
442 | sub _trim { |
443 | my $data = shift; |
444 | |
445 | return '' unless defined $data; |
446 | |
447 | $data =~ s/^\s+//; |
448 | $data =~ s/\s+$//; |
449 | return $data; |
450 | } |
451 | |
452 | =head1 TAP GRAMMAR |
453 | |
454 | B<NOTE:> This grammar is slightly out of date. There's still some discussion |
455 | about it and a new one will be provided when we have things better defined. |
456 | |
457 | The L<TAP::Parser> does not use a formal grammar because TAP is essentially a |
458 | stream-based protocol. In fact, it's quite legal to have an infinite stream. |
459 | For the same reason that we don't apply regexes to streams, we're not using a |
460 | formal grammar here. Instead, we parse the TAP in lines. |
461 | |
462 | For purposes for forward compatability, any result which does not match the |
463 | following grammar is currently referred to as |
464 | L<TAP::Parser::Result::Unknown>. It is I<not> a parse error. |
465 | |
466 | A formal grammar would look similar to the following: |
467 | |
468 | (* |
469 | For the time being, I'm cheating on the EBNF by allowing |
470 | certain terms to be defined by POSIX character classes by |
471 | using the following syntax: |
472 | |
473 | digit ::= [:digit:] |
474 | |
475 | As far as I am aware, that's not valid EBNF. Sue me. I |
476 | didn't know how to write "char" otherwise (Unicode issues). |
477 | Suggestions welcome. |
478 | *) |
479 | |
480 | tap ::= version? { comment | unknown } leading_plan lines |
481 | | |
482 | lines trailing_plan {comment} |
483 | |
484 | version ::= 'TAP version ' positiveInteger {positiveInteger} "\n" |
485 | |
486 | leading_plan ::= plan skip_directive? "\n" |
487 | |
488 | trailing_plan ::= plan "\n" |
489 | |
490 | plan ::= '1..' nonNegativeInteger |
491 | |
492 | lines ::= line {line} |
493 | |
494 | line ::= (comment | test | unknown | bailout ) "\n" |
495 | |
496 | test ::= status positiveInteger? description? directive? |
497 | |
498 | status ::= 'not '? 'ok ' |
499 | |
500 | description ::= (character - (digit | '#')) {character - '#'} |
501 | |
502 | directive ::= todo_directive | skip_directive |
503 | |
504 | todo_directive ::= hash_mark 'TODO' ' ' {character} |
505 | |
506 | skip_directive ::= hash_mark 'SKIP' ' ' {character} |
507 | |
508 | comment ::= hash_mark {character} |
509 | |
510 | hash_mark ::= '#' {' '} |
511 | |
512 | bailout ::= 'Bail out!' {character} |
513 | |
514 | unknown ::= { (character - "\n") } |
515 | |
516 | (* POSIX character classes and other terminals *) |
517 | |
518 | digit ::= [:digit:] |
519 | character ::= ([:print:] - "\n") |
520 | positiveInteger ::= ( digit - '0' ) {digit} |
521 | nonNegativeInteger ::= digit {digit} |
522 | |
523 | |
524 | =cut |
525 | |
526 | 1; |