Commit | Line | Data |
b965d173 |
1 | package TAP::Parser::Grammar; |
2 | |
3 | use strict; |
4 | use vars qw($VERSION); |
5 | |
6 | use TAP::Parser::Result (); |
7 | use TAP::Parser::YAMLish::Reader (); |
8 | |
9 | =head1 NAME |
10 | |
11 | TAP::Parser::Grammar - A grammar for the Test Anything Protocol. |
12 | |
13 | =head1 VERSION |
14 | |
2a7f4b9b |
15 | Version 3.10 |
b965d173 |
16 | |
17 | =cut |
18 | |
2a7f4b9b |
19 | $VERSION = '3.10'; |
b965d173 |
20 | |
21 | =head1 DESCRIPTION |
22 | |
23 | C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs |
24 | L<TAP::Parser::Result> subclasses to represent the tokens. |
25 | |
26 | Do not attempt to use this class directly. It won't make sense. It's mainly |
27 | here to ensure that we will be able to have pluggable grammars when TAP is |
28 | expanded at some future date (plus, this stuff was really cluttering the |
29 | parser). |
30 | |
31 | =cut |
32 | |
33 | ############################################################################## |
34 | |
35 | =head2 Class Methods |
36 | |
37 | |
38 | =head3 C<new> |
39 | |
40 | my $grammar = TAP::Grammar->new($stream); |
41 | |
42 | Returns TAP grammar object that will parse the specified stream. |
43 | |
44 | =cut |
45 | |
46 | sub new { |
47 | my ( $class, $stream ) = @_; |
48 | my $self = bless { stream => $stream }, $class; |
49 | $self->set_version(12); |
50 | return $self; |
51 | } |
52 | |
53 | my %language_for; |
54 | |
55 | { |
56 | |
57 | # XXX the 'not' and 'ok' might be on separate lines in VMS ... |
58 | my $ok = qr/(?:not )?ok\b/; |
59 | my $num = qr/\d+/; |
60 | |
61 | my %v12 = ( |
62 | version => { |
63 | syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i, |
64 | handler => sub { |
65 | my ( $self, $line ) = @_; |
66 | my $version = $1; |
67 | return $self->_make_version_token( $line, $version, ); |
68 | }, |
69 | }, |
70 | plan => { |
71 | syntax => qr/^1\.\.(\d+)\s*(.*)\z/, |
72 | handler => sub { |
73 | my ( $self, $line ) = @_; |
74 | my ( $tests_planned, $tail ) = ( $1, $2 ); |
75 | my $explanation = undef; |
76 | my $skip = ''; |
77 | |
78 | if ( $tail =~ /^todo((?:\s+\d+)+)/ ) { |
79 | my @todo = split /\s+/, _trim($1); |
80 | return $self->_make_plan_token( |
81 | $line, $tests_planned, 'TODO', |
82 | '', \@todo |
83 | ); |
84 | } |
85 | elsif ( 0 == $tests_planned ) { |
86 | $skip = 'SKIP'; |
87 | $explanation = $tail; |
88 | |
89 | # Trim valid SKIP directive without being strict |
90 | # about its presence. |
91 | $explanation =~ s/^#\s*//; |
92 | $explanation =~ s/^skip\S*\s+//i; |
93 | } |
94 | elsif ( $tail !~ /^\s*$/ ) { |
95 | return $self->_make_unknown_token($line); |
96 | } |
97 | |
98 | $explanation = '' unless defined $explanation; |
99 | |
100 | return $self->_make_plan_token( |
101 | $line, $tests_planned, $skip, |
102 | $explanation, [] |
103 | ); |
104 | |
105 | }, |
106 | }, |
107 | |
108 | # An optimization to handle the most common test lines without |
109 | # directives. |
110 | simple_test => { |
111 | syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x, |
112 | handler => sub { |
113 | my ( $self, $line ) = @_; |
114 | my ( $ok, $num, $desc ) = ( $1, $2, $3 ); |
115 | |
116 | return $self->_make_test_token( |
117 | $line, $ok, $num, |
118 | $desc |
119 | ); |
120 | }, |
121 | }, |
122 | test => { |
123 | syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x, |
124 | handler => sub { |
125 | my ( $self, $line ) = @_; |
126 | my ( $ok, $num, $desc ) = ( $1, $2, $3 ); |
127 | my ( $dir, $explanation ) = ( '', '' ); |
128 | if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* ) |
129 | \# \s* (SKIP|TODO) \b \s* (.*) $/ix |
130 | ) |
131 | { |
132 | ( $desc, $dir, $explanation ) = ( $1, $2, $3 ); |
133 | } |
134 | return $self->_make_test_token( |
135 | $line, $ok, $num, $desc, |
a62d3a1b |
136 | $dir, $explanation |
b965d173 |
137 | ); |
138 | }, |
139 | }, |
140 | comment => { |
141 | syntax => qr/^#(.*)/, |
142 | handler => sub { |
143 | my ( $self, $line ) = @_; |
144 | my $comment = $1; |
145 | return $self->_make_comment_token( $line, $comment ); |
146 | }, |
147 | }, |
148 | bailout => { |
149 | syntax => qr/^Bail out!\s*(.*)/, |
150 | handler => sub { |
151 | my ( $self, $line ) = @_; |
152 | my $explanation = $1; |
153 | return $self->_make_bailout_token( |
154 | $line, |
155 | $explanation |
156 | ); |
157 | }, |
158 | }, |
159 | ); |
160 | |
161 | my %v13 = ( |
162 | %v12, |
163 | plan => { |
164 | syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i, |
165 | handler => sub { |
166 | my ( $self, $line ) = @_; |
167 | my ( $tests_planned, $explanation ) = ( $1, $2 ); |
168 | my $skip |
169 | = ( 0 == $tests_planned || defined $explanation ) |
170 | ? 'SKIP' |
171 | : ''; |
172 | $explanation = '' unless defined $explanation; |
173 | return $self->_make_plan_token( |
174 | $line, $tests_planned, $skip, |
175 | $explanation, [] |
176 | ); |
177 | }, |
178 | }, |
179 | yaml => { |
180 | syntax => qr/^ (\s+) (---.*) $/x, |
181 | handler => sub { |
182 | my ( $self, $line ) = @_; |
183 | my ( $pad, $marker ) = ( $1, $2 ); |
184 | return $self->_make_yaml_token( $pad, $marker ); |
185 | }, |
186 | }, |
2a7f4b9b |
187 | pragma => { |
188 | syntax => |
189 | qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x, |
190 | handler => sub { |
191 | my ( $self, $line ) = @_; |
192 | my $pragmas = $1; |
193 | return $self->_make_pragma_token( $line, $pragmas ); |
194 | }, |
195 | }, |
b965d173 |
196 | ); |
197 | |
198 | %language_for = ( |
199 | '12' => { |
200 | tokens => \%v12, |
201 | }, |
202 | '13' => { |
203 | tokens => \%v13, |
204 | setup => sub { |
205 | shift->{stream}->handle_unicode; |
206 | }, |
207 | }, |
208 | ); |
209 | } |
210 | |
211 | ############################################################################## |
212 | |
213 | =head2 Instance Methods |
214 | |
215 | =head3 C<set_version> |
216 | |
217 | $grammar->set_version(13); |
218 | |
219 | Tell the grammar which TAP syntax version to support. The lowest |
220 | supported version is 12. Although 'TAP version' isn't valid version 12 |
221 | syntax it is accepted so that higher version numbers may be parsed. |
222 | |
223 | =cut |
224 | |
225 | sub set_version { |
226 | my $self = shift; |
227 | my $version = shift; |
228 | |
229 | if ( my $language = $language_for{$version} ) { |
230 | $self->{tokens} = $language->{tokens}; |
231 | |
232 | if ( my $setup = $language->{setup} ) { |
233 | $self->$setup(); |
234 | } |
235 | |
236 | $self->_order_tokens; |
237 | } |
238 | else { |
239 | require Carp; |
240 | Carp::croak("Unsupported syntax version: $version"); |
241 | } |
242 | } |
243 | |
244 | # Optimization to put the most frequent tokens first. |
245 | sub _order_tokens { |
246 | my $self = shift; |
247 | |
248 | my %copy = %{ $self->{tokens} }; |
249 | my @ordered_tokens = grep {defined} |
250 | map { delete $copy{$_} } qw( simple_test test comment plan ); |
251 | push @ordered_tokens, values %copy; |
252 | |
253 | $self->{ordered_tokens} = \@ordered_tokens; |
254 | } |
255 | |
256 | ############################################################################## |
257 | |
258 | =head3 C<tokenize> |
259 | |
260 | my $token = $grammar->tokenize; |
261 | |
262 | This method will return a L<TAP::Parser::Result> object representing the |
263 | current line of TAP. |
264 | |
265 | =cut |
266 | |
267 | sub tokenize { |
268 | my $self = shift; |
269 | |
270 | my $line = $self->{stream}->next; |
271 | return unless defined $line; |
272 | |
273 | my $token; |
274 | |
275 | foreach my $token_data ( @{ $self->{ordered_tokens} } ) { |
276 | if ( $line =~ $token_data->{syntax} ) { |
277 | my $handler = $token_data->{handler}; |
278 | $token = $self->$handler($line); |
279 | last; |
280 | } |
281 | } |
282 | |
283 | $token = $self->_make_unknown_token($line) unless $token; |
284 | |
285 | return TAP::Parser::Result->new($token); |
286 | } |
287 | |
288 | ############################################################################## |
289 | |
290 | =head3 C<token_types> |
291 | |
292 | my @types = $grammar->token_types; |
293 | |
294 | Returns the different types of tokens which this grammar can parse. |
295 | |
296 | =cut |
297 | |
298 | sub token_types { |
299 | my $self = shift; |
300 | return keys %{ $self->{tokens} }; |
301 | } |
302 | |
303 | ############################################################################## |
304 | |
305 | =head3 C<syntax_for> |
306 | |
307 | my $syntax = $grammar->syntax_for($token_type); |
308 | |
309 | Returns a pre-compiled regular expression which will match a chunk of TAP |
310 | corresponding to the token type. For example (not that you should really pay |
311 | attention to this, C<< $grammar->syntax_for('comment') >> will return |
312 | C<< qr/^#(.*)/ >>. |
313 | |
314 | =cut |
315 | |
316 | sub syntax_for { |
317 | my ( $self, $type ) = @_; |
318 | return $self->{tokens}->{$type}->{syntax}; |
319 | } |
320 | |
321 | ############################################################################## |
322 | |
323 | =head3 C<handler_for> |
324 | |
325 | my $handler = $grammar->handler_for($token_type); |
326 | |
327 | Returns a code reference which, when passed an appropriate line of TAP, |
328 | returns the lexed token corresponding to that line. As a result, the basic |
329 | TAP parsing loop looks similar to the following: |
330 | |
331 | my @tokens; |
332 | my $grammar = TAP::Grammar->new; |
333 | LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) { |
334 | foreach my $type ( $grammar->token_types ) { |
335 | my $syntax = $grammar->syntax_for($type); |
336 | if ( $line =~ $syntax ) { |
337 | my $handler = $grammar->handler_for($type); |
338 | push @tokens => $grammar->$handler($line); |
339 | next LINE; |
340 | } |
341 | } |
342 | push @tokens => $grammar->_make_unknown_token($line); |
343 | } |
344 | |
345 | =cut |
346 | |
347 | sub handler_for { |
348 | my ( $self, $type ) = @_; |
349 | return $self->{tokens}->{$type}->{handler}; |
350 | } |
351 | |
352 | sub _make_version_token { |
353 | my ( $self, $line, $version ) = @_; |
354 | return { |
355 | type => 'version', |
356 | raw => $line, |
357 | version => $version, |
358 | }; |
359 | } |
360 | |
361 | sub _make_plan_token { |
362 | my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_; |
363 | |
364 | if ( $directive eq 'SKIP' && 0 != $tests_planned ) { |
365 | warn |
366 | "Specified SKIP directive in plan but more than 0 tests ($line)\n"; |
367 | } |
368 | return { |
369 | type => 'plan', |
370 | raw => $line, |
371 | tests_planned => $tests_planned, |
372 | directive => $directive, |
373 | explanation => _trim($explanation), |
374 | todo_list => $todo, |
375 | }; |
376 | } |
377 | |
378 | sub _make_test_token { |
379 | my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_; |
380 | my %test = ( |
381 | ok => $ok, |
382 | test_num => $num, |
383 | description => _trim($desc), |
2a7f4b9b |
384 | directive => uc( defined $dir ? $dir : '' ), |
b965d173 |
385 | explanation => _trim($explanation), |
386 | raw => $line, |
387 | type => 'test', |
388 | ); |
389 | return \%test; |
390 | } |
391 | |
392 | sub _make_unknown_token { |
393 | my ( $self, $line ) = @_; |
394 | return { |
395 | raw => $line, |
396 | type => 'unknown', |
397 | }; |
398 | } |
399 | |
400 | sub _make_comment_token { |
401 | my ( $self, $line, $comment ) = @_; |
402 | return { |
403 | type => 'comment', |
404 | raw => $line, |
405 | comment => _trim($comment) |
406 | }; |
407 | } |
408 | |
409 | sub _make_bailout_token { |
410 | my ( $self, $line, $explanation ) = @_; |
411 | return { |
412 | type => 'bailout', |
413 | raw => $line, |
414 | bailout => _trim($explanation) |
415 | }; |
416 | } |
417 | |
418 | sub _make_yaml_token { |
419 | my ( $self, $pad, $marker ) = @_; |
420 | |
421 | my $yaml = TAP::Parser::YAMLish::Reader->new; |
422 | |
423 | my $stream = $self->{stream}; |
424 | |
425 | # Construct a reader that reads from our input stripping leading |
426 | # spaces from each line. |
427 | my $leader = length($pad); |
428 | my $strip = qr{ ^ (\s{$leader}) (.*) $ }x; |
429 | my @extra = ($marker); |
430 | my $reader = sub { |
431 | return shift @extra if @extra; |
432 | my $line = $stream->next; |
433 | return $2 if $line =~ $strip; |
434 | return; |
435 | }; |
436 | |
437 | my $data = $yaml->read($reader); |
438 | |
439 | # Reconstitute input. This is convoluted. Maybe we should just |
440 | # record it on the way in... |
441 | chomp( my $raw = $yaml->get_raw ); |
442 | $raw =~ s/^/$pad/mg; |
443 | |
444 | return { |
445 | type => 'yaml', |
446 | raw => $raw, |
447 | data => $data |
448 | }; |
449 | } |
450 | |
2a7f4b9b |
451 | sub _make_pragma_token { |
452 | my ( $self, $line, $pragmas ) = @_; |
453 | return { |
454 | type => 'pragma', |
455 | raw => $line, |
456 | pragmas => [ split /\s*,\s*/, _trim($pragmas) ], |
457 | }; |
458 | } |
459 | |
b965d173 |
460 | sub _trim { |
461 | my $data = shift; |
462 | |
463 | return '' unless defined $data; |
464 | |
465 | $data =~ s/^\s+//; |
466 | $data =~ s/\s+$//; |
467 | return $data; |
468 | } |
469 | |
470 | =head1 TAP GRAMMAR |
471 | |
472 | B<NOTE:> This grammar is slightly out of date. There's still some discussion |
473 | about it and a new one will be provided when we have things better defined. |
474 | |
475 | The L<TAP::Parser> does not use a formal grammar because TAP is essentially a |
476 | stream-based protocol. In fact, it's quite legal to have an infinite stream. |
477 | For the same reason that we don't apply regexes to streams, we're not using a |
478 | formal grammar here. Instead, we parse the TAP in lines. |
479 | |
480 | For purposes for forward compatability, any result which does not match the |
481 | following grammar is currently referred to as |
482 | L<TAP::Parser::Result::Unknown>. It is I<not> a parse error. |
483 | |
484 | A formal grammar would look similar to the following: |
485 | |
486 | (* |
487 | For the time being, I'm cheating on the EBNF by allowing |
488 | certain terms to be defined by POSIX character classes by |
489 | using the following syntax: |
490 | |
491 | digit ::= [:digit:] |
492 | |
493 | As far as I am aware, that's not valid EBNF. Sue me. I |
494 | didn't know how to write "char" otherwise (Unicode issues). |
495 | Suggestions welcome. |
496 | *) |
497 | |
498 | tap ::= version? { comment | unknown } leading_plan lines |
499 | | |
500 | lines trailing_plan {comment} |
501 | |
502 | version ::= 'TAP version ' positiveInteger {positiveInteger} "\n" |
503 | |
504 | leading_plan ::= plan skip_directive? "\n" |
505 | |
506 | trailing_plan ::= plan "\n" |
507 | |
508 | plan ::= '1..' nonNegativeInteger |
509 | |
510 | lines ::= line {line} |
511 | |
512 | line ::= (comment | test | unknown | bailout ) "\n" |
513 | |
514 | test ::= status positiveInteger? description? directive? |
515 | |
516 | status ::= 'not '? 'ok ' |
517 | |
518 | description ::= (character - (digit | '#')) {character - '#'} |
519 | |
520 | directive ::= todo_directive | skip_directive |
521 | |
522 | todo_directive ::= hash_mark 'TODO' ' ' {character} |
523 | |
524 | skip_directive ::= hash_mark 'SKIP' ' ' {character} |
525 | |
526 | comment ::= hash_mark {character} |
527 | |
528 | hash_mark ::= '#' {' '} |
529 | |
530 | bailout ::= 'Bail out!' {character} |
531 | |
532 | unknown ::= { (character - "\n") } |
533 | |
534 | (* POSIX character classes and other terminals *) |
535 | |
536 | digit ::= [:digit:] |
537 | character ::= ([:print:] - "\n") |
538 | positiveInteger ::= ( digit - '0' ) {digit} |
539 | nonNegativeInteger ::= digit {digit} |
540 | |
541 | |
542 | =cut |
543 | |
544 | 1; |