Add built local::lib
[catagits/Gitalist.git] / local-lib5 / lib / perl5 / TAP / Parser / Grammar.pm
CommitLineData
3fea05b9 1package TAP::Parser::Grammar;
2
3use strict;
4use vars qw($VERSION @ISA);
5
6use TAP::Object ();
7use TAP::Parser::ResultFactory ();
8use TAP::Parser::YAMLish::Reader ();
9
10@ISA = qw(TAP::Object);
11
12=head1 NAME
13
14TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
15
16=head1 VERSION
17
18Version 3.17
19
20=cut
21
22$VERSION = '3.17';
23
24=head1 SYNOPSIS
25
26 use TAP::Parser::Grammar;
27 my $grammar = $self->make_grammar({
28 stream => $tap_parser_stream,
29 parser => $tap_parser,
30 version => 12,
31 });
32
33 my $result = $grammar->tokenize;
34
35=head1 DESCRIPTION
36
37C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
38L<TAP::Parser::Result> subclasses to represent the tokens.
39
40Do not attempt to use this class directly. It won't make sense. It's mainly
41here to ensure that we will be able to have pluggable grammars when TAP is
42expanded at some future date (plus, this stuff was really cluttering the
43parser).
44
45=head1 METHODS
46
47=head2 Class Methods
48
49=head3 C<new>
50
51 my $grammar = TAP::Parser::Grammar->new({
52 stream => $stream,
53 parser => $parser,
54 version => $version,
55 });
56
57Returns L<TAP::Parser> grammar object that will parse the specified stream.
58Both C<stream> and C<parser> are required arguments. If C<version> is not set
59it defaults to C<12> (see L</set_version> for more details).
60
61=cut
62
63# new() implementation supplied by TAP::Object
64sub _initialize {
65 my ( $self, $args ) = @_;
66 $self->{stream} = $args->{stream}; # TODO: accessor
67 $self->{parser} = $args->{parser}; # TODO: accessor
68 $self->set_version( $args->{version} || 12 );
69 return $self;
70}
71
72my %language_for;
73
74{
75
76 # XXX the 'not' and 'ok' might be on separate lines in VMS ...
77 my $ok = qr/(?:not )?ok\b/;
78 my $num = qr/\d+/;
79
80 my %v12 = (
81 version => {
82 syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
83 handler => sub {
84 my ( $self, $line ) = @_;
85 my $version = $1;
86 return $self->_make_version_token( $line, $version, );
87 },
88 },
89 plan => {
90 syntax => qr/^1\.\.(\d+)\s*(.*)\z/,
91 handler => sub {
92 my ( $self, $line ) = @_;
93 my ( $tests_planned, $tail ) = ( $1, $2 );
94 my $explanation = undef;
95 my $skip = '';
96
97 if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
98 my @todo = split /\s+/, _trim($1);
99 return $self->_make_plan_token(
100 $line, $tests_planned, 'TODO',
101 '', \@todo
102 );
103 }
104 elsif ( 0 == $tests_planned ) {
105 $skip = 'SKIP';
106
107 # If we can't match # SKIP the directive should be undef.
108 ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i;
109 }
110 elsif ( $tail !~ /^\s*$/ ) {
111 return $self->_make_unknown_token($line);
112 }
113
114 $explanation = '' unless defined $explanation;
115
116 return $self->_make_plan_token(
117 $line, $tests_planned, $skip,
118 $explanation, []
119 );
120
121 },
122 },
123
124 # An optimization to handle the most common test lines without
125 # directives.
126 simple_test => {
127 syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
128 handler => sub {
129 my ( $self, $line ) = @_;
130 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
131
132 return $self->_make_test_token(
133 $line, $ok, $num,
134 $desc
135 );
136 },
137 },
138 test => {
139 syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
140 handler => sub {
141 my ( $self, $line ) = @_;
142 my ( $ok, $num, $desc ) = ( $1, $2, $3 );
143 my ( $dir, $explanation ) = ( '', '' );
144 if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
145 \# \s* (SKIP|TODO) \b \s* (.*) $/ix
146 )
147 {
148 ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
149 }
150 return $self->_make_test_token(
151 $line, $ok, $num, $desc,
152 $dir, $explanation
153 );
154 },
155 },
156 comment => {
157 syntax => qr/^#(.*)/,
158 handler => sub {
159 my ( $self, $line ) = @_;
160 my $comment = $1;
161 return $self->_make_comment_token( $line, $comment );
162 },
163 },
164 bailout => {
165 syntax => qr/^Bail out!\s*(.*)/,
166 handler => sub {
167 my ( $self, $line ) = @_;
168 my $explanation = $1;
169 return $self->_make_bailout_token(
170 $line,
171 $explanation
172 );
173 },
174 },
175 );
176
177 my %v13 = (
178 %v12,
179 plan => {
180 syntax => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
181 handler => sub {
182 my ( $self, $line ) = @_;
183 my ( $tests_planned, $explanation ) = ( $1, $2 );
184 my $skip
185 = ( 0 == $tests_planned || defined $explanation )
186 ? 'SKIP'
187 : '';
188 $explanation = '' unless defined $explanation;
189 return $self->_make_plan_token(
190 $line, $tests_planned, $skip,
191 $explanation, []
192 );
193 },
194 },
195 yaml => {
196 syntax => qr/^ (\s+) (---.*) $/x,
197 handler => sub {
198 my ( $self, $line ) = @_;
199 my ( $pad, $marker ) = ( $1, $2 );
200 return $self->_make_yaml_token( $pad, $marker );
201 },
202 },
203 pragma => {
204 syntax =>
205 qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
206 handler => sub {
207 my ( $self, $line ) = @_;
208 my $pragmas = $1;
209 return $self->_make_pragma_token( $line, $pragmas );
210 },
211 },
212 );
213
214 %language_for = (
215 '12' => {
216 tokens => \%v12,
217 },
218 '13' => {
219 tokens => \%v13,
220 setup => sub {
221 shift->{stream}->handle_unicode;
222 },
223 },
224 );
225}
226
227##############################################################################
228
229=head2 Instance Methods
230
231=head3 C<set_version>
232
233 $grammar->set_version(13);
234
235Tell the grammar which TAP syntax version to support. The lowest
236supported version is 12. Although 'TAP version' isn't valid version 12
237syntax it is accepted so that higher version numbers may be parsed.
238
239=cut
240
241sub set_version {
242 my $self = shift;
243 my $version = shift;
244
245 if ( my $language = $language_for{$version} ) {
246 $self->{version} = $version;
247 $self->{tokens} = $language->{tokens};
248
249 if ( my $setup = $language->{setup} ) {
250 $self->$setup();
251 }
252
253 $self->_order_tokens;
254 }
255 else {
256 require Carp;
257 Carp::croak("Unsupported syntax version: $version");
258 }
259}
260
261# Optimization to put the most frequent tokens first.
262sub _order_tokens {
263 my $self = shift;
264
265 my %copy = %{ $self->{tokens} };
266 my @ordered_tokens = grep {defined}
267 map { delete $copy{$_} } qw( simple_test test comment plan );
268 push @ordered_tokens, values %copy;
269
270 $self->{ordered_tokens} = \@ordered_tokens;
271}
272
273##############################################################################
274
275=head3 C<tokenize>
276
277 my $token = $grammar->tokenize;
278
279This method will return a L<TAP::Parser::Result> object representing the
280current line of TAP.
281
282=cut
283
284sub tokenize {
285 my $self = shift;
286
287 my $line = $self->{stream}->next;
288 unless ( defined $line ) {
289 delete $self->{parser}; # break circular ref
290 return;
291 }
292
293 my $token;
294
295 foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
296 if ( $line =~ $token_data->{syntax} ) {
297 my $handler = $token_data->{handler};
298 $token = $self->$handler($line);
299 last;
300 }
301 }
302
303 $token = $self->_make_unknown_token($line) unless $token;
304
305 return $self->{parser}->make_result($token);
306}
307
308##############################################################################
309
310=head3 C<token_types>
311
312 my @types = $grammar->token_types;
313
314Returns the different types of tokens which this grammar can parse.
315
316=cut
317
318sub token_types {
319 my $self = shift;
320 return keys %{ $self->{tokens} };
321}
322
323##############################################################################
324
325=head3 C<syntax_for>
326
327 my $syntax = $grammar->syntax_for($token_type);
328
329Returns a pre-compiled regular expression which will match a chunk of TAP
330corresponding to the token type. For example (not that you should really pay
331attention to this, C<< $grammar->syntax_for('comment') >> will return
332C<< qr/^#(.*)/ >>.
333
334=cut
335
336sub syntax_for {
337 my ( $self, $type ) = @_;
338 return $self->{tokens}->{$type}->{syntax};
339}
340
341##############################################################################
342
343=head3 C<handler_for>
344
345 my $handler = $grammar->handler_for($token_type);
346
347Returns a code reference which, when passed an appropriate line of TAP,
348returns the lexed token corresponding to that line. As a result, the basic
349TAP parsing loop looks similar to the following:
350
351 my @tokens;
352 my $grammar = TAP::Grammar->new;
353 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
354 foreach my $type ( $grammar->token_types ) {
355 my $syntax = $grammar->syntax_for($type);
356 if ( $line =~ $syntax ) {
357 my $handler = $grammar->handler_for($type);
358 push @tokens => $grammar->$handler($line);
359 next LINE;
360 }
361 }
362 push @tokens => $grammar->_make_unknown_token($line);
363 }
364
365=cut
366
367sub handler_for {
368 my ( $self, $type ) = @_;
369 return $self->{tokens}->{$type}->{handler};
370}
371
372sub _make_version_token {
373 my ( $self, $line, $version ) = @_;
374 return {
375 type => 'version',
376 raw => $line,
377 version => $version,
378 };
379}
380
381sub _make_plan_token {
382 my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
383
384 if ( $directive eq 'SKIP'
385 && 0 != $tests_planned
386 && $self->{version} < 13 )
387 {
388 warn
389 "Specified SKIP directive in plan but more than 0 tests ($line)\n";
390 }
391
392 return {
393 type => 'plan',
394 raw => $line,
395 tests_planned => $tests_planned,
396 directive => $directive,
397 explanation => _trim($explanation),
398 todo_list => $todo,
399 };
400}
401
402sub _make_test_token {
403 my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
404 return {
405 ok => $ok,
406 test_num => $num,
407 description => _trim($desc),
408 directive => ( defined $dir ? uc $dir : '' ),
409 explanation => _trim($explanation),
410 raw => $line,
411 type => 'test',
412 };
413}
414
415sub _make_unknown_token {
416 my ( $self, $line ) = @_;
417 return {
418 raw => $line,
419 type => 'unknown',
420 };
421}
422
423sub _make_comment_token {
424 my ( $self, $line, $comment ) = @_;
425 return {
426 type => 'comment',
427 raw => $line,
428 comment => _trim($comment)
429 };
430}
431
432sub _make_bailout_token {
433 my ( $self, $line, $explanation ) = @_;
434 return {
435 type => 'bailout',
436 raw => $line,
437 bailout => _trim($explanation)
438 };
439}
440
441sub _make_yaml_token {
442 my ( $self, $pad, $marker ) = @_;
443
444 my $yaml = TAP::Parser::YAMLish::Reader->new;
445
446 my $stream = $self->{stream};
447
448 # Construct a reader that reads from our input stripping leading
449 # spaces from each line.
450 my $leader = length($pad);
451 my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
452 my @extra = ($marker);
453 my $reader = sub {
454 return shift @extra if @extra;
455 my $line = $stream->next;
456 return $2 if $line =~ $strip;
457 return;
458 };
459
460 my $data = $yaml->read($reader);
461
462 # Reconstitute input. This is convoluted. Maybe we should just
463 # record it on the way in...
464 chomp( my $raw = $yaml->get_raw );
465 $raw =~ s/^/$pad/mg;
466
467 return {
468 type => 'yaml',
469 raw => $raw,
470 data => $data
471 };
472}
473
474sub _make_pragma_token {
475 my ( $self, $line, $pragmas ) = @_;
476 return {
477 type => 'pragma',
478 raw => $line,
479 pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
480 };
481}
482
483sub _trim {
484 my $data = shift;
485
486 return '' unless defined $data;
487
488 $data =~ s/^\s+//;
489 $data =~ s/\s+$//;
490 return $data;
491}
492
4931;
494
495=head1 TAP GRAMMAR
496
497B<NOTE:> This grammar is slightly out of date. There's still some discussion
498about it and a new one will be provided when we have things better defined.
499
500The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
501stream-based protocol. In fact, it's quite legal to have an infinite stream.
502For the same reason that we don't apply regexes to streams, we're not using a
503formal grammar here. Instead, we parse the TAP in lines.
504
505For purposes for forward compatability, any result which does not match the
506following grammar is currently referred to as
507L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
508
509A formal grammar would look similar to the following:
510
511 (*
512 For the time being, I'm cheating on the EBNF by allowing
513 certain terms to be defined by POSIX character classes by
514 using the following syntax:
515
516 digit ::= [:digit:]
517
518 As far as I am aware, that's not valid EBNF. Sue me. I
519 didn't know how to write "char" otherwise (Unicode issues).
520 Suggestions welcome.
521 *)
522
523 tap ::= version? { comment | unknown } leading_plan lines
524 |
525 lines trailing_plan {comment}
526
527 version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
528
529 leading_plan ::= plan skip_directive? "\n"
530
531 trailing_plan ::= plan "\n"
532
533 plan ::= '1..' nonNegativeInteger
534
535 lines ::= line {line}
536
537 line ::= (comment | test | unknown | bailout ) "\n"
538
539 test ::= status positiveInteger? description? directive?
540
541 status ::= 'not '? 'ok '
542
543 description ::= (character - (digit | '#')) {character - '#'}
544
545 directive ::= todo_directive | skip_directive
546
547 todo_directive ::= hash_mark 'TODO' ' ' {character}
548
549 skip_directive ::= hash_mark 'SKIP' ' ' {character}
550
551 comment ::= hash_mark {character}
552
553 hash_mark ::= '#' {' '}
554
555 bailout ::= 'Bail out!' {character}
556
557 unknown ::= { (character - "\n") }
558
559 (* POSIX character classes and other terminals *)
560
561 digit ::= [:digit:]
562 character ::= ([:print:] - "\n")
563 positiveInteger ::= ( digit - '0' ) {digit}
564 nonNegativeInteger ::= digit {digit}
565
566=head1 SUBCLASSING
567
568Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
569
570If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
571do is read through the code. There's no easy way of summarizing it here.
572
573=head1 SEE ALSO
574
575L<TAP::Object>,
576L<TAP::Parser>,
577L<TAP::Parser::Iterator>,
578L<TAP::Parser::Result>,
579
580=cut