[catagits/Gitalist.git] / local-lib5 / lib / perl5 / TAP / Parser / Grammar.pm

package TAP::Parser::Grammar;

use strict;
use vars qw($VERSION @ISA);

use TAP::Object                  ();
use TAP::Parser::ResultFactory   ();
use TAP::Parser::YAMLish::Reader ();

@ISA = qw(TAP::Object);

=head1 NAME

TAP::Parser::Grammar - A grammar for the Test Anything Protocol.

=head1 VERSION

Version 3.17

=cut

$VERSION = '3.17';

=head1 SYNOPSIS

  use TAP::Parser::Grammar;
  my $grammar = $self->make_grammar({
    stream  => $tap_parser_stream,
    parser  => $tap_parser,
    version => 12,
  });

  my $result = $grammar->tokenize;

=head1 DESCRIPTION

C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
L<TAP::Parser::Result> subclasses to represent the tokens.

Do not attempt to use this class directly.  It won't make sense.  It's mainly
here to ensure that we will be able to have pluggable grammars when TAP is
expanded at some future date (plus, this stuff was really cluttering the
parser).

=head1 METHODS

=head2 Class Methods

=head3 C<new>

  my $grammar = TAP::Parser::Grammar->new({
      stream  => $stream,
      parser  => $parser,
      version => $version,
  });

Returns L<TAP::Parser> grammar object that will parse the specified stream.
Both C<stream> and C<parser> are required arguments.  If C<version> is not set
it defaults to C<12> (see L</set_version> for more details).

=cut

# new() implementation supplied by TAP::Object
sub _initialize {
    my ( $self, $args ) = @_;
    $self->{stream} = $args->{stream};    # TODO: accessor
    $self->{parser} = $args->{parser};    # TODO: accessor
    $self->set_version( $args->{version} || 12 );
    return $self;
}

my %language_for;

{

    # XXX the 'not' and 'ok' might be on separate lines in VMS ...
    my $ok  = qr/(?:not )?ok\b/;
    my $num = qr/\d+/;

    my %v12 = (
        version => {
            syntax  => qr/^TAP\s+version\s+(\d+)\s*\z/i,
            handler => sub {
                my ( $self, $line ) = @_;
                my $version = $1;
                return $self->_make_version_token( $line, $version, );
            },
        },
        plan => {
            syntax  => qr/^1\.\.(\d+)\s*(.*)\z/,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $tests_planned, $tail ) = ( $1, $2 );
                my $explanation = undef;
                my $skip        = '';

                if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
                    my @todo = split /\s+/, _trim($1);
                    return $self->_make_plan_token(
                        $line, $tests_planned, 'TODO',
                        '',    \@todo
                    );
                }
                elsif ( 0 == $tests_planned ) {
                    $skip = 'SKIP';

                    # If we can't match # SKIP the directive should be undef.
                    ($explanation) = $tail =~ /^#\s*SKIP\S*\s+(.*)/i;
                }
                elsif ( $tail !~ /^\s*$/ ) {
                    return $self->_make_unknown_token($line);
                }

                $explanation = '' unless defined $explanation;

                return $self->_make_plan_token(
                    $line, $tests_planned, $skip,
                    $explanation, []
                );

            },
        },

        # An optimization to handle the most common test lines without
        # directives.
        simple_test => {
            syntax  => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $ok, $num, $desc ) = ( $1, $2, $3 );

                return $self->_make_test_token(
                    $line, $ok, $num,
                    $desc
                );
            },
        },
        test => {
            syntax  => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $ok, $num, $desc ) = ( $1, $2, $3 );
                my ( $dir, $explanation ) = ( '', '' );
                if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
                       \# \s* (SKIP|TODO) \b \s* (.*) $/ix
                  )
                {
                    ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
                }
                return $self->_make_test_token(
                    $line, $ok, $num, $desc,
                    $dir,  $explanation
                );
            },
        },
        comment => {
            syntax  => qr/^#(.*)/,
            handler => sub {
                my ( $self, $line ) = @_;
                my $comment = $1;
                return $self->_make_comment_token( $line, $comment );
            },
        },
        bailout => {
            syntax  => qr/^Bail out!\s*(.*)/,
            handler => sub {
                my ( $self, $line ) = @_;
                my $explanation = $1;
                return $self->_make_bailout_token(
                    $line,
                    $explanation
                );
            },
        },
    );

    my %v13 = (
        %v12,
        plan => {
            syntax  => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $tests_planned, $explanation ) = ( $1, $2 );
                my $skip
                  = ( 0 == $tests_planned || defined $explanation )
                  ? 'SKIP'
                  : '';
                $explanation = '' unless defined $explanation;
                return $self->_make_plan_token(
                    $line, $tests_planned, $skip,
                    $explanation, []
                );
            },
        },
        yaml => {
            syntax  => qr/^ (\s+) (---.*) $/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $pad, $marker ) = ( $1, $2 );
                return $self->_make_yaml_token( $pad, $marker );
            },
        },
        pragma => {
            syntax =>
              qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my $pragmas = $1;
                return $self->_make_pragma_token( $line, $pragmas );
            },
        },
    );

    %language_for = (
        '12' => {
            tokens => \%v12,
        },
        '13' => {
            tokens => \%v13,
            setup  => sub {
                shift->{stream}->handle_unicode;
            },
        },
    );
}

##############################################################################

=head2 Instance Methods

=head3 C<set_version>

  $grammar->set_version(13);

Tell the grammar which TAP syntax version to support. The lowest
supported version is 12. Although 'TAP version' isn't valid version 12
syntax it is accepted so that higher version numbers may be parsed.

=cut

sub set_version {
    my $self    = shift;
    my $version = shift;

    if ( my $language = $language_for{$version} ) {
        $self->{version} = $version;
        $self->{tokens}  = $language->{tokens};

        if ( my $setup = $language->{setup} ) {
            $self->$setup();
        }

        $self->_order_tokens;
    }
    else {
        require Carp;
        Carp::croak("Unsupported syntax version: $version");
    }
}

# Optimization to put the most frequent tokens first.
sub _order_tokens {
    my $self = shift;

    my %copy = %{ $self->{tokens} };
    my @ordered_tokens = grep {defined}
      map { delete $copy{$_} } qw( simple_test test comment plan );
    push @ordered_tokens, values %copy;

    $self->{ordered_tokens} = \@ordered_tokens;
}

##############################################################################

=head3 C<tokenize>

  my $token = $grammar->tokenize;

This method will return a L<TAP::Parser::Result> object representing the
current line of TAP.

=cut

sub tokenize {
    my $self = shift;

    my $line = $self->{stream}->next;
    unless ( defined $line ) {
        delete $self->{parser};    # break circular ref
        return;
    }

    my $token;

    foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
        if ( $line =~ $token_data->{syntax} ) {
            my $handler = $token_data->{handler};
            $token = $self->$handler($line);
            last;
        }
    }

    $token = $self->_make_unknown_token($line) unless $token;

    return $self->{parser}->make_result($token);
}

##############################################################################

=head3 C<token_types>

  my @types = $grammar->token_types;

Returns the different types of tokens which this grammar can parse.

=cut

sub token_types {
    my $self = shift;
    return keys %{ $self->{tokens} };
}

##############################################################################

=head3 C<syntax_for>

  my $syntax = $grammar->syntax_for($token_type);

Returns a pre-compiled regular expression which will match a chunk of TAP
corresponding to the token type.  For example (not that you should really pay
attention to this, C<< $grammar->syntax_for('comment') >> will return
C<< qr/^#(.*)/ >>.

=cut

sub syntax_for {
    my ( $self, $type ) = @_;
    return $self->{tokens}->{$type}->{syntax};
}

##############################################################################

=head3 C<handler_for>

  my $handler = $grammar->handler_for($token_type);

Returns a code reference which, when passed an appropriate line of TAP,
returns the lexed token corresponding to that line.  As a result, the basic
TAP parsing loop looks similar to the following:

 my @tokens;
 my $grammar = TAP::Grammar->new;
 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
     foreach my $type ( $grammar->token_types ) {
         my $syntax  = $grammar->syntax_for($type);
         if ( $line =~ $syntax ) {
             my $handler = $grammar->handler_for($type);
             push @tokens => $grammar->$handler($line);
             next LINE;
         }
     }
     push @tokens => $grammar->_make_unknown_token($line);
 }

=cut

sub handler_for {
    my ( $self, $type ) = @_;
    return $self->{tokens}->{$type}->{handler};
}

sub _make_version_token {
    my ( $self, $line, $version ) = @_;
    return {
        type    => 'version',
        raw     => $line,
        version => $version,
    };
}

sub _make_plan_token {
    my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;

    if (   $directive eq 'SKIP'
        && 0 != $tests_planned
        && $self->{version} < 13 )
    {
        warn
          "Specified SKIP directive in plan but more than 0 tests ($line)\n";
    }

    return {
        type          => 'plan',
        raw           => $line,
        tests_planned => $tests_planned,
        directive     => $directive,
        explanation   => _trim($explanation),
        todo_list     => $todo,
    };
}

sub _make_test_token {
    my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
    return {
        ok          => $ok,
        test_num    => $num,
        description => _trim($desc),
        directive   => ( defined $dir ? uc $dir : '' ),
        explanation => _trim($explanation),
        raw         => $line,
        type        => 'test',
    };
}

sub _make_unknown_token {
    my ( $self, $line ) = @_;
    return {
        raw  => $line,
        type => 'unknown',
    };
}

sub _make_comment_token {
    my ( $self, $line, $comment ) = @_;
    return {
        type    => 'comment',
        raw     => $line,
        comment => _trim($comment)
    };
}

sub _make_bailout_token {
    my ( $self, $line, $explanation ) = @_;
    return {
        type    => 'bailout',
        raw     => $line,
        bailout => _trim($explanation)
    };
}

sub _make_yaml_token {
    my ( $self, $pad, $marker ) = @_;

    my $yaml = TAP::Parser::YAMLish::Reader->new;

    my $stream = $self->{stream};

    # Construct a reader that reads from our input stripping leading
    # spaces from each line.
    my $leader = length($pad);
    my $strip  = qr{ ^ (\s{$leader}) (.*) $ }x;
    my @extra  = ($marker);
    my $reader = sub {
        return shift @extra if @extra;
        my $line = $stream->next;
        return $2 if $line =~ $strip;
        return;
    };

    my $data = $yaml->read($reader);

    # Reconstitute input. This is convoluted. Maybe we should just
    # record it on the way in...
    chomp( my $raw = $yaml->get_raw );
    $raw =~ s/^/$pad/mg;

    return {
        type => 'yaml',
        raw  => $raw,
        data => $data
    };
}

sub _make_pragma_token {
    my ( $self, $line, $pragmas ) = @_;
    return {
        type    => 'pragma',
        raw     => $line,
        pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
    };
}

sub _trim {
    my $data = shift;

    return '' unless defined $data;

    $data =~ s/^\s+//;
    $data =~ s/\s+$//;
    return $data;
}

1;

=head1 TAP GRAMMAR

B<NOTE:>  This grammar is slightly out of date.  There's still some discussion
about it and a new one will be provided when we have things better defined.

The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
stream-based protocol.  In fact, it's quite legal to have an infinite stream.
For the same reason that we don't apply regexes to streams, we're not using a
formal grammar here.  Instead, we parse the TAP in lines.

For purposes for forward compatability, any result which does not match the
following grammar is currently referred to as
L<TAP::Parser::Result::Unknown>.  It is I<not> a parse error.

A formal grammar would look similar to the following:

 (*
     For the time being, I'm cheating on the EBNF by allowing
     certain terms to be defined by POSIX character classes by
     using the following syntax:

       digit ::= [:digit:]

     As far as I am aware, that's not valid EBNF.  Sue me.  I
     didn't know how to write "char" otherwise (Unicode issues).
     Suggestions welcome.
 *)

 tap            ::= version? { comment | unknown } leading_plan lines
                    |
                    lines trailing_plan {comment}

 version        ::= 'TAP version ' positiveInteger {positiveInteger} "\n"

 leading_plan   ::= plan skip_directive? "\n"

 trailing_plan  ::= plan "\n"

 plan           ::= '1..' nonNegativeInteger

 lines          ::= line {line}

 line           ::= (comment | test | unknown | bailout ) "\n"

 test           ::= status positiveInteger? description? directive?

 status         ::= 'not '? 'ok '

 description    ::= (character - (digit | '#')) {character - '#'}

 directive      ::= todo_directive | skip_directive

 todo_directive ::= hash_mark 'TODO' ' ' {character}

 skip_directive ::= hash_mark 'SKIP' ' ' {character}

 comment        ::= hash_mark {character}

 hash_mark      ::= '#' {' '}

 bailout        ::= 'Bail out!' {character}

 unknown        ::= { (character - "\n") }

 (* POSIX character classes and other terminals *)

 digit              ::= [:digit:]
 character          ::= ([:print:] - "\n")
 positiveInteger    ::= ( digit - '0' ) {digit}
 nonNegativeInteger ::= digit {digit}

=head1 SUBCLASSING

Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.

If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
do is read through the code.  There's no easy way of summarizing it here.

=head1 SEE ALSO

L<TAP::Object>,
L<TAP::Parser>,
L<TAP::Parser::Iterator>,
L<TAP::Parser::Result>,

=cut
Commit	Line	Data
3fea05b9	1	package TAP::Parser::Grammar;
	2
	3	use strict;
	4	use vars qw($VERSION @ISA);
	5
	6	use TAP::Object ();
	7	use TAP::Parser::ResultFactory ();
	8	use TAP::Parser::YAMLish::Reader ();
	9
	10	@ISA = qw(TAP::Object);
	11
	12	=head1 NAME
	13
	14	TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
	15
	16	=head1 VERSION
	17
	18	Version 3.17
	19
	20	=cut
	21
	22	$VERSION = '3.17';
	23
	24	=head1 SYNOPSIS
	25
	26	use TAP::Parser::Grammar;
	27	my $grammar = $self->make_grammar({
	28	stream => $tap_parser_stream,
	29	parser => $tap_parser,
	30	version => 12,
	31	});
	32
	33	my $result = $grammar->tokenize;
	34
	35	=head1 DESCRIPTION
	36
	37	C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
	38	L<TAP::Parser::Result> subclasses to represent the tokens.
	39
	40	Do not attempt to use this class directly. It won't make sense. It's mainly
	41	here to ensure that we will be able to have pluggable grammars when TAP is
	42	expanded at some future date (plus, this stuff was really cluttering the
	43	parser).
	44
	45	=head1 METHODS
	46
	47	=head2 Class Methods
	48
	49	=head3 C<new>
	50
	51	my $grammar = TAP::Parser::Grammar->new({
	52	stream => $stream,
	53	parser => $parser,
	54	version => $version,
	55	});
	56
	57	Returns L<TAP::Parser> grammar object that will parse the specified stream.
	58	Both C<stream> and C<parser> are required arguments. If C<version> is not set
	59	it defaults to C<12> (see L</set_version> for more details).
	60
	61	=cut
	62
	63	# new() implementation supplied by TAP::Object
	64	sub _initialize {
65	my ( $self, $args ) = @_;
66	$self->{stream} = $args->{stream}; # TODO: accessor
67	$self->{parser} = $args->{parser}; # TODO: accessor
68	$self->set_version( $args->{version} \|\| 12 );
69	return $self;
70	}
71
72	my %language_for;
73
74	{
75
76	# XXX the 'not' and 'ok' might be on separate lines in VMS ...
77	my $ok = qr/(?:not )?ok\b/;
78	my $num = qr/\d+/;
79
80	my %v12 = (
81	version => {
82	syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
83	handler => sub {
84	my ( $self, $line ) = @_;
85	my $version = $1;
86	return $self->_make_version_token( $line, $version, );
87	},
88	},
89	plan => {
90	syntax => qr/^1\.\.(\d+)\s(.)\z/,
91	handler => sub {
92	my ( $self, $line ) = @_;
93	my ( $tests_planned, $tail ) = ( $1, $2 );
94	my $explanation = undef;
95	my $skip = '';
96
97	if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
98	my @todo = split /\s+/, _trim($1);
99	return $self->_make_plan_token(
100	$line, $tests_planned, 'TODO',
101	'', \@todo
102	);
103	}
104	elsif ( 0 == $tests_planned ) {
105	$skip = 'SKIP';
106
107	# If we can't match # SKIP the directive should be undef.
108	($explanation) = $tail =~ /^#\sSKIP\S\s+(.*)/i;
109	}
110	elsif ( $tail !~ /^\s*$/ ) {
111	return $self->_make_unknown_token($line);
112	}
113
114	$explanation = '' unless defined $explanation;
115
116	return $self->_make_plan_token(
117	$line, $tests_planned, $skip,
118	$explanation, []
119	);
120
121	},
122	},
123
124	# An optimization to handle the most common test lines without
125	# directives.
126	simple_test => {
127	syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
128	handler => sub {
129	my ( $self, $line ) = @_;
130	my ( $ok, $num, $desc ) = ( $1, $2, $3 );
131
132	return $self->_make_test_token(
133	$line, $ok, $num,
134	$desc
135	);
136	},
137	},
138	test => {
139	syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
140	handler => sub {
141	my ( $self, $line ) = @_;
142	my ( $ok, $num, $desc ) = ( $1, $2, $3 );
143	my ( $dir, $explanation ) = ( '', '' );
144	if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
145	\# \s* (SKIP\|TODO) \b \s* (.*) $/ix
146	)
147	{
148	( $desc, $dir, $explanation ) = ( $1, $2, $3 );
149	}
150	return $self->_make_test_token(
151	$line, $ok, $num, $desc,
152	$dir, $explanation
153	);
154	},
155	},
156	comment => {
157	syntax => qr/^#(.*)/,
158	handler => sub {
159	my ( $self, $line ) = @_;
160	my $comment = $1;
161	return $self->_make_comment_token( $line, $comment );
162	},
163	},
164	bailout => {
165	syntax => qr/^Bail out!\s(.)/,
166	handler => sub {
167	my ( $self, $line ) = @_;
168	my $explanation = $1;
169	return $self->_make_bailout_token(
170	$line,
171	$explanation
172	);
173	},
174	},
175	);
176
177	my %v13 = (
178	%v12,
179	plan => {
180	syntax => qr/^1\.\.(\d+)(?:\s#\sSKIP\b(.*))?\z/i,
181	handler => sub {
182	my ( $self, $line ) = @_;
183	my ( $tests_planned, $explanation ) = ( $1, $2 );
184	my $skip
185	= ( 0 == $tests_planned \|\| defined $explanation )
186	? 'SKIP'
187	: '';
188	$explanation = '' unless defined $explanation;
189	return $self->_make_plan_token(
190	$line, $tests_planned, $skip,
191	$explanation, []
192	);
193	},
194	},
195	yaml => {
196	syntax => qr/^ (\s+) (---.*) $/x,
197	handler => sub {
198	my ( $self, $line ) = @_;
199	my ( $pad, $marker ) = ( $1, $2 );
200	return $self->_make_yaml_token( $pad, $marker );
201	},
202	},
203	pragma => {
204	syntax =>
205	qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
206	handler => sub {
207	my ( $self, $line ) = @_;
208	my $pragmas = $1;
209	return $self->_make_pragma_token( $line, $pragmas );
210	},
211	},
212	);
213
214	%language_for = (
215	'12' => {
216	tokens => \%v12,
217	},
218	'13' => {
219	tokens => \%v13,
220	setup => sub {
221	shift->{stream}->handle_unicode;
222	},
223	},
224	);
225	}
226
227	##############################################################################
228
229	=head2 Instance Methods
230
231	=head3 C<set_version>
232
233	$grammar->set_version(13);
234
235	Tell the grammar which TAP syntax version to support. The lowest
236	supported version is 12. Although 'TAP version' isn't valid version 12
237	syntax it is accepted so that higher version numbers may be parsed.
238
239	=cut
240
241	sub set_version {
242	my $self = shift;
243	my $version = shift;
244
245	if ( my $language = $language_for{$version} ) {
246	$self->{version} = $version;
247	$self->{tokens} = $language->{tokens};
248
249	if ( my $setup = $language->{setup} ) {
250	$self->$setup();
251	}
252
253	$self->_order_tokens;
254	}
255	else {
256	require Carp;
257	Carp::croak("Unsupported syntax version: $version");
258	}
259	}
260
261	# Optimization to put the most frequent tokens first.
262	sub _order_tokens {
263	my $self = shift;
264
265	my %copy = %{ $self->{tokens} };
266	my @ordered_tokens = grep {defined}
267	map { delete $copy{$_} } qw( simple_test test comment plan );
268	push @ordered_tokens, values %copy;
269
270	$self->{ordered_tokens} = \@ordered_tokens;
271	}
272
273	##############################################################################
274
275	=head3 C<tokenize>
276
277	my $token = $grammar->tokenize;
278
279	This method will return a L<TAP::Parser::Result> object representing the
280	current line of TAP.
281
282	=cut
283
284	sub tokenize {
285	my $self = shift;
286
287	my $line = $self->{stream}->next;
288	unless ( defined $line ) {
289	delete $self->{parser}; # break circular ref
290	return;
291	}
292
293	my $token;
294
295	foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
296	if ( $line =~ $token_data->{syntax} ) {
297	my $handler = $token_data->{handler};
298	$token = $self->$handler($line);
299	last;
300	}
301	}
302
303	$token = $self->_make_unknown_token($line) unless $token;
304
305	return $self->{parser}->make_result($token);
306	}
307
308	##############################################################################
309
310	=head3 C<token_types>
311
312	my @types = $grammar->token_types;
313
314	Returns the different types of tokens which this grammar can parse.
315
316	=cut
317
318	sub token_types {
319	my $self = shift;
320	return keys %{ $self->{tokens} };
321	}
322
323	##############################################################################
324
325	=head3 C<syntax_for>
326
327	my $syntax = $grammar->syntax_for($token_type);
328
329	Returns a pre-compiled regular expression which will match a chunk of TAP
330	corresponding to the token type. For example (not that you should really pay
331	attention to this, C<< $grammar->syntax_for('comment') >> will return
332	C<< qr/^#(.*)/ >>.
333
334	=cut
335
336	sub syntax_for {
337	my ( $self, $type ) = @_;
338	return $self->{tokens}->{$type}->{syntax};
339	}
340
341	##############################################################################
342
343	=head3 C<handler_for>
344
345	my $handler = $grammar->handler_for($token_type);
346
347	Returns a code reference which, when passed an appropriate line of TAP,
348	returns the lexed token corresponding to that line. As a result, the basic
349	TAP parsing loop looks similar to the following:
350
351	my @tokens;
352	my $grammar = TAP::Grammar->new;
353	LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
354	foreach my $type ( $grammar->token_types ) {
355	my $syntax = $grammar->syntax_for($type);
356	if ( $line =~ $syntax ) {
357	my $handler = $grammar->handler_for($type);
358	push @tokens => $grammar->$handler($line);
359	next LINE;
360	}
361	}
362	push @tokens => $grammar->_make_unknown_token($line);
363	}
364
365	=cut
366
367	sub handler_for {
368	my ( $self, $type ) = @_;
369	return $self->{tokens}->{$type}->{handler};
370	}
371
372	sub _make_version_token {
373	my ( $self, $line, $version ) = @_;
374	return {
375	type => 'version',
376	raw => $line,
377	version => $version,
378	};
379	}
380
381	sub _make_plan_token {
382	my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
383
384	if ( $directive eq 'SKIP'
385	&& 0 != $tests_planned
386	&& $self->{version} < 13 )
387	{
388	warn
389	"Specified SKIP directive in plan but more than 0 tests ($line)\n";
390	}
391
392	return {
393	type => 'plan',
394	raw => $line,
395	tests_planned => $tests_planned,
396	directive => $directive,
397	explanation => _trim($explanation),
398	todo_list => $todo,
399	};
400	}
401
402	sub _make_test_token {
403	my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
404	return {
405	ok => $ok,
406	test_num => $num,
407	description => _trim($desc),
408	directive => ( defined $dir ? uc $dir : '' ),
409	explanation => _trim($explanation),
410	raw => $line,
411	type => 'test',
412	};
413	}
414
415	sub _make_unknown_token {
416	my ( $self, $line ) = @_;
417	return {
418	raw => $line,
419	type => 'unknown',
420	};
421	}
422
423	sub _make_comment_token {
424	my ( $self, $line, $comment ) = @_;
425	return {
426	type => 'comment',
427	raw => $line,
428	comment => _trim($comment)
429	};
430	}
431
432	sub _make_bailout_token {
433	my ( $self, $line, $explanation ) = @_;
434	return {
435	type => 'bailout',
436	raw => $line,
437	bailout => _trim($explanation)
438	};
439	}
440
441	sub _make_yaml_token {
442	my ( $self, $pad, $marker ) = @_;
443
444	my $yaml = TAP::Parser::YAMLish::Reader->new;
445
446	my $stream = $self->{stream};
447
448	# Construct a reader that reads from our input stripping leading
449	# spaces from each line.
450	my $leader = length($pad);
451	my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
452	my @extra = ($marker);
453	my $reader = sub {
454	return shift @extra if @extra;
455	my $line = $stream->next;
456	return $2 if $line =~ $strip;
457	return;
458	};
459
460	my $data = $yaml->read($reader);
461
462	# Reconstitute input. This is convoluted. Maybe we should just
463	# record it on the way in...
464	chomp( my $raw = $yaml->get_raw );
465	$raw =~ s/^/$pad/mg;
466
467	return {
468	type => 'yaml',
469	raw => $raw,
470	data => $data
471	};
472	}
473
474	sub _make_pragma_token {
475	my ( $self, $line, $pragmas ) = @_;
476	return {
477	type => 'pragma',
478	raw => $line,
479	pragmas => [ split /\s,\s/, _trim($pragmas) ],
480	};
481	}
482
483	sub _trim {
484	my $data = shift;
485
486	return '' unless defined $data;
487
488	$data =~ s/^\s+//;
489	$data =~ s/\s+$//;
490	return $data;
491	}
492
493	1;
494
495	=head1 TAP GRAMMAR
496
497	B<NOTE:> This grammar is slightly out of date. There's still some discussion
498	about it and a new one will be provided when we have things better defined.
499
500	The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
501	stream-based protocol. In fact, it's quite legal to have an infinite stream.
502	For the same reason that we don't apply regexes to streams, we're not using a
503	formal grammar here. Instead, we parse the TAP in lines.
504
505	For purposes for forward compatability, any result which does not match the
506	following grammar is currently referred to as
507	L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
508
509	A formal grammar would look similar to the following:
510
511	(*
512	For the time being, I'm cheating on the EBNF by allowing
513	certain terms to be defined by POSIX character classes by
514	using the following syntax:
515
516	digit ::= [:digit:]
517
518	As far as I am aware, that's not valid EBNF. Sue me. I
519	didn't know how to write "char" otherwise (Unicode issues).
520	Suggestions welcome.
521	*)
522
523	tap ::= version? { comment \| unknown } leading_plan lines
524	\|
525	lines trailing_plan {comment}
526
527	version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
528
529	leading_plan ::= plan skip_directive? "\n"
530
531	trailing_plan ::= plan "\n"
532
533	plan ::= '1..' nonNegativeInteger
534
535	lines ::= line {line}
536
537	line ::= (comment \| test \| unknown \| bailout ) "\n"
538
539	test ::= status positiveInteger? description? directive?
540
541	status ::= 'not '? 'ok '
542
543	description ::= (character - (digit \| '#')) {character - '#'}
544
545	directive ::= todo_directive \| skip_directive
546
547	todo_directive ::= hash_mark 'TODO' ' ' {character}
548
549	skip_directive ::= hash_mark 'SKIP' ' ' {character}
550
551	comment ::= hash_mark {character}
552
553	hash_mark ::= '#' {' '}
554
555	bailout ::= 'Bail out!' {character}
556
557	unknown ::= { (character - "\n") }
558
559	(* POSIX character classes and other terminals *)
560
561	digit ::= [:digit:]
562	character ::= ([:print:] - "\n")
563	positiveInteger ::= ( digit - '0' ) {digit}
564	nonNegativeInteger ::= digit {digit}
565
566	=head1 SUBCLASSING
567
568	Please see L<TAP::Parser/SUBCLASSING> for a subclassing overview.
569
570	If you I<really> want to subclass L<TAP::Parser>'s grammar the best thing to
571	do is read through the code. There's no easy way of summarizing it here.
572
573	=head1 SEE ALSO
574
575	L<TAP::Object>,
576	L<TAP::Parser>,
577	L<TAP::Parser::Iterator>,
578	L<TAP::Parser::Result>,
579
580	=cut