[p5sagit/p5-mst-13.2.git] / lib / TAP / Parser / Grammar.pm

package TAP::Parser::Grammar;

use strict;
use vars qw($VERSION);

use TAP::Parser::Result          ();
use TAP::Parser::YAMLish::Reader ();

=head1 NAME

TAP::Parser::Grammar - A grammar for the Test Anything Protocol.

=head1 VERSION

Version 3.10

=cut

$VERSION = '3.10';

=head1 DESCRIPTION

C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
L<TAP::Parser::Result> subclasses to represent the tokens.

Do not attempt to use this class directly.  It won't make sense.  It's mainly
here to ensure that we will be able to have pluggable grammars when TAP is
expanded at some future date (plus, this stuff was really cluttering the
parser).

=cut

##############################################################################

=head2 Class Methods


=head3 C<new>

  my $grammar = TAP::Grammar->new($stream);

Returns TAP grammar object that will parse the specified stream.

=cut

sub new {
    my ( $class, $stream ) = @_;
    my $self = bless { stream => $stream }, $class;
    $self->set_version(12);
    return $self;
}

my %language_for;

{

    # XXX the 'not' and 'ok' might be on separate lines in VMS ...
    my $ok  = qr/(?:not )?ok\b/;
    my $num = qr/\d+/;

    my %v12 = (
        version => {
            syntax  => qr/^TAP\s+version\s+(\d+)\s*\z/i,
            handler => sub {
                my ( $self, $line ) = @_;
                my $version = $1;
                return $self->_make_version_token( $line, $version, );
            },
        },
        plan => {
            syntax  => qr/^1\.\.(\d+)\s*(.*)\z/,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $tests_planned, $tail ) = ( $1, $2 );
                my $explanation = undef;
                my $skip        = '';

                if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
                    my @todo = split /\s+/, _trim($1);
                    return $self->_make_plan_token(
                        $line, $tests_planned, 'TODO',
                        '',    \@todo
                    );
                }
                elsif ( 0 == $tests_planned ) {
                    $skip        = 'SKIP';
                    $explanation = $tail;

                    # Trim valid SKIP directive without being strict
                    # about its presence.
                    $explanation =~ s/^#\s*//;
                    $explanation =~ s/^skip\S*\s+//i;
                }
                elsif ( $tail !~ /^\s*$/ ) {
                    return $self->_make_unknown_token($line);
                }

                $explanation = '' unless defined $explanation;

                return $self->_make_plan_token(
                    $line, $tests_planned, $skip,
                    $explanation, []
                );

            },
        },

        # An optimization to handle the most common test lines without
        # directives.
        simple_test => {
            syntax  => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $ok, $num, $desc ) = ( $1, $2, $3 );

                return $self->_make_test_token(
                    $line, $ok, $num,
                    $desc
                );
            },
        },
        test => {
            syntax  => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $ok, $num, $desc ) = ( $1, $2, $3 );
                my ( $dir, $explanation ) = ( '', '' );
                if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
                       \# \s* (SKIP|TODO) \b \s* (.*) $/ix
                  )
                {
                    ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
                }
                return $self->_make_test_token(
                    $line,   $ok, $num, $desc,
                    $dir, $explanation
                );
            },
        },
        comment => {
            syntax  => qr/^#(.*)/,
            handler => sub {
                my ( $self, $line ) = @_;
                my $comment = $1;
                return $self->_make_comment_token( $line, $comment );
            },
        },
        bailout => {
            syntax  => qr/^Bail out!\s*(.*)/,
            handler => sub {
                my ( $self, $line ) = @_;
                my $explanation = $1;
                return $self->_make_bailout_token(
                    $line,
                    $explanation
                );
            },
        },
    );

    my %v13 = (
        %v12,
        plan => {
            syntax  => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $tests_planned, $explanation ) = ( $1, $2 );
                my $skip
                  = ( 0 == $tests_planned || defined $explanation )
                  ? 'SKIP'
                  : '';
                $explanation = '' unless defined $explanation;
                return $self->_make_plan_token(
                    $line, $tests_planned, $skip,
                    $explanation, []
                );
            },
        },
        yaml => {
            syntax  => qr/^ (\s+) (---.*) $/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $pad, $marker ) = ( $1, $2 );
                return $self->_make_yaml_token( $pad, $marker );
            },
        },
        pragma => {
            syntax =>
              qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my $pragmas = $1;
                return $self->_make_pragma_token( $line, $pragmas );
            },
        },
    );

    %language_for = (
        '12' => {
            tokens => \%v12,
        },
        '13' => {
            tokens => \%v13,
            setup  => sub {
                shift->{stream}->handle_unicode;
            },
        },
    );
}

##############################################################################

=head2 Instance Methods

=head3 C<set_version>

  $grammar->set_version(13);

Tell the grammar which TAP syntax version to support. The lowest
supported version is 12. Although 'TAP version' isn't valid version 12
syntax it is accepted so that higher version numbers may be parsed.

=cut

sub set_version {
    my $self    = shift;
    my $version = shift;

    if ( my $language = $language_for{$version} ) {
        $self->{tokens} = $language->{tokens};

        if ( my $setup = $language->{setup} ) {
            $self->$setup();
        }

        $self->_order_tokens;
    }
    else {
        require Carp;
        Carp::croak("Unsupported syntax version: $version");
    }
}

# Optimization to put the most frequent tokens first.
sub _order_tokens {
    my $self = shift;

    my %copy = %{ $self->{tokens} };
    my @ordered_tokens = grep {defined}
      map { delete $copy{$_} } qw( simple_test test comment plan );
    push @ordered_tokens, values %copy;

    $self->{ordered_tokens} = \@ordered_tokens;
}

##############################################################################

=head3 C<tokenize>

  my $token = $grammar->tokenize;

This method will return a L<TAP::Parser::Result> object representing the
current line of TAP.

=cut

sub tokenize {
    my $self = shift;

    my $line = $self->{stream}->next;
    return unless defined $line;

    my $token;

    foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
        if ( $line =~ $token_data->{syntax} ) {
            my $handler = $token_data->{handler};
            $token = $self->$handler($line);
            last;
        }
    }

    $token = $self->_make_unknown_token($line) unless $token;

    return TAP::Parser::Result->new($token);
}

##############################################################################

=head3 C<token_types>

  my @types = $grammar->token_types;

Returns the different types of tokens which this grammar can parse.

=cut

sub token_types {
    my $self = shift;
    return keys %{ $self->{tokens} };
}

##############################################################################

=head3 C<syntax_for>

  my $syntax = $grammar->syntax_for($token_type);

Returns a pre-compiled regular expression which will match a chunk of TAP
corresponding to the token type.  For example (not that you should really pay
attention to this, C<< $grammar->syntax_for('comment') >> will return
C<< qr/^#(.*)/ >>.

=cut

sub syntax_for {
    my ( $self, $type ) = @_;
    return $self->{tokens}->{$type}->{syntax};
}

##############################################################################

=head3 C<handler_for>

  my $handler = $grammar->handler_for($token_type);

Returns a code reference which, when passed an appropriate line of TAP,
returns the lexed token corresponding to that line.  As a result, the basic
TAP parsing loop looks similar to the following:

 my @tokens;
 my $grammar = TAP::Grammar->new;
 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
     foreach my $type ( $grammar->token_types ) {
         my $syntax  = $grammar->syntax_for($type);
         if ( $line =~ $syntax ) {
             my $handler = $grammar->handler_for($type);
             push @tokens => $grammar->$handler($line);
             next LINE;
         }
     }
     push @tokens => $grammar->_make_unknown_token($line);
 }

=cut

sub handler_for {
    my ( $self, $type ) = @_;
    return $self->{tokens}->{$type}->{handler};
}

sub _make_version_token {
    my ( $self, $line, $version ) = @_;
    return {
        type    => 'version',
        raw     => $line,
        version => $version,
    };
}

sub _make_plan_token {
    my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;

    if ( $directive eq 'SKIP' && 0 != $tests_planned ) {
        warn
          "Specified SKIP directive in plan but more than 0 tests ($line)\n";
    }
    return {
        type          => 'plan',
        raw           => $line,
        tests_planned => $tests_planned,
        directive     => $directive,
        explanation   => _trim($explanation),
        todo_list     => $todo,
    };
}

sub _make_test_token {
    my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
    my %test = (
        ok          => $ok,
        test_num    => $num,
        description => _trim($desc),
        directive   => uc( defined $dir ? $dir : '' ),
        explanation => _trim($explanation),
        raw         => $line,
        type        => 'test',
    );
    return \%test;
}

sub _make_unknown_token {
    my ( $self, $line ) = @_;
    return {
        raw  => $line,
        type => 'unknown',
    };
}

sub _make_comment_token {
    my ( $self, $line, $comment ) = @_;
    return {
        type    => 'comment',
        raw     => $line,
        comment => _trim($comment)
    };
}

sub _make_bailout_token {
    my ( $self, $line, $explanation ) = @_;
    return {
        type    => 'bailout',
        raw     => $line,
        bailout => _trim($explanation)
    };
}

sub _make_yaml_token {
    my ( $self, $pad, $marker ) = @_;

    my $yaml = TAP::Parser::YAMLish::Reader->new;

    my $stream = $self->{stream};

    # Construct a reader that reads from our input stripping leading
    # spaces from each line.
    my $leader = length($pad);
    my $strip  = qr{ ^ (\s{$leader}) (.*) $ }x;
    my @extra  = ($marker);
    my $reader = sub {
        return shift @extra if @extra;
        my $line = $stream->next;
        return $2 if $line =~ $strip;
        return;
    };

    my $data = $yaml->read($reader);

    # Reconstitute input. This is convoluted. Maybe we should just
    # record it on the way in...
    chomp( my $raw = $yaml->get_raw );
    $raw =~ s/^/$pad/mg;

    return {
        type => 'yaml',
        raw  => $raw,
        data => $data
    };
}

sub _make_pragma_token {
    my ( $self, $line, $pragmas ) = @_;
    return {
        type    => 'pragma',
        raw     => $line,
        pragmas => [ split /\s*,\s*/, _trim($pragmas) ],
    };
}

sub _trim {
    my $data = shift;

    return '' unless defined $data;

    $data =~ s/^\s+//;
    $data =~ s/\s+$//;
    return $data;
}

=head1 TAP GRAMMAR

B<NOTE:>  This grammar is slightly out of date.  There's still some discussion
about it and a new one will be provided when we have things better defined.

The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
stream-based protocol.  In fact, it's quite legal to have an infinite stream.
For the same reason that we don't apply regexes to streams, we're not using a
formal grammar here.  Instead, we parse the TAP in lines.

For purposes for forward compatability, any result which does not match the
following grammar is currently referred to as
L<TAP::Parser::Result::Unknown>.  It is I<not> a parse error.

A formal grammar would look similar to the following:

 (*
     For the time being, I'm cheating on the EBNF by allowing
     certain terms to be defined by POSIX character classes by
     using the following syntax:

       digit ::= [:digit:]

     As far as I am aware, that's not valid EBNF.  Sue me.  I
     didn't know how to write "char" otherwise (Unicode issues).
     Suggestions welcome.
 *)

 tap            ::= version? { comment | unknown } leading_plan lines
                    |
                    lines trailing_plan {comment}

 version        ::= 'TAP version ' positiveInteger {positiveInteger} "\n"

 leading_plan   ::= plan skip_directive? "\n"

 trailing_plan  ::= plan "\n"

 plan           ::= '1..' nonNegativeInteger

 lines          ::= line {line}

 line           ::= (comment | test | unknown | bailout ) "\n"

 test           ::= status positiveInteger? description? directive?

 status         ::= 'not '? 'ok '

 description    ::= (character - (digit | '#')) {character - '#'}

 directive      ::= todo_directive | skip_directive

 todo_directive ::= hash_mark 'TODO' ' ' {character}

 skip_directive ::= hash_mark 'SKIP' ' ' {character}

 comment        ::= hash_mark {character}

 hash_mark      ::= '#' {' '}

 bailout        ::= 'Bail out!' {character}

 unknown        ::= { (character - "\n") }

 (* POSIX character classes and other terminals *)

 digit              ::= [:digit:]
 character          ::= ([:print:] - "\n")
 positiveInteger    ::= ( digit - '0' ) {digit}
 nonNegativeInteger ::= digit {digit}


=cut

1;
Commit	Line	Data
b965d173	1	package TAP::Parser::Grammar;
	2
	3	use strict;
	4	use vars qw($VERSION);
	5
	6	use TAP::Parser::Result ();
	7	use TAP::Parser::YAMLish::Reader ();
	8
	9	=head1 NAME
	10
	11	TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
	12
	13	=head1 VERSION
	14
2a7f4b9b	15	Version 3.10
b965d173	16
	17	=cut
	18
2a7f4b9b	19	$VERSION = '3.10';
b965d173	20
	21	=head1 DESCRIPTION
	22
	23	C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
	24	L<TAP::Parser::Result> subclasses to represent the tokens.
	25
	26	Do not attempt to use this class directly. It won't make sense. It's mainly
	27	here to ensure that we will be able to have pluggable grammars when TAP is
	28	expanded at some future date (plus, this stuff was really cluttering the
	29	parser).
	30
	31	=cut
	32
	33	##############################################################################
	34
	35	=head2 Class Methods
	36
	37
	38	=head3 C<new>
	39
	40	my $grammar = TAP::Grammar->new($stream);
	41
	42	Returns TAP grammar object that will parse the specified stream.
	43
	44	=cut
	45
	46	sub new {
	47	my ( $class, $stream ) = @_;
	48	my $self = bless { stream => $stream }, $class;
	49	$self->set_version(12);
	50	return $self;
	51	}
	52
	53	my %language_for;
	54
	55	{
	56
	57	# XXX the 'not' and 'ok' might be on separate lines in VMS ...
	58	my $ok = qr/(?:not )?ok\b/;
	59	my $num = qr/\d+/;
	60
	61	my %v12 = (
	62	version => {
	63	syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
	64	handler => sub {
	65	my ( $self, $line ) = @_;
	66	my $version = $1;
	67	return $self->_make_version_token( $line, $version, );
	68	},
	69	},
	70	plan => {
	71	syntax => qr/^1\.\.(\d+)\s(.)\z/,
	72	handler => sub {
	73	my ( $self, $line ) = @_;
	74	my ( $tests_planned, $tail ) = ( $1, $2 );
	75	my $explanation = undef;
	76	my $skip = '';
	77
	78	if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
	79	my @todo = split /\s+/, _trim($1);
	80	return $self->_make_plan_token(
	81	$line, $tests_planned, 'TODO',
	82	'', \@todo
	83	);
84	}
85	elsif ( 0 == $tests_planned ) {
86	$skip = 'SKIP';
87	$explanation = $tail;
88
89	# Trim valid SKIP directive without being strict
90	# about its presence.
91	$explanation =~ s/^#\s*//;
92	$explanation =~ s/^skip\S*\s+//i;
93	}
94	elsif ( $tail !~ /^\s*$/ ) {
95	return $self->_make_unknown_token($line);
96	}
97
98	$explanation = '' unless defined $explanation;
99
100	return $self->_make_plan_token(
101	$line, $tests_planned, $skip,
102	$explanation, []
103	);
104
105	},
106	},
107
108	# An optimization to handle the most common test lines without
109	# directives.
110	simple_test => {
111	syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
112	handler => sub {
113	my ( $self, $line ) = @_;
114	my ( $ok, $num, $desc ) = ( $1, $2, $3 );
115
116	return $self->_make_test_token(
117	$line, $ok, $num,
118	$desc
119	);
120	},
121	},
122	test => {
123	syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
124	handler => sub {
125	my ( $self, $line ) = @_;
126	my ( $ok, $num, $desc ) = ( $1, $2, $3 );
127	my ( $dir, $explanation ) = ( '', '' );
128	if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
129	\# \s* (SKIP\|TODO) \b \s* (.*) $/ix
130	)
131	{
132	( $desc, $dir, $explanation ) = ( $1, $2, $3 );
133	}
134	return $self->_make_test_token(
135	$line, $ok, $num, $desc,
a62d3a1b	136	$dir, $explanation
b965d173	137	);
	138	},
	139	},
	140	comment => {
	141	syntax => qr/^#(.*)/,
	142	handler => sub {
	143	my ( $self, $line ) = @_;
	144	my $comment = $1;
	145	return $self->_make_comment_token( $line, $comment );
	146	},
	147	},
	148	bailout => {
	149	syntax => qr/^Bail out!\s(.)/,
	150	handler => sub {
	151	my ( $self, $line ) = @_;
	152	my $explanation = $1;
	153	return $self->_make_bailout_token(
	154	$line,
	155	$explanation
	156	);
	157	},
	158	},
	159	);
	160
	161	my %v13 = (
	162	%v12,
	163	plan => {
	164	syntax => qr/^1\.\.(\d+)(?:\s#\sSKIP\b(.*))?\z/i,
	165	handler => sub {
	166	my ( $self, $line ) = @_;
	167	my ( $tests_planned, $explanation ) = ( $1, $2 );
	168	my $skip
	169	= ( 0 == $tests_planned \|\| defined $explanation )
	170	? 'SKIP'
	171	: '';
	172	$explanation = '' unless defined $explanation;
	173	return $self->_make_plan_token(
	174	$line, $tests_planned, $skip,
	175	$explanation, []
	176	);
	177	},
	178	},
	179	yaml => {
	180	syntax => qr/^ (\s+) (---.*) $/x,
	181	handler => sub {
	182	my ( $self, $line ) = @_;
	183	my ( $pad, $marker ) = ( $1, $2 );
	184	return $self->_make_yaml_token( $pad, $marker );
	185	},
	186	},
2a7f4b9b	187	pragma => {
	188	syntax =>
	189	qr/^ pragma \s+ ( [-+] \w+ \s* (?: , \s* [-+] \w+ \s* )* ) $/x,
	190	handler => sub {
	191	my ( $self, $line ) = @_;
	192	my $pragmas = $1;
	193	return $self->_make_pragma_token( $line, $pragmas );
	194	},
	195	},
b965d173	196	);
	197
	198	%language_for = (
	199	'12' => {
	200	tokens => \%v12,
	201	},
	202	'13' => {
	203	tokens => \%v13,
	204	setup => sub {
	205	shift->{stream}->handle_unicode;
	206	},
	207	},
	208	);
	209	}
	210
	211	##############################################################################
	212
	213	=head2 Instance Methods
	214
	215	=head3 C<set_version>
	216
	217	$grammar->set_version(13);
	218
	219	Tell the grammar which TAP syntax version to support. The lowest
	220	supported version is 12. Although 'TAP version' isn't valid version 12
	221	syntax it is accepted so that higher version numbers may be parsed.
	222
	223	=cut
	224
	225	sub set_version {
	226	my $self = shift;
	227	my $version = shift;
	228
	229	if ( my $language = $language_for{$version} ) {
	230	$self->{tokens} = $language->{tokens};
	231
	232	if ( my $setup = $language->{setup} ) {
	233	$self->$setup();
	234	}
	235
	236	$self->_order_tokens;
	237	}
	238	else {
	239	require Carp;
	240	Carp::croak("Unsupported syntax version: $version");
	241	}
	242	}
	243
	244	# Optimization to put the most frequent tokens first.
	245	sub _order_tokens {
	246	my $self = shift;
	247
	248	my %copy = %{ $self->{tokens} };
	249	my @ordered_tokens = grep {defined}
	250	map { delete $copy{$_} } qw( simple_test test comment plan );
	251	push @ordered_tokens, values %copy;
	252
	253	$self->{ordered_tokens} = \@ordered_tokens;
	254	}
	255
	256	##############################################################################
	257
	258	=head3 C<tokenize>
	259
260	my $token = $grammar->tokenize;
261
262	This method will return a L<TAP::Parser::Result> object representing the
263	current line of TAP.
264
265	=cut
266
267	sub tokenize {
268	my $self = shift;
269
270	my $line = $self->{stream}->next;
271	return unless defined $line;
272
273	my $token;
274
275	foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
276	if ( $line =~ $token_data->{syntax} ) {
277	my $handler = $token_data->{handler};
278	$token = $self->$handler($line);
279	last;
280	}
281	}
282
283	$token = $self->_make_unknown_token($line) unless $token;
284
285	return TAP::Parser::Result->new($token);
286	}
287
288	##############################################################################
289
290	=head3 C<token_types>
291
292	my @types = $grammar->token_types;
293
294	Returns the different types of tokens which this grammar can parse.
295
296	=cut
297
298	sub token_types {
299	my $self = shift;
300	return keys %{ $self->{tokens} };
301	}
302
303	##############################################################################
304
305	=head3 C<syntax_for>
306
307	my $syntax = $grammar->syntax_for($token_type);
308
309	Returns a pre-compiled regular expression which will match a chunk of TAP
310	corresponding to the token type. For example (not that you should really pay
311	attention to this, C<< $grammar->syntax_for('comment') >> will return
312	C<< qr/^#(.*)/ >>.
313
314	=cut
315
316	sub syntax_for {
317	my ( $self, $type ) = @_;
318	return $self->{tokens}->{$type}->{syntax};
319	}
320
321	##############################################################################
322
323	=head3 C<handler_for>
324
325	my $handler = $grammar->handler_for($token_type);
326
327	Returns a code reference which, when passed an appropriate line of TAP,
328	returns the lexed token corresponding to that line. As a result, the basic
329	TAP parsing loop looks similar to the following:
330
331	my @tokens;
332	my $grammar = TAP::Grammar->new;
333	LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
334	foreach my $type ( $grammar->token_types ) {
335	my $syntax = $grammar->syntax_for($type);
336	if ( $line =~ $syntax ) {
337	my $handler = $grammar->handler_for($type);
338	push @tokens => $grammar->$handler($line);
339	next LINE;
340	}
341	}
342	push @tokens => $grammar->_make_unknown_token($line);
343	}
344
345	=cut
346
347	sub handler_for {
348	my ( $self, $type ) = @_;
349	return $self->{tokens}->{$type}->{handler};
350	}
351
352	sub _make_version_token {
353	my ( $self, $line, $version ) = @_;
354	return {
355	type => 'version',
356	raw => $line,
357	version => $version,
358	};
359	}
360
361	sub _make_plan_token {
362	my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
363
364	if ( $directive eq 'SKIP' && 0 != $tests_planned ) {
365	warn
366	"Specified SKIP directive in plan but more than 0 tests ($line)\n";
367	}
368	return {
369	type => 'plan',
370	raw => $line,
371	tests_planned => $tests_planned,
372	directive => $directive,
373	explanation => _trim($explanation),
374	todo_list => $todo,
375	};
376	}
377
378	sub _make_test_token {
379	my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
380	my %test = (
381	ok => $ok,
382	test_num => $num,
383	description => _trim($desc),
2a7f4b9b	384	directive => uc( defined $dir ? $dir : '' ),
b965d173	385	explanation => _trim($explanation),
	386	raw => $line,
	387	type => 'test',
	388	);
	389	return \%test;
	390	}
	391
	392	sub _make_unknown_token {
	393	my ( $self, $line ) = @_;
	394	return {
	395	raw => $line,
	396	type => 'unknown',
	397	};
	398	}
	399
	400	sub _make_comment_token {
	401	my ( $self, $line, $comment ) = @_;
	402	return {
	403	type => 'comment',
	404	raw => $line,
	405	comment => _trim($comment)
	406	};
	407	}
	408
	409	sub _make_bailout_token {
	410	my ( $self, $line, $explanation ) = @_;
	411	return {
	412	type => 'bailout',
	413	raw => $line,
	414	bailout => _trim($explanation)
	415	};
	416	}
	417
	418	sub _make_yaml_token {
	419	my ( $self, $pad, $marker ) = @_;
	420
	421	my $yaml = TAP::Parser::YAMLish::Reader->new;
	422
	423	my $stream = $self->{stream};
	424
	425	# Construct a reader that reads from our input stripping leading
	426	# spaces from each line.
	427	my $leader = length($pad);
	428	my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
	429	my @extra = ($marker);
	430	my $reader = sub {
	431	return shift @extra if @extra;
	432	my $line = $stream->next;
	433	return $2 if $line =~ $strip;
	434	return;
	435	};
	436
	437	my $data = $yaml->read($reader);
	438
	439	# Reconstitute input. This is convoluted. Maybe we should just
	440	# record it on the way in...
	441	chomp( my $raw = $yaml->get_raw );
	442	$raw =~ s/^/$pad/mg;
	443
	444	return {
	445	type => 'yaml',
	446	raw => $raw,
	447	data => $data
	448	};
449	}
450
2a7f4b9b	451	sub _make_pragma_token {
	452	my ( $self, $line, $pragmas ) = @_;
	453	return {
	454	type => 'pragma',
	455	raw => $line,
	456	pragmas => [ split /\s,\s/, _trim($pragmas) ],
	457	};
	458	}
	459
b965d173	460	sub _trim {
	461	my $data = shift;
	462
	463	return '' unless defined $data;
	464
	465	$data =~ s/^\s+//;
	466	$data =~ s/\s+$//;
	467	return $data;
	468	}
	469
	470	=head1 TAP GRAMMAR
	471
	472	B<NOTE:> This grammar is slightly out of date. There's still some discussion
	473	about it and a new one will be provided when we have things better defined.
	474
	475	The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
	476	stream-based protocol. In fact, it's quite legal to have an infinite stream.
	477	For the same reason that we don't apply regexes to streams, we're not using a
	478	formal grammar here. Instead, we parse the TAP in lines.
	479
	480	For purposes for forward compatability, any result which does not match the
	481	following grammar is currently referred to as
	482	L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
	483
	484	A formal grammar would look similar to the following:
	485
	486	(*
	487	For the time being, I'm cheating on the EBNF by allowing
	488	certain terms to be defined by POSIX character classes by
	489	using the following syntax:
	490
	491	digit ::= [:digit:]
	492
	493	As far as I am aware, that's not valid EBNF. Sue me. I
	494	didn't know how to write "char" otherwise (Unicode issues).
	495	Suggestions welcome.
	496	*)
	497
	498	tap ::= version? { comment \| unknown } leading_plan lines
	499	\|
	500	lines trailing_plan {comment}
	501
	502	version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
	503
	504	leading_plan ::= plan skip_directive? "\n"
	505
	506	trailing_plan ::= plan "\n"
	507
	508	plan ::= '1..' nonNegativeInteger
	509
	510	lines ::= line {line}
	511
	512	line ::= (comment \| test \| unknown \| bailout ) "\n"
	513
	514	test ::= status positiveInteger? description? directive?
	515
	516	status ::= 'not '? 'ok '
	517
	518	description ::= (character - (digit \| '#')) {character - '#'}
	519
	520	directive ::= todo_directive \| skip_directive
	521
	522	todo_directive ::= hash_mark 'TODO' ' ' {character}
	523
524	skip_directive ::= hash_mark 'SKIP' ' ' {character}
525
526	comment ::= hash_mark {character}
527
528	hash_mark ::= '#' {' '}
529
530	bailout ::= 'Bail out!' {character}
531
532	unknown ::= { (character - "\n") }
533
534	(* POSIX character classes and other terminals *)
535
536	digit ::= [:digit:]
537	character ::= ([:print:] - "\n")
538	positiveInteger ::= ( digit - '0' ) {digit}
539	nonNegativeInteger ::= digit {digit}
540
541
542	=cut
543
544	1;