[p5sagit/p5-mst-13.2.git] / lib / TAP / Parser / Grammar.pm

package TAP::Parser::Grammar;

use strict;
use vars qw($VERSION);

use TAP::Parser::Result          ();
use TAP::Parser::YAMLish::Reader ();

=head1 NAME

TAP::Parser::Grammar - A grammar for the Test Anything Protocol.

=head1 VERSION

Version 3.06

=cut

$VERSION = '3.06';

=head1 DESCRIPTION

C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
L<TAP::Parser::Result> subclasses to represent the tokens.

Do not attempt to use this class directly.  It won't make sense.  It's mainly
here to ensure that we will be able to have pluggable grammars when TAP is
expanded at some future date (plus, this stuff was really cluttering the
parser).

=cut

##############################################################################

=head2 Class Methods


=head3 C<new>

  my $grammar = TAP::Grammar->new($stream);

Returns TAP grammar object that will parse the specified stream.

=cut

sub new {
    my ( $class, $stream ) = @_;
    my $self = bless { stream => $stream }, $class;
    $self->set_version(12);
    return $self;
}

my %language_for;

{

    # XXX the 'not' and 'ok' might be on separate lines in VMS ...
    my $ok  = qr/(?:not )?ok\b/;
    my $num = qr/\d+/;

    my %v12 = (
        version => {
            syntax  => qr/^TAP\s+version\s+(\d+)\s*\z/i,
            handler => sub {
                my ( $self, $line ) = @_;
                my $version = $1;
                return $self->_make_version_token( $line, $version, );
            },
        },
        plan => {
            syntax  => qr/^1\.\.(\d+)\s*(.*)\z/,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $tests_planned, $tail ) = ( $1, $2 );
                my $explanation = undef;
                my $skip        = '';

                if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
                    my @todo = split /\s+/, _trim($1);
                    return $self->_make_plan_token(
                        $line, $tests_planned, 'TODO',
                        '',    \@todo
                    );
                }
                elsif ( 0 == $tests_planned ) {
                    $skip        = 'SKIP';
                    $explanation = $tail;

                    # Trim valid SKIP directive without being strict
                    # about its presence.
                    $explanation =~ s/^#\s*//;
                    $explanation =~ s/^skip\S*\s+//i;
                }
                elsif ( $tail !~ /^\s*$/ ) {
                    return $self->_make_unknown_token($line);
                }

                $explanation = '' unless defined $explanation;

                return $self->_make_plan_token(
                    $line, $tests_planned, $skip,
                    $explanation, []
                );

            },
        },

        # An optimization to handle the most common test lines without
        # directives.
        simple_test => {
            syntax  => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $ok, $num, $desc ) = ( $1, $2, $3 );

                return $self->_make_test_token(
                    $line, $ok, $num,
                    $desc
                );
            },
        },
        test => {
            syntax  => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $ok, $num, $desc ) = ( $1, $2, $3 );
                my ( $dir, $explanation ) = ( '', '' );
                if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
                       \# \s* (SKIP|TODO) \b \s* (.*) $/ix
                  )
                {
                    ( $desc, $dir, $explanation ) = ( $1, $2, $3 );
                }
                return $self->_make_test_token(
                    $line,   $ok, $num, $desc,
                    uc $dir, $explanation
                );
            },
        },
        comment => {
            syntax  => qr/^#(.*)/,
            handler => sub {
                my ( $self, $line ) = @_;
                my $comment = $1;
                return $self->_make_comment_token( $line, $comment );
            },
        },
        bailout => {
            syntax  => qr/^Bail out!\s*(.*)/,
            handler => sub {
                my ( $self, $line ) = @_;
                my $explanation = $1;
                return $self->_make_bailout_token(
                    $line,
                    $explanation
                );
            },
        },
    );

    my %v13 = (
        %v12,
        plan => {
            syntax  => qr/^1\.\.(\d+)(?:\s*#\s*SKIP\b(.*))?\z/i,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $tests_planned, $explanation ) = ( $1, $2 );
                my $skip
                  = ( 0 == $tests_planned || defined $explanation )
                  ? 'SKIP'
                  : '';
                $explanation = '' unless defined $explanation;
                return $self->_make_plan_token(
                    $line, $tests_planned, $skip,
                    $explanation, []
                );
            },
        },
        yaml => {
            syntax  => qr/^ (\s+) (---.*) $/x,
            handler => sub {
                my ( $self, $line ) = @_;
                my ( $pad, $marker ) = ( $1, $2 );
                return $self->_make_yaml_token( $pad, $marker );
            },
        },
    );

    %language_for = (
        '12' => {
            tokens => \%v12,
        },
        '13' => {
            tokens => \%v13,
            setup  => sub {
                shift->{stream}->handle_unicode;
            },
        },
    );
}

##############################################################################

=head2 Instance Methods

=head3 C<set_version>

  $grammar->set_version(13);

Tell the grammar which TAP syntax version to support. The lowest
supported version is 12. Although 'TAP version' isn't valid version 12
syntax it is accepted so that higher version numbers may be parsed.

=cut

sub set_version {
    my $self    = shift;
    my $version = shift;

    if ( my $language = $language_for{$version} ) {
        $self->{tokens} = $language->{tokens};

        if ( my $setup = $language->{setup} ) {
            $self->$setup();
        }

        $self->_order_tokens;
    }
    else {
        require Carp;
        Carp::croak("Unsupported syntax version: $version");
    }
}

# Optimization to put the most frequent tokens first.
sub _order_tokens {
    my $self = shift;

    my %copy = %{ $self->{tokens} };
    my @ordered_tokens = grep {defined}
      map { delete $copy{$_} } qw( simple_test test comment plan );
    push @ordered_tokens, values %copy;

    $self->{ordered_tokens} = \@ordered_tokens;
}

##############################################################################

=head3 C<tokenize>

  my $token = $grammar->tokenize;

This method will return a L<TAP::Parser::Result> object representing the
current line of TAP.

=cut

sub tokenize {
    my $self = shift;

    my $line = $self->{stream}->next;
    return unless defined $line;

    my $token;

    foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
        if ( $line =~ $token_data->{syntax} ) {
            my $handler = $token_data->{handler};
            $token = $self->$handler($line);
            last;
        }
    }

    $token = $self->_make_unknown_token($line) unless $token;

    return TAP::Parser::Result->new($token);
}

##############################################################################

=head3 C<token_types>

  my @types = $grammar->token_types;

Returns the different types of tokens which this grammar can parse.

=cut

sub token_types {
    my $self = shift;
    return keys %{ $self->{tokens} };
}

##############################################################################

=head3 C<syntax_for>

  my $syntax = $grammar->syntax_for($token_type);

Returns a pre-compiled regular expression which will match a chunk of TAP
corresponding to the token type.  For example (not that you should really pay
attention to this, C<< $grammar->syntax_for('comment') >> will return
C<< qr/^#(.*)/ >>.

=cut

sub syntax_for {
    my ( $self, $type ) = @_;
    return $self->{tokens}->{$type}->{syntax};
}

##############################################################################

=head3 C<handler_for>

  my $handler = $grammar->handler_for($token_type);

Returns a code reference which, when passed an appropriate line of TAP,
returns the lexed token corresponding to that line.  As a result, the basic
TAP parsing loop looks similar to the following:

 my @tokens;
 my $grammar = TAP::Grammar->new;
 LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
     foreach my $type ( $grammar->token_types ) {
         my $syntax  = $grammar->syntax_for($type);
         if ( $line =~ $syntax ) {
             my $handler = $grammar->handler_for($type);
             push @tokens => $grammar->$handler($line);
             next LINE;
         }
     }
     push @tokens => $grammar->_make_unknown_token($line);
 }

=cut

sub handler_for {
    my ( $self, $type ) = @_;
    return $self->{tokens}->{$type}->{handler};
}

sub _make_version_token {
    my ( $self, $line, $version ) = @_;
    return {
        type    => 'version',
        raw     => $line,
        version => $version,
    };
}

sub _make_plan_token {
    my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;

    if ( $directive eq 'SKIP' && 0 != $tests_planned ) {
        warn
          "Specified SKIP directive in plan but more than 0 tests ($line)\n";
    }
    return {
        type          => 'plan',
        raw           => $line,
        tests_planned => $tests_planned,
        directive     => $directive,
        explanation   => _trim($explanation),
        todo_list     => $todo,
    };
}

sub _make_test_token {
    my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
    my %test = (
        ok          => $ok,
        test_num    => $num,
        description => _trim($desc),
        directive   => uc($dir),
        explanation => _trim($explanation),
        raw         => $line,
        type        => 'test',
    );
    return \%test;
}

sub _make_unknown_token {
    my ( $self, $line ) = @_;
    return {
        raw  => $line,
        type => 'unknown',
    };
}

sub _make_comment_token {
    my ( $self, $line, $comment ) = @_;
    return {
        type    => 'comment',
        raw     => $line,
        comment => _trim($comment)
    };
}

sub _make_bailout_token {
    my ( $self, $line, $explanation ) = @_;
    return {
        type    => 'bailout',
        raw     => $line,
        bailout => _trim($explanation)
    };
}

sub _make_yaml_token {
    my ( $self, $pad, $marker ) = @_;

    my $yaml = TAP::Parser::YAMLish::Reader->new;

    my $stream = $self->{stream};

    # Construct a reader that reads from our input stripping leading
    # spaces from each line.
    my $leader = length($pad);
    my $strip  = qr{ ^ (\s{$leader}) (.*) $ }x;
    my @extra  = ($marker);
    my $reader = sub {
        return shift @extra if @extra;
        my $line = $stream->next;
        return $2 if $line =~ $strip;
        return;
    };

    my $data = $yaml->read($reader);

    # Reconstitute input. This is convoluted. Maybe we should just
    # record it on the way in...
    chomp( my $raw = $yaml->get_raw );
    $raw =~ s/^/$pad/mg;

    return {
        type => 'yaml',
        raw  => $raw,
        data => $data
    };
}

sub _trim {
    my $data = shift;

    return '' unless defined $data;

    $data =~ s/^\s+//;
    $data =~ s/\s+$//;
    return $data;
}

=head1 TAP GRAMMAR

B<NOTE:>  This grammar is slightly out of date.  There's still some discussion
about it and a new one will be provided when we have things better defined.

The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
stream-based protocol.  In fact, it's quite legal to have an infinite stream.
For the same reason that we don't apply regexes to streams, we're not using a
formal grammar here.  Instead, we parse the TAP in lines.

For purposes for forward compatability, any result which does not match the
following grammar is currently referred to as
L<TAP::Parser::Result::Unknown>.  It is I<not> a parse error.

A formal grammar would look similar to the following:

 (*
     For the time being, I'm cheating on the EBNF by allowing
     certain terms to be defined by POSIX character classes by
     using the following syntax:

       digit ::= [:digit:]

     As far as I am aware, that's not valid EBNF.  Sue me.  I
     didn't know how to write "char" otherwise (Unicode issues).
     Suggestions welcome.
 *)

 tap            ::= version? { comment | unknown } leading_plan lines
                    |
                    lines trailing_plan {comment}

 version        ::= 'TAP version ' positiveInteger {positiveInteger} "\n"

 leading_plan   ::= plan skip_directive? "\n"

 trailing_plan  ::= plan "\n"

 plan           ::= '1..' nonNegativeInteger

 lines          ::= line {line}

 line           ::= (comment | test | unknown | bailout ) "\n"

 test           ::= status positiveInteger? description? directive?

 status         ::= 'not '? 'ok '

 description    ::= (character - (digit | '#')) {character - '#'}

 directive      ::= todo_directive | skip_directive

 todo_directive ::= hash_mark 'TODO' ' ' {character}

 skip_directive ::= hash_mark 'SKIP' ' ' {character}

 comment        ::= hash_mark {character}

 hash_mark      ::= '#' {' '}

 bailout        ::= 'Bail out!' {character}

 unknown        ::= { (character - "\n") }

 (* POSIX character classes and other terminals *)

 digit              ::= [:digit:]
 character          ::= ([:print:] - "\n")
 positiveInteger    ::= ( digit - '0' ) {digit}
 nonNegativeInteger ::= digit {digit}


=cut

1;
Commit	Line	Data
b965d173	1	package TAP::Parser::Grammar;
	2
	3	use strict;
	4	use vars qw($VERSION);
	5
	6	use TAP::Parser::Result ();
	7	use TAP::Parser::YAMLish::Reader ();
	8
	9	=head1 NAME
	10
	11	TAP::Parser::Grammar - A grammar for the Test Anything Protocol.
	12
	13	=head1 VERSION
	14
69f36734	15	Version 3.06
b965d173	16
	17	=cut
	18
69f36734	19	$VERSION = '3.06';
b965d173	20
	21	=head1 DESCRIPTION
	22
	23	C<TAP::Parser::Grammar> tokenizes lines from a TAP stream and constructs
	24	L<TAP::Parser::Result> subclasses to represent the tokens.
	25
	26	Do not attempt to use this class directly. It won't make sense. It's mainly
	27	here to ensure that we will be able to have pluggable grammars when TAP is
	28	expanded at some future date (plus, this stuff was really cluttering the
	29	parser).
	30
	31	=cut
	32
	33	##############################################################################
	34
	35	=head2 Class Methods
	36
	37
	38	=head3 C<new>
	39
	40	my $grammar = TAP::Grammar->new($stream);
	41
	42	Returns TAP grammar object that will parse the specified stream.
	43
	44	=cut
	45
	46	sub new {
	47	my ( $class, $stream ) = @_;
	48	my $self = bless { stream => $stream }, $class;
	49	$self->set_version(12);
	50	return $self;
	51	}
	52
	53	my %language_for;
	54
	55	{
	56
	57	# XXX the 'not' and 'ok' might be on separate lines in VMS ...
	58	my $ok = qr/(?:not )?ok\b/;
	59	my $num = qr/\d+/;
	60
	61	my %v12 = (
	62	version => {
	63	syntax => qr/^TAP\s+version\s+(\d+)\s*\z/i,
	64	handler => sub {
	65	my ( $self, $line ) = @_;
	66	my $version = $1;
	67	return $self->_make_version_token( $line, $version, );
	68	},
	69	},
	70	plan => {
	71	syntax => qr/^1\.\.(\d+)\s(.)\z/,
	72	handler => sub {
	73	my ( $self, $line ) = @_;
	74	my ( $tests_planned, $tail ) = ( $1, $2 );
	75	my $explanation = undef;
	76	my $skip = '';
	77
	78	if ( $tail =~ /^todo((?:\s+\d+)+)/ ) {
	79	my @todo = split /\s+/, _trim($1);
	80	return $self->_make_plan_token(
	81	$line, $tests_planned, 'TODO',
	82	'', \@todo
	83	);
84	}
85	elsif ( 0 == $tests_planned ) {
86	$skip = 'SKIP';
87	$explanation = $tail;
88
89	# Trim valid SKIP directive without being strict
90	# about its presence.
91	$explanation =~ s/^#\s*//;
92	$explanation =~ s/^skip\S*\s+//i;
93	}
94	elsif ( $tail !~ /^\s*$/ ) {
95	return $self->_make_unknown_token($line);
96	}
97
98	$explanation = '' unless defined $explanation;
99
100	return $self->_make_plan_token(
101	$line, $tests_planned, $skip,
102	$explanation, []
103	);
104
105	},
106	},
107
108	# An optimization to handle the most common test lines without
109	# directives.
110	simple_test => {
111	syntax => qr/^($ok) \ ($num) (?:\ ([^#]+))? \z/x,
112	handler => sub {
113	my ( $self, $line ) = @_;
114	my ( $ok, $num, $desc ) = ( $1, $2, $3 );
115
116	return $self->_make_test_token(
117	$line, $ok, $num,
118	$desc
119	);
120	},
121	},
122	test => {
123	syntax => qr/^($ok) \s* ($num)? \s* (.*) \z/x,
124	handler => sub {
125	my ( $self, $line ) = @_;
126	my ( $ok, $num, $desc ) = ( $1, $2, $3 );
127	my ( $dir, $explanation ) = ( '', '' );
128	if ($desc =~ m/^ ( [^\\\#]* (?: \\. [^\\\#]* )* )
129	\# \s* (SKIP\|TODO) \b \s* (.*) $/ix
130	)
131	{
132	( $desc, $dir, $explanation ) = ( $1, $2, $3 );
133	}
134	return $self->_make_test_token(
135	$line, $ok, $num, $desc,
136	uc $dir, $explanation
137	);
138	},
139	},
140	comment => {
141	syntax => qr/^#(.*)/,
142	handler => sub {
143	my ( $self, $line ) = @_;
144	my $comment = $1;
145	return $self->_make_comment_token( $line, $comment );
146	},
147	},
148	bailout => {
149	syntax => qr/^Bail out!\s(.)/,
150	handler => sub {
151	my ( $self, $line ) = @_;
152	my $explanation = $1;
153	return $self->_make_bailout_token(
154	$line,
155	$explanation
156	);
157	},
158	},
159	);
160
161	my %v13 = (
162	%v12,
163	plan => {
164	syntax => qr/^1\.\.(\d+)(?:\s#\sSKIP\b(.*))?\z/i,
165	handler => sub {
166	my ( $self, $line ) = @_;
167	my ( $tests_planned, $explanation ) = ( $1, $2 );
168	my $skip
169	= ( 0 == $tests_planned \|\| defined $explanation )
170	? 'SKIP'
171	: '';
172	$explanation = '' unless defined $explanation;
173	return $self->_make_plan_token(
174	$line, $tests_planned, $skip,
175	$explanation, []
176	);
177	},
178	},
179	yaml => {
180	syntax => qr/^ (\s+) (---.*) $/x,
181	handler => sub {
182	my ( $self, $line ) = @_;
183	my ( $pad, $marker ) = ( $1, $2 );
184	return $self->_make_yaml_token( $pad, $marker );
185	},
186	},
187	);
188
189	%language_for = (
190	'12' => {
191	tokens => \%v12,
192	},
193	'13' => {
194	tokens => \%v13,
195	setup => sub {
196	shift->{stream}->handle_unicode;
197	},
198	},
199	);
200	}
201
202	##############################################################################
203
204	=head2 Instance Methods
205
206	=head3 C<set_version>
207
208	$grammar->set_version(13);
209
210	Tell the grammar which TAP syntax version to support. The lowest
211	supported version is 12. Although 'TAP version' isn't valid version 12
212	syntax it is accepted so that higher version numbers may be parsed.
213
214	=cut
215
216	sub set_version {
217	my $self = shift;
218	my $version = shift;
219
220	if ( my $language = $language_for{$version} ) {
221	$self->{tokens} = $language->{tokens};
222
223	if ( my $setup = $language->{setup} ) {
224	$self->$setup();
225	}
226
227	$self->_order_tokens;
228	}
229	else {
230	require Carp;
231	Carp::croak("Unsupported syntax version: $version");
232	}
233	}
234
235	# Optimization to put the most frequent tokens first.
236	sub _order_tokens {
237	my $self = shift;
238
239	my %copy = %{ $self->{tokens} };
240	my @ordered_tokens = grep {defined}
241	map { delete $copy{$_} } qw( simple_test test comment plan );
242	push @ordered_tokens, values %copy;
243
244	$self->{ordered_tokens} = \@ordered_tokens;
245	}
246
247	##############################################################################
248
249	=head3 C<tokenize>
250
251	my $token = $grammar->tokenize;
252
253	This method will return a L<TAP::Parser::Result> object representing the
254	current line of TAP.
255
256	=cut
257
258	sub tokenize {
259	my $self = shift;
260
261	my $line = $self->{stream}->next;
262	return unless defined $line;
263
264	my $token;
265
266	foreach my $token_data ( @{ $self->{ordered_tokens} } ) {
267	if ( $line =~ $token_data->{syntax} ) {
268	my $handler = $token_data->{handler};
269	$token = $self->$handler($line);
270	last;
271	}
272	}
273
274	$token = $self->_make_unknown_token($line) unless $token;
275
276	return TAP::Parser::Result->new($token);
277	}
278
279	##############################################################################
280
281	=head3 C<token_types>
282
283	my @types = $grammar->token_types;
284
285	Returns the different types of tokens which this grammar can parse.
286
287	=cut
288
289	sub token_types {
290	my $self = shift;
291	return keys %{ $self->{tokens} };
292	}
293
294	##############################################################################
295
296	=head3 C<syntax_for>
297
298	my $syntax = $grammar->syntax_for($token_type);
299
300	Returns a pre-compiled regular expression which will match a chunk of TAP
301	corresponding to the token type. For example (not that you should really pay
302	attention to this, C<< $grammar->syntax_for('comment') >> will return
303	C<< qr/^#(.*)/ >>.
304
305	=cut
306
307	sub syntax_for {
308	my ( $self, $type ) = @_;
309	return $self->{tokens}->{$type}->{syntax};
310	}
311
312	##############################################################################
313
314	=head3 C<handler_for>
315
316	my $handler = $grammar->handler_for($token_type);
317
318	Returns a code reference which, when passed an appropriate line of TAP,
319	returns the lexed token corresponding to that line. As a result, the basic
320	TAP parsing loop looks similar to the following:
321
322	my @tokens;
323	my $grammar = TAP::Grammar->new;
324	LINE: while ( defined( my $line = $parser->_next_chunk_of_tap ) ) {
325	foreach my $type ( $grammar->token_types ) {
326	my $syntax = $grammar->syntax_for($type);
327	if ( $line =~ $syntax ) {
328	my $handler = $grammar->handler_for($type);
329	push @tokens => $grammar->$handler($line);
330	next LINE;
331	}
332	}
333	push @tokens => $grammar->_make_unknown_token($line);
334	}
335
336	=cut
337
338	sub handler_for {
339	my ( $self, $type ) = @_;
340	return $self->{tokens}->{$type}->{handler};
341	}
342
343	sub _make_version_token {
344	my ( $self, $line, $version ) = @_;
345	return {
346	type => 'version',
347	raw => $line,
348	version => $version,
349	};
350	}
351
352	sub _make_plan_token {
353	my ( $self, $line, $tests_planned, $directive, $explanation, $todo ) = @_;
354
355	if ( $directive eq 'SKIP' && 0 != $tests_planned ) {
356	warn
357	"Specified SKIP directive in plan but more than 0 tests ($line)\n";
358	}
359	return {
360	type => 'plan',
361	raw => $line,
362	tests_planned => $tests_planned,
363	directive => $directive,
364	explanation => _trim($explanation),
365	todo_list => $todo,
366	};
367	}
368
369	sub _make_test_token {
370	my ( $self, $line, $ok, $num, $desc, $dir, $explanation ) = @_;
371	my %test = (
372	ok => $ok,
373	test_num => $num,
374	description => _trim($desc),
375	directive => uc($dir),
376	explanation => _trim($explanation),
377	raw => $line,
378	type => 'test',
379	);
380	return \%test;
381	}
382
383	sub _make_unknown_token {
384	my ( $self, $line ) = @_;
385	return {
386	raw => $line,
387	type => 'unknown',
388	};
389	}
390
391	sub _make_comment_token {
392	my ( $self, $line, $comment ) = @_;
393	return {
394	type => 'comment',
395	raw => $line,
396	comment => _trim($comment)
397	};
398	}
399
400	sub _make_bailout_token {
401	my ( $self, $line, $explanation ) = @_;
402	return {
403	type => 'bailout',
404	raw => $line,
405	bailout => _trim($explanation)
406	};
407	}
408
409	sub _make_yaml_token {
410	my ( $self, $pad, $marker ) = @_;
411
412	my $yaml = TAP::Parser::YAMLish::Reader->new;
413
414	my $stream = $self->{stream};
415
416	# Construct a reader that reads from our input stripping leading
417	# spaces from each line.
418	my $leader = length($pad);
419	my $strip = qr{ ^ (\s{$leader}) (.*) $ }x;
420	my @extra = ($marker);
421	my $reader = sub {
422	return shift @extra if @extra;
423	my $line = $stream->next;
424	return $2 if $line =~ $strip;
425	return;
426	};
427
428	my $data = $yaml->read($reader);
429
430	# Reconstitute input. This is convoluted. Maybe we should just
431	# record it on the way in...
432	chomp( my $raw = $yaml->get_raw );
433	$raw =~ s/^/$pad/mg;
434
435	return {
436	type => 'yaml',
437	raw => $raw,
438	data => $data
439	};
440	}
441
442	sub _trim {
443	my $data = shift;
444
445	return '' unless defined $data;
446
447	$data =~ s/^\s+//;
448	$data =~ s/\s+$//;
449	return $data;
450	}
451
452	=head1 TAP GRAMMAR
453
454	B<NOTE:> This grammar is slightly out of date. There's still some discussion
455	about it and a new one will be provided when we have things better defined.
456
457	The L<TAP::Parser> does not use a formal grammar because TAP is essentially a
458	stream-based protocol. In fact, it's quite legal to have an infinite stream.
459	For the same reason that we don't apply regexes to streams, we're not using a
460	formal grammar here. Instead, we parse the TAP in lines.
461
462	For purposes for forward compatability, any result which does not match the
463	following grammar is currently referred to as
464	L<TAP::Parser::Result::Unknown>. It is I<not> a parse error.
465
466	A formal grammar would look similar to the following:
467
468	(*
469	For the time being, I'm cheating on the EBNF by allowing
470	certain terms to be defined by POSIX character classes by
471	using the following syntax:
472
473	digit ::= [:digit:]
474
475	As far as I am aware, that's not valid EBNF. Sue me. I
476	didn't know how to write "char" otherwise (Unicode issues).
477	Suggestions welcome.
478	*)
479
480	tap ::= version? { comment \| unknown } leading_plan lines
481	\|
482	lines trailing_plan {comment}
483
484	version ::= 'TAP version ' positiveInteger {positiveInteger} "\n"
485
486	leading_plan ::= plan skip_directive? "\n"
487
488	trailing_plan ::= plan "\n"
489
490	plan ::= '1..' nonNegativeInteger
491
492	lines ::= line {line}
493
494	line ::= (comment \| test \| unknown \| bailout ) "\n"
495
496	test ::= status positiveInteger? description? directive?
497
498	status ::= 'not '? 'ok '
499
500	description ::= (character - (digit \| '#')) {character - '#'}
501
502	directive ::= todo_directive \| skip_directive
503
504	todo_directive ::= hash_mark 'TODO' ' ' {character}
505
506	skip_directive ::= hash_mark 'SKIP' ' ' {character}
507
508	comment ::= hash_mark {character}
509
510	hash_mark ::= '#' {' '}
511
512	bailout ::= 'Bail out!' {character}
513
514	unknown ::= { (character - "\n") }
515
516	(* POSIX character classes and other terminals *)
517
518	digit ::= [:digit:]
519	character ::= ([:print:] - "\n")
520	positiveInteger ::= ( digit - '0' ) {digit}
521	nonNegativeInteger ::= digit {digit}
522
523
524	=cut
525
526	1;