Commit | Line | Data |
b965d173 |
1 | package TAP::Parser::YAMLish::Reader; |
2 | |
3 | use strict; |
f7c69158 |
4 | use vars qw($VERSION @ISA); |
b965d173 |
5 | |
f7c69158 |
6 | use TAP::Object (); |
b965d173 |
7 | |
f7c69158 |
8 | @ISA = 'TAP::Object'; |
9 | $VERSION = '3.13'; |
b965d173 |
10 | |
11 | # TODO: |
12 | # Handle blessed object syntax |
13 | |
14 | # Printable characters for escapes |
15 | my %UNESCAPES = ( |
16 | z => "\x00", a => "\x07", t => "\x09", |
17 | n => "\x0a", v => "\x0b", f => "\x0c", |
18 | r => "\x0d", e => "\x1b", '\\' => '\\', |
19 | ); |
20 | |
21 | my $QQ_STRING = qr{ " (?:\\. | [^"])* " }x; |
f7c69158 |
22 | my $HASH_LINE = qr{ ^ ($QQ_STRING|\S+) \s* : \s* (?: (.+?) \s* )? $ }x; |
b965d173 |
23 | my $IS_HASH_KEY = qr{ ^ [\w\'\"] }x; |
24 | my $IS_END_YAML = qr{ ^ \.\.\. \s* $ }x; |
25 | my $IS_QQ_STRING = qr{ ^ $QQ_STRING $ }x; |
26 | |
f7c69158 |
27 | # new() implementation supplied by TAP::Object |
b965d173 |
28 | |
29 | sub read { |
30 | my $self = shift; |
31 | my $obj = shift; |
32 | |
33 | die "Must have a code reference to read input from" |
34 | unless ref $obj eq 'CODE'; |
35 | |
36 | $self->{reader} = $obj; |
37 | $self->{capture} = []; |
38 | |
39 | # Prime the reader |
40 | $self->_next; |
f7c69158 |
41 | return unless $self->{next}; |
b965d173 |
42 | |
43 | my $doc = $self->_read; |
44 | |
45 | # The terminator is mandatory otherwise we'd consume a line from the |
46 | # iterator that doesn't belong to us. If we want to remove this |
47 | # restriction we'll have to implement look-ahead in the iterators. |
48 | # Which might not be a bad idea. |
49 | my $dots = $self->_peek; |
50 | die "Missing '...' at end of YAMLish" |
51 | unless defined $dots |
52 | and $dots =~ $IS_END_YAML; |
53 | |
54 | delete $self->{reader}; |
55 | delete $self->{next}; |
56 | |
57 | return $doc; |
58 | } |
59 | |
f7c69158 |
60 | sub get_raw { join( "\n", grep defined, @{ shift->{capture} || [] } ) . "\n" } |
b965d173 |
61 | |
62 | sub _peek { |
63 | my $self = shift; |
64 | return $self->{next} unless wantarray; |
65 | my $line = $self->{next}; |
66 | $line =~ /^ (\s*) (.*) $ /x; |
67 | return ( $2, length $1 ); |
68 | } |
69 | |
70 | sub _next { |
71 | my $self = shift; |
72 | die "_next called with no reader" |
73 | unless $self->{reader}; |
74 | my $line = $self->{reader}->(); |
75 | $self->{next} = $line; |
76 | push @{ $self->{capture} }, $line; |
77 | } |
78 | |
79 | sub _read { |
80 | my $self = shift; |
81 | |
82 | my $line = $self->_peek; |
83 | |
84 | # Do we have a document header? |
85 | if ( $line =~ /^ --- (?: \s* (.+?) \s* )? $/x ) { |
86 | $self->_next; |
87 | |
88 | return $self->_read_scalar($1) if defined $1; # Inline? |
89 | |
90 | my ( $next, $indent ) = $self->_peek; |
91 | |
92 | if ( $next =~ /^ - /x ) { |
93 | return $self->_read_array($indent); |
94 | } |
95 | elsif ( $next =~ $IS_HASH_KEY ) { |
96 | return $self->_read_hash( $next, $indent ); |
97 | } |
98 | elsif ( $next =~ $IS_END_YAML ) { |
99 | die "Premature end of YAMLish"; |
100 | } |
101 | else { |
102 | die "Unsupported YAMLish syntax: '$next'"; |
103 | } |
104 | } |
105 | else { |
106 | die "YAMLish document header not found"; |
107 | } |
108 | } |
109 | |
110 | # Parse a double quoted string |
111 | sub _read_qq { |
112 | my $self = shift; |
113 | my $str = shift; |
114 | |
115 | unless ( $str =~ s/^ " (.*?) " $/$1/x ) { |
116 | die "Internal: not a quoted string"; |
117 | } |
118 | |
119 | $str =~ s/\\"/"/gx; |
120 | $str =~ s/ \\ ( [tartan\\favez] | x([0-9a-fA-F]{2}) ) |
121 | / (length($1) > 1) ? pack("H2", $2) : $UNESCAPES{$1} /gex; |
122 | return $str; |
123 | } |
124 | |
125 | # Parse a scalar string to the actual scalar |
126 | sub _read_scalar { |
127 | my $self = shift; |
128 | my $string = shift; |
129 | |
130 | return undef if $string eq '~'; |
131 | return {} if $string eq '{}'; |
132 | return [] if $string eq '[]'; |
133 | |
134 | if ( $string eq '>' || $string eq '|' ) { |
135 | |
136 | my ( $line, $indent ) = $self->_peek; |
137 | die "Multi-line scalar content missing" unless defined $line; |
138 | |
139 | my @multiline = ($line); |
140 | |
141 | while (1) { |
142 | $self->_next; |
143 | my ( $next, $ind ) = $self->_peek; |
144 | last if $ind < $indent; |
f7c69158 |
145 | |
146 | my $pad = $string eq '|' ? ( ' ' x ( $ind - $indent ) ) : ''; |
147 | push @multiline, $pad . $next; |
b965d173 |
148 | } |
149 | |
150 | return join( ( $string eq '>' ? ' ' : "\n" ), @multiline ) . "\n"; |
151 | } |
152 | |
153 | if ( $string =~ /^ ' (.*) ' $/x ) { |
154 | ( my $rv = $1 ) =~ s/''/'/g; |
155 | return $rv; |
156 | } |
157 | |
158 | if ( $string =~ $IS_QQ_STRING ) { |
159 | return $self->_read_qq($string); |
160 | } |
161 | |
162 | if ( $string =~ /^['"]/ ) { |
163 | |
164 | # A quote with folding... we don't support that |
165 | die __PACKAGE__ . " does not support multi-line quoted scalars"; |
166 | } |
167 | |
168 | # Regular unquoted string |
169 | return $string; |
170 | } |
171 | |
172 | sub _read_nested { |
173 | my $self = shift; |
174 | |
175 | my ( $line, $indent ) = $self->_peek; |
176 | |
177 | if ( $line =~ /^ -/x ) { |
178 | return $self->_read_array($indent); |
179 | } |
180 | elsif ( $line =~ $IS_HASH_KEY ) { |
181 | return $self->_read_hash( $line, $indent ); |
182 | } |
183 | else { |
184 | die "Unsupported YAMLish syntax: '$line'"; |
185 | } |
186 | } |
187 | |
188 | # Parse an array |
189 | sub _read_array { |
190 | my ( $self, $limit ) = @_; |
191 | |
192 | my $ar = []; |
193 | |
194 | while (1) { |
195 | my ( $line, $indent ) = $self->_peek; |
196 | last |
197 | if $indent < $limit |
198 | || !defined $line |
199 | || $line =~ $IS_END_YAML; |
200 | |
201 | if ( $indent > $limit ) { |
202 | die "Array line over-indented"; |
203 | } |
204 | |
205 | if ( $line =~ /^ (- \s+) \S+ \s* : (?: \s+ | $ ) /x ) { |
206 | $indent += length $1; |
207 | $line =~ s/-\s+//; |
208 | push @$ar, $self->_read_hash( $line, $indent ); |
209 | } |
210 | elsif ( $line =~ /^ - \s* (.+?) \s* $/x ) { |
211 | die "Unexpected start of YAMLish" if $line =~ /^---/; |
212 | $self->_next; |
213 | push @$ar, $self->_read_scalar($1); |
214 | } |
215 | elsif ( $line =~ /^ - \s* $/x ) { |
216 | $self->_next; |
217 | push @$ar, $self->_read_nested; |
218 | } |
219 | elsif ( $line =~ $IS_HASH_KEY ) { |
220 | $self->_next; |
221 | push @$ar, $self->_read_hash( $line, $indent, ); |
222 | } |
223 | else { |
224 | die "Unsupported YAMLish syntax: '$line'"; |
225 | } |
226 | } |
227 | |
228 | return $ar; |
229 | } |
230 | |
231 | sub _read_hash { |
232 | my ( $self, $line, $limit ) = @_; |
233 | |
234 | my $indent; |
235 | my $hash = {}; |
236 | |
237 | while (1) { |
238 | die "Badly formed hash line: '$line'" |
239 | unless $line =~ $HASH_LINE; |
240 | |
241 | my ( $key, $value ) = ( $self->_read_scalar($1), $2 ); |
242 | $self->_next; |
243 | |
244 | if ( defined $value ) { |
245 | $hash->{$key} = $self->_read_scalar($value); |
246 | } |
247 | else { |
248 | $hash->{$key} = $self->_read_nested; |
249 | } |
250 | |
251 | ( $line, $indent ) = $self->_peek; |
252 | last |
253 | if $indent < $limit |
254 | || !defined $line |
255 | || $line =~ $IS_END_YAML; |
256 | } |
257 | |
258 | return $hash; |
259 | } |
260 | |
261 | 1; |
262 | |
263 | __END__ |
264 | |
265 | =pod |
266 | |
267 | =head1 NAME |
268 | |
269 | TAP::Parser::YAMLish::Reader - Read YAMLish data from iterator |
270 | |
271 | =head1 VERSION |
272 | |
f7c69158 |
273 | Version 3.13 |
b965d173 |
274 | |
275 | =head1 SYNOPSIS |
276 | |
277 | =head1 DESCRIPTION |
278 | |
279 | Note that parts of this code were derived from L<YAML::Tiny> with the |
280 | permission of Adam Kennedy. |
281 | |
282 | =head1 METHODS |
283 | |
284 | =head2 Class Methods |
285 | |
286 | =head3 C<new> |
287 | |
288 | The constructor C<new> creates and returns an empty |
289 | C<TAP::Parser::YAMLish::Reader> object. |
290 | |
291 | my $reader = TAP::Parser::YAMLish::Reader->new; |
292 | |
293 | =head2 Instance Methods |
294 | |
295 | =head3 C<read> |
296 | |
297 | my $got = $reader->read($stream); |
298 | |
299 | Read YAMLish from a L<TAP::Parser::Iterator> and return the data structure it |
300 | represents. |
301 | |
302 | =head3 C<get_raw> |
303 | |
304 | my $source = $reader->get_source; |
305 | |
306 | Return the raw YAMLish source from the most recent C<read>. |
307 | |
308 | =head1 AUTHOR |
309 | |
310 | Andy Armstrong, <andy@hexten.net> |
311 | |
312 | Adam Kennedy wrote L<YAML::Tiny> which provided the template and many of |
313 | the YAML matching regular expressions for this module. |
314 | |
315 | =head1 SEE ALSO |
316 | |
317 | L<YAML::Tiny>, L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>, |
318 | L<http://use.perl.org/~Alias/journal/29427> |
319 | |
320 | =head1 COPYRIGHT |
321 | |
53bc175b |
322 | Copyright 2007-2008 Andy Armstrong. |
b965d173 |
323 | |
53bc175b |
324 | Portions copyright 2006-2008 Adam Kennedy. |
b965d173 |
325 | |
326 | This program is free software; you can redistribute |
327 | it and/or modify it under the same terms as Perl itself. |
328 | |
329 | The full text of the license can be found in the |
330 | LICENSE file included with this module. |
331 | |
332 | =cut |
333 | |