Commit | Line | Data |
b965d173 |
1 | package TAP::Parser::YAMLish::Reader; |
2 | |
3 | use strict; |
4 | |
5 | use vars qw{$VERSION}; |
6 | |
69f36734 |
7 | $VERSION = '3.06'; |
b965d173 |
8 | |
9 | # TODO: |
10 | # Handle blessed object syntax |
11 | |
12 | # Printable characters for escapes |
13 | my %UNESCAPES = ( |
14 | z => "\x00", a => "\x07", t => "\x09", |
15 | n => "\x0a", v => "\x0b", f => "\x0c", |
16 | r => "\x0d", e => "\x1b", '\\' => '\\', |
17 | ); |
18 | |
19 | my $QQ_STRING = qr{ " (?:\\. | [^"])* " }x; |
20 | my $HASH_LINE = qr{ ^ ($QQ_STRING|\S+) \s* : (?: \s+ (.+?) \s* )? $ }x; |
21 | my $IS_HASH_KEY = qr{ ^ [\w\'\"] }x; |
22 | my $IS_END_YAML = qr{ ^ \.\.\. \s* $ }x; |
23 | my $IS_QQ_STRING = qr{ ^ $QQ_STRING $ }x; |
24 | |
25 | # Create an empty TAP::Parser::YAMLish::Reader object |
26 | sub new { |
27 | my $class = shift; |
28 | bless {}, $class; |
29 | } |
30 | |
31 | sub read { |
32 | my $self = shift; |
33 | my $obj = shift; |
34 | |
35 | die "Must have a code reference to read input from" |
36 | unless ref $obj eq 'CODE'; |
37 | |
38 | $self->{reader} = $obj; |
39 | $self->{capture} = []; |
40 | |
41 | # Prime the reader |
42 | $self->_next; |
43 | |
44 | my $doc = $self->_read; |
45 | |
46 | # The terminator is mandatory otherwise we'd consume a line from the |
47 | # iterator that doesn't belong to us. If we want to remove this |
48 | # restriction we'll have to implement look-ahead in the iterators. |
49 | # Which might not be a bad idea. |
50 | my $dots = $self->_peek; |
51 | die "Missing '...' at end of YAMLish" |
52 | unless defined $dots |
53 | and $dots =~ $IS_END_YAML; |
54 | |
55 | delete $self->{reader}; |
56 | delete $self->{next}; |
57 | |
58 | return $doc; |
59 | } |
60 | |
61 | sub get_raw { |
62 | my $self = shift; |
63 | |
64 | if ( defined( my $capture = $self->{capture} ) ) { |
65 | return join( "\n", @$capture ) . "\n"; |
66 | } |
67 | |
68 | return ''; |
69 | } |
70 | |
71 | sub _peek { |
72 | my $self = shift; |
73 | return $self->{next} unless wantarray; |
74 | my $line = $self->{next}; |
75 | $line =~ /^ (\s*) (.*) $ /x; |
76 | return ( $2, length $1 ); |
77 | } |
78 | |
79 | sub _next { |
80 | my $self = shift; |
81 | die "_next called with no reader" |
82 | unless $self->{reader}; |
83 | my $line = $self->{reader}->(); |
84 | $self->{next} = $line; |
85 | push @{ $self->{capture} }, $line; |
86 | } |
87 | |
88 | sub _read { |
89 | my $self = shift; |
90 | |
91 | my $line = $self->_peek; |
92 | |
93 | # Do we have a document header? |
94 | if ( $line =~ /^ --- (?: \s* (.+?) \s* )? $/x ) { |
95 | $self->_next; |
96 | |
97 | return $self->_read_scalar($1) if defined $1; # Inline? |
98 | |
99 | my ( $next, $indent ) = $self->_peek; |
100 | |
101 | if ( $next =~ /^ - /x ) { |
102 | return $self->_read_array($indent); |
103 | } |
104 | elsif ( $next =~ $IS_HASH_KEY ) { |
105 | return $self->_read_hash( $next, $indent ); |
106 | } |
107 | elsif ( $next =~ $IS_END_YAML ) { |
108 | die "Premature end of YAMLish"; |
109 | } |
110 | else { |
111 | die "Unsupported YAMLish syntax: '$next'"; |
112 | } |
113 | } |
114 | else { |
115 | die "YAMLish document header not found"; |
116 | } |
117 | } |
118 | |
119 | # Parse a double quoted string |
120 | sub _read_qq { |
121 | my $self = shift; |
122 | my $str = shift; |
123 | |
124 | unless ( $str =~ s/^ " (.*?) " $/$1/x ) { |
125 | die "Internal: not a quoted string"; |
126 | } |
127 | |
128 | $str =~ s/\\"/"/gx; |
129 | $str =~ s/ \\ ( [tartan\\favez] | x([0-9a-fA-F]{2}) ) |
130 | / (length($1) > 1) ? pack("H2", $2) : $UNESCAPES{$1} /gex; |
131 | return $str; |
132 | } |
133 | |
134 | # Parse a scalar string to the actual scalar |
135 | sub _read_scalar { |
136 | my $self = shift; |
137 | my $string = shift; |
138 | |
139 | return undef if $string eq '~'; |
140 | return {} if $string eq '{}'; |
141 | return [] if $string eq '[]'; |
142 | |
143 | if ( $string eq '>' || $string eq '|' ) { |
144 | |
145 | my ( $line, $indent ) = $self->_peek; |
146 | die "Multi-line scalar content missing" unless defined $line; |
147 | |
148 | my @multiline = ($line); |
149 | |
150 | while (1) { |
151 | $self->_next; |
152 | my ( $next, $ind ) = $self->_peek; |
153 | last if $ind < $indent; |
154 | push @multiline, $next; |
155 | } |
156 | |
157 | return join( ( $string eq '>' ? ' ' : "\n" ), @multiline ) . "\n"; |
158 | } |
159 | |
160 | if ( $string =~ /^ ' (.*) ' $/x ) { |
161 | ( my $rv = $1 ) =~ s/''/'/g; |
162 | return $rv; |
163 | } |
164 | |
165 | if ( $string =~ $IS_QQ_STRING ) { |
166 | return $self->_read_qq($string); |
167 | } |
168 | |
169 | if ( $string =~ /^['"]/ ) { |
170 | |
171 | # A quote with folding... we don't support that |
172 | die __PACKAGE__ . " does not support multi-line quoted scalars"; |
173 | } |
174 | |
175 | # Regular unquoted string |
176 | return $string; |
177 | } |
178 | |
179 | sub _read_nested { |
180 | my $self = shift; |
181 | |
182 | my ( $line, $indent ) = $self->_peek; |
183 | |
184 | if ( $line =~ /^ -/x ) { |
185 | return $self->_read_array($indent); |
186 | } |
187 | elsif ( $line =~ $IS_HASH_KEY ) { |
188 | return $self->_read_hash( $line, $indent ); |
189 | } |
190 | else { |
191 | die "Unsupported YAMLish syntax: '$line'"; |
192 | } |
193 | } |
194 | |
195 | # Parse an array |
196 | sub _read_array { |
197 | my ( $self, $limit ) = @_; |
198 | |
199 | my $ar = []; |
200 | |
201 | while (1) { |
202 | my ( $line, $indent ) = $self->_peek; |
203 | last |
204 | if $indent < $limit |
205 | || !defined $line |
206 | || $line =~ $IS_END_YAML; |
207 | |
208 | if ( $indent > $limit ) { |
209 | die "Array line over-indented"; |
210 | } |
211 | |
212 | if ( $line =~ /^ (- \s+) \S+ \s* : (?: \s+ | $ ) /x ) { |
213 | $indent += length $1; |
214 | $line =~ s/-\s+//; |
215 | push @$ar, $self->_read_hash( $line, $indent ); |
216 | } |
217 | elsif ( $line =~ /^ - \s* (.+?) \s* $/x ) { |
218 | die "Unexpected start of YAMLish" if $line =~ /^---/; |
219 | $self->_next; |
220 | push @$ar, $self->_read_scalar($1); |
221 | } |
222 | elsif ( $line =~ /^ - \s* $/x ) { |
223 | $self->_next; |
224 | push @$ar, $self->_read_nested; |
225 | } |
226 | elsif ( $line =~ $IS_HASH_KEY ) { |
227 | $self->_next; |
228 | push @$ar, $self->_read_hash( $line, $indent, ); |
229 | } |
230 | else { |
231 | die "Unsupported YAMLish syntax: '$line'"; |
232 | } |
233 | } |
234 | |
235 | return $ar; |
236 | } |
237 | |
238 | sub _read_hash { |
239 | my ( $self, $line, $limit ) = @_; |
240 | |
241 | my $indent; |
242 | my $hash = {}; |
243 | |
244 | while (1) { |
245 | die "Badly formed hash line: '$line'" |
246 | unless $line =~ $HASH_LINE; |
247 | |
248 | my ( $key, $value ) = ( $self->_read_scalar($1), $2 ); |
249 | $self->_next; |
250 | |
251 | if ( defined $value ) { |
252 | $hash->{$key} = $self->_read_scalar($value); |
253 | } |
254 | else { |
255 | $hash->{$key} = $self->_read_nested; |
256 | } |
257 | |
258 | ( $line, $indent ) = $self->_peek; |
259 | last |
260 | if $indent < $limit |
261 | || !defined $line |
262 | || $line =~ $IS_END_YAML; |
263 | } |
264 | |
265 | return $hash; |
266 | } |
267 | |
268 | 1; |
269 | |
270 | __END__ |
271 | |
272 | =pod |
273 | |
274 | =head1 NAME |
275 | |
276 | TAP::Parser::YAMLish::Reader - Read YAMLish data from iterator |
277 | |
278 | =head1 VERSION |
279 | |
69f36734 |
280 | Version 3.06 |
b965d173 |
281 | |
282 | =head1 SYNOPSIS |
283 | |
284 | =head1 DESCRIPTION |
285 | |
286 | Note that parts of this code were derived from L<YAML::Tiny> with the |
287 | permission of Adam Kennedy. |
288 | |
289 | =head1 METHODS |
290 | |
291 | =head2 Class Methods |
292 | |
293 | =head3 C<new> |
294 | |
295 | The constructor C<new> creates and returns an empty |
296 | C<TAP::Parser::YAMLish::Reader> object. |
297 | |
298 | my $reader = TAP::Parser::YAMLish::Reader->new; |
299 | |
300 | =head2 Instance Methods |
301 | |
302 | =head3 C<read> |
303 | |
304 | my $got = $reader->read($stream); |
305 | |
306 | Read YAMLish from a L<TAP::Parser::Iterator> and return the data structure it |
307 | represents. |
308 | |
309 | =head3 C<get_raw> |
310 | |
311 | my $source = $reader->get_source; |
312 | |
313 | Return the raw YAMLish source from the most recent C<read>. |
314 | |
315 | =head1 AUTHOR |
316 | |
317 | Andy Armstrong, <andy@hexten.net> |
318 | |
319 | Adam Kennedy wrote L<YAML::Tiny> which provided the template and many of |
320 | the YAML matching regular expressions for this module. |
321 | |
322 | =head1 SEE ALSO |
323 | |
324 | L<YAML::Tiny>, L<YAML>, L<YAML::Syck>, L<Config::Tiny>, L<CSS::Tiny>, |
325 | L<http://use.perl.org/~Alias/journal/29427> |
326 | |
327 | =head1 COPYRIGHT |
328 | |
329 | Copyright 2007 Andy Armstrong. |
330 | |
331 | Portions copyright 2006-2007 Adam Kennedy. |
332 | |
333 | This program is free software; you can redistribute |
334 | it and/or modify it under the same terms as Perl itself. |
335 | |
336 | The full text of the license can be found in the |
337 | LICENSE file included with this module. |
338 | |
339 | =cut |
340 | |