6 use YAML::Loader::Base;
10 our @ISA = 'YAML::Loader::Base';
13 use constant LEAF => 1;
14 use constant COLLECTION => 2;
15 use constant VALUE => "\x07YAML\x07VALUE\x07";
16 use constant COMMENT => "\x07YAML\x07COMMENT\x07";
18 # Common YAML character sets
19 my $ESCAPE_CHAR = '[\\x00-\\x08\\x0b-\\x0d\\x0e-\\x1f]';
22 my $LIT_CHAR_RX = "\\$LIT_CHAR";
26 $self->stream($_[0] || '');
27 return $self->_parse();
30 # Top level function for parsing. Parse each document in order and
31 # handle processing for YAML headers.
34 my (%directives, $preface);
35 $self->{stream} =~ s|\015\012|\012|g;
36 $self->{stream} =~ s|\015|\012|g;
38 $self->die('YAML_PARSE_ERR_BAD_CHARS')
39 if $self->stream =~ /$ESCAPE_CHAR/;
40 $self->die('YAML_PARSE_ERR_NO_FINAL_NEWLINE')
41 if length($self->stream) and
42 $self->{stream} !~ s/(.)\n\Z/$1/s;
43 $self->lines([split /\x0a/, $self->stream, -1]);
45 # Throw away any comments or blanks before the header (or start of
46 # content for headerless streams)
47 $self->_parse_throwaway_comments();
50 # Add an "assumed" header if there is no header and the stream is
51 # not empty (after initial throwaways).
53 if ($self->lines->[0] !~ /^---(\s|$)/) {
54 unshift @{$self->lines}, '---';
59 # Main Loop. Parse out all the top level nodes and return them.
60 while (not $self->eos) {
61 $self->anchor2node({});
65 $self->offset->[0] = -1;
67 if ($self->lines->[0] =~ /^---\s*(.*)$/) {
68 my @words = split /\s+/, $1;
70 while (@words && $words[0] =~ /^#(\w+):(\S.*)$/) {
71 my ($key, $value) = ($1, $2);
73 if (defined $directives{$key}) {
74 $self->warn('YAML_PARSE_WARN_MULTIPLE_DIRECTIVES',
75 $key, $self->document);
78 $directives{$key} = $value;
80 $self->preface(join ' ', @words);
83 $self->die('YAML_PARSE_ERR_NO_SEPARATOR');
86 if (not $self->done) {
87 $self->_parse_next_line(COLLECTION);
94 $directives{YAML} ||= '1.0';
95 $directives{TAB} ||= 'NONE';
96 ($self->{major_version}, $self->{minor_version}) =
97 split /\./, $directives{YAML}, 2;
98 $self->die('YAML_PARSE_ERR_BAD_MAJOR_VERSION', $directives{YAML})
99 if $self->major_version ne '1';
100 $self->warn('YAML_PARSE_WARN_BAD_MINOR_VERSION', $directives{YAML})
101 if $self->minor_version ne '0';
102 $self->die('Unrecognized TAB policy')
103 unless $directives{TAB} =~ /^(NONE|\d+)(:HARD)?$/;
105 push @{$self->documents}, $self->_parse_node();
107 return wantarray ? @{$self->documents} : $self->documents->[-1];
110 # This function is the dispatcher for parsing each node. Every node
111 # recurses back through here. (Inlines are an exception as they have
112 # their own sub-parser.)
115 my $preface = $self->preface;
117 my ($node, $type, $indicator, $escape, $chomp) = ('') x 5;
118 my ($anchor, $alias, $explicit, $implicit, $class) = ('') x 5;
119 ($anchor, $alias, $explicit, $implicit, $preface) =
120 $self->_parse_qualifiers($preface);
122 $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node';
125 while (length $preface) {
126 my $line = $self->line - 1;
127 if ($preface =~ s/^($FOLD_CHAR|$LIT_CHAR_RX)(-|\+)?\d*\s*//) {
129 $chomp = $2 if defined($2);
132 $self->die('YAML_PARSE_ERR_TEXT_AFTER_INDICATOR') if $indicator;
133 $self->inline($preface);
138 $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias)
139 unless defined $self->anchor2node->{$alias};
140 if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') {
141 $node = $self->anchor2node->{$alias};
144 $node = do {my $sv = "*$alias"};
145 push @{$self->anchor2node->{$alias}}, [\$node, $self->line];
148 elsif (length $self->inline) {
149 $node = $self->_parse_inline(1, $implicit, $explicit);
150 if (length $self->inline) {
151 $self->die('YAML_PARSE_ERR_SINGLE_LINE');
154 elsif ($indicator eq $LIT_CHAR) {
156 $node = $self->_parse_block($chomp);
157 $node = $self->_parse_implicit($node) if $implicit;
160 elsif ($indicator eq $FOLD_CHAR) {
162 $node = $self->_parse_unfold($chomp);
163 $node = $self->_parse_implicit($node) if $implicit;
168 $self->offset->[$self->level] ||= 0;
169 if ($self->indent == $self->offset->[$self->level]) {
170 if ($self->content =~ /^-( |$)/) {
171 $node = $self->_parse_seq($anchor);
173 elsif ($self->content =~ /(^\?|\:( |$))/) {
174 $node = $self->_parse_mapping($anchor);
176 elsif ($preface =~ /^\s*$/) {
177 $node = $self->_parse_implicit('');
180 $self->die('YAML_PARSE_ERR_BAD_NODE');
188 $#{$self->offset} = $self->level;
197 CORE::bless $node, $class;
200 $node = $self->_parse_explicit($node, $explicit);
204 if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') {
205 # XXX Can't remember what this code actually does
206 for my $ref (@{$self->anchor2node->{$anchor}}) {
207 ${$ref->[0]} = $node;
208 $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS',
212 $self->anchor2node->{$anchor} = $node;
217 # Preprocess the qualifiers that may be attached to any node.
218 sub _parse_qualifiers {
221 my ($anchor, $alias, $explicit, $implicit, $token) = ('') x 5;
223 while ($preface =~ /^[&*!]/) {
224 my $line = $self->line - 1;
225 if ($preface =~ s/^\!(\S+)\s*//) {
226 $self->die('YAML_PARSE_ERR_MANY_EXPLICIT') if $explicit;
229 elsif ($preface =~ s/^\!\s*//) {
230 $self->die('YAML_PARSE_ERR_MANY_IMPLICIT') if $implicit;
233 elsif ($preface =~ s/^\&([^ ,:]+)\s*//) {
235 $self->die('YAML_PARSE_ERR_BAD_ANCHOR')
236 unless $token =~ /^[a-zA-Z0-9]+$/;
237 $self->die('YAML_PARSE_ERR_MANY_ANCHOR') if $anchor;
238 $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $alias;
241 elsif ($preface =~ s/^\*([^ ,:]+)\s*//) {
243 $self->die('YAML_PARSE_ERR_BAD_ALIAS')
244 unless $token =~ /^[a-zA-Z0-9]+$/;
245 $self->die('YAML_PARSE_ERR_MANY_ALIAS') if $alias;
246 $self->die('YAML_PARSE_ERR_ANCHOR_ALIAS') if $anchor;
250 return ($anchor, $alias, $explicit, $implicit, $preface);
253 # Morph a node to it's explicit type
254 sub _parse_explicit {
256 my ($node, $explicit) = @_;
258 if ($explicit =~ /^\!?perl\/(hash|array|ref|scalar)(?:\:(\w(\w|\:\:)*)?)?$/) {
259 ($type, $class) = (($1 || ''), ($2 || ''));
261 # FIXME # die unless uc($type) eq ref($node) ?
263 if ( $type eq "ref" ) {
264 $self->die('YAML_LOAD_ERR_NO_DEFAULT_VALUE', 'XXX', $explicit)
265 unless exists $node->{VALUE()} and scalar(keys %$node) == 1;
267 my $value = $node->{VALUE()};
271 if ( $type eq "scalar" and length($class) and !ref($node) ) {
276 if ( length($class) ) {
277 CORE::bless($node, $class);
282 if ($explicit =~ m{^!?perl/(glob|regexp|code)(?:\:(\w(\w|\:\:)*)?)?$}) {
283 ($type, $class) = (($1 || ''), ($2 || ''));
284 my $type_class = "YAML::Type::$type";
286 if ($type_class->can('yaml_load')) {
287 return $type_class->yaml_load($node, $class, $self);
290 $self->die('YAML_LOAD_ERR_NO_CONVERT', 'XXX', $explicit);
293 # This !perl/@Foo and !perl/$Foo are deprecated but still parsed
294 elsif ($YAML::TagClass->{$explicit} ||
295 $explicit =~ m{^perl/(\@|\$)?([a-zA-Z](\w|::)+)$}
297 $class = $YAML::TagClass->{$explicit} || $2;
298 if ($class->can('yaml_load')) {
300 return $class->yaml_load(YAML::Node->new($node, $explicit));
304 return CORE::bless $node, $class;
307 return CORE::bless \$node, $class;
313 return YAML::Node->new($node, $explicit);
316 # XXX This is likely wrong. Failing test:
317 # --- !unknown 'scalar value'
322 # Parse a YAML mapping into a Perl hash
327 $self->anchor2node->{$anchor} = $mapping;
329 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
331 if ($self->{content} =~ s/^\?\s*//) {
332 $self->preface($self->content);
333 $self->_parse_next_line(COLLECTION);
334 $key = $self->_parse_node();
337 # If "default" key (equals sign)
338 elsif ($self->{content} =~ s/^\=\s*//) {
341 # If "comment" key (slash slash)
342 elsif ($self->{content} =~ s/^\=\s*//) {
345 # Regular scalar key:
347 $self->inline($self->content);
348 $key = $self->_parse_inline();
350 $self->content($self->inline);
354 unless ($self->{content} =~ s/^:\s*//) {
355 $self->die('YAML_LOAD_ERR_BAD_MAP_ELEMENT');
357 $self->preface($self->content);
358 my $line = $self->line;
359 $self->_parse_next_line(COLLECTION);
360 my $value = $self->_parse_node();
361 if (exists $mapping->{$key}) {
362 $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY');
365 $mapping->{$key} = $value;
371 # Parse a YAML sequence into a Perl array
376 $self->anchor2node->{$anchor} = $seq;
377 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
378 if ($self->content =~ /^-(?: (.*))?$/) {
379 $self->preface(defined($1) ? $1 : '');
382 $self->die('YAML_LOAD_ERR_BAD_SEQ_ELEMENT');
384 if ($self->preface =~ /^(\s*)(\w.*\:(?: |$).*)$/) {
385 $self->indent($self->offset->[$self->level] + 2 + length($1));
387 $self->level($self->level + 1);
388 $self->offset->[$self->level] = $self->indent;
390 push @$seq, $self->_parse_mapping('');
392 $#{$self->offset} = $self->level;
395 $self->_parse_next_line(COLLECTION);
396 push @$seq, $self->_parse_node();
402 # Parse an inline value. Since YAML supports inline collections, this is
403 # the top level of a sub parsing.
406 my ($top, $top_implicit, $top_explicit) = (@_, '', '', '');
407 $self->{inline} =~ s/^\s*(.*)\s*$/$1/; # OUCH - mugwump
408 my ($node, $anchor, $alias, $explicit, $implicit) = ('') x 5;
409 ($anchor, $alias, $explicit, $implicit, $self->{inline}) =
410 $self->_parse_qualifiers($self->inline);
412 $self->anchor2node->{$anchor} = CORE::bless [], 'YAML-anchor2node';
414 $implicit ||= $top_implicit;
415 $explicit ||= $top_explicit;
416 ($top_implicit, $top_explicit) = ('', '');
418 $self->die('YAML_PARSE_ERR_NO_ANCHOR', $alias)
419 unless defined $self->anchor2node->{$alias};
420 if (ref($self->anchor2node->{$alias}) ne 'YAML-anchor2node') {
421 $node = $self->anchor2node->{$alias};
424 $node = do {my $sv = "*$alias"};
425 push @{$self->anchor2node->{$alias}}, [\$node, $self->line];
428 elsif ($self->inline =~ /^\{/) {
429 $node = $self->_parse_inline_mapping($anchor);
431 elsif ($self->inline =~ /^\[/) {
432 $node = $self->_parse_inline_seq($anchor);
434 elsif ($self->inline =~ /^"/) {
435 $node = $self->_parse_inline_double_quoted();
436 $node = $self->_unescape($node);
437 $node = $self->_parse_implicit($node) if $implicit;
439 elsif ($self->inline =~ /^'/) {
440 $node = $self->_parse_inline_single_quoted();
441 $node = $self->_parse_implicit($node) if $implicit;
445 $node = $self->inline;
449 $node = $self->_parse_inline_simple();
451 $node = $self->_parse_implicit($node) unless $explicit;
454 $node = $self->_parse_explicit($node, $explicit);
457 if (ref($self->anchor2node->{$anchor}) eq 'YAML-anchor2node') {
458 for my $ref (@{$self->anchor2node->{$anchor}}) {
459 ${$ref->[0]} = $node;
460 $self->warn('YAML_LOAD_WARN_UNRESOLVED_ALIAS',
464 $self->anchor2node->{$anchor} = $node;
469 # Parse the inline YAML mapping into a Perl hash
470 sub _parse_inline_mapping {
474 $self->anchor2node->{$anchor} = $node;
476 $self->die('YAML_PARSE_ERR_INLINE_MAP')
477 unless $self->{inline} =~ s/^\{\s*//;
478 while (not $self->{inline} =~ s/^\s*\}//) {
479 my $key = $self->_parse_inline();
480 $self->die('YAML_PARSE_ERR_INLINE_MAP')
481 unless $self->{inline} =~ s/^\: \s*//;
482 my $value = $self->_parse_inline();
483 if (exists $node->{$key}) {
484 $self->warn('YAML_LOAD_WARN_DUPLICATE_KEY');
487 $node->{$key} = $value;
489 next if $self->inline =~ /^\s*\}/;
490 $self->die('YAML_PARSE_ERR_INLINE_MAP')
491 unless $self->{inline} =~ s/^\,\s*//;
496 # Parse the inline YAML sequence into a Perl array
497 sub _parse_inline_seq {
501 $self->anchor2node->{$anchor} = $node;
503 $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE')
504 unless $self->{inline} =~ s/^\[\s*//;
505 while (not $self->{inline} =~ s/^\s*\]//) {
506 my $value = $self->_parse_inline();
508 next if $self->inline =~ /^\s*\]/;
509 $self->die('YAML_PARSE_ERR_INLINE_SEQUENCE')
510 unless $self->{inline} =~ s/^\,\s*//;
515 # Parse the inline double quoted string.
516 sub _parse_inline_double_quoted {
519 if ($self->inline =~ /^"((?:\\"|[^"])*)"\s*(.*)$/) {
525 $self->die('YAML_PARSE_ERR_BAD_DOUBLE');
531 # Parse the inline single quoted string.
532 sub _parse_inline_single_quoted {
535 if ($self->inline =~ /^'((?:''|[^'])*)'\s*(.*)$/) {
541 $self->die('YAML_PARSE_ERR_BAD_SINGLE');
546 # Parse the inline unquoted string and do implicit typing.
547 sub _parse_inline_simple {
550 if ($self->inline =~ /^(|[^!@#%^&*].*?)(?=[\[\]\{\},]|, |: |- |:\s*$|$)/) {
552 substr($self->{inline}, 0, length($1)) = '';
555 $self->die('YAML_PARSE_ERR_BAD_INLINE_IMPLICIT', $value);
560 sub _parse_implicit {
564 return $value if $value eq '';
565 return undef if $value =~ /^~$/;
567 unless $value =~ /^[\@\`\^]/ or
568 $value =~ /^[\-\?]\s/;
569 $self->die('YAML_PARSE_ERR_BAD_IMPLICIT', $value);
572 # Unfold a YAML multiline scalar into a single string.
578 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
579 $node .= $self->content. "\n";
580 $self->_parse_next_line(LEAF);
582 $node =~ s/^(\S.*)\n(?=\S)/$1 /gm;
583 $node =~ s/^(\S.*)\n(\n+\S)/$1$2/gm;
584 $node =~ s/\n*\Z// unless $chomp eq '+';
585 $node .= "\n" unless $chomp;
589 # Parse a YAML block style scalar. This is like a Perl here-document.
594 while (not $self->done and $self->indent == $self->offset->[$self->level]) {
595 $node .= $self->content . "\n";
596 $self->_parse_next_line(LEAF);
598 return $node if '+' eq $chomp;
599 $node =~ s/\n*\Z/\n/;
600 $node =~ s/\n\Z// if $chomp eq '-';
604 # Handle Perl style '#' comments. Comments must be at the same indentation
605 # level as the collection line following them.
606 sub _parse_throwaway_comments {
608 while (@{$self->lines} and
609 $self->lines->[0] =~ m{^\s*(\#|$)}
611 shift @{$self->lines};
614 $self->eos($self->{done} = not @{$self->lines});
617 # This is the routine that controls what line is being parsed. It gets called
618 # once for each line in the YAML stream.
621 # 1) Skip past the current line
622 # 2) Determine the indentation offset for a new level
623 # 3) Find the next _content_ line
624 # A) Skip over any throwaways (Comments/blanks)
625 # B) Set $self->indent, $self->content, $self->line
626 # 4) Expand tabs appropriately
627 sub _parse_next_line {
630 my $level = $self->level;
631 my $offset = $self->offset->[$level];
632 $self->die('YAML_EMIT_ERR_BAD_LEVEL') unless defined $offset;
633 shift @{$self->lines};
634 $self->eos($self->{done} = not @{$self->lines});
635 return if $self->eos;
638 # Determine the offset for a new leaf node
639 if ($self->preface =~
640 qr/(?:^|\s)(?:$FOLD_CHAR|$LIT_CHAR_RX)(?:-|\+)?(\d*)\s*$/
642 $self->die('YAML_PARSE_ERR_ZERO_INDENT')
643 if length($1) and $1 == 0;
646 $self->offset->[$level + 1] = $offset + $1;
649 # First get rid of any comments.
650 while (@{$self->lines} && ($self->lines->[0] =~ /^\s*#/)) {
651 $self->lines->[0] =~ /^( *)/ or die;
652 last unless length($1) <= $offset;
653 shift @{$self->lines};
656 $self->eos($self->{done} = not @{$self->lines});
657 return if $self->eos;
658 if ($self->lines->[0] =~ /^( *)\S/ and length($1) > $offset) {
659 $self->offset->[$level+1] = length($1);
662 $self->offset->[$level+1] = $offset + 1;
665 $offset = $self->offset->[++$level];
667 # Determine the offset for a new collection level
668 elsif ($type == COLLECTION and
669 $self->preface =~ /^(\s*(\!\S*|\&\S+))*\s*$/) {
670 $self->_parse_throwaway_comments();
672 $self->offset->[$level+1] = $offset + 1;
676 $self->lines->[0] =~ /^( *)\S/ or die;
677 if (length($1) > $offset) {
678 $self->offset->[$level+1] = length($1);
681 $self->offset->[$level+1] = $offset + 1;
684 $offset = $self->offset->[++$level];
688 while (@{$self->lines} and
689 $self->lines->[0] =~ m{^( *)(\#)} and
692 shift @{$self->lines};
695 $self->eos($self->{done} = not @{$self->lines});
698 $self->_parse_throwaway_comments();
700 return if $self->eos;
702 if ($self->lines->[0] =~ /^---(\s|$)/) {
706 if ($type == LEAF and
707 $self->lines->[0] =~ /^ {$offset}(.*)$/
709 $self->indent($offset);
712 elsif ($self->lines->[0] =~ /^\s*$/) {
713 $self->indent($offset);
717 $self->lines->[0] =~ /^( *)(\S.*)$/;
718 while ($self->offset->[$level] > length($1)) {
721 $self->die('YAML_PARSE_ERR_INCONSISTENT_INDENTATION')
722 if $self->offset->[$level] != length($1);
723 $self->indent(length($1));
726 $self->die('YAML_PARSE_ERR_INDENTATION')
727 if $self->indent - $offset > 1;
730 #==============================================================================
731 # Utility subroutines.
732 #==============================================================================
734 # Printable characters for escapes
740 'v' => "\x0b", # Potential v-string error on 5.6.2 if not quoted
747 # Transform all the backslash style escape characters to their literal meaning
751 $node =~ s/\\([never\\fart0]|x([0-9a-fA-F]{2}))/
752 (length($1)>1)?pack("H2",$2):$unescapes{$1}/gex;
762 YAML::Loader - YAML class for loading Perl objects to YAML
767 my $loader = YAML::Loader->new;
768 my $hash = $loader->load(<<'...');
774 YAML::Loader is the module that YAML.pm used to deserialize YAML to Perl
775 objects. It is fully object oriented and usable on its own.
779 Ingy döt Net <ingy@cpan.org>
783 Copyright (c) 2006. Ingy döt Net. All rights reserved.
785 This program is free software; you can redistribute it and/or modify it
786 under the same terms as Perl itself.
788 See L<http://www.perl.com/perl/misc/Artistic.html>