7 PPI::Document - Object representation of a Perl document
19 # Load a document from a file
20 my $Document = PPI::Document->new('My/Module.pm');
23 $Document->prune('PPI::Token::Comment');
25 # Find all the named subroutines
26 my $sub_nodes = $Document->find(
27 sub { $_[1]->isa('PPI::Statement::Sub') and $_[1]->name }
29 my @sub_names = map { $_->name } @$sub_nodes;
32 $Document->save('My/Module.pm.stripped');
36 The C<PPI::Document> class represents a single Perl "document". A
37 C<PPI::Document> object acts as a root L<PPI::Node>, with some
38 additional methods for loading and saving, and working with
39 the line/column locations of Elements within a file.
41 The exemption to its L<PPI::Node>-like behavior this is that a
42 C<PPI::Document> object can NEVER have a parent node, and is always
43 the root node in a tree.
45 =head2 Storable Support
47 C<PPI::Document> implements the necessary C<STORABLE_freeze> and
48 C<STORABLE_thaw> hooks to provide native support for L<Storable>,
49 if you have it installed.
51 However if you want to clone clone a Document, you are highly recommended
52 to use the internal C<$Document-E<gt>clone> method rather than Storable's
53 C<dclone> function (although C<dclone> should still work).
57 Most of the things you are likely to want to do with a Document are
58 probably going to involve the methods from L<PPI::Node> class, of which
61 The methods listed here are the remaining few methods that are truly
68 use List::MoreUtils ();
69 use Params::Util qw{_SCALAR0 _ARRAY0 _INSTANCE};
74 use PPI::Exception::ParserTimeout ();
76 use overload 'bool' => \&PPI::Util::TRUE;
77 use overload '""' => 'content';
79 use vars qw{$VERSION @ISA $errstr};
86 use PPI::Document::Fragment ();
91 # Convenience constants related to constants
92 use constant LOCATION_LINE => 0;
93 use constant LOCATION_CHARACTER => 1;
94 use constant LOCATION_COLUMN => 2;
95 use constant LOCATION_LOGICAL_LINE => 3;
96 use constant LOCATION_LOGICAL_FILE => 4;
102 #####################################################################
103 # Constructor and Static Methods
109 # Simple construction
110 $doc = PPI::Document->new( $filename );
111 $doc = PPI::Document->new( \$source );
113 # With the readonly attribute set
114 $doc = PPI::Document->new( $filename,
118 The C<new> constructor takes as argument a variety of different sources of
119 Perl code, and creates a single cohesive Perl C<PPI::Document>
122 If passed a file name as a normal string, it will attempt to load the
123 document from the file.
125 If passed a reference to a C<SCALAR>, this is taken to be source code and
126 parsed directly to create the document.
128 If passed zero arguments, a "blank" document will be created that contains
131 In all cases, the document is considered to be "anonymous" and not tied back
132 to where it was created from. Specifically, if you create a PPI::Document from
133 a filename, the document will B<not> remember where it was created from.
135 The constructor also takes attribute flags.
137 At this time, the only available attribute is the C<readonly> flag.
139 Setting C<readonly> to true will allow various systems to provide
140 additional optimisations and caching. Note that because C<readonly> is an
141 optimisation flag, it is off by default and you will need to explicitly
144 Returns a C<PPI::Document> object, or C<undef> if parsing fails.
149 local $_; # An extra one, just in case
150 my $class = ref $_[0] ? ref shift : shift;
153 my $self = $class->SUPER::new;
154 $self->{readonly} = ! 1;
155 $self->{tab_width} = 1;
159 # Check constructor attributes
162 my $timeout = delete $attr{timeout};
163 if ( $timeout and ! PPI::Util::HAVE_ALARM() ) {
164 Carp::croak("This platform does not support PPI parser timeouts");
167 # Check the data source
168 if ( ! defined $source ) {
169 $class->_error("An undefined value was passed to PPI::Document::new");
171 } elsif ( ! ref $source ) {
172 # Catch people using the old API
173 if ( $source =~ /(?:\012|\015)/ ) {
174 Carp::croak("API CHANGE: Source code should only be passed to PPI::Document->new as a SCALAR reference");
177 # When loading from a filename, use the caching layer if it exists.
180 my $source = PPI::Util::_slurp( $file );
181 unless ( ref $source ) {
182 # Errors returned as plain string
183 return $class->_error($source);
186 # Retrieve the document from the cache
187 my $document = $CACHE->get_document($source);
188 return $class->_setattr( $document, %attr ) if $document;
192 local $SIG{ALRM} = sub { die "alarm\n" };
194 $document = PPI::Lexer->lex_source( $$source );
198 $document = PPI::Lexer->lex_source( $$source );
202 $CACHE->store_document( $document );
203 return $class->_setattr( $document, %attr );
208 local $SIG{ALRM} = sub { die "alarm\n" };
210 my $document = PPI::Lexer->lex_file( $source );
211 return $class->_setattr( $document, %attr ) if $document;
215 my $document = PPI::Lexer->lex_file( $source );
216 return $class->_setattr( $document, %attr ) if $document;
220 } elsif ( _SCALAR0($source) ) {
223 local $SIG{ALRM} = sub { die "alarm\n" };
225 my $document = PPI::Lexer->lex_source( $$source );
226 return $class->_setattr( $document, %attr ) if $document;
230 my $document = PPI::Lexer->lex_source( $$source );
231 return $class->_setattr( $document, %attr ) if $document;
234 } elsif ( _ARRAY0($source) ) {
235 $source = join '', map { "$_\n" } @$source;
238 local $SIG{ALRM} = sub { die "alarm\n" };
240 my $document = PPI::Lexer->lex_source( $source );
241 return $class->_setattr( $document, %attr ) if $document;
245 my $document = PPI::Lexer->lex_source( $source );
246 return $class->_setattr( $document, %attr ) if $document;
250 $class->_error("Unknown object or reference was passed to PPI::Document::new");
253 # Pull and store the error from the lexer
255 if ( _INSTANCE($@, 'PPI::Exception::Timeout') ) {
256 $errstr = 'Timed out while parsing document';
257 } elsif ( _INSTANCE($@, 'PPI::Exception') ) {
258 $errstr = $@->message;
261 $errstr =~ s/\sat line\s.+$//;
262 } elsif ( PPI::Lexer->errstr ) {
263 $errstr = PPI::Lexer->errstr;
265 $errstr = "Unknown error parsing Perl document";
268 $class->_error( $errstr );
272 Carp::croak("API CHANGE: File names should now be passed to PPI::Document->new to load a file");
276 my ($class, $document, %attr) = @_;
277 $document->{readonly} = !! $attr{readonly};
283 =head2 set_cache $cache
285 As of L<PPI> 1.100, C<PPI::Document> supports parser caching.
287 The default cache class L<PPI::Cache> provides a L<Storable>-based
288 caching or the parsed document based on the MD5 hash of the document as
291 The static C<set_cache> method is used to set the cache object for
292 C<PPI::Document> to use when loading documents. It takes as argument
293 a L<PPI::Cache> object (or something that C<isa> the same).
295 If passed C<undef>, this method will stop using the current cache, if any.
297 For more information on caching, see L<PPI::Cache>.
299 Returns true on success, or C<undef> if not passed a valid param.
304 my $class = ref $_[0] ? ref shift : shift;
306 if ( defined $_[0] ) {
308 my $object = _INSTANCE(shift, 'PPI::Cache') or return undef;
322 If a document cache is currently set, the C<get_cache> method will
325 Returns a L<PPI::Cache> object, or C<undef> if there is no cache
326 currently set for C<PPI::Document>.
338 #####################################################################
339 # PPI::Document Instance Methods
345 The C<readonly> attribute indicates if the document is intended to be
346 read-only, and will never be modified. This is an advisory flag, that
347 writers of L<PPI>-related systems may or may not use to enable
348 optimisations and caches for your document.
350 Returns true if the document is read-only or false if not.
360 =head2 tab_width [ $width ]
362 In order to handle support for C<location> correctly, C<Documents>
363 need to understand the concept of tabs and tab width. The C<tab_width>
364 method is used to get and set the size of the tab width.
366 At the present time, PPI only supports "naive" (width 1) tabs, but we do
367 plan on supporting arbitrary, default and auto-sensing tab widths later.
369 Returns the tab width as an integer, or C<die>s if you attempt to set the
376 return $self->{tab_width} unless @_;
377 $self->{tab_width} = shift;
384 $document->save( $file )
386 The C<save> method serializes the C<PPI::Document> object and saves the
387 resulting Perl document to a file. Returns C<undef> on failure to open
388 or write to the file.
395 open( FILE, '>', $_[0] ) or return undef;
396 print FILE $self->serialize or return undef;
397 close FILE or return undef;
405 Unlike the C<content> method, which shows only the immediate content
406 within an element, Document objects also have to be able to be written
409 When doing this we need to take into account some additional factors.
411 Primarily, we need to handle here-docs correctly, so that are written
412 to the file in the expected place.
414 The C<serialize> method generates the actual file content for a given
415 Document object. The resulting string can be written straight to a file.
417 Returns the serialized document as a string.
423 my @tokens = $self->tokens;
425 # The here-doc content buffer
428 # Start the main loop
430 foreach my $i ( 0 .. $#tokens ) {
431 my $Token = $tokens[$i];
433 # Handle normal tokens
434 unless ( $Token->isa('PPI::Token::HereDoc') ) {
435 my $content = $Token->content;
437 # Handle the trivial cases
438 unless ( $heredoc ne '' and $content =~ /\n/ ) {
443 # We have pending here-doc content that needs to be
444 # inserted just after the first newline in the content.
445 if ( $content eq "\n" ) {
446 # Shortcut the most common case for speed
447 $output .= $content . $heredoc;
449 # Slower and more general version
450 $content =~ s/\n/\n$heredoc/;
458 # This token is a HereDoc.
459 # First, add the token content as normal, which in this
460 # case will definately not contain a newline.
461 $output .= $Token->content;
463 # Now add all of the here-doc content to the heredoc buffer.
464 foreach my $line ( $Token->heredoc ) {
468 if ( $Token->{_damaged} ) {
470 # There are a couple of warning/bug situations
471 # that can occur when a HereDoc content was read in
472 # from the end of a file that we silently allow.
474 # When writing back out to the file we have to
475 # auto-repair these problems if we arn't going back
476 # on to the end of the file.
478 # When calculating $last_line, ignore the final token if
479 # and only if it has a single newline at the end.
480 my $last_index = $#tokens;
481 if ( $tokens[$last_index]->{content} =~ /^[^\n]*\n$/ ) {
485 # This is a two part test.
486 # First, are we on the last line of the
487 # content part of the file
488 my $last_line = List::MoreUtils::none {
489 $tokens[$_] and $tokens[$_]->{content} =~ /\n/
490 } (($i + 1) .. $last_index);
491 if ( ! defined $last_line ) {
492 # Handles the null list case
496 # Secondly, are their any more here-docs after us,
497 # (with content or a terminator)
498 my $any_after = List::MoreUtils::any {
499 $tokens[$_]->isa('PPI::Token::HereDoc')
501 scalar(@{$tokens[$_]->{_heredoc}})
503 defined $tokens[$_]->{_terminator_line}
505 } (($i + 1) .. $#tokens);
506 if ( ! defined $any_after ) {
507 # Handles the null list case
511 # We don't need to repair the last here-doc on the
512 # last line. But we do need to repair anything else.
513 unless ( $last_line and ! $any_after ) {
514 # Add a terminating string if it didn't have one
515 unless ( defined $Token->{_terminator_line} ) {
516 $Token->{_terminator_line} = $Token->{_terminator};
519 # Add a trailing newline to the terminating
520 # string if it didn't have one.
521 unless ( $Token->{_terminator_line} =~ /\n$/ ) {
522 $Token->{_terminator_line} .= "\n";
527 # Now add the termination line to the heredoc buffer
528 if ( defined $Token->{_terminator_line} ) {
529 $heredoc .= $Token->{_terminator_line};
535 if ( $heredoc ne '' ) {
536 # If the file doesn't end in a newline, we need to add one
537 # so that the here-doc content starts on the next line.
538 unless ( $output =~ /\n$/ ) {
542 # Now we add the remaining here-doc content
543 # to the end of the file.
554 The C<hex_id> method generates an unique identifier for the Perl document.
556 This identifier is basically just the serialized document, with
557 Unix-specific newlines, passed through MD5 to produce a hexadecimal string.
559 This identifier is used by a variety of systems (such as L<PPI::Cache>
560 and L<Perl::Metrics>) as a unique key against which to store or cache
561 information about a document (or indeed, to cache the document itself).
563 Returns a 32 character hexadecimal string.
568 PPI::Util::md5hex($_[0]->serialize);
573 =head2 index_locations
575 Within a document, all L<PPI::Element> objects can be considered to have a
576 "location", a line/column position within the document when considered as a
577 file. This position is primarily useful for debugging type activities.
579 The method for finding the position of a single Element is a bit laborious,
580 and very slow if you need to do it a lot. So the C<index_locations> method
581 will index and save the locations of every Element within the Document in
582 advance, making future calls to <PPI::Element::location> virtually free.
584 Please note that this index should always be cleared using C<flush_locations>
585 once you are finished with the locations. If content is added to or removed
586 from the file, these indexed locations will be B<wrong>.
590 sub index_locations {
592 my @tokens = $self->tokens;
594 # Whenever we hit a heredoc we will need to increment by
595 # the number of lines in it's content section when when we
596 # encounter the next token with a newline in it.
599 # Find the first Token without a location
600 my ($first, $location) = ();
601 foreach ( 0 .. $#tokens ) {
602 my $Token = $tokens[$_];
603 next if $Token->{_location};
605 # Found the first Token without a location
606 # Calculate the new location if needed.
609 $self->_add_location( $location, $tokens[$_ - 1], \$heredoc );
612 $self->can('filename') ? $self->filename : undef;
613 $location = [ 1, 1, 1, 1, $logical_file ];
619 # Calculate locations for the rest
620 foreach ( $first .. $#tokens ) {
621 my $Token = $tokens[$_];
622 $Token->{_location} = $location;
623 $location = $self->_add_location( $location, $Token, \$heredoc );
625 # Add any here-doc lines to the counter
626 if ( $Token->isa('PPI::Token::HereDoc') ) {
627 $heredoc += $Token->heredoc + 1;
635 my ($self, $start, $Token, $heredoc) = @_;
636 my $content = $Token->{content};
638 # Does the content contain any newlines
639 my $newlines =()= $content =~ /\n/g;
640 my ($logical_line, $logical_file) =
641 $self->_logical_line_and_file($start, $Token, $newlines);
643 unless ( $newlines ) {
644 # Handle the simple case
646 $start->[LOCATION_LINE],
647 $start->[LOCATION_CHARACTER] + length($content),
648 $start->[LOCATION_COLUMN]
649 + $self->_visual_length(
651 $start->[LOCATION_COLUMN]
658 # This is the more complex case where we hit or
659 # span a newline boundary.
660 my $physical_line = $start->[LOCATION_LINE] + $newlines;
661 my $location = [ $physical_line, 1, 1, $logical_line, $logical_file ];
662 if ( $heredoc and $$heredoc ) {
663 $location->[LOCATION_LINE] += $$heredoc;
664 $location->[LOCATION_LOGICAL_LINE] += $$heredoc;
668 # Does the token have additional characters
669 # after their last newline.
670 if ( $content =~ /\n([^\n]+?)\z/ ) {
671 $location->[LOCATION_CHARACTER] += length($1);
672 $location->[LOCATION_COLUMN] +=
673 $self->_visual_length(
674 $1, $location->[LOCATION_COLUMN],
681 sub _logical_line_and_file {
682 my ($self, $start, $Token, $newlines) = @_;
684 # Regex taken from perlsyn, with the correction that there's no space
685 # required between the line number and the file name.
686 if ($start->[LOCATION_CHARACTER] == 1) {
687 if ( $Token->isa('PPI::Token::Comment') ) {
689 $Token->content =~ m<
694 (?: (\"?) ([^\"]* [^\s\"]) \2 )?
699 return $1, ($3 || $start->[LOCATION_LOGICAL_FILE]);
702 elsif ( $Token->isa('PPI::Token::Pod') ) {
703 my $content = $Token->content;
705 my $file = $start->[LOCATION_LOGICAL_FILE];
706 my $end_of_directive;
712 (\d+) (?: (?! \n) \s)*
713 (?: (\"?) ([^\"]*? [^\s\"]) \2 )??
718 ($line, $file) = ($1, ( $3 || $file ) );
719 $end_of_directive = pos $content;
723 pos $content = $end_of_directive;
724 my $post_directive_newlines =()= $content =~ m< \G [^\n]* \n >xmsg;
725 return $line + $post_directive_newlines - 1, $file;
731 $start->[LOCATION_LOGICAL_LINE] + $newlines,
732 $start->[LOCATION_LOGICAL_FILE];
736 my ($self, $content, $pos) = @_;
738 my $tab_width = $self->tab_width;
739 my ($length, $vis_inc);
741 return length $content if $content !~ /\t/;
743 # Split the content in tab and non-tab parts and calculate the
744 # "visual increase" of each part.
745 for my $part ( split(/(\t)/, $content) ) {
747 $vis_inc = $tab_width - ($pos-1) % $tab_width;
750 $vis_inc = length $part;
761 =head2 flush_locations
763 When no longer needed, the C<flush_locations> method clears all location data
768 sub flush_locations {
769 shift->_flush_locations(@_);
776 The C<normalized> method is used to generate a "Layer 1"
777 L<PPI::Document::Normalized> object for the current Document.
779 A "normalized" Perl Document is an arbitrary structure that removes any
780 irrelevant parts of the document and refactors out variations in style,
781 to attempt to approach something that is closer to the "true meaning"
784 See L<PPI::Normal> for more information on document normalization and
785 the tasks for which it is useful.
787 Returns a L<PPI::Document::Normalized> object, or C<undef> on error.
792 # The normalization process will utterly destroy and mangle
793 # anything passed to it, so we are going to only give it a
795 PPI::Normal->process( $_[0]->clone );
802 The C<complete> method is used to determine if a document is cleanly
803 structured, all braces are closed, the final statement is
804 fully terminated and all heredocs are fully entered.
806 Returns true if the document is complete or false if not.
813 # Every structure has to be complete
814 $self->find_any( sub {
815 $_[1]->isa('PPI::Structure')
821 # Strip anything that isn't a statement off the end
822 my @child = $self->children;
823 while ( @child and not $child[-1]->isa('PPI::Statement') ) {
827 # We must have at least one statement
828 return '' unless @child;
830 # Check the completeness of the last statement
831 return $child[-1]->_complete;
838 #####################################################################
841 # We are a scope boundary
842 ### XS -> PPI/XS.xs:_PPI_Document__scope 0.903+
849 #####################################################################
850 # PPI::Element Methods
854 # die "Cannot insert_before a PPI::Document";
859 # die "Cannot insert_after a PPI::Document";
864 # die "Cannot replace a PPI::Document";
871 #####################################################################
874 # Set the error message
880 # Clear the error message.
881 # Returns the object as a convenience.
891 For error that occur when loading and saving documents, you can use
892 C<errstr>, as either a static or object method, to access the error message.
894 If a Document loads or saves without error, C<errstr> will return false.
906 #####################################################################
907 # Native Storable Support
909 sub STORABLE_freeze {
911 my $class = ref $self;
913 return ($class, \%hash);
917 my ($self, undef, $class, $hash) = @_;
919 foreach ( keys %$hash ) {
920 $self->{$_} = delete $hash->{$_};
922 $self->__link_children;
931 - May need to overload some methods to forcefully prevent Document
932 objects becoming children of another Node.
936 See the L<support section|PPI/SUPPORT> in the main module.
940 Adam Kennedy E<lt>adamk@cpan.orgE<gt>
944 L<PPI>, L<http://ali.as/>
948 Copyright 2001 - 2009 Adam Kennedy.
950 This program is free software; you can redistribute
951 it and/or modify it under the same terms as Perl itself.
953 The full text of the license can be found in the
954 LICENSE file included with this module.