#!/usr/bin/perl -w
#
#   stem2pod
#
# scans a file containing the code for a stem cell
# and updates it with pod generated from the cell's
# attribute variables.
#
# also generates pod for detected methods, subs, and
# any missing standard stuff (author, license, etc.)
#

use strict;
use warnings;
use Data::Dumper;
use File::Slurp;
use Carp;


# These globals are initalized in the BEGIN block at the bottom of this file
# Please treat as constants.
our ( %PRE_POD, %POST_POD );

# Another global/"constant"
our %IS_ATTR_PART = map { $_ => 1 } qw(
  name
  type
  help
  default
  required
  class
  class_args
);


for my $file_name (@ARGV) {
    stem2pod($file_name);
}
exit;


sub stem2pod {
    my ($file_name) = @_;

    my $file_text = read_file($file_name);

    my $attr_spec = find_attr_spec( $file_text, $file_name );

    my @meth_names = find_method_names( $file_text );

    # extract_pod _literally_ extracts it - 
    # removing any pod found from the text.
    my $new_text = $file_text;
    my $pod_sections = extract_pod( \$new_text, \@meth_names );

    my ($class_name) = ($file_text =~ /^package\s+([\w:]+)/m);

    my $attr_pod = generate_attr_pod( $attr_spec, $file_name, $class_name );
    
    generate_boilerplate( $file_text, $class_name );

    my $new_pod = generate_pod( $attr_pod, \@meth_names, $pod_sections );

    my $new_source = build_source( $new_pod, $new_text );

    if ( $file_text eq $new_source ) {
        warn "No changes to make to $file_name\n";
        return;
    }

    write_file( "$file_name.new", $new_source );
}


# Finds and returns a list of the names of 
# methods/subroutines found in the file
sub find_method_names {
    my ( $file_text ) = @_;

    my @meth_names = $file_text =~ /^sub\s+([^\W_]\w*)/gm;

    return @meth_names;
}


# Find and _remove_ any POD in the file. 
# Returns a HoH-like data-structure that you should probably inspect
# with DD to see what it looks like...
sub extract_pod {
    my ( $text_ref, $method_names ) = @_;
    my %pod_sections;
    my @pod_names;
    while ( ${$text_ref} =~ 
        s{
	        ^	         # begin of line
	        (	         # grab pod head line as $1
	            =head\w* # begin pod section
		        \s+
		        (.+?)$   # grab section title as $2
            )
            (
		        .+?      # grab pod body as $3
	        )
	        (?:          # match until after =cut, before =head or at EOF
                ^=cut
              | (?=^=head)
              | \z
            )
        }{}mxs 
    ) {
        my $head  = $1;
        my $title = $2;
        my $body  = $3;

#        # strip out leading and trailing blank lines
#        $body =~ s/\A[\n]\s*[\n]|[\n]\s*[\n]\z//msg;

        # Does the section title contain the name of a method?
        my ($method_name) = grep { $title =~ /$_/i and $_ } @$method_names;
        if ( $method_name ) {
        
            warn "Duplicate pod section for method "
               . "$method_name named [$title].\n"
              if exists $pod_sections{methods}{$method_name};
              
            $pod_sections{methods}{$method_name}{head} = $head;  
            $pod_sections{methods}{$method_name}{body} = $body;
            
            next;
        }
        
        warn "Found pod section with duplicate name [$title]\n"
          if exists $pod_sections{other}{ lc $title };
          
        $pod_sections{other}{ lc $title }{head} = $head;
        $pod_sections{other}{ lc $title }{body} = $body;
    }
#    print Dumper \%pod_sections; exit;
    return \%pod_sections;
}


# Generate all the POD text in one big chunk...
# Fill in most sections with boilerplate, but will
# merge in sections that were already present ( found by extract_pod() )
sub generate_pod {
    my ( $attr_pod, $meth_names, $pod_sections ) = @_;

    my $new_pod;

    $new_pod .= generate_pod_sections( $pod_sections, \%PRE_POD );

    # attr_pod is always regenerated...
    # TODO: perhaps it's better to call generate_attr_pod() here?
    if ($attr_pod) {
        $new_pod .= "=head1 ATTRIBUTES\n\n";
        $new_pod .= strip_blank_lines( $attr_pod ) . "\n\n";
    }

    $new_pod .= "=head1 METHODS\n\n";
    foreach my $meth_name ( @$meth_names ) {

        $new_pod .= "=head2 $meth_name\n\n";
        if ( my $meth_pod = $pod_sections->{methods}{$meth_name} ) {
            $new_pod .= strip_blank_lines( $meth_pod->{body} ) . "\n\n";
            next;
        }

        if ( $meth_name eq 'new' ) {
            $new_pod .= strip_blank_lines( <<END_POD ) . "\n\n";

This is a class method that constructs the object. It uses
the standard Stem API of key/value attributes. These are described
in the \$attr_spec in the module, and it's POD is auto-generated.

END_POD
            next;
        }

        if ( $meth_name eq 'msg_in' ) {
            $new_pod .= strip_blank_lines( <<END_POD ) . "\n\n";

This method is the 'wildcard' Stem message handler and it is passed a single 
argument which is a L<Stem::Msg> object. This method is called if no 
specific message handler method is found in this cell. This is missing
specific documentation to be filled in.

END_POD
            next;
        }

        if ( $meth_name =~ /^(.+)_in$/ ) {
            my $msg_type = $1; 
            $new_pod .= strip_blank_lines( <<END_POD ) . "\n\n";

This method is a Stem message handler for messages of type $msg_type.
It is passed a single argument which is a L<Stem::Msg> object and 
its return value is ignored. This is missing specific documentation 
to be filled in.

END_POD
            next;
        }

        if ( $meth_name =~ /^(.+)_cmd$/ ) {
            my $cmd_type = $1;
            $new_pod .= strip_blank_lines( <<END_POD ) . "\n\n";

This method is a Stem message handler for command messages of type $cmd_type.
It is passed a single argument which is a L<Stem::Msg> object. 
Any return value will be sent back as a 'response' type message to the sender.
This is missing specific documentation to be filled in.

END_POD
            next;
        }

        # All other methods just get this...
        $new_pod .= "This method is missing documentation\n\n";
    }

    $new_pod .= generate_pod_sections( $pod_sections, \%POST_POD );
    
    $new_pod .= "=cut\n\n";

    return $new_pod;
}


sub generate_pod_sections {
    my ($existing_pod, $bp_pod) = @_;
    
    my $new_pod;

    foreach my $section ( @{ $bp_pod->{names} } ) {

        (my $printable_section = $section) =~ tr/_/ /;
        $new_pod .= "=head1 $printable_section\n\n";

        my $found_sec = $existing_pod->{other}{ lc $section };
        
        my $next_part = $found_sec ? 
            $found_sec->{body} : $bp_pod->{boilerplate}{$section} ;

        $new_pod .= strip_blank_lines( $next_part ) . "\n\n";
    }

    return $new_pod;
}


sub generate_boilerplate {
    my ($file_text, $class_name) = @_;

    $PRE_POD{boilerplate}{NAME} = <<END_POD;
$class_name
END_POD

    if ($file_text =~ /\$attr_spec/) {
        $POST_POD{boilerplate}{SEE_ALSO} = <<END_POD;
L<Stem::Class>
END_POD
    }

return;
}

# strip out leading and trailing blank lines
sub strip_blank_lines {
    my ($text) = @_;
    $text =~ s/\A[\n]?\s*[\n]|[\n]\s*[\n]?\z//msg;
    return $text;
}


# tries to locate the $attr_spec array ref in the text
# and returns it as a real data structure (eval magick)
sub find_attr_spec {
    my ( $file_text, $file_name ) = @_;

    # The regex below is naive but will be fine for now. I would
    # prefer to use Text::Balanced or Regexp::Common though...
    my $attr_spec;
    if ( $file_text =~ /\$attr_spec\s*=\s*(\[[^\]]*\])\s*;/ ) {
        $attr_spec = eval $1
          or croak "Error parsing \$attr_spec from $file_name:\n$@\n";
    }

    return $attr_spec;
}


# blow away __END__ and everything after (if present) 
# and replace with generated POD
sub build_source {
    my ( $new_pod, $file_text ) = @_;

    ( my $new_text = $file_text ) =~ s/^__END__[\n].*//msg;

    return $new_text . "__END__\n\n" . $new_pod;
}


# Stolen from uri's original script, but really, 
# really needs to be cleaned up...
# TODO: clean up this sub, somehow...
sub generate_attr_pod {
    my ( $attr_list, $file_name, $package ) = @_;

    my $pod .= <<POD ;

=head2 Class Attributes for $package

This pod is generated from the \$attr_spec in the module.
That data is also used by L<Stem::Class> to parse the arguments 
passed to the class constructor new().

=over 4

POD

    #print "POD [$pod]\n" ;

    foreach my $attr_ref ( @{$attr_list} ) {

        my $name = $attr_ref->{name};

        unless ($name) {

            warn <<WARN ;
Missing attribute name in Class $package in file $file_name
WARN
            next;
        }

        $pod .= <<POD ;

=item * B<$name>

=over 4

POD

        my $help = $attr_ref->{help};

        unless ( defined($help) ) {
            warn <<WARN ;
Missing help in attribute $name in Class $package in file $file_name
WARN
            next;
        }

        $pod .= <<POD ;

=item - $help
POD
# suppress line break in POD output after description
        chomp $pod; 

        if ( my $attr_class = $attr_ref->{class} ) {

            my $class_args =
              '<' . join( ', ', @{ $attr_ref->{class_args} || [] } ) . '>';

            $pod .= <<POD ;
=item - Class Attribute: '$name' is an object of class $attr_class and constructed with: $class_args
POD
        }

        exists( $attr_ref->{type} ) and $pod .= <<POD ;

=item - The type of '$name' is: $attr_ref->{type}
POD

        if ( exists( $attr_ref->{default} ) ) {

            my $default = $attr_ref->{default};

            if ( ref($default) eq "ARRAY" ) {

                $default = '(' . join( ', ', @{$default} ) . ')';
            }

            $pod .= <<POD

=item - B<Default> value: $default
POD
        }

        exists( $attr_ref->{required} ) and $pod .= <<POD ;

=item - It is B<required>.
POD

        foreach my $attr ( sort keys %{$attr_ref} ) {
            next if $IS_ATTR_PART{$attr};
            $pod .= "Unknown attribute $attr\n";
        }

        $pod .= <<POD ;
     
=back 
  
POD
    }

    $pod .= <<POD ;

=back

=cut

POD

    #print "[$pod]" ;

    #print "POD2 [", substr($pod, 0, 40), "]\n" ;

    return $pod;
}


# Globals declared at the top of this script are initalized here
# Keeping at the bottom of this file to (try to) reduce clutter.
BEGIN {

    # Pod sections that go before attribute and method sections
    my @PRE_POD_NAMES = qw( NAME SYNOPSIS DESCRIPTION );

    # sections that go after
    my @POST_POD_NAMES = qw( BUGS TODO SEE_ALSO AUTHOR LICENSE COPYRIGHT );


    # Boilerplate for each "pre" section
    my %PRE_POD_BP = (

        NAME => <<'END_POD',
Name of module
END_POD

        SYNOPSIS => <<'END_POD',
Code/config example
END_POD

        DESCRIPTION => <<'END_POD',
What is this module good for?
END_POD

    );


    # Boilerplate for each "post" section
    my %POST_POD_BP = (

        BUGS => <<'END_POD',
None
END_POD

        TODO => <<'END_POD',
END_POD

        SEE_ALSO => <<'END_POD',
END_POD

        AUTHOR => <<'END_POD',
Uri Guttman uri@cpan.org
END_POD

        LICENSE => <<'END_POD',
Same as Perl
END_POD

        COPYRIGHT => <<'END_POD',
(C) Uri Guttman, 2009
END_POD

    );


    # globals used in other parts of the program
    %PRE_POD = (
        names       => \@PRE_POD_NAMES,
        boilerplate => \%PRE_POD_BP,
    );

    %POST_POD = (
        names       => \@POST_POD_NAMES,
        boilerplate => \%POST_POD_BP,
    );

}