X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FB%2FB%2FDeparse.pm;h=37b98a034386607fafb1d11b2f227ce7163a40f1;hb=c27ea44e678f02a1903a4aa0e3110ba824fcd93a;hp=7e14c8e5cc6ec6a7fdce51f90d688a5ec191f242;hpb=1ced0934e21e7f931312ef2a026cf1ae22815ba3;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/B/B/Deparse.pm b/ext/B/B/Deparse.pm index 7e14c8e..37b98a0 100644 --- a/ext/B/B/Deparse.pm +++ b/ext/B/B/Deparse.pm @@ -7,19 +7,21 @@ # but essentially none of his code remains. package B::Deparse; -use Carp 'cluck', 'croak'; -use B qw(class main_root main_start main_cv svref_2object opnumber cstring +use Carp; +use B qw(class main_root main_start main_cv svref_2object opnumber perlstring OPf_WANT OPf_WANT_VOID OPf_WANT_SCALAR OPf_WANT_LIST - OPf_KIDS OPf_REF OPf_STACKED OPf_SPECIAL - OPpLVAL_INTRO OPpENTERSUB_AMPER OPpSLICE OPpCONST_BARE + OPf_KIDS OPf_REF OPf_STACKED OPf_SPECIAL OPf_MOD + OPpLVAL_INTRO OPpOUR_INTRO OPpENTERSUB_AMPER OPpSLICE OPpCONST_BARE OPpTRANS_SQUASH OPpTRANS_DELETE OPpTRANS_COMPLEMENT OPpTARGET_MY - OPpCONST_ARYBASE OPpEXISTS_SUB - SVf_IOK SVf_NOK SVf_ROK SVf_POK + OPpCONST_ARYBASE OPpEXISTS_SUB OPpSORT_NUMERIC OPpSORT_INTEGER + OPpSORT_REVERSE + SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE CVf_METHOD CVf_LOCKED CVf_LVALUE PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_SKIPWHITE PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED); -$VERSION = 0.60; +$VERSION = 0.63; use strict; +use vars qw/$AUTOLOAD/; use warnings (); # Changes between 0.50 and 0.51: @@ -34,7 +36,7 @@ use warnings (); # - package declarations using cop_stash # - subs, formats and code sorted by cop_seq # Changes between 0.51 and 0.52: -# - added pp_threadsv (special variables under USE_THREADS) +# - added pp_threadsv (special variables under USE_5005THREADS) # - added documentation # Changes between 0.52 and 0.53: # - many changes adding precedence contexts and associativity @@ -91,8 +93,27 @@ use warnings (); # - separate recognition of constant subs # - rewrote continue block handling, now recoginizing for loops # - added more control of expanding control structures +# Changes between 0.60 and 0.61 (mostly by Robin Houston) +# - many bug-fixes +# - support for pragmas and 'use' +# - support for the little-used $[ variable +# - support for __DATA__ sections +# - UTF8 support +# - BEGIN, CHECK, INIT and END blocks +# - scoping of subroutine declarations fixed +# - compile-time output from the input program can be suppressed, so that the +# output is just the deparsed code. (a change to O.pm in fact) +# - our() declarations +# - *all* the known bugs are now listed in the BUGS section +# - comprehensive test mechanism (TEST -deparse) +# Changes between 0.62 and 0.63 (mostly by Rafael Garcia-Suarez) +# - bug-fixes +# - new switch -P +# - support for command-line switches (-l, -0, etc.) # Todo: +# (See also BUGS section at the end of this file) +# # - finish tr/// changes # - add option for even more parens (generalize \&foo change) # - left/right context @@ -112,7 +133,7 @@ use warnings (); # - here-docs? # Tests that will always fail: -# comp/redef.t -- all (redefinition happens at compile time) +# (see t/TEST for the short list) # Object fields (were globals): # @@ -153,6 +174,9 @@ use warnings (); # keys are names of subs for which we've printed declarations. # That means we can omit parentheses from the arguments. # +# subs_deparsed +# Keeps track of fully qualified names of all deparsed subs. +# # parens: -p # linenums: -l # unquote: -q @@ -205,6 +229,13 @@ use warnings (); # 1 statement modifiers # 0 statement level +# Also, lineseq may pass a fourth parameter to the pp_ routines: +# if present, the fourth parameter is passed on by deparse. +# +# If present and true, it means that the op exists directly as +# part of a lineseq. Currently it's only used by scopeop to +# decide whether its results need to be enclosed in a do {} block. + # Nonprinting characters with special meaning: # \cS - steal parens (see maybe_parens_unop) # \n - newline and indent @@ -229,6 +260,9 @@ sub todo { $seq = 0; } push @{$self->{'subs_todo'}}, [$seq, $cv, $is_form]; + unless ($is_form || class($cv->STASH) eq 'SPECIAL') { + $self->{'subs_deparsed'}{$cv->STASH->NAME."::".$cv->GV->NAME} = 1; + } } sub next_todo { @@ -249,7 +283,13 @@ sub next_todo { return $use_dec; } } - return "sub $name " . $self->deparse_sub($cv); + my $l = ''; + if ($self->{'linenums'}) { + my $line = $gv->LINE; + my $file = $gv->FILE; + $l = "\n\f#line $line \"$file\"\n"; + } + return "${l}sub $name " . $self->deparse_sub($cv); } } @@ -257,6 +297,7 @@ sub next_todo { sub begin_is_use { my ($self, $cv) = @_; my $root = $cv->ROOT; + local @$self{qw'curcv curcvlex'} = ($cv); #require B::Debug; #B::walkoptree($cv->ROOT, "debug"); my $lineseq = $root->first; @@ -285,7 +326,6 @@ sub begin_is_use { return unless $self->const_sv($constop)->PV eq $module; $constop = $constop->sibling; - $version = $self->const_sv($constop)->int_value; $constop = $constop->sibling; return if $constop->name ne "method_named"; @@ -304,18 +344,18 @@ sub begin_is_use { # See if there are import arguments my $args = ''; - my $constop = $entersub->first->sibling; # Skip over pushmark - return unless $self->const_sv($constop)->PV eq $module; + my $svop = $entersub->first->sibling; # Skip over pushmark + return unless $self->const_sv($svop)->PV eq $module; # Pull out the arguments - for ($constop=$constop->sibling; $constop->name eq "const"; - $constop = $constop->sibling) { + for ($svop=$svop->sibling; $svop->name ne "method_named"; + $svop = $svop->sibling) { $args .= ", " if length($args); - $args .= $self->deparse($constop, 6); + $args .= $self->deparse($svop, 6); } my $use = 'use'; - my $method_named = $constop; + my $method_named = $svop; return if $method_named->name ne "method_named"; my $method_name = $self->const_sv($method_named)->PV; @@ -358,10 +398,24 @@ sub stash_subs { next if $key eq 'main::'; # avoid infinite recursion my $class = class($val); if ($class eq "PV") { - # Just a prototype + # Just a prototype. As an ugly but fairly effective way + # to find out if it belongs here is to see if the AUTOLOAD + # (if any) for the stash was defined in one of our files. + my $A = $stash{"AUTOLOAD"}; + if (defined ($A) && class($A) eq "GV" && defined($A->CV) + && class($A->CV) eq "CV") { + my $AF = $A->FILE; + next unless $AF eq $0 || exists $self->{'files'}{$AF}; + } push @{$self->{'protos_todo'}}, [$pack . $key, $val->PV]; } elsif ($class eq "IV") { - # Just a name + # Just a name. As above. + my $A = $stash{"AUTOLOAD"}; + if (defined ($A) && class($A) eq "GV" && defined($A->CV) + && class($A->CV) eq "CV") { + my $AF = $A->FILE; + next unless $AF eq $0 || exists $self->{'files'}{$AF}; + } push @{$self->{'protos_todo'}}, [$pack . $key, undef]; } elsif ($class eq "GV") { if (class(my $cv = $val->CV) ne "SPECIAL") { @@ -440,6 +494,8 @@ sub new { $self->{'files'}{$1} = 1; } elsif ($arg eq "-p") { $self->{'parens'} = 1; + } elsif ($arg eq "-P") { + $self->{'noproto'} = 1; } elsif ($arg eq "-l") { $self->{'linenums'} = 1; } elsif ($arg eq "-q") { @@ -453,9 +509,15 @@ sub new { return $self; } -sub WARN_MASK () { - # Mask out the bits that C uses - $warnings::Bits{all} | $warnings::DeadBits{all}; +{ + # Mask out the bits that L uses + my $WARN_MASK; + BEGIN { + $WARN_MASK = $warnings::Bits{all} | $warnings::DeadBits{all}; + } + sub WARN_MASK () { + return $WARN_MASK; + } } # Initialise the contextual information, either from @@ -478,10 +540,23 @@ sub compile { my(@args) = @_; return sub { my $self = B::Deparse->new(@args); + # First deparse command-line args + if (defined $^I) { # deparse -i + print q(BEGIN { $^I = ).perlstring($^I).qq(; }\n); + } + if ($^W) { # deparse -w + print qq(BEGIN { \$^W = $^W; }\n); + } + if ($/ ne "\n" or defined $O::savebackslash) { # deparse -l and -0 + my $fs = perlstring($/) || 'undef'; + my $bs = perlstring($O::savebackslash) || 'undef'; + print qq(BEGIN { \$/ = $fs; \$\\ = $bs; }\n); + } my @BEGINs = B::begin_av->isa("B::AV") ? B::begin_av->ARRAY : (); + my @CHECKs = B::check_av->isa("B::AV") ? B::check_av->ARRAY : (); my @INITs = B::init_av->isa("B::AV") ? B::init_av->ARRAY : (); my @ENDs = B::end_av->isa("B::AV") ? B::end_av->ARRAY : (); - for my $block (@BEGINs, @INITs, @ENDs) { + for my $block (@BEGINs, @CHECKs, @INITs, @ENDs) { $self->todo($block, 0); } $self->stash_subs(); @@ -500,9 +575,11 @@ sub compile { # Print __DATA__ section, if necessary no strict 'refs'; - if (defined *{$self->{'curstash'}."::DATA"}{IO}) { + my $laststash = defined $self->{'curcop'} + ? $self->{'curcop'}->stash->NAME : $self->{'curstash'}; + if (defined *{$laststash."::DATA"}{IO}) { print "__DATA__\n"; - print readline(*{$self->{'curstash'}."::DATA"}); + print readline(*{$laststash."::DATA"}); } } } @@ -510,7 +587,7 @@ sub compile { sub coderef2text { my $self = shift; my $sub = shift; - croak "Usage: ->coderef2text(CODEREF)" unless ref($sub) eq "CODE"; + croak "Usage: ->coderef2text(CODEREF)" unless UNIVERSAL::isa($sub, "CODE"); $self->init(); return $self->indent($self->deparse_sub(svref_2object($sub))); @@ -564,13 +641,13 @@ sub ambient_pragmas { elsif ($name eq 're') { require re; if ($val eq 'none') { - $hint_bits &= ~re::bits(qw/taint eval asciirange/); + $hint_bits &= ~re::bits(qw/taint eval/); next(); } my @names; if ($val eq 'all') { - @names = qw/taint eval asciirange/; + @names = qw/taint eval/; } elsif (ref $val) { @names = @$val; @@ -583,7 +660,7 @@ sub ambient_pragmas { elsif ($name eq 'warnings') { if ($val eq 'none') { - $warning_bits = "\0"x12; + $warning_bits = $warnings::NONE; next(); } @@ -595,7 +672,7 @@ sub ambient_pragmas { @names = split/\s+/, $val; } - $warning_bits = "\0"x12 if !defined ($warning_bits); + $warning_bits = $warnings::NONE if !defined ($warning_bits); $warning_bits |= warnings::bits(@names); } @@ -622,11 +699,14 @@ sub ambient_pragmas { sub deparse { my $self = shift; - my($op, $cx) = @_; + my($op, $cx, $flags) = @_; Carp::confess("Null op in deparse") if !defined($op) || class($op) eq "NULL"; my $meth = "pp_" . $op->name; + if (is_scope($op)) { + return $self->$meth($op, $cx, $flags); + } return $self->$meth($op, $cx); } @@ -662,6 +742,7 @@ sub deparse_sub { my $self = shift; my $cv = shift; my $proto = ""; +Carp::confess("NULL in deparse_sub") if !defined($cv) || $cv->isa("B::NULL"); Carp::confess("SPECIAL in deparse_sub") if $cv->isa("B::SPECIAL"); local $self->{'curcop'} = $self->{'curcop'}; if ($cv->FLAGS & SVf_POK) { @@ -678,18 +759,35 @@ Carp::confess("SPECIAL in deparse_sub") if $cv->isa("B::SPECIAL"); local($self->{'curcvlex'}); local(@$self{qw'curstash warnings hints'}) = @$self{qw'curstash warnings hints'}; + my $body; if (not null $cv->ROOT) { - # skip leavesub - return $proto . "{\n\t" . - $self->deparse($cv->ROOT->first, 0) . "\n\b}\n"; + my $lineseq = $cv->ROOT->first; + if ($lineseq->name eq "lineseq") { + my @ops; + for(my$o=$lineseq->first; $$o; $o=$o->sibling) { + push @ops, $o; + } + $body = $self->lineseq(undef, @ops).";"; + my $scope_en = $self->find_scope_en($lineseq); + if (defined $scope_en) { + my $subs = join"", $self->seq_subs($scope_en); + $body .= ";\n$subs" if length($subs); + } + } + else { + $body = $self->deparse($cv->ROOT->first, 0); + } } - my $sv = $cv->const_sv; - if ($$sv) { - # uh-oh. inlinable sub... format it differently - return $proto . "{ " . const($sv) . " }\n"; - } else { # XSUB? (or just a declaration) - return "$proto;\n"; + else { + my $sv = $cv->const_sv; + if ($$sv) { + # uh-oh. inlinable sub... format it differently + return $proto . "{ " . const($sv) . " }\n"; + } else { # XSUB? (or just a declaration) + return "$proto;\n"; + } } + return $proto ."{\n\t$body\n\b}" ."\n"; } sub deparse_format { @@ -698,24 +796,27 @@ sub deparse_format { my @text; local($self->{'curcv'}) = $form; local($self->{'curcvlex'}); + local($self->{'in_format'}) = 1; local(@$self{qw'curstash warnings hints'}) - = @$self{'curstash warnings hints'}; + = @$self{qw'curstash warnings hints'}; my $op = $form->ROOT; my $kid; + return "\f." if $op->first->name eq 'stub' + || $op->first->name eq 'nextstate'; $op = $op->first->first; # skip leavewrite, lineseq while (not null $op) { $op = $op->sibling; # skip nextstate my @exprs; $kid = $op->first->sibling; # skip pushmark - push @text, $self->const_sv($kid)->PV; + push @text, "\f".$self->const_sv($kid)->PV; $kid = $kid->sibling; for (; not null $kid; $kid = $kid->sibling) { push @exprs, $self->deparse($kid, 0); } - push @text, join(", ", @exprs)."\n" if @exprs; + push @text, "\f".join(", ", @exprs)."\n" if @exprs; $op = $op->sibling; } - return join("", @text) . "."; + return join("", @text) . "\f."; } sub is_scope { @@ -773,9 +874,16 @@ sub maybe_parens_unop { my $self = shift; my($name, $kid, $cx) = @_; if ($cx > 16 or $self->{'parens'}) { - return "$name(" . $self->deparse($kid, 1) . ")"; + $kid = $self->deparse($kid, 1); + if ($name eq "umask" && $kid =~ /^\d+$/) { + $kid = sprintf("%#o", $kid); + } + return "$name($kid)"; } else { $kid = $self->deparse($kid, 16); + if ($name eq "umask" && $kid =~ /^\d+$/) { + $kid = sprintf("%#o", $kid); + } if (substr($kid, 0, 1) eq "\cS") { # use kid's parens return $name . substr($kid, 1); @@ -802,11 +910,14 @@ sub maybe_parens_func { sub maybe_local { my $self = shift; my($op, $cx, $text) = @_; - if ($op->private & OPpLVAL_INTRO and not $self->{'avoid_local'}{$$op}) { + my $our_intro = ($op->name =~ /^(gv|rv2)[ash]v$/) ? OPpOUR_INTRO : 0; + if ($op->private & (OPpLVAL_INTRO|$our_intro) + and not $self->{'avoid_local'}{$$op}) { + my $our_local = ($op->private & OPpLVAL_INTRO) ? "local" : "our"; if (want_scalar($op)) { - return "local $text"; + return "$our_local $text"; } else { - return $self->maybe_parens_func("local", $text, $cx, 16); + return $self->maybe_parens_func("$our_local", $text, $cx, 16); } } else { return $text; @@ -848,115 +959,75 @@ sub maybe_my { # The following OPs don't have functions: # pp_padany -- does not exist after parsing -# pp_rcatline -- does not exist - -sub pp_enter { # see also leave - cluck "unexpected OP_ENTER"; - return "XXX"; -} - -sub pp_pushmark { # see also list - cluck "unexpected OP_PUSHMARK"; - return "XXX"; -} - -sub pp_leavesub { # see also deparse_sub - cluck "unexpected OP_LEAVESUB"; - return "XXX"; -} - -sub pp_leavewrite { # see also deparse_format - cluck "unexpected OP_LEAVEWRITE"; - return "XXX"; -} - -sub pp_method { # see also entersub - cluck "unexpected OP_METHOD"; - return "XXX"; -} - -sub pp_regcmaybe { # see also regcomp - cluck "unexpected OP_REGCMAYBE"; - return "XXX"; -} - -sub pp_regcreset { # see also regcomp - cluck "unexpected OP_REGCRESET"; - return "XXX"; -} - -sub pp_substcont { # see also subst - cluck "unexpected OP_SUBSTCONT"; - return "XXX"; -} - -sub pp_grepstart { # see also grepwhile - cluck "unexpected OP_GREPSTART"; - return "XXX"; -} - -sub pp_mapstart { # see also mapwhile - cluck "unexpected OP_MAPSTART"; - return "XXX"; -} - -sub pp_flip { # see also flop - cluck "unexpected OP_FLIP"; - return "XXX"; -} - -sub pp_iter { # see also leaveloop - cluck "unexpected OP_ITER"; - return "XXX"; -} - -sub pp_enteriter { # see also leaveloop - cluck "unexpected OP_ENTERITER"; - return "XXX"; -} - -sub pp_enterloop { # see also leaveloop - cluck "unexpected OP_ENTERLOOP"; - return "XXX"; -} -sub pp_leaveeval { # see also entereval - cluck "unexpected OP_LEAVEEVAL"; - return "XXX"; +sub AUTOLOAD { + if ($AUTOLOAD =~ s/^.*::pp_//) { + warn "unexpected OP_".uc $AUTOLOAD; + return "XXX"; + } else { + die "Undefined subroutine $AUTOLOAD called"; + } } -sub pp_entertry { # see also leavetry - cluck "unexpected OP_ENTERTRY"; - return "XXX"; -} +sub DESTROY {} # Do not AUTOLOAD +# $root should be the op which represents the root of whatever +# we're sequencing here. If it's undefined, then we don't append +# any subroutine declarations to the deparsed ops, otherwise we +# append appropriate declarations. sub lineseq { - my $self = shift; - my(@ops) = @_; + my($self, $root, @ops) = @_; my($expr, @exprs); + + my $out_cop = $self->{'curcop'}; + my $out_seq = defined($out_cop) ? $out_cop->cop_seq : undef; + my $limit_seq; + if (defined $root) { + $limit_seq = $out_seq; + my $nseq = $self->find_scope_st($root->sibling) if ${$root->sibling}; + $limit_seq = $nseq if !defined($limit_seq) + or defined($nseq) && $nseq < $limit_seq; + } + $limit_seq = $self->{'limit_seq'} + if defined($self->{'limit_seq'}) + && (!defined($limit_seq) || $self->{'limit_seq'} < $limit_seq); + local $self->{'limit_seq'} = $limit_seq; for (my $i = 0; $i < @ops; $i++) { $expr = ""; if (is_state $ops[$i]) { $expr = $self->deparse($ops[$i], 0); $i++; - last if $i > $#ops; + if ($i > $#ops) { + push @exprs, $expr; + last; + } } - if (!is_state $ops[$i] and $ops[$i+1] and !null($ops[$i+1]) and - $ops[$i+1]->name eq "leaveloop" and $self->{'expand'} < 3) + if (!is_state $ops[$i] and (my $ls = $ops[$i+1]) and + !null($ops[$i+1]) and $ops[$i+1]->name eq "lineseq") { - push @exprs, $expr . $self->for_loop($ops[$i], 0); - $i++; - next; + if ($ls->first && !null($ls->first) && is_state($ls->first) + && (my $sib = $ls->first->sibling)) { + if (!null($sib) && $sib->name eq "leaveloop") { + push @exprs, $expr . $self->for_loop($ops[$i], 0); + $i++; + next; + } + } } - $expr .= $self->deparse($ops[$i], 0); - push @exprs, $expr if length $expr; + $expr .= $self->deparse($ops[$i], 0, (@ops != 1)); + $expr =~ s/;\n?\z//; + push @exprs, $expr; } - for(@exprs[0..@exprs-1]) { s/;\n\z// } - return join(";\n", @exprs); + my $body = join(";\n", grep {length} @exprs); + my $subs = ""; + if (defined $root && defined $limit_seq && !$self->{'in_format'}) { + $subs = join "\n", $self->seq_subs($limit_seq); + } + return join(";\n", grep {length} $body, $subs); } sub scopeop { - my($real_block, $self, $op, $cx) = @_; + my($real_block, $self, $op, $cx, $flags) = @_; my $kid; my @kids; @@ -986,10 +1057,10 @@ sub scopeop { for (; !null($kid); $kid = $kid->sibling) { push @kids, $kid; } - if ($cx > 0) { # inside an expression, (a do {} while for lineseq) - return "do { " . $self->lineseq(@kids) . " }"; + if ($flags || $cx > 0) { # inside an expression, (a do {} while for lineseq) + return "do {\n\t" . $self->lineseq($op, @kids) . "\n\b}"; } else { - my $lineseq = $self->lineseq(@kids); + my $lineseq = $self->lineseq($op, @kids); return (length ($lineseq) ? "$lineseq;" : ""); } } @@ -1010,15 +1081,16 @@ sub gv_name { Carp::confess() if $gv->isa("B::CV"); my $stash = $gv->STASH->NAME; my $name = $gv->SAFENAME; - if ($stash eq $self->{'curstash'} or $globalnames{$name} + if (($stash eq 'main' && $globalnames{$name}) + or ($stash eq $self->{'curstash'} && !$globalnames{$name}) or $name =~ /^[^A-Za-z_]/) { $stash = ""; } else { $stash = $stash . "::"; } - if ($name =~ /^\^../) { - $name = "{$name}"; # ${^WARNING_BITS} etc + if ($name =~ /^(\^..|{)/) { + $name = "{$name}"; # ${^WARNING_BITS}, etc and ${ } return $stash . $name; } @@ -1045,6 +1117,7 @@ sub lex_in_scope { my ($self, $name) = @_; $self->populate_curcvlex() if !defined $self->{'curcvlex'}; + return 0 if !defined($self->{'curcop'}); my $seq = $self->{'curcop'}->cop_seq; return 0 if !exists $self->{'curcvlex'}{$name}; for my $a (@{$self->{'curcvlex'}{$name}}) { @@ -1056,36 +1129,59 @@ sub lex_in_scope { sub populate_curcvlex { my $self = shift; - for (my $cv = $self->{'curcv'}; $$cv; $cv = $cv->OUTSIDE) { - my @padlist = $cv->PADLIST->ARRAY; + for (my $cv = $self->{'curcv'}; class($cv) eq "CV"; $cv = $cv->OUTSIDE) { + my $padlist = $cv->PADLIST; + # an undef CV still in lexical chain + next if class($padlist) eq "SPECIAL"; + my @padlist = $padlist->ARRAY; my @ns = $padlist[0]->ARRAY; for (my $i=0; $i<@ns; ++$i) { next if class($ns[$i]) eq "SPECIAL"; + next if $ns[$i]->FLAGS & SVpad_OUR; # Skip "our" vars if (class($ns[$i]) eq "PV") { # Probably that pesky lexical @_ next; } my $name = $ns[$i]->PVX; - my $seq_st = $ns[$i]->NVX; - my $seq_en = int($ns[$i]->IVX); + my ($seq_st, $seq_en) = + ($ns[$i]->FLAGS & SVf_FAKE) + ? (0, 999999) + : ($ns[$i]->NVX, $ns[$i]->IVX); push @{$self->{'curcvlex'}{$name}}, [$seq_st, $seq_en]; } } } -# Recurses down the tree, looking for a COP -sub find_cop { - my ($self, $op) = @_; - if ($op->flags & OPf_KIDS) { - for (my $o=$op->first; $$o; $o=$o->sibling) { - return $o if is_state($o); - my $r = $self->find_cop($o); - return $r if defined $r; +sub find_scope_st { ((find_scope(@_))[0]); } +sub find_scope_en { ((find_scope(@_))[1]); } + +# Recurses down the tree, looking for pad variable introductions and COPs +sub find_scope { + my ($self, $op, $scope_st, $scope_en) = @_; + carp("Undefined op in find_scope") if !defined $op; + return ($scope_st, $scope_en) unless $op->flags & OPf_KIDS; + + for (my $o=$op->first; $$o; $o=$o->sibling) { + if ($o->name =~ /^pad.v$/ && $o->private & OPpLVAL_INTRO) { + my $s = int($self->padname_sv($o->targ)->NVX); + my $e = $self->padname_sv($o->targ)->IVX; + $scope_st = $s if !defined($scope_st) || $s < $scope_st; + $scope_en = $e if !defined($scope_en) || $e > $scope_en; + } + elsif (is_state($o)) { + my $c = $o->cop_seq; + $scope_st = $c if !defined($scope_st) || $c < $scope_st; + $scope_en = $c if !defined($scope_en) || $c > $scope_en; + } + elsif ($o->flags & OPf_KIDS) { + ($scope_st, $scope_en) = + $self->find_scope($o, $scope_st, $scope_en) } } - return undef; + + return ($scope_st, $scope_en); } # Returns a list of subs which should be inserted before the COP @@ -1095,8 +1191,8 @@ sub cop_subs { # If we have nephews, then our sequence number indicates # the cop_seq of the end of some sort of scope. if (class($op->sibling) ne "NULL" && $op->sibling->flags & OPf_KIDS - and my $ncop = $self->find_cop($op->sibling)) { - $seq = $ncop->cop_seq; + and my $nseq = $self->find_scope_st($op->sibling) ) { + $seq = $nseq; } $seq = $out_seq if defined($out_seq) && $out_seq < $seq; return $self->seq_subs($seq); @@ -1107,6 +1203,7 @@ sub seq_subs { my @text; #push @text, "# ($seq)\n"; + return "" if !defined $seq; while (scalar(@{$self->{'subs_todo'}}) and $seq > $self->{'subs_todo'}[0][0]) { push @text, $self->next_todo; @@ -1121,9 +1218,8 @@ sub pp_nextstate { my($op, $cx) = @_; $self->{'curcop'} = $op; my @text; - @text = $op->label . ": " if $op->label; -#push @text, "# ", $op->cop_seq, "\n"; push @text, $self->cop_subs($op); + push @text, $op->label . ": " if $op->label; my $stash = $op->stashpv; if ($stash ne $self->{'curstash'}) { push @text, "package $stash;\n"; @@ -1142,10 +1238,10 @@ sub pp_nextstate { my $warnings = $op->warnings; my $warning_bits; if ($warnings->isa("B::SPECIAL") && $$warnings == 4) { - $warning_bits = $warnings::Bits{"all"}; + $warning_bits = $warnings::Bits{"all"} & WARN_MASK; } elsif ($warnings->isa("B::SPECIAL") && $$warnings == 5) { - $warning_bits = "\0"x12; + $warning_bits = $warnings::NONE; } elsif ($warnings->isa("B::SPECIAL")) { $warning_bits = undef; @@ -1170,19 +1266,36 @@ sub pp_nextstate { sub declare_warnings { my ($from, $to) = @_; - if ($to eq warnings::bits("all")) { + if (($to & WARN_MASK) eq warnings::bits("all")) { return "use warnings;\n"; } - elsif ($to eq "\0"x12) { + elsif (($to & WARN_MASK) eq "\0"x length($to)) { return "no warnings;\n"; } - return "BEGIN {\${^WARNING_BITS} = ".cstring($to)."}\n"; + return "BEGIN {\${^WARNING_BITS} = ".perlstring($to)."}\n"; } sub declare_hints { my ($from, $to) = @_; - my $bits = $to; - return sprintf "BEGIN {\$^H &= ~0xFF; \$^H |= %x}\n", $bits; + my $use = $to & ~$from; + my $no = $from & ~$to; + my $decls = ""; + for my $pragma (hint_pragmas($use)) { + $decls .= "use $pragma;\n"; + } + for my $pragma (hint_pragmas($no)) { + $decls .= "no $pragma;\n"; + } + return $decls; +} + +sub hint_pragmas { + my ($bits) = @_; + my @pragmas; + push @pragmas, "integer" if $bits & 0x1; + push @pragmas, "strict 'refs'" if $bits & 0x2; + push @pragmas, "bytes" if $bits & 0x8; + return @pragmas; } sub pp_dbstate { pp_nextstate(@_) } @@ -1196,7 +1309,16 @@ sub baseop { return $name; } -sub pp_stub { baseop(@_, "()") } +sub pp_stub { + my $self = shift; + my($op, $cx, $name) = @_; + if ($cx) { + return "()"; + } + else { + return "();"; + } +} sub pp_wantarray { baseop(@_, "wantarray") } sub pp_fork { baseop(@_, "fork") } sub pp_wait { maybe_targmy(@_, \&baseop, "wait") } @@ -1439,7 +1561,6 @@ sub pp_scalar { sub padval { my $self = shift; my $targ = shift; - #cluck "curcv was undef" unless $self->{curcv}; return (($self->{'curcv'}->PADLIST->ARRAY)[1]->ARRAY)[$targ]; } @@ -1494,6 +1615,12 @@ sub pp_readline { return $self->unop($op, $cx, "readline"); } +sub pp_rcatline { + my $self = shift; + my($op) = @_; + return "<" . $self->gv_name($op->gv) . ">"; +} + # Unary operators that can occur as pseudo-listops inside double quotes sub dq_unop { my $self = shift; @@ -1581,6 +1708,7 @@ sub pp_ftbinary { ftst(@_, "-B") } sub SWAP_CHILDREN () { 1 } sub ASSIGN () { 2 } # has OP= variant +sub LIST_CONTEXT () { 4 } # Assignment is in list context my(%left, %right); @@ -1683,6 +1811,8 @@ sub binop { ($left, $right) = ($right, $left); } $left = $self->deparse_binop_left($op, $left, $prec); + $left = "($left)" if $flags & LIST_CONTEXT + && $left !~ /^(my|our|local|)[\@\(]/; $right = $self->deparse_binop_right($op, $right, $prec); return $self->maybe_parens("$left $opname$eq $right", $cx, $prec); } @@ -1729,7 +1859,7 @@ sub pp_sle { binop(@_, "le", 15) } sub pp_scmp { binop(@_, "cmp", 14) } sub pp_sassign { binop(@_, "=", 7, SWAP_CHILDREN) } -sub pp_aassign { binop(@_, "=", 7, SWAP_CHILDREN) } +sub pp_aassign { binop(@_, "=", 7, SWAP_CHILDREN | LIST_CONTEXT) } # `.' is special because concats-of-concats are optimized to save copying # by making all but the first concat stacked. The effect is as if the @@ -1827,6 +1957,7 @@ sub logop { sub pp_and { logop(@_, "and", 3, "&&", 11, "if") } sub pp_or { logop(@_, "or", 2, "||", 10, "unless") } +sub pp_dor { logop(@_, "err", 2, "//", 10, "") } # xor is syntactically a logop, but it's really a binop (contrary to # old versions of opcode.pl). Syntax is what matters here. @@ -1843,7 +1974,8 @@ sub logassignop { } sub pp_andassign { logassignop(@_, "&&=") } -sub pp_orassign { logassignop(@_, "||=") } +sub pp_orassign { logassignop(@_, "||=") } +sub pp_dorassign { logassignop(@_, "//=") } sub listop { my $self = shift; @@ -1853,6 +1985,7 @@ sub listop { my $kid = $op->first->sibling; return $name if null $kid; my $first; + $name = "socketpair" if $name eq "sockpair"; if (defined prototype("CORE::$name") && prototype("CORE::$name") =~ /^;?\*/ && $kid->name eq "rv2gv") { @@ -1861,6 +1994,9 @@ sub listop { else { $first = $self->deparse($kid, 6); } + if ($name eq "chmod" && $first =~ /^\d+$/) { + $first = sprintf("%#o", $first); + } $first = "+$first" if not $parens and substr($first, 0, 1) eq "("; push @exprs, $first; $kid = $kid->sibling; @@ -2006,12 +2142,21 @@ sub indirop { $indir = $indir->first; # skip rv2gv if (is_scope($indir)) { $indir = "{" . $self->deparse($indir, 0) . "}"; + } elsif ($indir->name eq "const" && $indir->private & OPpCONST_BARE) { + $indir = $self->const_sv($indir)->PV; } else { $indir = $self->deparse($indir, 24); } $indir = $indir . " "; $kid = $kid->sibling; } + if ($name eq "sort" && $op->private & (OPpSORT_NUMERIC | OPpSORT_INTEGER)) { + $indir = ($op->private & OPpSORT_REVERSE) ? '{$b <=> $a} ' + : '{$a <=> $b} '; + } + elsif ($name eq "sort" && $op->private & OPpSORT_REVERSE) { + $indir = '{$b cmp $a} '; + } for (; !null($kid); $kid = $kid->sibling) { $expr = $self->deparse($kid, 6); push @exprs, $expr; @@ -2039,7 +2184,7 @@ sub mapop { $kid = $kid->sibling; for (; !null($kid); $kid = $kid->sibling) { $expr = $self->deparse($kid, 6); - push @exprs, $expr if $expr; + push @exprs, $expr if defined $expr; } return $self->maybe_parens_func($name, $code . join(", ", @exprs), $cx, 5); } @@ -2053,21 +2198,35 @@ sub pp_list { my($expr, @exprs); my $kid = $op->first->sibling; # skip pushmark my $lop; - my $local = "either"; # could be local(...) or my(...) + my $local = "either"; # could be local(...), my(...) or our(...) for ($lop = $kid; !null($lop); $lop = $lop->sibling) { # This assumes that no other private flags equal 128, and that # OPs that store things other than flags in their op_private, # like OP_AELEMFAST, won't be immediate children of a list. - unless ($lop->private & OPpLVAL_INTRO or $lop->name eq "undef") + # + # OP_ENTERSUB can break this logic, so check for it. + # I suspect that open and exit can too. + + if (!($lop->private & (OPpLVAL_INTRO|OPpOUR_INTRO) + or $lop->name eq "undef") + or $lop->name eq "entersub" + or $lop->name eq "exit" + or $lop->name eq "open") { $local = ""; # or not last; } if ($lop->name =~ /^pad[ash]v$/) { # my() - ($local = "", last) if $local eq "local"; + ($local = "", last) if $local eq "local" || $local eq "our"; $local = "my"; + } elsif ($lop->name =~ /^(gv|rv2)[ash]v$/ + && $lop->private & OPpOUR_INTRO + or $lop->name eq "null" && $lop->first->name eq "gvsv" + && $lop->first->private & OPpOUR_INTRO) { # our() + ($local = "", last) if $local eq "my" || $local eq "local"; + $local = "our"; } elsif ($lop->name ne "undef") { # local() - ($local = "", last) if $local eq "my"; + ($local = "", last) if $local eq "my" || $local eq "our"; $local = "local"; } } @@ -2151,10 +2310,9 @@ sub loop_common { my $bare = 0; my $body; my $cond = undef; - my $out_seq = $self->{'curcop'}->cop_seq;; if ($kid->name eq "lineseq") { # bare or infinite loop if (is_state $kid->last) { # infinite - $head = "for (;;) "; # shorter than while (1) + $head = "while (1) "; # Can't use for(;;) if there's a continue $cond = ""; } else { $bare = 1; @@ -2208,7 +2366,7 @@ sub loop_common { # block (or the last in a bare loop). my $cont_start = $enter->nextop; my $cont; - if ($$cont_start != $$op and $ {$cont_start->sibling} != $ {$body->last}) { + if ($$cont_start != $$op && ${$cont_start->sibling} != ${$body->last}) { if ($bare) { $cont = $body->last; } else { @@ -2223,7 +2381,7 @@ sub loop_common { for (; $$state != $$cont; $state = $state->sibling) { push @states, $state; } - $body = $self->lineseq(@states); + $body = $self->lineseq(undef, @states); if (defined $cond and not is_scope $cont and $self->{'expand'} < 3) { $head = "for ($init; $cond; " . $self->deparse($cont, 1) .") "; $cont = "\cK"; @@ -2233,13 +2391,13 @@ sub loop_common { } } else { return "" if !defined $body; + if (length $init) { + $head = "for ($init; $cond;) "; + } $cont = "\cK"; $body = $self->deparse($body, 0); } - $body .= "\n"; - # If we have say C<{my $x=2; sub x{$x}}>, the sub must go inside - # the loop. So we insert any subs which are due here. - $body .= join"", $self->seq_subs($out_seq); + $body =~ s/;?$/;\n/; return $head . "{\n\t" . $body . "\b}" . $cont; } @@ -2250,7 +2408,7 @@ sub for_loop { my $self = shift; my($op, $cx) = @_; my $init = $self->deparse($op, 1); - return $self->loop_common($op->sibling, $cx, $init); + return $self->loop_common($op->sibling->first->sibling, $cx, $init); } sub pp_leavetry { @@ -2260,10 +2418,12 @@ sub pp_leavetry { BEGIN { eval "sub OP_CONST () {" . opnumber("const") . "}" } BEGIN { eval "sub OP_STRINGIFY () {" . opnumber("stringify") . "}" } +BEGIN { eval "sub OP_RV2SV () {" . opnumber("rv2sv") . "}" } +BEGIN { eval "sub OP_LIST () {" . opnumber("list") . "}" } sub pp_null { my $self = shift; - my($op, $cx) = @_; + my($op, $cx, $flags) = @_; if (class($op) eq "OP") { # old value is lost return $self->{'ex_const'} if $op->targ == OP_CONST; @@ -2285,6 +2445,21 @@ sub pp_null { return $self->maybe_parens($self->deparse($op->first, 20) . " =~ " . $self->deparse($op->first->sibling, 20), $cx, 20); + } elsif ($op->flags & OPf_SPECIAL && $cx == 0 && !$op->targ) { + if ($flags) { + return $self->deparse($op->first, $cx); + } + else { + return "do {\n\t". $self->deparse($op->first, $cx) ."\n\b};"; + } + } elsif (!null($op->first->sibling) and + $op->first->sibling->name eq "null" and + class($op->first->sibling) eq "UNOP" and + $op->first->sibling->first->flags & OPf_STACKED and + $op->first->sibling->first->name eq "rcatline") { + return $self->maybe_parens($self->deparse($op->first, 18) . " .= " + . $self->deparse($op->first->sibling, 18), + $cx, 18); } else { return $self->deparse($op->first, $cx); } @@ -2355,13 +2530,22 @@ sub pp_aelemfast { my $self = shift; my($op, $cx) = @_; my $gv = $self->gv_or_padgv($op); - return "\$" . $self->gv_name($gv) . "[" . + my $name = $self->gv_name($gv); + $name = $self->{'curstash'}."::$name" + if $name !~ /::/ && $self->lex_in_scope('@'.$name); + + return "\$" . $name . "[" . ($op->private + $self->{'arybase'}) . "]"; } sub rv2x { my $self = shift; my($op, $cx, $type) = @_; + + if (class($op) eq 'NULL' || !$op->can("first")) { + carp("Unexpected op in pp_rv2x"); + return 'XXX'; + } my $kid = $op->first; my $str = $self->deparse($kid, 0); return $self->stash_variable($type, $str) if is_scalar($kid); @@ -2385,7 +2569,17 @@ sub pp_av2arylen { } # skip down to the old, ex-rv2cv -sub pp_rv2cv { $_[0]->rv2x($_[1]->first->first->sibling, $_[2], "&") } +sub pp_rv2cv { + my ($self, $op, $cx) = @_; + if (!null($op->first) && $op->first->name eq 'null' && + $op->first->targ eq OP_LIST) + { + return $self->rv2x($op->first->first->sibling, $cx, "&") + } + else { + return $self->rv2x($op, $cx, "") + } +} sub pp_rv2av { my $self = shift; @@ -2429,6 +2623,13 @@ sub elem { $array = $self->padany($array); } elsif (is_scope($array)) { # ${expr}[0] $array = "{" . $self->deparse($array, 0) . "}"; + } elsif ($array->name eq "gv") { + $array = $self->gv_name($self->gv_or_padgv($array)); + if ($array !~ /::/) { + my $prefix = ($left eq '[' ? '@' : '%'); + $array = $self->{curstash}.'::'.$array + if $self->lex_in_scope($prefix . $array); + } } elsif (is_scalar $array) { # $x[0], $$x[0], ... $array = $self->deparse($array, 24); } else { @@ -2457,6 +2658,16 @@ sub elem { # $idx =~ s/^\((.*)\)$/$1/ if $self->{'parens'}; + # Hash-element braces will autoquote a bareword inside themselves. + # We need to make sure that C<$hash{warn()}> doesn't come out as + # C<$hash{warn}>, which has a quite different meaning. Currently + # B::Deparse will always quote strings, even if the string was a + # bareword in the original (i.e. the OPpCONST_BARE flag is ignored + # for constant strings.) So we can cheat slightly here - if we see + # a bareword, we know that it is supposed to be a function call. + # + $idx =~ s/^([A-Za-z_]\w*)$/$1()/; + return "\$" . $array . $left . $idx . $right; } @@ -2560,7 +2771,8 @@ sub method { } else { $obj = $kid; $kid = $kid->sibling; - for (; not null $kid->sibling; $kid = $kid->sibling) { + for (; !null ($kid->sibling) && $kid->name ne "method_named"; + $kid = $kid->sibling) { push @exprs, $self->deparse($kid, 6); } $meth = $kid; @@ -2580,7 +2792,7 @@ sub method { } my $args = join(", ", @exprs); $kid = $obj . "->" . $meth; - if ($args) { + if (length $args) { return $kid . "(" . $args . ")"; # parens mandatory } else { return $kid; @@ -2591,14 +2803,15 @@ sub method { # or ("", $args_after_prototype_demunging) if it does. sub check_proto { my $self = shift; + return "&" if $self->{'noproto'}; my($proto, @args) = @_; my($arg, $real); my $doneok = 0; my @reals; # An unbackslashed @ or % gobbles up the rest of the args - $proto =~ s/([^\\]|^)([@%])(.*)$/$1$2/; + 1 while $proto =~ s/(?name =~ /^s?refgen$/ and !null($real = $arg->first) and - ($chr eq "\$" && is_scalar($real->first) - or ($chr eq "\@" + ($chr =~ /\$/ && is_scalar($real->first) + or ($chr =~ /@/ + && class($real->first->sibling) ne 'NULL' && $real->first->sibling->name =~ /^(rv2|pad)av$/) - or ($chr eq "%" + or ($chr =~ /%/ + && class($real->first->sibling) ne 'NULL' && $real->first->sibling->name =~ /^(rv2|pad)hv$/) - #or ($chr eq "&" # This doesn't work + #or ($chr =~ /&/ # This doesn't work # && $real->first->name eq "rv2cv") - or ($chr eq "*" + or ($chr =~ /\*/ && $real->first->name eq "rv2gv"))) { push @reals, $self->deparse($real, 6); @@ -2670,7 +2885,7 @@ sub pp_entersub { my $prefix = ""; my $amper = ""; my($kid, @exprs); - if ($op->flags & OPf_SPECIAL) { + if ($op->flags & OPf_SPECIAL && !($op->flags & OPf_MOD)) { $prefix = "do "; } elsif ($op->private & OPpENTERSUB_AMPER) { $amper = "&"; @@ -2692,7 +2907,7 @@ sub pp_entersub { } $simple = 1; # only calls of named functions can be prototyped $kid = $self->deparse($kid, 24); - } elsif (is_scalar $kid->first) { + } elsif (is_scalar ($kid->first) && $kid->first->name ne 'rv2cv') { $amper = "&"; $kid = $self->deparse($kid, 24); } else { @@ -2703,7 +2918,22 @@ sub pp_entersub { # Doesn't matter how many prototypes there are, if # they haven't happened yet! - my $declared = exists $self->{'subs_declared'}{$kid}; + my $declared; + { + no strict 'refs'; + no warnings 'uninitialized'; + $declared = exists $self->{'subs_declared'}{$kid} + || ( + defined &{ %{$self->{'curstash'}."::"}->{$kid} } + && !exists + $self->{'subs_deparsed'}{$self->{'curstash'}."::".$kid} + && defined prototype $self->{'curstash'}."::".$kid + ); + if (!$declared && defined($proto)) { + # Avoid "too early to check prototype" warning + ($amper, $proto) = ('&'); + } + } my $args; if ($declared and defined $proto and not $amper) { @@ -2722,7 +2952,7 @@ sub pp_entersub { } } else { # glob() invocations can be translated into calls of - # CORE::GLOBAL::glob with an second parameter, a number. + # CORE::GLOBAL::glob with a second parameter, a number. # Reverse this. if ($kid eq "CORE::GLOBAL::glob") { $kid = "glob"; @@ -2738,7 +2968,7 @@ sub pp_entersub { return "$kid(" . $args . ")"; } elsif (defined $proto and $proto eq "") { return $kid; - } elsif (defined $proto and $proto eq "\$") { + } elsif (defined $proto and $proto eq "\$" and is_scalar($exprs[0])) { return $self->maybe_parens_func($kid, $args, $cx, 16); } elsif (defined($proto) && $proto or $simple) { return $self->maybe_parens_func($kid, $args, $cx, 5); @@ -2758,26 +2988,134 @@ sub uninterp { return $str; } +{ +my $bal; +BEGIN { + use re "eval"; + # Matches any string which is balanced with respect to {braces} + $bal = qr( + (?: + [^\\{}] + | \\\\ + | \\[{}] + | \{(??{$bal})\} + )* + )x; +} + # the same, but treat $|, $), $( and $ at the end of the string differently sub re_uninterp { my($str) = @_; - $str =~ s/(^|\G|[^\\])((?:\\\\)*)([\$\@](?!\||\)|\$\(|$)|\\[uUlLQE])/$1$2\\$3/g; + + $str =~ s/ + ( ^|\G # $1 + | [^\\] + ) + + ( # $2 + (?:\\\\)* + ) + + ( # $3 + (\(\?\??\{$bal\}\)) # $4 + | [\$\@] + (?!\||\)|\(|$) + | \\[uUlLQE] + ) + + /defined($4) && length($4) ? "$1$2$4" : "$1$2\\$3"/xeg; + + return $str; +} + +# This is for regular expressions with the /x modifier +# We have to leave comments unmangled. +sub re_uninterp_extended { + my($str) = @_; + + $str =~ s/ + ( ^|\G # $1 + | [^\\] + ) + + ( # $2 + (?:\\\\)* + ) + + ( # $3 + ( \(\?\??\{$bal\}\) # $4 (skip over (?{}) and (??{}) blocks) + | \#[^\n]* # (skip over comments) + ) + | [\$\@] + (?!\||\)|\(|$|\s) + | \\[uUlLQE] + ) + + /defined($4) && length($4) ? "$1$2$4" : "$1$2\\$3"/xeg; + return $str; } +} + +my %unctrl = # portable to to EBCDIC + ( + "\c@" => '\c@', # unused + "\cA" => '\cA', + "\cB" => '\cB', + "\cC" => '\cC', + "\cD" => '\cD', + "\cE" => '\cE', + "\cF" => '\cF', + "\cG" => '\cG', + "\cH" => '\cH', + "\cI" => '\cI', + "\cJ" => '\cJ', + "\cK" => '\cK', + "\cL" => '\cL', + "\cM" => '\cM', + "\cN" => '\cN', + "\cO" => '\cO', + "\cP" => '\cP', + "\cQ" => '\cQ', + "\cR" => '\cR', + "\cS" => '\cS', + "\cT" => '\cT', + "\cU" => '\cU', + "\cV" => '\cV', + "\cW" => '\cW', + "\cX" => '\cX', + "\cY" => '\cY', + "\cZ" => '\cZ', + "\c[" => '\c[', # unused + "\c\\" => '\c\\', # unused + "\c]" => '\c]', # unused + "\c_" => '\c_', # unused + ); # character escapes, but not delimiters that might need to be escaped sub escape_str { # ASCII, UTF8 my($str) = @_; - $str =~ s/(.)/ord($1)>255 ? sprintf("\\x{%x}", ord($1)) : $1/eg; + $str =~ s/(.)/ord($1) > 255 ? sprintf("\\x{%x}", ord($1)) : $1/eg; $str =~ s/\a/\\a/g; -# $str =~ s/\cH/\\b/g; # \b means someting different in a regex +# $str =~ s/\cH/\\b/g; # \b means something different in a regex $str =~ s/\t/\\t/g; $str =~ s/\n/\\n/g; $str =~ s/\e/\\e/g; $str =~ s/\f/\\f/g; $str =~ s/\r/\\r/g; - $str =~ s/([\cA-\cZ])/'\\c' . chr(ord('@') + ord($1))/ge; - $str =~ s/([\0\033-\037\177-\377])/'\\' . sprintf("%03o", ord($1))/ge; + $str =~ s/([\cA-\cZ])/$unctrl{$1}/ge; + $str =~ s/([[:^print:]])/sprintf("\\%03o", ord($1))/ge; + return $str; +} + +# For regexes with the /x modifier. +# Leave whitespace unmangled. +sub escape_extended_re { + my($str) = @_; + $str =~ s/(.)/ord($1) > 255 ? sprintf("\\x{%x}", ord($1)) : $1/eg; + $str =~ s/([[:^print:]])/ + ($1 =~ y! \t\n!!) ? $1 : sprintf("\\%03o", ord($1))/ge; + $str =~ s/\n/\n\f/g; return $str; } @@ -2794,7 +3132,7 @@ sub re_unback { my($str) = @_; # the insane complexity here is due to the behaviour of "\c\" - $str =~ s/(^|[^\\]|\\c\\)(?FLAGS & SVf_IOK) { return $sv->int_value; } elsif ($sv->FLAGS & SVf_NOK) { - return $sv->NV; + # try the default stringification + my $r = "".$sv->NV; + if ($r =~ /e/) { + # If it's in scientific notation, we might have lost information + return sprintf("%.20e", $sv->NV); + } + return $r; } elsif ($sv->FLAGS & SVf_ROK && $sv->can("RV")) { return "\\(" . const($sv->RV) . ")"; # constant folded - } else { + } elsif ($sv->FLAGS & SVf_POK) { my $str = $sv->PV; if ($str =~ /[^ -~]/) { # ASCII for non-printing return single_delim("qq", '"', uninterp escape_str unback $str); } else { return single_delim("q", "'", unback $str); } + } else { + return "undef"; } } @@ -2897,13 +3244,13 @@ sub dq { } elsif ($type eq "concat") { my $first = $self->dq($op->first); my $last = $self->dq($op->last); + # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]" - if ($last =~ /^[A-Z\\\^\[\]_?]/) { - $first =~ s/([\$@])\^$/${1}{^}/; # "${^}W" etc - } - elsif ($last =~ /^[{\[\w]/) { - $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/; - } + ($last =~ /^[A-Z\\\^\[\]_?]/ && + $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc + || ($last =~ /^[{\[\w_]/ && + $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/); + return $first . $last; } elsif ($type eq "uc") { return '\U' . $self->dq($op->first->sibling) . '\E'; @@ -2969,10 +3316,13 @@ sub double_delim { } } +# Only used by tr///, so backslashes hyphens sub pchr { # ASCII my($n) = @_; if ($n == ord '\\') { return '\\\\'; + } elsif ($n == ord "-") { + return "\\-"; } elsif ($n >= ord(' ') and $n <= ord('~')) { return chr($n); } elsif ($n == ord "\a") { @@ -3015,12 +3365,10 @@ sub collapse { return $str; } -# XXX This has trouble with hyphens in the replacement (tr/bac/-AC/), -# and backslashes. - sub tr_decode_byte { my($table, $flags) = @_; - my(@table) = unpack("s256", $table); + my(@table) = unpack("s*", $table); + splice @table, 0x100, 1; # Number of subsequent elements my($c, $tr, @from, @to, @delfrom, $delhyphen); if ($table[ord "-"] != -1 and $table[ord("-") - 1] == -1 || $table[ord("-") + 1] == -1) @@ -3034,7 +3382,7 @@ sub tr_decode_byte { $delhyphen = 1; } } - for ($c = 0; $c < 256; $c++) { + for ($c = 0; $c < @table; $c++) { $tr = $table[$c]; if ($tr >= 0) { push @from, $c; push @to, $tr; @@ -3066,6 +3414,8 @@ sub tr_chr { my $x = shift; if ($x == ord "-") { return "\\-"; + } elsif ($x == ord "\\") { + return "\\\\"; } else { return chr $x; } @@ -3188,32 +3538,36 @@ sub pp_trans { # Like dq(), but different sub re_dq { my $self = shift; - my $op = shift; + my ($op, $extended) = @_; + my $type = $op->name; if ($type eq "const") { return '$[' if $op->private & OPpCONST_ARYBASE; - return re_uninterp(escape_str(re_unback($self->const_sv($op)->as_string))); + my $unbacked = re_unback($self->const_sv($op)->as_string); + return re_uninterp_extended(escape_extended_re($unbacked)) + if $extended; + return re_uninterp(escape_str($unbacked)); } elsif ($type eq "concat") { - my $first = $self->re_dq($op->first); - my $last = $self->re_dq($op->last); + my $first = $self->re_dq($op->first, $extended); + my $last = $self->re_dq($op->last, $extended); + # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]" - if ($last =~ /^[A-Z\\\^\[\]_?]/) { - $first =~ s/([\$@])\^$/${1}{^}/; - } - elsif ($last =~ /^[{\[\w]/) { - $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/; - } + ($last =~ /^[A-Z\\\^\[\]_?]/ && + $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc + || ($last =~ /^[{\[\w_]/ && + $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/); + return $first . $last; } elsif ($type eq "uc") { - return '\U' . $self->re_dq($op->first->sibling) . '\E'; + return '\U' . $self->re_dq($op->first->sibling, $extended) . '\E'; } elsif ($type eq "lc") { - return '\L' . $self->re_dq($op->first->sibling) . '\E'; + return '\L' . $self->re_dq($op->first->sibling, $extended) . '\E'; } elsif ($type eq "ucfirst") { - return '\u' . $self->re_dq($op->first->sibling); + return '\u' . $self->re_dq($op->first->sibling, $extended); } elsif ($type eq "lcfirst") { - return '\l' . $self->re_dq($op->first->sibling); + return '\l' . $self->re_dq($op->first->sibling, $extended); } elsif ($type eq "quotemeta") { - return '\Q' . $self->re_dq($op->first->sibling) . '\E'; + return '\Q' . $self->re_dq($op->first->sibling, $extended) . '\E'; } elsif ($type eq "join") { return $self->deparse($op->last, 26); # was join($", @ary) } else { @@ -3221,13 +3575,54 @@ sub re_dq { } } -sub pp_regcomp { +sub pure_string { + my ($self, $op) = @_; + my $type = $op->name; + + if ($type eq 'const') { + return 1; + } + elsif ($type =~ /^[ul]c(first)?$/ || $type eq 'quotemeta') { + return $self->pure_string($op->first->sibling); + } + elsif ($type eq 'join') { + my $join_op = $op->first->sibling; # Skip pushmark + return 0 unless $join_op->name eq 'null' && $join_op->targ eq OP_RV2SV; + + my $gvop = $join_op->first; + return 0 unless $gvop->name eq 'gvsv'; + return 0 unless '"' eq $self->gv_name($self->gv_or_padgv($gvop)); + + return 0 unless ${$join_op->sibling} eq ${$op->last}; + return 0 unless $op->last->name =~ /^(rv2|pad)av$/; + } + elsif ($type eq 'concat') { + return $self->pure_string($op->first) + && $self->pure_string($op->last); + } + elsif (is_scalar($op) || $type =~ /^[ah]elem(fast)?$/) { + return 1; + } + else { + return 0; + } + + return 1; +} + +sub regcomp { my $self = shift; - my($op, $cx) = @_; + my($op, $cx, $extended) = @_; my $kid = $op->first; $kid = $kid->first if $kid->name eq "regcmaybe"; $kid = $kid->first if $kid->name eq "regcreset"; - return $self->re_dq($kid); + return ($self->re_dq($kid, $extended), 1) if $self->pure_string($kid); + return ($self->deparse($kid, $cx), 0); +} + +sub pp_regcomp { + my ($self, $op, $cx) = @_; + return (($self->regcomp($op, $cx, 0))[0]); } # osmic acid -- see osmium tetroxide @@ -3247,10 +3642,19 @@ sub matchop { $var = $self->deparse($kid, 20); $kid = $kid->sibling; } + my $quote = 1; + my $extended = ($op->pmflags & PMf_EXTENDED); if (null $kid) { - $re = re_uninterp(escape_str(re_unback($op->precomp))); + my $unbacked = re_unback($op->precomp); + if ($extended) { + $re = re_uninterp_extended(escape_extended_re($unbacked)); + } else { + $re = re_uninterp(escape_str(re_unback($op->precomp))); + } + } elsif ($kid->name ne 'regcomp') { + carp("found ".$kid->name." where regcomp expected"); } else { - $re = $self->deparse($kid, 1); + ($re, $quote) = $self->regcomp($kid, 1, $extended); } my $flags = ""; $flags .= "c" if $op->pmflags & PMf_CONTINUE; @@ -3264,10 +3668,10 @@ sub matchop { if ($op->pmflags & PMf_ONCE) { # only one kind of delimiter works here $re =~ s/\?/\\?/g; $re = "?$re?"; - } else { + } elsif ($quote) { $re = single_delim($name, $delim, $re); } - $re = $re . $flags; + $re = $re . $flags if $quote; if ($binop) { return $self->maybe_parens("$var =~ $re", $cx, 20); } else { @@ -3284,8 +3688,11 @@ sub pp_split { my($op, $cx) = @_; my($kid, @exprs, $ary, $expr); $kid = $op->first; - if ($ {$kid->pmreplroot}) { - $ary = '@' . $self->gv_name($kid->pmreplroot); + # under ithreads pmreplroot is an integer, not an SV + my $replroot = $kid->pmreplroot; + if ( ( ref($replroot) && $$replroot ) || + ( !ref($replroot) && $replroot ) ) { + $ary = $self->stash_variable('@', $self->gv_name($kid->pmreplroot)); } for (; !null($kid); $kid = $kid->sibling) { push @exprs, $self->deparse($kid, 6); @@ -3338,15 +3745,22 @@ sub pp_subst { $flags .= "e"; } if ($op->pmflags & PMf_EVAL) { - $repl = $self->deparse($repl, 0); + $repl = $self->deparse($repl, 0, 1); } else { $repl = $self->dq($repl); } } + my $extended = ($op->pmflags & PMf_EXTENDED); if (null $kid) { - $re = re_uninterp(escape_str(re_unback($op->precomp))); + my $unbacked = re_unback($op->precomp); + if ($extended) { + $re = re_uninterp_extended(escape_extended_re($unbacked)); + } + else { + $re = re_uninterp(escape_str($unbacked)); + } } else { - $re = $self->deparse($kid, 1); + ($re) = $self->regcomp($kid, 1, $extended); } $flags .= "e" if $op->pmflags & PMf_EVAL; $flags .= "g" if $op->pmflags & PMf_GLOBAL; @@ -3354,7 +3768,7 @@ sub pp_subst { $flags .= "m" if $op->pmflags & PMf_MULTILINE; $flags .= "o" if $op->pmflags & PMf_KEEP; $flags .= "s" if $op->pmflags & PMf_SINGLELINE; - $flags .= "x" if $op->pmflags & PMf_EXTENDED; + $flags .= "x" if $extended; $flags = $substwords{$flags} if $substwords{$flags}; if ($binop) { return $self->maybe_parens("$var =~ s" @@ -3428,6 +3842,22 @@ C will print which probably isn't what you intended (the C<'???'> is a sign that perl optimized away a constant value). +=item B<-P> + +Disable prototype checking. With this option, all function calls are +deparsed as if no prototype was defined for them. In other words, + + perl -MO=Deparse,-P -e 'sub foo (\@) { 1 } foo @x' + +will print + + sub foo (\@) { + 1; + } + &foo(\@x); + +making clear how the parameters are actually passed to C. + =item B<-q> Expand double-quoted strings into the corresponding combinations of @@ -3641,6 +4071,9 @@ particular context, where some pragmas are already in scope. In this case, you use the B method to describe the assumptions you wish to make. +Not all of the options currently have any useful effect. See +L for more details. + The parameters it accepts are: =over 4 @@ -3733,13 +4166,81 @@ the main:: package, the code will include a package declaration. =head1 BUGS -See the 'to do' list at the beginning of the module file. +=over 4 + +=item * + +The only pragmas to be completely supported are: C, +C, C, and C. (C<$[>, which +behaves like a pragma, is also supported.) + +Excepting those listed above, we're currently unable to guarantee that +B::Deparse will produce a pragma at the correct point in the program. +Since the effects of pragmas are often lexically scoped, this can mean +that the pragma holds sway over a different portion of the program +than in the input file. + +=item * + +In fact, the above is a specific instance of a more general problem: +we can't guarantee to produce BEGIN blocks or C declarations in +exactly the right place. So if you use a module which affects compilation +(such as by over-riding keywords, overloading constants or whatever) +then the output code might not work as intended. + +This is the most serious outstanding problem, and will be very hard +to fix. + +=item * + +If a keyword is over-ridden, and your program explicitly calls +the built-in version by using CORE::keyword, the output of B::Deparse +will not reflect this. If you run the resulting code, it will call +the over-ridden version rather than the built-in one. (Maybe there +should be an option to B print keyword calls as C.) + +=item * + +C comes out as C, which +causes perl to issue a warning. + +The obvious fix doesn't work, because these are different: + + print (FOO 1, 2, 3), 4, 5, 6; + print FOO (1, 2, 3), 4, 5, 6; + +=item * + +Constants (other than simple strings or numbers) don't work properly. +Pathological examples that fail (and probably always will) include: + + use constant E2BIG => ($!=7); + use constant x=>\$x; print x + +The following could (and should) be made to work: + + use constant regex => qr/blah/; + print regex; + +=item * + +An input file that uses source filtering probably won't be deparsed into +runnable code, because it will still include the B declaration +for the source filtering module, even though the code that is +produced is already ordinary Perl which shouldn't be filtered again. + +=item * + +There are probably many more bugs on non-ASCII platforms (EBCDIC). + +=back =head1 AUTHOR Stephen McCamant , based on an earlier version by Malcolm Beattie , with -contributions from Gisle Aas, James Duncan, Albert Dvornik, Hugo van -der Sanden, Gurusamy Sarathy, and Nick Ing-Simmons. +contributions from Gisle Aas, James Duncan, Albert Dvornik, Robin +Houston, Hugo van der Sanden, Gurusamy Sarathy, Nick Ing-Simmons, +and Rafael Garcia-Suarez. =cut