X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=blobdiff_plain;f=ext%2FB%2FB%2FDeparse.pm;h=37b98a034386607fafb1d11b2f227ce7163a40f1;hb=c27ea44e678f02a1903a4aa0e3110ba824fcd93a;hp=6a51cb330cb03e072f77310c3cc39e950f900e49;hpb=995e581ff6cca225aeb40cfb47ff14993949344d;p=p5sagit%2Fp5-mst-13.2.git diff --git a/ext/B/B/Deparse.pm b/ext/B/B/Deparse.pm index 6a51cb3..37b98a0 100644 --- a/ext/B/B/Deparse.pm +++ b/ext/B/B/Deparse.pm @@ -7,20 +7,21 @@ # but essentially none of his code remains. package B::Deparse; -use Carp 'cluck', 'croak'; -use B qw(class main_root main_start main_cv svref_2object opnumber cstring +use Carp; +use B qw(class main_root main_start main_cv svref_2object opnumber perlstring OPf_WANT OPf_WANT_VOID OPf_WANT_SCALAR OPf_WANT_LIST - OPf_KIDS OPf_REF OPf_STACKED OPf_SPECIAL + OPf_KIDS OPf_REF OPf_STACKED OPf_SPECIAL OPf_MOD OPpLVAL_INTRO OPpOUR_INTRO OPpENTERSUB_AMPER OPpSLICE OPpCONST_BARE OPpTRANS_SQUASH OPpTRANS_DELETE OPpTRANS_COMPLEMENT OPpTARGET_MY OPpCONST_ARYBASE OPpEXISTS_SUB OPpSORT_NUMERIC OPpSORT_INTEGER OPpSORT_REVERSE - SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR + SVf_IOK SVf_NOK SVf_ROK SVf_POK SVpad_OUR SVf_FAKE CVf_METHOD CVf_LOCKED CVf_LVALUE PMf_KEEP PMf_GLOBAL PMf_CONTINUE PMf_EVAL PMf_ONCE PMf_SKIPWHITE PMf_MULTILINE PMf_SINGLELINE PMf_FOLD PMf_EXTENDED); -$VERSION = 0.60; +$VERSION = 0.63; use strict; +use vars qw/$AUTOLOAD/; use warnings (); # Changes between 0.50 and 0.51: @@ -35,7 +36,7 @@ use warnings (); # - package declarations using cop_stash # - subs, formats and code sorted by cop_seq # Changes between 0.51 and 0.52: -# - added pp_threadsv (special variables under USE_THREADS) +# - added pp_threadsv (special variables under USE_5005THREADS) # - added documentation # Changes between 0.52 and 0.53: # - many changes adding precedence contexts and associativity @@ -92,8 +93,27 @@ use warnings (); # - separate recognition of constant subs # - rewrote continue block handling, now recoginizing for loops # - added more control of expanding control structures +# Changes between 0.60 and 0.61 (mostly by Robin Houston) +# - many bug-fixes +# - support for pragmas and 'use' +# - support for the little-used $[ variable +# - support for __DATA__ sections +# - UTF8 support +# - BEGIN, CHECK, INIT and END blocks +# - scoping of subroutine declarations fixed +# - compile-time output from the input program can be suppressed, so that the +# output is just the deparsed code. (a change to O.pm in fact) +# - our() declarations +# - *all* the known bugs are now listed in the BUGS section +# - comprehensive test mechanism (TEST -deparse) +# Changes between 0.62 and 0.63 (mostly by Rafael Garcia-Suarez) +# - bug-fixes +# - new switch -P +# - support for command-line switches (-l, -0, etc.) # Todo: +# (See also BUGS section at the end of this file) +# # - finish tr/// changes # - add option for even more parens (generalize \&foo change) # - left/right context @@ -113,7 +133,7 @@ use warnings (); # - here-docs? # Tests that will always fail: -# comp/redef.t -- all (redefinition happens at compile time) +# (see t/TEST for the short list) # Object fields (were globals): # @@ -154,6 +174,9 @@ use warnings (); # keys are names of subs for which we've printed declarations. # That means we can omit parentheses from the arguments. # +# subs_deparsed +# Keeps track of fully qualified names of all deparsed subs. +# # parens: -p # linenums: -l # unquote: -q @@ -237,6 +260,9 @@ sub todo { $seq = 0; } push @{$self->{'subs_todo'}}, [$seq, $cv, $is_form]; + unless ($is_form || class($cv->STASH) eq 'SPECIAL') { + $self->{'subs_deparsed'}{$cv->STASH->NAME."::".$cv->GV->NAME} = 1; + } } sub next_todo { @@ -468,6 +494,8 @@ sub new { $self->{'files'}{$1} = 1; } elsif ($arg eq "-p") { $self->{'parens'} = 1; + } elsif ($arg eq "-P") { + $self->{'noproto'} = 1; } elsif ($arg eq "-l") { $self->{'linenums'} = 1; } elsif ($arg eq "-q") { @@ -481,9 +509,15 @@ sub new { return $self; } -sub WARN_MASK () { - # Mask out the bits that C uses - $warnings::Bits{all} | $warnings::DeadBits{all}; +{ + # Mask out the bits that L uses + my $WARN_MASK; + BEGIN { + $WARN_MASK = $warnings::Bits{all} | $warnings::DeadBits{all}; + } + sub WARN_MASK () { + return $WARN_MASK; + } } # Initialise the contextual information, either from @@ -506,10 +540,23 @@ sub compile { my(@args) = @_; return sub { my $self = B::Deparse->new(@args); + # First deparse command-line args + if (defined $^I) { # deparse -i + print q(BEGIN { $^I = ).perlstring($^I).qq(; }\n); + } + if ($^W) { # deparse -w + print qq(BEGIN { \$^W = $^W; }\n); + } + if ($/ ne "\n" or defined $O::savebackslash) { # deparse -l and -0 + my $fs = perlstring($/) || 'undef'; + my $bs = perlstring($O::savebackslash) || 'undef'; + print qq(BEGIN { \$/ = $fs; \$\\ = $bs; }\n); + } my @BEGINs = B::begin_av->isa("B::AV") ? B::begin_av->ARRAY : (); + my @CHECKs = B::check_av->isa("B::AV") ? B::check_av->ARRAY : (); my @INITs = B::init_av->isa("B::AV") ? B::init_av->ARRAY : (); my @ENDs = B::end_av->isa("B::AV") ? B::end_av->ARRAY : (); - for my $block (@BEGINs, @INITs, @ENDs) { + for my $block (@BEGINs, @CHECKs, @INITs, @ENDs) { $self->todo($block, 0); } $self->stash_subs(); @@ -528,9 +575,11 @@ sub compile { # Print __DATA__ section, if necessary no strict 'refs'; - if (defined *{$self->{'curstash'}."::DATA"}{IO}) { + my $laststash = defined $self->{'curcop'} + ? $self->{'curcop'}->stash->NAME : $self->{'curstash'}; + if (defined *{$laststash."::DATA"}{IO}) { print "__DATA__\n"; - print readline(*{$self->{'curstash'}."::DATA"}); + print readline(*{$laststash."::DATA"}); } } } @@ -538,7 +587,7 @@ sub compile { sub coderef2text { my $self = shift; my $sub = shift; - croak "Usage: ->coderef2text(CODEREF)" unless ref($sub) eq "CODE"; + croak "Usage: ->coderef2text(CODEREF)" unless UNIVERSAL::isa($sub, "CODE"); $self->init(); return $self->indent($self->deparse_sub(svref_2object($sub))); @@ -592,13 +641,13 @@ sub ambient_pragmas { elsif ($name eq 're') { require re; if ($val eq 'none') { - $hint_bits &= ~re::bits(qw/taint eval asciirange/); + $hint_bits &= ~re::bits(qw/taint eval/); next(); } my @names; if ($val eq 'all') { - @names = qw/taint eval asciirange/; + @names = qw/taint eval/; } elsif (ref $val) { @names = @$val; @@ -611,7 +660,7 @@ sub ambient_pragmas { elsif ($name eq 'warnings') { if ($val eq 'none') { - $warning_bits = "\0"x12; + $warning_bits = $warnings::NONE; next(); } @@ -623,7 +672,7 @@ sub ambient_pragmas { @names = split/\s+/, $val; } - $warning_bits = "\0"x12 if !defined ($warning_bits); + $warning_bits = $warnings::NONE if !defined ($warning_bits); $warning_bits |= warnings::bits(@names); } @@ -747,10 +796,13 @@ sub deparse_format { my @text; local($self->{'curcv'}) = $form; local($self->{'curcvlex'}); + local($self->{'in_format'}) = 1; local(@$self{qw'curstash warnings hints'}) - = @$self{'curstash warnings hints'}; + = @$self{qw'curstash warnings hints'}; my $op = $form->ROOT; my $kid; + return "\f." if $op->first->name eq 'stub' + || $op->first->name eq 'nextstate'; $op = $op->first->first; # skip leavewrite, lineseq while (not null $op) { $op = $op->sibling; # skip nextstate @@ -907,87 +959,17 @@ sub maybe_my { # The following OPs don't have functions: # pp_padany -- does not exist after parsing -# pp_rcatline -- does not exist - -sub pp_enter { # see also leave - cluck "unexpected OP_ENTER"; - return "XXX"; -} -sub pp_pushmark { # see also list - cluck "unexpected OP_PUSHMARK"; - return "XXX"; -} - -sub pp_leavesub { # see also deparse_sub - cluck "unexpected OP_LEAVESUB"; - return "XXX"; -} - -sub pp_leavewrite { # see also deparse_format - cluck "unexpected OP_LEAVEWRITE"; - return "XXX"; -} - -sub pp_method { # see also entersub - cluck "unexpected OP_METHOD"; - return "XXX"; -} - -sub pp_regcmaybe { # see also regcomp - cluck "unexpected OP_REGCMAYBE"; - return "XXX"; -} - -sub pp_regcreset { # see also regcomp - cluck "unexpected OP_REGCRESET"; - return "XXX"; -} - -sub pp_substcont { # see also subst - cluck "unexpected OP_SUBSTCONT"; - return "XXX"; -} - -sub pp_grepstart { # see also grepwhile - cluck "unexpected OP_GREPSTART"; - return "XXX"; -} - -sub pp_mapstart { # see also mapwhile - cluck "unexpected OP_MAPSTART"; - return "XXX"; -} - -sub pp_flip { # see also flop - cluck "unexpected OP_FLIP"; - return "XXX"; -} - -sub pp_iter { # see also leaveloop - cluck "unexpected OP_ITER"; - return "XXX"; -} - -sub pp_enteriter { # see also leaveloop - cluck "unexpected OP_ENTERITER"; - return "XXX"; -} - -sub pp_enterloop { # see also leaveloop - cluck "unexpected OP_ENTERLOOP"; - return "XXX"; -} - -sub pp_leaveeval { # see also entereval - cluck "unexpected OP_LEAVEEVAL"; - return "XXX"; +sub AUTOLOAD { + if ($AUTOLOAD =~ s/^.*::pp_//) { + warn "unexpected OP_".uc $AUTOLOAD; + return "XXX"; + } else { + die "Undefined subroutine $AUTOLOAD called"; + } } -sub pp_entertry { # see also leavetry - cluck "unexpected OP_ENTERTRY"; - return "XXX"; -} +sub DESTROY {} # Do not AUTOLOAD # $root should be the op which represents the root of whatever # we're sequencing here. If it's undefined, then we don't append @@ -1038,7 +1020,7 @@ sub lineseq { } my $body = join(";\n", grep {length} @exprs); my $subs = ""; - if (defined $root && defined $limit_seq) { + if (defined $root && defined $limit_seq && !$self->{'in_format'}) { $subs = join "\n", $self->seq_subs($limit_seq); } return join(";\n", grep {length} $body, $subs); @@ -1099,15 +1081,16 @@ sub gv_name { Carp::confess() if $gv->isa("B::CV"); my $stash = $gv->STASH->NAME; my $name = $gv->SAFENAME; - if ($stash eq $self->{'curstash'} or $globalnames{$name} + if (($stash eq 'main' && $globalnames{$name}) + or ($stash eq $self->{'curstash'} && !$globalnames{$name}) or $name =~ /^[^A-Za-z_]/) { $stash = ""; } else { $stash = $stash . "::"; } - if ($name =~ /^\^../) { - $name = "{$name}"; # ${^WARNING_BITS} etc + if ($name =~ /^(\^..|{)/) { + $name = "{$name}"; # ${^WARNING_BITS}, etc and ${ } return $stash . $name; } @@ -1147,7 +1130,10 @@ sub lex_in_scope { sub populate_curcvlex { my $self = shift; for (my $cv = $self->{'curcv'}; class($cv) eq "CV"; $cv = $cv->OUTSIDE) { - my @padlist = $cv->PADLIST->ARRAY; + my $padlist = $cv->PADLIST; + # an undef CV still in lexical chain + next if class($padlist) eq "SPECIAL"; + my @padlist = $padlist->ARRAY; my @ns = $padlist[0]->ARRAY; for (my $i=0; $i<@ns; ++$i) { @@ -1158,8 +1144,10 @@ sub populate_curcvlex { next; } my $name = $ns[$i]->PVX; - my $seq_st = $ns[$i]->NVX; - my $seq_en = int($ns[$i]->IVX); + my ($seq_st, $seq_en) = + ($ns[$i]->FLAGS & SVf_FAKE) + ? (0, 999999) + : ($ns[$i]->NVX, $ns[$i]->IVX); push @{$self->{'curcvlex'}{$name}}, [$seq_st, $seq_en]; } @@ -1172,7 +1160,7 @@ sub find_scope_en { ((find_scope(@_))[1]); } # Recurses down the tree, looking for pad variable introductions and COPs sub find_scope { my ($self, $op, $scope_st, $scope_en) = @_; -Carp::cluck() if !defined $op; + carp("Undefined op in find_scope") if !defined $op; return ($scope_st, $scope_en) unless $op->flags & OPf_KIDS; for (my $o=$op->first; $$o; $o=$o->sibling) { @@ -1250,10 +1238,10 @@ sub pp_nextstate { my $warnings = $op->warnings; my $warning_bits; if ($warnings->isa("B::SPECIAL") && $$warnings == 4) { - $warning_bits = $warnings::Bits{"all"}; + $warning_bits = $warnings::Bits{"all"} & WARN_MASK; } elsif ($warnings->isa("B::SPECIAL") && $$warnings == 5) { - $warning_bits = "\0"x12; + $warning_bits = $warnings::NONE; } elsif ($warnings->isa("B::SPECIAL")) { $warning_bits = undef; @@ -1284,7 +1272,7 @@ sub declare_warnings { elsif (($to & WARN_MASK) eq "\0"x length($to)) { return "no warnings;\n"; } - return "BEGIN {\${^WARNING_BITS} = ".cstring($to)."}\n"; + return "BEGIN {\${^WARNING_BITS} = ".perlstring($to)."}\n"; } sub declare_hints { @@ -1573,7 +1561,6 @@ sub pp_scalar { sub padval { my $self = shift; my $targ = shift; - #cluck "curcv was undef" unless $self->{curcv}; return (($self->{'curcv'}->PADLIST->ARRAY)[1]->ARRAY)[$targ]; } @@ -1628,6 +1615,12 @@ sub pp_readline { return $self->unop($op, $cx, "readline"); } +sub pp_rcatline { + my $self = shift; + my($op) = @_; + return "<" . $self->gv_name($op->gv) . ">"; +} + # Unary operators that can occur as pseudo-listops inside double quotes sub dq_unop { my $self = shift; @@ -1818,7 +1811,8 @@ sub binop { ($left, $right) = ($right, $left); } $left = $self->deparse_binop_left($op, $left, $prec); - $left = "($left)" if $flags & LIST_CONTEXT && $left =~ /^\$/; + $left = "($left)" if $flags & LIST_CONTEXT + && $left !~ /^(my|our|local|)[\@\(]/; $right = $self->deparse_binop_right($op, $right, $prec); return $self->maybe_parens("$left $opname$eq $right", $cx, $prec); } @@ -1963,6 +1957,7 @@ sub logop { sub pp_and { logop(@_, "and", 3, "&&", 11, "if") } sub pp_or { logop(@_, "or", 2, "||", 10, "unless") } +sub pp_dor { logop(@_, "err", 2, "//", 10, "") } # xor is syntactically a logop, but it's really a binop (contrary to # old versions of opcode.pl). Syntax is what matters here. @@ -1979,7 +1974,8 @@ sub logassignop { } sub pp_andassign { logassignop(@_, "&&=") } -sub pp_orassign { logassignop(@_, "||=") } +sub pp_orassign { logassignop(@_, "||=") } +sub pp_dorassign { logassignop(@_, "//=") } sub listop { my $self = shift; @@ -1989,6 +1985,7 @@ sub listop { my $kid = $op->first->sibling; return $name if null $kid; my $first; + $name = "socketpair" if $name eq "sockpair"; if (defined prototype("CORE::$name") && prototype("CORE::$name") =~ /^;?\*/ && $kid->name eq "rv2gv") { @@ -2187,7 +2184,7 @@ sub mapop { $kid = $kid->sibling; for (; !null($kid); $kid = $kid->sibling) { $expr = $self->deparse($kid, 6); - push @exprs, $expr if $expr; + push @exprs, $expr if defined $expr; } return $self->maybe_parens_func($name, $code . join(", ", @exprs), $cx, 5); } @@ -2206,8 +2203,15 @@ sub pp_list { # This assumes that no other private flags equal 128, and that # OPs that store things other than flags in their op_private, # like OP_AELEMFAST, won't be immediate children of a list. - unless ($lop->private & OPpLVAL_INTRO + # + # OP_ENTERSUB can break this logic, so check for it. + # I suspect that open and exit can too. + + if (!($lop->private & (OPpLVAL_INTRO|OPpOUR_INTRO) or $lop->name eq "undef") + or $lop->name eq "entersub" + or $lop->name eq "exit" + or $lop->name eq "open") { $local = ""; # or not last; @@ -2215,8 +2219,10 @@ sub pp_list { if ($lop->name =~ /^pad[ash]v$/) { # my() ($local = "", last) if $local eq "local" || $local eq "our"; $local = "my"; - } elsif ($op->name =~ /^(gv|rv2)[ash]v$/ - && $op->private & OPpOUR_INTRO) { # our() + } elsif ($lop->name =~ /^(gv|rv2)[ash]v$/ + && $lop->private & OPpOUR_INTRO + or $lop->name eq "null" && $lop->first->name eq "gvsv" + && $lop->first->private & OPpOUR_INTRO) { # our() ($local = "", last) if $local eq "my" || $local eq "local"; $local = "our"; } elsif ($lop->name ne "undef") { # local() @@ -2412,10 +2418,12 @@ sub pp_leavetry { BEGIN { eval "sub OP_CONST () {" . opnumber("const") . "}" } BEGIN { eval "sub OP_STRINGIFY () {" . opnumber("stringify") . "}" } +BEGIN { eval "sub OP_RV2SV () {" . opnumber("rv2sv") . "}" } +BEGIN { eval "sub OP_LIST () {" . opnumber("list") . "}" } sub pp_null { my $self = shift; - my($op, $cx) = @_; + my($op, $cx, $flags) = @_; if (class($op) eq "OP") { # old value is lost return $self->{'ex_const'} if $op->targ == OP_CONST; @@ -2438,7 +2446,20 @@ sub pp_null { . $self->deparse($op->first->sibling, 20), $cx, 20); } elsif ($op->flags & OPf_SPECIAL && $cx == 0 && !$op->targ) { - return "do {\n\t". $self->deparse($op->first, $cx) ."\n\b};"; + if ($flags) { + return $self->deparse($op->first, $cx); + } + else { + return "do {\n\t". $self->deparse($op->first, $cx) ."\n\b};"; + } + } elsif (!null($op->first->sibling) and + $op->first->sibling->name eq "null" and + class($op->first->sibling) eq "UNOP" and + $op->first->sibling->first->flags & OPf_STACKED and + $op->first->sibling->first->name eq "rcatline") { + return $self->maybe_parens($self->deparse($op->first, 18) . " .= " + . $self->deparse($op->first->sibling, 18), + $cx, 18); } else { return $self->deparse($op->first, $cx); } @@ -2520,6 +2541,11 @@ sub pp_aelemfast { sub rv2x { my $self = shift; my($op, $cx, $type) = @_; + + if (class($op) eq 'NULL' || !$op->can("first")) { + carp("Unexpected op in pp_rv2x"); + return 'XXX'; + } my $kid = $op->first; my $str = $self->deparse($kid, 0); return $self->stash_variable($type, $str) if is_scalar($kid); @@ -2543,7 +2569,17 @@ sub pp_av2arylen { } # skip down to the old, ex-rv2cv -sub pp_rv2cv { $_[0]->rv2x($_[1]->first->first->sibling, $_[2], "&") } +sub pp_rv2cv { + my ($self, $op, $cx) = @_; + if (!null($op->first) && $op->first->name eq 'null' && + $op->first->targ eq OP_LIST) + { + return $self->rv2x($op->first->first->sibling, $cx, "&") + } + else { + return $self->rv2x($op, $cx, "") + } +} sub pp_rv2av { my $self = shift; @@ -2622,6 +2658,16 @@ sub elem { # $idx =~ s/^\((.*)\)$/$1/ if $self->{'parens'}; + # Hash-element braces will autoquote a bareword inside themselves. + # We need to make sure that C<$hash{warn()}> doesn't come out as + # C<$hash{warn}>, which has a quite different meaning. Currently + # B::Deparse will always quote strings, even if the string was a + # bareword in the original (i.e. the OPpCONST_BARE flag is ignored + # for constant strings.) So we can cheat slightly here - if we see + # a bareword, we know that it is supposed to be a function call. + # + $idx =~ s/^([A-Za-z_]\w*)$/$1()/; + return "\$" . $array . $left . $idx . $right; } @@ -2725,7 +2771,8 @@ sub method { } else { $obj = $kid; $kid = $kid->sibling; - for (; not null $kid->sibling; $kid = $kid->sibling) { + for (; !null ($kid->sibling) && $kid->name ne "method_named"; + $kid = $kid->sibling) { push @exprs, $self->deparse($kid, 6); } $meth = $kid; @@ -2745,7 +2792,7 @@ sub method { } my $args = join(", ", @exprs); $kid = $obj . "->" . $meth; - if ($args) { + if (length $args) { return $kid . "(" . $args . ")"; # parens mandatory } else { return $kid; @@ -2756,14 +2803,15 @@ sub method { # or ("", $args_after_prototype_demunging) if it does. sub check_proto { my $self = shift; + return "&" if $self->{'noproto'}; my($proto, @args) = @_; my($arg, $real); my $doneok = 0; my @reals; # An unbackslashed @ or % gobbles up the rest of the args - $proto =~ s/([^\\]|^)([@%])(.*)$/$1$2/; + 1 while $proto =~ s/(?name =~ /^s?refgen$/ and !null($real = $arg->first) and - ($chr eq "\$" && is_scalar($real->first) - or ($chr eq "\@" + ($chr =~ /\$/ && is_scalar($real->first) + or ($chr =~ /@/ + && class($real->first->sibling) ne 'NULL' && $real->first->sibling->name =~ /^(rv2|pad)av$/) - or ($chr eq "%" + or ($chr =~ /%/ + && class($real->first->sibling) ne 'NULL' && $real->first->sibling->name =~ /^(rv2|pad)hv$/) - #or ($chr eq "&" # This doesn't work + #or ($chr =~ /&/ # This doesn't work # && $real->first->name eq "rv2cv") - or ($chr eq "*" + or ($chr =~ /\*/ && $real->first->name eq "rv2gv"))) { push @reals, $self->deparse($real, 6); @@ -2835,7 +2885,7 @@ sub pp_entersub { my $prefix = ""; my $amper = ""; my($kid, @exprs); - if ($op->flags & OPf_SPECIAL) { + if ($op->flags & OPf_SPECIAL && !($op->flags & OPf_MOD)) { $prefix = "do "; } elsif ($op->private & OPpENTERSUB_AMPER) { $amper = "&"; @@ -2857,7 +2907,7 @@ sub pp_entersub { } $simple = 1; # only calls of named functions can be prototyped $kid = $self->deparse($kid, 24); - } elsif (is_scalar $kid->first) { + } elsif (is_scalar ($kid->first) && $kid->first->name ne 'rv2cv') { $amper = "&"; $kid = $self->deparse($kid, 24); } else { @@ -2868,10 +2918,21 @@ sub pp_entersub { # Doesn't matter how many prototypes there are, if # they haven't happened yet! - my $declared = exists $self->{'subs_declared'}{$kid}; - if (!$declared && defined($proto)) { - # Avoid "too early to check prototype" warning - ($amper, $proto) = ('&'); + my $declared; + { + no strict 'refs'; + no warnings 'uninitialized'; + $declared = exists $self->{'subs_declared'}{$kid} + || ( + defined &{ %{$self->{'curstash'}."::"}->{$kid} } + && !exists + $self->{'subs_deparsed'}{$self->{'curstash'}."::".$kid} + && defined prototype $self->{'curstash'}."::".$kid + ); + if (!$declared && defined($proto)) { + # Avoid "too early to check prototype" warning + ($amper, $proto) = ('&'); + } } my $args; @@ -2891,7 +2952,7 @@ sub pp_entersub { } } else { # glob() invocations can be translated into calls of - # CORE::GLOBAL::glob with an second parameter, a number. + # CORE::GLOBAL::glob with a second parameter, a number. # Reverse this. if ($kid eq "CORE::GLOBAL::glob") { $kid = "glob"; @@ -2927,13 +2988,12 @@ sub uninterp { return $str; } -# the same, but treat $|, $), $( and $ at the end of the string differently -sub re_uninterp { - my($str) = @_; - +{ +my $bal; +BEGIN { use re "eval"; # Matches any string which is balanced with respect to {braces} - my $bal = qr( + $bal = qr( (?: [^\\{}] | \\\\ @@ -2941,6 +3001,11 @@ sub re_uninterp { | \{(??{$bal})\} )* )x; +} + +# the same, but treat $|, $), $( and $ at the end of the string differently +sub re_uninterp { + my($str) = @_; $str =~ s/ ( ^|\G # $1 @@ -2958,24 +3023,99 @@ sub re_uninterp { | \\[uUlLQE] ) - /length($4) ? "$1$2$4" : "$1$2\\$3"/xeg; + /defined($4) && length($4) ? "$1$2$4" : "$1$2\\$3"/xeg; + + return $str; +} + +# This is for regular expressions with the /x modifier +# We have to leave comments unmangled. +sub re_uninterp_extended { + my($str) = @_; + + $str =~ s/ + ( ^|\G # $1 + | [^\\] + ) + + ( # $2 + (?:\\\\)* + ) + + ( # $3 + ( \(\?\??\{$bal\}\) # $4 (skip over (?{}) and (??{}) blocks) + | \#[^\n]* # (skip over comments) + ) + | [\$\@] + (?!\||\)|\(|$|\s) + | \\[uUlLQE] + ) + + /defined($4) && length($4) ? "$1$2$4" : "$1$2\\$3"/xeg; return $str; } +} + +my %unctrl = # portable to to EBCDIC + ( + "\c@" => '\c@', # unused + "\cA" => '\cA', + "\cB" => '\cB', + "\cC" => '\cC', + "\cD" => '\cD', + "\cE" => '\cE', + "\cF" => '\cF', + "\cG" => '\cG', + "\cH" => '\cH', + "\cI" => '\cI', + "\cJ" => '\cJ', + "\cK" => '\cK', + "\cL" => '\cL', + "\cM" => '\cM', + "\cN" => '\cN', + "\cO" => '\cO', + "\cP" => '\cP', + "\cQ" => '\cQ', + "\cR" => '\cR', + "\cS" => '\cS', + "\cT" => '\cT', + "\cU" => '\cU', + "\cV" => '\cV', + "\cW" => '\cW', + "\cX" => '\cX', + "\cY" => '\cY', + "\cZ" => '\cZ', + "\c[" => '\c[', # unused + "\c\\" => '\c\\', # unused + "\c]" => '\c]', # unused + "\c_" => '\c_', # unused + ); # character escapes, but not delimiters that might need to be escaped sub escape_str { # ASCII, UTF8 my($str) = @_; - $str =~ s/(.)/ord($1)>255 ? sprintf("\\x{%x}", ord($1)) : $1/eg; + $str =~ s/(.)/ord($1) > 255 ? sprintf("\\x{%x}", ord($1)) : $1/eg; $str =~ s/\a/\\a/g; -# $str =~ s/\cH/\\b/g; # \b means someting different in a regex +# $str =~ s/\cH/\\b/g; # \b means something different in a regex $str =~ s/\t/\\t/g; $str =~ s/\n/\\n/g; $str =~ s/\e/\\e/g; $str =~ s/\f/\\f/g; $str =~ s/\r/\\r/g; - $str =~ s/([\cA-\cZ])/'\\c' . chr(ord('@') + ord($1))/ge; - $str =~ s/([\0\033-\037\177-\377])/'\\' . sprintf("%03o", ord($1))/ge; + $str =~ s/([\cA-\cZ])/$unctrl{$1}/ge; + $str =~ s/([[:^print:]])/sprintf("\\%03o", ord($1))/ge; + return $str; +} + +# For regexes with the /x modifier. +# Leave whitespace unmangled. +sub escape_extended_re { + my($str) = @_; + $str =~ s/(.)/ord($1) > 255 ? sprintf("\\x{%x}", ord($1)) : $1/eg; + $str =~ s/([[:^print:]])/ + ($1 =~ y! \t\n!!) ? $1 : sprintf("\\%03o", ord($1))/ge; + $str =~ s/\n/\n\f/g; return $str; } @@ -2992,7 +3132,7 @@ sub re_unback { my($str) = @_; # the insane complexity here is due to the behaviour of "\c\" - $str =~ s/(^|[^\\]|\\c\\)(?FLAGS & SVf_IOK) { return $sv->int_value; } elsif ($sv->FLAGS & SVf_NOK) { - return $sv->NV; + # try the default stringification + my $r = "".$sv->NV; + if ($r =~ /e/) { + # If it's in scientific notation, we might have lost information + return sprintf("%.20e", $sv->NV); + } + return $r; } elsif ($sv->FLAGS & SVf_ROK && $sv->can("RV")) { return "\\(" . const($sv->RV) . ")"; # constant folded } elsif ($sv->FLAGS & SVf_POK) { @@ -3097,13 +3244,13 @@ sub dq { } elsif ($type eq "concat") { my $first = $self->dq($op->first); my $last = $self->dq($op->last); + # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]" - if ($last =~ /^[A-Z\\\^\[\]_?]/) { - $first =~ s/([\$@])\^$/${1}{^}/; # "${^}W" etc - } - elsif ($last =~ /^[{\[\w]/) { - $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/; - } + ($last =~ /^[A-Z\\\^\[\]_?]/ && + $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc + || ($last =~ /^[{\[\w_]/ && + $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/); + return $first . $last; } elsif ($type eq "uc") { return '\U' . $self->dq($op->first->sibling) . '\E'; @@ -3169,10 +3316,13 @@ sub double_delim { } } +# Only used by tr///, so backslashes hyphens sub pchr { # ASCII my($n) = @_; if ($n == ord '\\') { return '\\\\'; + } elsif ($n == ord "-") { + return "\\-"; } elsif ($n >= ord(' ') and $n <= ord('~')) { return chr($n); } elsif ($n == ord "\a") { @@ -3215,12 +3365,10 @@ sub collapse { return $str; } -# XXX This has trouble with hyphens in the replacement (tr/bac/-AC/), -# and backslashes. - sub tr_decode_byte { my($table, $flags) = @_; - my(@table) = unpack("s256", $table); + my(@table) = unpack("s*", $table); + splice @table, 0x100, 1; # Number of subsequent elements my($c, $tr, @from, @to, @delfrom, $delhyphen); if ($table[ord "-"] != -1 and $table[ord("-") - 1] == -1 || $table[ord("-") + 1] == -1) @@ -3234,7 +3382,7 @@ sub tr_decode_byte { $delhyphen = 1; } } - for ($c = 0; $c < 256; $c++) { + for ($c = 0; $c < @table; $c++) { $tr = $table[$c]; if ($tr >= 0) { push @from, $c; push @to, $tr; @@ -3266,6 +3414,8 @@ sub tr_chr { my $x = shift; if ($x == ord "-") { return "\\-"; + } elsif ($x == ord "\\") { + return "\\\\"; } else { return chr $x; } @@ -3388,32 +3538,36 @@ sub pp_trans { # Like dq(), but different sub re_dq { my $self = shift; - my $op = shift; + my ($op, $extended) = @_; + my $type = $op->name; if ($type eq "const") { return '$[' if $op->private & OPpCONST_ARYBASE; - return re_uninterp(escape_str(re_unback($self->const_sv($op)->as_string))); + my $unbacked = re_unback($self->const_sv($op)->as_string); + return re_uninterp_extended(escape_extended_re($unbacked)) + if $extended; + return re_uninterp(escape_str($unbacked)); } elsif ($type eq "concat") { - my $first = $self->re_dq($op->first); - my $last = $self->re_dq($op->last); + my $first = $self->re_dq($op->first, $extended); + my $last = $self->re_dq($op->last, $extended); + # Disambiguate "${foo}bar", "${foo}{bar}", "${foo}[1]" - if ($last =~ /^[A-Z\\\^\[\]_?]/) { - $first =~ s/([\$@])\^$/${1}{^}/; - } - elsif ($last =~ /^[{\[\w]/) { - $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/; - } + ($last =~ /^[A-Z\\\^\[\]_?]/ && + $first =~ s/([\$@])\^$/${1}{^}/) # "${^}W" etc + || ($last =~ /^[{\[\w_]/ && + $first =~ s/([\$@])([A-Za-z_]\w*)$/${1}{$2}/); + return $first . $last; } elsif ($type eq "uc") { - return '\U' . $self->re_dq($op->first->sibling) . '\E'; + return '\U' . $self->re_dq($op->first->sibling, $extended) . '\E'; } elsif ($type eq "lc") { - return '\L' . $self->re_dq($op->first->sibling) . '\E'; + return '\L' . $self->re_dq($op->first->sibling, $extended) . '\E'; } elsif ($type eq "ucfirst") { - return '\u' . $self->re_dq($op->first->sibling); + return '\u' . $self->re_dq($op->first->sibling, $extended); } elsif ($type eq "lcfirst") { - return '\l' . $self->re_dq($op->first->sibling); + return '\l' . $self->re_dq($op->first->sibling, $extended); } elsif ($type eq "quotemeta") { - return '\Q' . $self->re_dq($op->first->sibling) . '\E'; + return '\Q' . $self->re_dq($op->first->sibling, $extended) . '\E'; } elsif ($type eq "join") { return $self->deparse($op->last, 26); # was join($", @ary) } else { @@ -3421,13 +3575,54 @@ sub re_dq { } } -sub pp_regcomp { +sub pure_string { + my ($self, $op) = @_; + my $type = $op->name; + + if ($type eq 'const') { + return 1; + } + elsif ($type =~ /^[ul]c(first)?$/ || $type eq 'quotemeta') { + return $self->pure_string($op->first->sibling); + } + elsif ($type eq 'join') { + my $join_op = $op->first->sibling; # Skip pushmark + return 0 unless $join_op->name eq 'null' && $join_op->targ eq OP_RV2SV; + + my $gvop = $join_op->first; + return 0 unless $gvop->name eq 'gvsv'; + return 0 unless '"' eq $self->gv_name($self->gv_or_padgv($gvop)); + + return 0 unless ${$join_op->sibling} eq ${$op->last}; + return 0 unless $op->last->name =~ /^(rv2|pad)av$/; + } + elsif ($type eq 'concat') { + return $self->pure_string($op->first) + && $self->pure_string($op->last); + } + elsif (is_scalar($op) || $type =~ /^[ah]elem(fast)?$/) { + return 1; + } + else { + return 0; + } + + return 1; +} + +sub regcomp { my $self = shift; - my($op, $cx) = @_; + my($op, $cx, $extended) = @_; my $kid = $op->first; $kid = $kid->first if $kid->name eq "regcmaybe"; $kid = $kid->first if $kid->name eq "regcreset"; - return $self->re_dq($kid); + return ($self->re_dq($kid, $extended), 1) if $self->pure_string($kid); + return ($self->deparse($kid, $cx), 0); +} + +sub pp_regcomp { + my ($self, $op, $cx) = @_; + return (($self->regcomp($op, $cx, 0))[0]); } # osmic acid -- see osmium tetroxide @@ -3447,10 +3642,19 @@ sub matchop { $var = $self->deparse($kid, 20); $kid = $kid->sibling; } + my $quote = 1; + my $extended = ($op->pmflags & PMf_EXTENDED); if (null $kid) { - $re = re_uninterp(escape_str(re_unback($op->precomp))); + my $unbacked = re_unback($op->precomp); + if ($extended) { + $re = re_uninterp_extended(escape_extended_re($unbacked)); + } else { + $re = re_uninterp(escape_str(re_unback($op->precomp))); + } + } elsif ($kid->name ne 'regcomp') { + carp("found ".$kid->name." where regcomp expected"); } else { - $re = $self->deparse($kid, 1); + ($re, $quote) = $self->regcomp($kid, 1, $extended); } my $flags = ""; $flags .= "c" if $op->pmflags & PMf_CONTINUE; @@ -3464,10 +3668,10 @@ sub matchop { if ($op->pmflags & PMf_ONCE) { # only one kind of delimiter works here $re =~ s/\?/\\?/g; $re = "?$re?"; - } else { + } elsif ($quote) { $re = single_delim($name, $delim, $re); } - $re = $re . $flags; + $re = $re . $flags if $quote; if ($binop) { return $self->maybe_parens("$var =~ $re", $cx, 20); } else { @@ -3484,7 +3688,10 @@ sub pp_split { my($op, $cx) = @_; my($kid, @exprs, $ary, $expr); $kid = $op->first; - if ($ {$kid->pmreplroot}) { + # under ithreads pmreplroot is an integer, not an SV + my $replroot = $kid->pmreplroot; + if ( ( ref($replroot) && $$replroot ) || + ( !ref($replroot) && $replroot ) ) { $ary = $self->stash_variable('@', $self->gv_name($kid->pmreplroot)); } for (; !null($kid); $kid = $kid->sibling) { @@ -3538,15 +3745,22 @@ sub pp_subst { $flags .= "e"; } if ($op->pmflags & PMf_EVAL) { - $repl = $self->deparse($repl, 0); + $repl = $self->deparse($repl, 0, 1); } else { $repl = $self->dq($repl); } } + my $extended = ($op->pmflags & PMf_EXTENDED); if (null $kid) { - $re = re_uninterp(escape_str(re_unback($op->precomp))); + my $unbacked = re_unback($op->precomp); + if ($extended) { + $re = re_uninterp_extended(escape_extended_re($unbacked)); + } + else { + $re = re_uninterp(escape_str($unbacked)); + } } else { - $re = $self->deparse($kid, 1); + ($re) = $self->regcomp($kid, 1, $extended); } $flags .= "e" if $op->pmflags & PMf_EVAL; $flags .= "g" if $op->pmflags & PMf_GLOBAL; @@ -3554,7 +3768,7 @@ sub pp_subst { $flags .= "m" if $op->pmflags & PMf_MULTILINE; $flags .= "o" if $op->pmflags & PMf_KEEP; $flags .= "s" if $op->pmflags & PMf_SINGLELINE; - $flags .= "x" if $op->pmflags & PMf_EXTENDED; + $flags .= "x" if $extended; $flags = $substwords{$flags} if $substwords{$flags}; if ($binop) { return $self->maybe_parens("$var =~ s" @@ -3628,6 +3842,22 @@ C will print which probably isn't what you intended (the C<'???'> is a sign that perl optimized away a constant value). +=item B<-P> + +Disable prototype checking. With this option, all function calls are +deparsed as if no prototype was defined for them. In other words, + + perl -MO=Deparse,-P -e 'sub foo (\@) { 1 } foo @x' + +will print + + sub foo (\@) { + 1; + } + &foo(\@x); + +making clear how the parameters are actually passed to C. + =item B<-q> Expand double-quoted strings into the corresponding combinations of @@ -3952,36 +4182,22 @@ than in the input file. =item * -Lvalue method calls are not yet fully supported. (Ordinary lvalue -subroutine calls ought to be okay though.) - -=item * +In fact, the above is a specific instance of a more general problem: +we can't guarantee to produce BEGIN blocks or C declarations in +exactly the right place. So if you use a module which affects compilation +(such as by over-riding keywords, overloading constants or whatever) +then the output code might not work as intended. -If you have a regex which is anything other than a literal of some -kind, B::Deparse will produce incorrect output. -e.g. C<$foo =~ give_me_a_regex()> will come back as -C<$foo =~ /give_me_a_regex()/> - -=item * - - m{ #foo - bar }x - -comes out as - - m/#foo\n bar/x) - -which isn't right. +This is the most serious outstanding problem, and will be very hard +to fix. =item * If a keyword is over-ridden, and your program explicitly calls the built-in version by using CORE::keyword, the output of B::Deparse -will not reflect this. - -=item * - -tr/// doesn't correctly handle wide characters +will not reflect this. If you run the resulting code, it will call +the over-ridden version rather than the built-in one. (Maybe there +should be an option to B print keyword calls as C.) =item * @@ -3996,18 +4212,35 @@ The obvious fix doesn't work, because these are different: =item * Constants (other than simple strings or numbers) don't work properly. -Examples that fail include: +Pathological examples that fail (and probably always will) include: use constant E2BIG => ($!=7); use constant x=>\$x; print x +The following could (and should) be made to work: + + use constant regex => qr/blah/; + print regex; + +=item * + +An input file that uses source filtering probably won't be deparsed into +runnable code, because it will still include the B declaration +for the source filtering module, even though the code that is +produced is already ordinary Perl which shouldn't be filtered again. + +=item * + +There are probably many more bugs on non-ASCII platforms (EBCDIC). + =back =head1 AUTHOR Stephen McCamant , based on an earlier version by Malcolm Beattie , with -contributions from Gisle Aas, James Duncan, Albert Dvornik, Hugo van -der Sanden, Gurusamy Sarathy, and Nick Ing-Simmons. +contributions from Gisle Aas, James Duncan, Albert Dvornik, Robin +Houston, Hugo van der Sanden, Gurusamy Sarathy, Nick Ing-Simmons, +and Rafael Garcia-Suarez. =cut