From: Jarkko Hietaniemi Date: Wed, 19 Dec 2001 14:25:27 +0000 (+0000) Subject: B::Assembler/B::Disassembler patches and test; X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f4abc3e7120c79388800ae3eaccafb9461d38553;p=p5sagit%2Fp5-mst-13.2.git B::Assembler/B::Disassembler patches and test; from Wolfgang Laun. TODO: getting perlcc working. p4raw-id: //depot/perl@13802 --- diff --git a/MANIFEST b/MANIFEST index 293c2d4..b320b65 100644 --- a/MANIFEST +++ b/MANIFEST @@ -92,6 +92,7 @@ ext/B/ramblings/reg.alloc Compiler ramblings: register allocation ext/B/ramblings/runtime.porting Compiler ramblings: porting PP enging ext/B/README Compiler backend README ext/B/t/asmdata.t See if B::Asmdata works +ext/B/t/assembler.t See if B::Assembler, B::Disassembler comply ext/B/t/b.t See if B works ext/B/t/bblock.t See if B::Bblock works ext/B/t/debug.t See if B::Debug works diff --git a/ext/B/B/Assembler.pm b/ext/B/B/Assembler.pm index 86f0962..10ae81b 100644 --- a/ext/B/B/Assembler.pm +++ b/ext/B/B/Assembler.pm @@ -14,7 +14,7 @@ require ByteLoader; # we just need its $VERSIOM @ISA = qw(Exporter); @EXPORT_OK = qw(assemble_fh newasm endasm assemble); -$VERSION = 0.03; +$VERSION = 0.04; use strict; my %opnumber; @@ -34,6 +34,15 @@ sub error { my $debug = 0; sub debug { $debug = shift } +sub limcheck($$$$){ + my( $val, $lo, $hi, $loc ) = @_; + if( $val < $lo || $hi < $val ){ + error "argument for $loc outside [$lo, $hi]: $val"; + $val = $hi; + } + return $val; +} + # # First define all the data conversion subs to which Asmdata will refer # @@ -47,32 +56,46 @@ sub B::Asmdata::PUT_U8 { $c = substr($c, 0, 1); } } else { + $arg = limcheck( $arg, 0, 0xff, 'U8' ); $c = chr($arg); } return $c; } -sub B::Asmdata::PUT_U16 { pack("S", $_[0]) } -sub B::Asmdata::PUT_U32 { pack("L", $_[0]) } -sub B::Asmdata::PUT_I32 { pack("L", $_[0]) } -sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" loses precision and pack('d',...) +sub B::Asmdata::PUT_U16 { + my $arg = limcheck( $_[0], 0, 0xffff, 'U16' ); + pack("S", $arg); +} +sub B::Asmdata::PUT_U32 { + my $arg = limcheck( $_[0], 0, 0xffffffff, 'U32' ); + pack("L", $arg); +} +sub B::Asmdata::PUT_I32 { + my $arg = limcheck( $_[0], -0x80000000, 0x7fffffff, 'I32' ); + pack("L", $arg); +} +sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" looses precision and pack('d',...) # may not even be portable between compilers -sub B::Asmdata::PUT_objindex { pack("L", $_[0]) } # could allow names here +sub B::Asmdata::PUT_objindex { # could allow names here + my $arg = limcheck( $_[0], 0, 0xffffffff, '*index' ); + pack("L", $arg); +} sub B::Asmdata::PUT_svindex { &B::Asmdata::PUT_objindex } sub B::Asmdata::PUT_opindex { &B::Asmdata::PUT_objindex } sub B::Asmdata::PUT_pvindex { &B::Asmdata::PUT_objindex } sub B::Asmdata::PUT_strconst { my $arg = shift; - $arg = uncstring($arg); - if (!defined($arg)) { + my $str = uncstring($arg); + if (!defined($str)) { error "bad string constant: $arg"; - return ""; + $str = ''; } - if ($arg =~ s/\0//g) { + if ($str =~ s/\0//g) { error "string constant argument contains NUL: $arg"; + $str = ''; } - return $arg . "\0"; + return $str . "\0"; } sub B::Asmdata::PUT_pvcontents { @@ -82,9 +105,12 @@ sub B::Asmdata::PUT_pvcontents { } sub B::Asmdata::PUT_PV { my $arg = shift; - $arg = uncstring($arg); - error "bad string argument: $arg" unless defined($arg); - return pack("L", length($arg)) . $arg; + my $str = uncstring($arg); + if( ! defined($str) ){ + error "bad string argument: $arg"; + $str = ''; + } + return pack("L", length($str)) . $str; } sub B::Asmdata::PUT_comment_t { my $arg = shift; @@ -111,9 +137,14 @@ sub B::Asmdata::PUT_op_tr_array { return pack("S256", @ary); } # XXX Check this works +# Note: $arg >> 32 is a no-op on 32-bit systems sub B::Asmdata::PUT_IV64 { my $arg = shift; - return pack("LL", $arg >> 32, $arg & 0xffffffff); + return pack("LL", ($arg >> 16) >>16 , $arg & 0xffffffff); +} + +sub B::Asmdata::PUT_IV { + $Config{ivsize} == 4 ? &B::Asmdata::PUT_I32 : &B::Asmdata::PUT_IV64; } my %unesc = (n => "\n", r => "\r", t => "\t", a => "\a", @@ -164,14 +195,13 @@ sub gen_header { sub parse_statement { my $stmt = shift; my ($insn, $arg) = $stmt =~ m{ - (?sx) ^\s* # allow (but ignore) leading whitespace (.*?) # Instruction continues up until... (?: # ...an optional whitespace+argument group \s+ # first whitespace. (.*) # The argument is all the rest (newlines included). )?$ # anchor at end-of-line - }; + }sx; if (defined($arg)) { if ($arg =~ s/^0x(?=[0-9a-fA-F]+$)//) { $arg = hex($arg); @@ -247,11 +277,12 @@ sub assemble { $quotedline =~ s/"/\\"/g; $out->(assemble_insn("comment", qq("$quotedline"))); } - $line = strip_comments($line) or next; - ($insn, $arg) = parse_statement($line); - $out->(assemble_insn($insn, $arg)); - if ($debug) { - $out->(assemble_insn("nop", undef)); + if( $line = strip_comments($line) ){ + ($insn, $arg) = parse_statement($line); + $out->(assemble_insn($insn, $arg)); + if ($debug) { + $out->(assemble_insn("nop", undef)); + } } } diff --git a/ext/B/B/Disassembler.pm b/ext/B/B/Disassembler.pm index 7fc4ac7..b8b5262 100644 --- a/ext/B/B/Disassembler.pm +++ b/ext/B/B/Disassembler.pm @@ -6,10 +6,11 @@ # License or the Artistic License, as specified in the README file. package B::Disassembler::BytecodeStream; -our $VERSION = '1.00'; +our $VERSION = '1.01'; use FileHandle; use Carp; +use Config qw(%Config); use B qw(cstring cast_I32); @ISA = qw(FileHandle); sub readn { @@ -31,54 +32,65 @@ sub GET_U16 { my $fh = shift; my $str = $fh->readn(2); croak "reached EOF while reading U16" unless length($str) == 2; - return unpack("n", $str); + return unpack("S", $str); } sub GET_NV { my $fh = shift; - my $str = $fh->readn(8); - croak "reached EOF while reading NV" unless length($str) == 8; - return unpack("N", $str); + my ($str, $c); + while (defined($c = $fh->getc) && $c ne "\0") { + $str .= $c; + } + croak "reached EOF while reading double" unless defined($c); + return $str; } sub GET_U32 { my $fh = shift; my $str = $fh->readn(4); croak "reached EOF while reading U32" unless length($str) == 4; - return unpack("N", $str); + return unpack("L", $str); } sub GET_I32 { my $fh = shift; my $str = $fh->readn(4); croak "reached EOF while reading I32" unless length($str) == 4; - return cast_I32(unpack("N", $str)); + return cast_I32(unpack("L", $str)); } sub GET_objindex { my $fh = shift; my $str = $fh->readn(4); croak "reached EOF while reading objindex" unless length($str) == 4; - return unpack("N", $str); + return unpack("L", $str); } sub GET_opindex { my $fh = shift; my $str = $fh->readn(4); croak "reached EOF while reading opindex" unless length($str) == 4; - return unpack("N", $str); + return unpack("L", $str); } sub GET_svindex { my $fh = shift; my $str = $fh->readn(4); croak "reached EOF while reading svindex" unless length($str) == 4; - return unpack("N", $str); + return unpack("L", $str); +} + +sub GET_pvindex { + my $fh = shift; + my $str = $fh->readn(4); + croak "reached EOF while reading pvindex" unless length($str) == 4; + return unpack("L", $str); } sub GET_strconst { my $fh = shift; my ($str, $c); + $str = ''; while (defined($c = $fh->getc) && $c ne "\0") { $str .= $c; } @@ -125,29 +137,51 @@ sub GET_none {} sub GET_op_tr_array { my $fh = shift; - my @ary = unpack("n256", $fh->readn(256 * 2)); + my @ary = unpack("S256", $fh->readn(256 * 2)); return join(",", @ary); } sub GET_IV64 { my $fh = shift; - my ($hi, $lo) = unpack("NN", $fh->readn(8)); - return sprintf("0x%4x%04x", $hi, $lo); # cheat + my ($hi, $lo) = unpack("LL", $fh->readn(8)); + return sprintf("0x%x%08x", $hi, $lo); # cheat +} + +sub GET_IV { + $Config{ivsize} == 4 ? &GET_I32 : &GET_IV64; } package B::Disassembler; use Exporter; @ISA = qw(Exporter); -@EXPORT_OK = qw(disassemble_fh); +@EXPORT_OK = qw(disassemble_fh get_header); use Carp; use strict; use B::Asmdata qw(%insn_data @insn_name); +our( $magic, $archname, $blversion, $ivsize, $ptrsize, $byteorder ); + +sub dis_header($){ + my( $fh ) = @_; + $magic = $fh->GET_U32(); + warn( "bad magic" ) if $magic != 0x43424c50; + $archname = $fh->GET_strconst(); + $blversion = $fh->GET_strconst(); + $ivsize = $fh->GET_U32(); + $ptrsize = $fh->GET_U32(); + $byteorder = $fh->GET_strconst(); +} + +sub get_header(){ + return( $magic, $archname, $blversion, $ivsize, $ptrsize, $byteorder ); +} + sub disassemble_fh { my ($fh, $out) = @_; my ($c, $getmeth, $insn, $arg); bless $fh, "B::Disassembler::BytecodeStream"; + dis_header( $fh ); while (defined($c = $fh->getc)) { $c = ord($c); $insn = $insn_name[$c]; diff --git a/ext/B/t/assembler.t b/ext/B/t/assembler.t new file mode 100644 index 0000000..6bec7e0 --- /dev/null +++ b/ext/B/t/assembler.t @@ -0,0 +1,374 @@ +#!./perl -w + +=pod + +=head1 TEST FOR B::Assembler.pm AND B::Disassembler.pm + +=head2 Description + +The general idea is to test by assembling a choice set of assembler +instructions, then disassemble them, and check that we've completed the +round trip. Also, error checking of Assembler.pm is tested by feeding +it assorted errors. + +Since Assembler.pm likes to assemble a file, we comply by writing a +text file. This file contains three sections: + + testing operand categories + use each opcode + erronous assembler instructions + +An "operand category" is identified by the suffix of the PUT_/GET_ +subroutines as shown in the C<%Asmdata::insn_data> initialization, e.g. +opcode C has operand category C: + + insn_data{ldsv} = [1, \&PUT_svindex, "GET_svindex"]; + +Because Disassembler.pm also assumes input from a file, we write the +resulting object code to a file. And disassembled output is written to +yet another text file which is then compared to the original input. +(Erronous assembler instructions still generate code, but this is not +written to the object file; therefore disassembly bails out at the first +instruction in error.) + +All files are kept in memory by using TIEHASH. + + +=head2 Caveats + +An error where Assembler.pm and Disassembler.pm agree but Assembler.pm +generates invalid object code will not be detected. + +Due to the way this test has been set up, failure of a single test +could cause all subsequent tests to fail as well: After an unexpected +assembler error no output is written, and disassembled lines will be +out of sync for all lines thereafter. + +Not all possibilities for writing a valid operand value can be tested +because disassembly results in a uniform representation. + + +=head2 Maintenance + +New opcodes are added automatically. + +A new operand category will cause this program to die ("no operand list +for XXX"). The cure is to add suitable entries to C<%goodlist> and +C<%badlist>. (Since the data in Asmdata.pm is autogenerated, it may also +happen that the corresponding assembly or disassembly subroutine is +missing.) Note that an empty array as a C<%goodlist> entry means that +opcodes of the operand category do not take an operand (and therefore the +corresponding entry in C<%badlist> should have one). An C entry +in C<%badlist> means that any value is acceptable (and thus there is no +way to cause an error). + +Set C<$dbg> to debug this test. + +=cut + +package VirtFile; +use strict; + +# Note: This is NOT a general purpose package. It implements +# sequential text and binary file i/o in a rather simple form. + +sub TIEHANDLE($;$){ + my( $class, $data ) = @_; + my $obj = { data => defined( $data ) ? $data : '', + pos => 0 }; + return bless( $obj, $class ); +} + +sub PRINT($@){ + my( $self ) = shift; + $self->{data} .= join( '', @_ ); +} + +sub WRITE($$;$$){ + my( $self, $buf, $len, $offset ) = @_; + unless( defined( $len ) ){ + $len = length( $buf ); + $offset = 0; + } + unless( defined( $offset ) ){ + $offset = 0; + } + $self->{data} .= substr( $buf, $offset, $len ); + return $len; +} + + +sub GETC($){ + my( $self ) = @_; + return undef() if $self->{pos} >= length( $self->{data} ); + return substr( $self->{data}, $self->{pos}++, 1 ); +} + +sub READLINE($){ + my( $self ) = @_; + return undef() if $self->{pos} >= length( $self->{data} ); + my $lfpos = index( $self->{data}, "\n", $self->{pos} ); + if( $lfpos < 0 ){ + $lfpos = length( $self->{data} ); + } + my $pos = $self->{pos}; + $self->{pos} = $lfpos + 1; + return substr( $self->{data}, $pos, $self->{pos} - $pos ); +} + +sub READ($@){ + my $self = shift(); + my $bufref = \$_[0]; + my( undef, $len, $offset ) = @_; + if( $offset ){ + die( "offset beyond end of buffer\n" ) + if ! defined( $$bufref ) || $offset > length( $$bufref ); + } else { + $$bufref = ''; + $offset = 0; + } + my $remlen = length( $self->{data} ) - $self->{pos}; + $len = $remlen if $remlen < $len; + return 0 unless $len; + substr( $$bufref, $offset, $len ) = + substr( $self->{data}, $self->{pos}, $len ); + $self->{pos} += $len; + return $len; +} + +sub TELL($){ + my $self = shift(); + return $self->{pos}; +} + +sub CLOSE($){ + my( $self ) = @_; + $self->{pos} = 0; +} + +1; + +package main; + +use strict; +use Test::More; +use Config qw(%Config); + +use B::Asmdata qw( %insn_data ); +use B::Assembler qw( &assemble_fh ); +use B::Disassembler qw( &disassemble_fh &get_header ); + +my( %opsByType, @code2name ); +my( $lineno, $dbg, $firstbadline, @descr ); +$dbg = 0; # debug switch + +# $SIG{__WARN__} handler to catch Assembler error messages +# +my $warnmsg; +sub catchwarn($){ + $warnmsg = $_[0]; + print "error: $warnmsg\n" if $dbg; +} + +# Callback for writing assembled bytes. This is where we check +# that we do get an error. +# +sub putobj($){ + if( ++$lineno >= $firstbadline ){ + ok( $warnmsg && $warnmsg =~ /^\d+:\s/, $descr[$lineno] ); + undef( $warnmsg ); + } else { + my $l = syswrite( OBJ, $_[0] ); + } +} + +# Callback for writing a disassembled statement. +# +sub putdis(@){ + my $line = join( ' ', @_ ); + ++$lineno; + print DIS "$line\n"; + printf "%5d %s\n", $lineno, $line if $dbg; +} + +# Generate assembler instructions from a hash of operand types: each +# existing entry contains a list of good or bad operand values. The +# corresponding opcodes can be found in %opsByType. +# +sub gen_type($$$){ + my( $href, $descref, $text ) = @_; + for my $odt ( keys( %opsByType ) ){ + my $opcode = $opsByType{$odt}->[0]; + my $sel = $odt; + $sel =~ s/^GET_//; + die( "no operand list for $sel\n" ) unless exists( $href->{$sel} ); + if( defined( $href->{$sel} ) ){ + if( @{$href->{$sel}} ){ + for my $od ( @{$href->{$sel}} ){ + ++$lineno; + $descref->[$lineno] = "$text: $code2name[$opcode] $od"; + print ASM "$code2name[$opcode] $od\n"; + printf "%5d %s %s\n", $lineno, $code2name[$opcode], $od if $dbg; + } + } else { + ++$lineno; + $descref->[$lineno] = "$text: $code2name[$opcode]"; + print ASM "$code2name[$opcode]\n"; + printf "%5d %s\n", $lineno, $code2name[$opcode] if $dbg; + } + } + } +} + +# Interesting operand values +# +my %goodlist = ( +comment_t => [ '"a comment"' ], # no \n +none => [], +svindex => [ 0x7fffffff, 0 ], +opindex => [ 0x7fffffff, 0 ], +pvindex => [ 0x7fffffff, 0 ], +U32 => [ 0xffffffff, 0 ], +U8 => [ 0xff, 0 ], +PV => [ '""', '"a string"', ], +I32 => [ -0x80000000, 0x7fffffff ], +IV64 => [ '0x000000000', '0x0ffffffff', '0x000000001' ], # disass formats 0x%09x +IV => $Config{ivsize} == 4 ? + [ -0x80000000, 0x7fffffff ] : + [ '0x000000000', '0x0ffffffff', '0x000000001' ], +NV => [ 1.23456789E3 ], +U16 => [ 0xffff, 0 ], +pvcontents => [], +strconst => [ '""', '"another string"' ], # no NUL +op_tr_array => [ join( ',', 0..255 ) ], + ); + +# Erronous operand values +# +my %badlist = ( +comment_t => [ '"multi-line\ncomment"' ], # no \n +none => [ '"spurious arg"' ], +svindex => [ 0xffffffff * 2, -1 ], +opindex => [ 0xffffffff * 2, -2 ], +pvindex => [ 0xffffffff * 2, -3 ], +U32 => [ 0xffffffff * 2, -4 ], +U16 => [ 0x5ffff, -5 ], +U8 => [ 0x6ff, -6 ], +PV => [ 'no quote"' ], +I32 => [ -0x80000001, 0x80000000 ], +IV64 => undef, # PUT_IV64 doesn't check - no integrity there +IV => $Config{ivsize} == 4 ? + [ -0x80000001, 0x80000000 ] : undef, +NV => undef, # PUT_NV accepts anything - it shouldn't, real-ly +pvcontents => [ '"spurious arg"' ], +strconst => [ 'no quote"', '"with NUL '."\0".' char"' ], # no NUL +op_tr_array => [ join( ',', 1..42 ) ], + ); + + +# Determine all operand types from %Asmdata::insn_data +# +for my $opname ( keys( %insn_data ) ){ + my ( $opcode, $put, $getname ) = @{$insn_data{$opname}}; + push( @{$opsByType{$getname}}, $opcode ); + $code2name[$opcode] = $opname; +} + + +# Write instruction(s) for correct operand values each operand type class +# +$lineno = 0; +tie( *ASM, 'VirtFile' ); +gen_type( \%goodlist, \@descr, 'round trip' ); + +# Write one instruction for each opcode. +# +for my $opcode ( 0..$#code2name ){ + next unless defined( $code2name[$opcode] ); + my $sel = $insn_data{$code2name[$opcode]}->[2]; + $sel =~ s/^GET_//; + die( "no operand list for $sel\n" ) unless exists( $goodlist{$sel} ); + if( defined( $goodlist{$sel} ) ){ + ++$lineno; + if( @{$goodlist{$sel}} ){ + my $od = $goodlist{$sel}[0]; + $descr[$lineno] = "round trip: $code2name[$opcode] $od"; + print ASM "$code2name[$opcode] $od\n"; + printf "%5d %s %s\n", $lineno, $code2name[$opcode], $od if $dbg; + } else { + $descr[$lineno] = "round trip: $code2name[$opcode]"; + print ASM "$code2name[$opcode]\n"; + printf "%5d %s\n", $lineno, $code2name[$opcode] if $dbg; + } + } +} + +# Write instruction(s) for incorrect operand values each operand type class +# +$firstbadline = $lineno + 1; +gen_type( \%badlist, \@descr, 'asm error' ); + +# invalid opcode is an odd-man-out ;-) +# +++$lineno; +$descr[$lineno] = "asm error: Gollum"; +print ASM "Gollum\n"; +printf "%5d %s\n", $lineno, 'Gollum' if $dbg; + +close( ASM ); + +# Now that we have defined all of our tests: plan +# +plan( tests => $lineno ); +print "firstbadline=$firstbadline\n" if $dbg; + +# assemble (guard against warnings and death from assembly errors) +# +$SIG{'__WARN__'} = \&catchwarn; + +$lineno = -1; # account for the assembly header +tie( *OBJ, 'VirtFile' ); +eval { assemble_fh( \*ASM, \&putobj ); }; +print "eval: $@" if $dbg; +close( ASM ); +close( OBJ ); +$SIG{'__WARN__'} = 'DEFAULT'; + +# disassemble +# +print "--- disassembling ---\n" if $dbg; +$lineno = 0; +tie( *DIS, 'VirtFile' ); +disassemble_fh( \*OBJ, \&putdis ); +close( OBJ ); +close( DIS ); + +# get header (for debugging only) +# +if( $dbg ){ + my( $magic, $archname, $blversion, $ivsize, $ptrsize, $byteorder ) = + get_header(); + printf "Magic: 0x%08x\n", $magic; + print "Architecture: $archname\n"; + print "Byteloader V: $blversion\n"; + print "ivsize: $ivsize\n"; + print "ptrsize: $ptrsize\n"; + print "Byteorder: $byteorder\n"; +} + +# check by comparing files line by line +# +print "--- checking ---\n" if $dbg; +$lineno = 0; +my( $asmline, $disline ); +while( defined( $asmline = ) ){ + $disline = ; + ++$lineno; + last if $lineno eq $firstbadline; # bail out where errors begin + ok( $asmline eq $disline, $descr[$lineno] ); + printf "%5d %s\n", $lineno, $asmline if $dbg; +} +close( ASM ); +close( DIS ); + +__END__