@ISA = qw(Exporter);
@EXPORT_OK = qw(assemble_fh newasm endasm assemble);
-$VERSION = 0.03;
+$VERSION = 0.04;
use strict;
my %opnumber;
my $debug = 0;
sub debug { $debug = shift }
+sub limcheck($$$$){
+ my( $val, $lo, $hi, $loc ) = @_;
+ if( $val < $lo || $hi < $val ){
+ error "argument for $loc outside [$lo, $hi]: $val";
+ $val = $hi;
+ }
+ return $val;
+}
+
#
# First define all the data conversion subs to which Asmdata will refer
#
$c = substr($c, 0, 1);
}
} else {
+ $arg = limcheck( $arg, 0, 0xff, 'U8' );
$c = chr($arg);
}
return $c;
}
-sub B::Asmdata::PUT_U16 { pack("S", $_[0]) }
-sub B::Asmdata::PUT_U32 { pack("L", $_[0]) }
-sub B::Asmdata::PUT_I32 { pack("L", $_[0]) }
-sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" loses precision and pack('d',...)
+sub B::Asmdata::PUT_U16 {
+ my $arg = limcheck( $_[0], 0, 0xffff, 'U16' );
+ pack("S", $arg);
+}
+sub B::Asmdata::PUT_U32 {
+ my $arg = limcheck( $_[0], 0, 0xffffffff, 'U32' );
+ pack("L", $arg);
+}
+sub B::Asmdata::PUT_I32 {
+ my $arg = limcheck( $_[0], -0x80000000, 0x7fffffff, 'I32' );
+ pack("L", $arg);
+}
+sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" looses precision and pack('d',...)
# may not even be portable between compilers
-sub B::Asmdata::PUT_objindex { pack("L", $_[0]) } # could allow names here
+sub B::Asmdata::PUT_objindex { # could allow names here
+ my $arg = limcheck( $_[0], 0, 0xffffffff, '*index' );
+ pack("L", $arg);
+}
sub B::Asmdata::PUT_svindex { &B::Asmdata::PUT_objindex }
sub B::Asmdata::PUT_opindex { &B::Asmdata::PUT_objindex }
sub B::Asmdata::PUT_pvindex { &B::Asmdata::PUT_objindex }
sub B::Asmdata::PUT_strconst {
my $arg = shift;
- $arg = uncstring($arg);
- if (!defined($arg)) {
+ my $str = uncstring($arg);
+ if (!defined($str)) {
error "bad string constant: $arg";
- return "";
+ $str = '';
}
- if ($arg =~ s/\0//g) {
+ if ($str =~ s/\0//g) {
error "string constant argument contains NUL: $arg";
+ $str = '';
}
- return $arg . "\0";
+ return $str . "\0";
}
sub B::Asmdata::PUT_pvcontents {
}
sub B::Asmdata::PUT_PV {
my $arg = shift;
- $arg = uncstring($arg);
- error "bad string argument: $arg" unless defined($arg);
- return pack("L", length($arg)) . $arg;
+ my $str = uncstring($arg);
+ if( ! defined($str) ){
+ error "bad string argument: $arg";
+ $str = '';
+ }
+ return pack("L", length($str)) . $str;
}
sub B::Asmdata::PUT_comment_t {
my $arg = shift;
return pack("S256", @ary);
}
# XXX Check this works
+# Note: $arg >> 32 is a no-op on 32-bit systems
sub B::Asmdata::PUT_IV64 {
my $arg = shift;
- return pack("LL", $arg >> 32, $arg & 0xffffffff);
+ return pack("LL", ($arg >> 16) >>16 , $arg & 0xffffffff);
+}
+
+sub B::Asmdata::PUT_IV {
+ $Config{ivsize} == 4 ? &B::Asmdata::PUT_I32 : &B::Asmdata::PUT_IV64;
}
my %unesc = (n => "\n", r => "\r", t => "\t", a => "\a",
sub parse_statement {
my $stmt = shift;
my ($insn, $arg) = $stmt =~ m{
- (?sx)
^\s* # allow (but ignore) leading whitespace
(.*?) # Instruction continues up until...
(?: # ...an optional whitespace+argument group
\s+ # first whitespace.
(.*) # The argument is all the rest (newlines included).
)?$ # anchor at end-of-line
- };
+ }sx;
if (defined($arg)) {
if ($arg =~ s/^0x(?=[0-9a-fA-F]+$)//) {
$arg = hex($arg);
$quotedline =~ s/"/\\"/g;
$out->(assemble_insn("comment", qq("$quotedline")));
}
- $line = strip_comments($line) or next;
- ($insn, $arg) = parse_statement($line);
- $out->(assemble_insn($insn, $arg));
- if ($debug) {
- $out->(assemble_insn("nop", undef));
+ if( $line = strip_comments($line) ){
+ ($insn, $arg) = parse_statement($line);
+ $out->(assemble_insn($insn, $arg));
+ if ($debug) {
+ $out->(assemble_insn("nop", undef));
+ }
}
}
# License or the Artistic License, as specified in the README file.
package B::Disassembler::BytecodeStream;
-our $VERSION = '1.00';
+our $VERSION = '1.01';
use FileHandle;
use Carp;
+use Config qw(%Config);
use B qw(cstring cast_I32);
@ISA = qw(FileHandle);
sub readn {
my $fh = shift;
my $str = $fh->readn(2);
croak "reached EOF while reading U16" unless length($str) == 2;
- return unpack("n", $str);
+ return unpack("S", $str);
}
sub GET_NV {
my $fh = shift;
- my $str = $fh->readn(8);
- croak "reached EOF while reading NV" unless length($str) == 8;
- return unpack("N", $str);
+ my ($str, $c);
+ while (defined($c = $fh->getc) && $c ne "\0") {
+ $str .= $c;
+ }
+ croak "reached EOF while reading double" unless defined($c);
+ return $str;
}
sub GET_U32 {
my $fh = shift;
my $str = $fh->readn(4);
croak "reached EOF while reading U32" unless length($str) == 4;
- return unpack("N", $str);
+ return unpack("L", $str);
}
sub GET_I32 {
my $fh = shift;
my $str = $fh->readn(4);
croak "reached EOF while reading I32" unless length($str) == 4;
- return cast_I32(unpack("N", $str));
+ return cast_I32(unpack("L", $str));
}
sub GET_objindex {
my $fh = shift;
my $str = $fh->readn(4);
croak "reached EOF while reading objindex" unless length($str) == 4;
- return unpack("N", $str);
+ return unpack("L", $str);
}
sub GET_opindex {
my $fh = shift;
my $str = $fh->readn(4);
croak "reached EOF while reading opindex" unless length($str) == 4;
- return unpack("N", $str);
+ return unpack("L", $str);
}
sub GET_svindex {
my $fh = shift;
my $str = $fh->readn(4);
croak "reached EOF while reading svindex" unless length($str) == 4;
- return unpack("N", $str);
+ return unpack("L", $str);
+}
+
+sub GET_pvindex {
+ my $fh = shift;
+ my $str = $fh->readn(4);
+ croak "reached EOF while reading pvindex" unless length($str) == 4;
+ return unpack("L", $str);
}
sub GET_strconst {
my $fh = shift;
my ($str, $c);
+ $str = '';
while (defined($c = $fh->getc) && $c ne "\0") {
$str .= $c;
}
sub GET_op_tr_array {
my $fh = shift;
- my @ary = unpack("n256", $fh->readn(256 * 2));
+ my @ary = unpack("S256", $fh->readn(256 * 2));
return join(",", @ary);
}
sub GET_IV64 {
my $fh = shift;
- my ($hi, $lo) = unpack("NN", $fh->readn(8));
- return sprintf("0x%4x%04x", $hi, $lo); # cheat
+ my ($hi, $lo) = unpack("LL", $fh->readn(8));
+ return sprintf("0x%x%08x", $hi, $lo); # cheat
+}
+
+sub GET_IV {
+ $Config{ivsize} == 4 ? &GET_I32 : &GET_IV64;
}
package B::Disassembler;
use Exporter;
@ISA = qw(Exporter);
-@EXPORT_OK = qw(disassemble_fh);
+@EXPORT_OK = qw(disassemble_fh get_header);
use Carp;
use strict;
use B::Asmdata qw(%insn_data @insn_name);
+our( $magic, $archname, $blversion, $ivsize, $ptrsize, $byteorder );
+
+sub dis_header($){
+ my( $fh ) = @_;
+ $magic = $fh->GET_U32();
+ warn( "bad magic" ) if $magic != 0x43424c50;
+ $archname = $fh->GET_strconst();
+ $blversion = $fh->GET_strconst();
+ $ivsize = $fh->GET_U32();
+ $ptrsize = $fh->GET_U32();
+ $byteorder = $fh->GET_strconst();
+}
+
+sub get_header(){
+ return( $magic, $archname, $blversion, $ivsize, $ptrsize, $byteorder );
+}
+
sub disassemble_fh {
my ($fh, $out) = @_;
my ($c, $getmeth, $insn, $arg);
bless $fh, "B::Disassembler::BytecodeStream";
+ dis_header( $fh );
while (defined($c = $fh->getc)) {
$c = ord($c);
$insn = $insn_name[$c];
--- /dev/null
+#!./perl -w
+
+=pod
+
+=head1 TEST FOR B::Assembler.pm AND B::Disassembler.pm
+
+=head2 Description
+
+The general idea is to test by assembling a choice set of assembler
+instructions, then disassemble them, and check that we've completed the
+round trip. Also, error checking of Assembler.pm is tested by feeding
+it assorted errors.
+
+Since Assembler.pm likes to assemble a file, we comply by writing a
+text file. This file contains three sections:
+
+ testing operand categories
+ use each opcode
+ erronous assembler instructions
+
+An "operand category" is identified by the suffix of the PUT_/GET_
+subroutines as shown in the C<%Asmdata::insn_data> initialization, e.g.
+opcode C<ldsv> has operand category C<svindex>:
+
+ insn_data{ldsv} = [1, \&PUT_svindex, "GET_svindex"];
+
+Because Disassembler.pm also assumes input from a file, we write the
+resulting object code to a file. And disassembled output is written to
+yet another text file which is then compared to the original input.
+(Erronous assembler instructions still generate code, but this is not
+written to the object file; therefore disassembly bails out at the first
+instruction in error.)
+
+All files are kept in memory by using TIEHASH.
+
+
+=head2 Caveats
+
+An error where Assembler.pm and Disassembler.pm agree but Assembler.pm
+generates invalid object code will not be detected.
+
+Due to the way this test has been set up, failure of a single test
+could cause all subsequent tests to fail as well: After an unexpected
+assembler error no output is written, and disassembled lines will be
+out of sync for all lines thereafter.
+
+Not all possibilities for writing a valid operand value can be tested
+because disassembly results in a uniform representation.
+
+
+=head2 Maintenance
+
+New opcodes are added automatically.
+
+A new operand category will cause this program to die ("no operand list
+for XXX"). The cure is to add suitable entries to C<%goodlist> and
+C<%badlist>. (Since the data in Asmdata.pm is autogenerated, it may also
+happen that the corresponding assembly or disassembly subroutine is
+missing.) Note that an empty array as a C<%goodlist> entry means that
+opcodes of the operand category do not take an operand (and therefore the
+corresponding entry in C<%badlist> should have one). An C<undef> entry
+in C<%badlist> means that any value is acceptable (and thus there is no
+way to cause an error).
+
+Set C<$dbg> to debug this test.
+
+=cut
+
+package VirtFile;
+use strict;
+
+# Note: This is NOT a general purpose package. It implements
+# sequential text and binary file i/o in a rather simple form.
+
+sub TIEHANDLE($;$){
+ my( $class, $data ) = @_;
+ my $obj = { data => defined( $data ) ? $data : '',
+ pos => 0 };
+ return bless( $obj, $class );
+}
+
+sub PRINT($@){
+ my( $self ) = shift;
+ $self->{data} .= join( '', @_ );
+}
+
+sub WRITE($$;$$){
+ my( $self, $buf, $len, $offset ) = @_;
+ unless( defined( $len ) ){
+ $len = length( $buf );
+ $offset = 0;
+ }
+ unless( defined( $offset ) ){
+ $offset = 0;
+ }
+ $self->{data} .= substr( $buf, $offset, $len );
+ return $len;
+}
+
+
+sub GETC($){
+ my( $self ) = @_;
+ return undef() if $self->{pos} >= length( $self->{data} );
+ return substr( $self->{data}, $self->{pos}++, 1 );
+}
+
+sub READLINE($){
+ my( $self ) = @_;
+ return undef() if $self->{pos} >= length( $self->{data} );
+ my $lfpos = index( $self->{data}, "\n", $self->{pos} );
+ if( $lfpos < 0 ){
+ $lfpos = length( $self->{data} );
+ }
+ my $pos = $self->{pos};
+ $self->{pos} = $lfpos + 1;
+ return substr( $self->{data}, $pos, $self->{pos} - $pos );
+}
+
+sub READ($@){
+ my $self = shift();
+ my $bufref = \$_[0];
+ my( undef, $len, $offset ) = @_;
+ if( $offset ){
+ die( "offset beyond end of buffer\n" )
+ if ! defined( $$bufref ) || $offset > length( $$bufref );
+ } else {
+ $$bufref = '';
+ $offset = 0;
+ }
+ my $remlen = length( $self->{data} ) - $self->{pos};
+ $len = $remlen if $remlen < $len;
+ return 0 unless $len;
+ substr( $$bufref, $offset, $len ) =
+ substr( $self->{data}, $self->{pos}, $len );
+ $self->{pos} += $len;
+ return $len;
+}
+
+sub TELL($){
+ my $self = shift();
+ return $self->{pos};
+}
+
+sub CLOSE($){
+ my( $self ) = @_;
+ $self->{pos} = 0;
+}
+
+1;
+
+package main;
+
+use strict;
+use Test::More;
+use Config qw(%Config);
+
+use B::Asmdata qw( %insn_data );
+use B::Assembler qw( &assemble_fh );
+use B::Disassembler qw( &disassemble_fh &get_header );
+
+my( %opsByType, @code2name );
+my( $lineno, $dbg, $firstbadline, @descr );
+$dbg = 0; # debug switch
+
+# $SIG{__WARN__} handler to catch Assembler error messages
+#
+my $warnmsg;
+sub catchwarn($){
+ $warnmsg = $_[0];
+ print "error: $warnmsg\n" if $dbg;
+}
+
+# Callback for writing assembled bytes. This is where we check
+# that we do get an error.
+#
+sub putobj($){
+ if( ++$lineno >= $firstbadline ){
+ ok( $warnmsg && $warnmsg =~ /^\d+:\s/, $descr[$lineno] );
+ undef( $warnmsg );
+ } else {
+ my $l = syswrite( OBJ, $_[0] );
+ }
+}
+
+# Callback for writing a disassembled statement.
+#
+sub putdis(@){
+ my $line = join( ' ', @_ );
+ ++$lineno;
+ print DIS "$line\n";
+ printf "%5d %s\n", $lineno, $line if $dbg;
+}
+
+# Generate assembler instructions from a hash of operand types: each
+# existing entry contains a list of good or bad operand values. The
+# corresponding opcodes can be found in %opsByType.
+#
+sub gen_type($$$){
+ my( $href, $descref, $text ) = @_;
+ for my $odt ( keys( %opsByType ) ){
+ my $opcode = $opsByType{$odt}->[0];
+ my $sel = $odt;
+ $sel =~ s/^GET_//;
+ die( "no operand list for $sel\n" ) unless exists( $href->{$sel} );
+ if( defined( $href->{$sel} ) ){
+ if( @{$href->{$sel}} ){
+ for my $od ( @{$href->{$sel}} ){
+ ++$lineno;
+ $descref->[$lineno] = "$text: $code2name[$opcode] $od";
+ print ASM "$code2name[$opcode] $od\n";
+ printf "%5d %s %s\n", $lineno, $code2name[$opcode], $od if $dbg;
+ }
+ } else {
+ ++$lineno;
+ $descref->[$lineno] = "$text: $code2name[$opcode]";
+ print ASM "$code2name[$opcode]\n";
+ printf "%5d %s\n", $lineno, $code2name[$opcode] if $dbg;
+ }
+ }
+ }
+}
+
+# Interesting operand values
+#
+my %goodlist = (
+comment_t => [ '"a comment"' ], # no \n
+none => [],
+svindex => [ 0x7fffffff, 0 ],
+opindex => [ 0x7fffffff, 0 ],
+pvindex => [ 0x7fffffff, 0 ],
+U32 => [ 0xffffffff, 0 ],
+U8 => [ 0xff, 0 ],
+PV => [ '""', '"a string"', ],
+I32 => [ -0x80000000, 0x7fffffff ],
+IV64 => [ '0x000000000', '0x0ffffffff', '0x000000001' ], # disass formats 0x%09x
+IV => $Config{ivsize} == 4 ?
+ [ -0x80000000, 0x7fffffff ] :
+ [ '0x000000000', '0x0ffffffff', '0x000000001' ],
+NV => [ 1.23456789E3 ],
+U16 => [ 0xffff, 0 ],
+pvcontents => [],
+strconst => [ '""', '"another string"' ], # no NUL
+op_tr_array => [ join( ',', 0..255 ) ],
+ );
+
+# Erronous operand values
+#
+my %badlist = (
+comment_t => [ '"multi-line\ncomment"' ], # no \n
+none => [ '"spurious arg"' ],
+svindex => [ 0xffffffff * 2, -1 ],
+opindex => [ 0xffffffff * 2, -2 ],
+pvindex => [ 0xffffffff * 2, -3 ],
+U32 => [ 0xffffffff * 2, -4 ],
+U16 => [ 0x5ffff, -5 ],
+U8 => [ 0x6ff, -6 ],
+PV => [ 'no quote"' ],
+I32 => [ -0x80000001, 0x80000000 ],
+IV64 => undef, # PUT_IV64 doesn't check - no integrity there
+IV => $Config{ivsize} == 4 ?
+ [ -0x80000001, 0x80000000 ] : undef,
+NV => undef, # PUT_NV accepts anything - it shouldn't, real-ly
+pvcontents => [ '"spurious arg"' ],
+strconst => [ 'no quote"', '"with NUL '."\0".' char"' ], # no NUL
+op_tr_array => [ join( ',', 1..42 ) ],
+ );
+
+
+# Determine all operand types from %Asmdata::insn_data
+#
+for my $opname ( keys( %insn_data ) ){
+ my ( $opcode, $put, $getname ) = @{$insn_data{$opname}};
+ push( @{$opsByType{$getname}}, $opcode );
+ $code2name[$opcode] = $opname;
+}
+
+
+# Write instruction(s) for correct operand values each operand type class
+#
+$lineno = 0;
+tie( *ASM, 'VirtFile' );
+gen_type( \%goodlist, \@descr, 'round trip' );
+
+# Write one instruction for each opcode.
+#
+for my $opcode ( 0..$#code2name ){
+ next unless defined( $code2name[$opcode] );
+ my $sel = $insn_data{$code2name[$opcode]}->[2];
+ $sel =~ s/^GET_//;
+ die( "no operand list for $sel\n" ) unless exists( $goodlist{$sel} );
+ if( defined( $goodlist{$sel} ) ){
+ ++$lineno;
+ if( @{$goodlist{$sel}} ){
+ my $od = $goodlist{$sel}[0];
+ $descr[$lineno] = "round trip: $code2name[$opcode] $od";
+ print ASM "$code2name[$opcode] $od\n";
+ printf "%5d %s %s\n", $lineno, $code2name[$opcode], $od if $dbg;
+ } else {
+ $descr[$lineno] = "round trip: $code2name[$opcode]";
+ print ASM "$code2name[$opcode]\n";
+ printf "%5d %s\n", $lineno, $code2name[$opcode] if $dbg;
+ }
+ }
+}
+
+# Write instruction(s) for incorrect operand values each operand type class
+#
+$firstbadline = $lineno + 1;
+gen_type( \%badlist, \@descr, 'asm error' );
+
+# invalid opcode is an odd-man-out ;-)
+#
+++$lineno;
+$descr[$lineno] = "asm error: Gollum";
+print ASM "Gollum\n";
+printf "%5d %s\n", $lineno, 'Gollum' if $dbg;
+
+close( ASM );
+
+# Now that we have defined all of our tests: plan
+#
+plan( tests => $lineno );
+print "firstbadline=$firstbadline\n" if $dbg;
+
+# assemble (guard against warnings and death from assembly errors)
+#
+$SIG{'__WARN__'} = \&catchwarn;
+
+$lineno = -1; # account for the assembly header
+tie( *OBJ, 'VirtFile' );
+eval { assemble_fh( \*ASM, \&putobj ); };
+print "eval: $@" if $dbg;
+close( ASM );
+close( OBJ );
+$SIG{'__WARN__'} = 'DEFAULT';
+
+# disassemble
+#
+print "--- disassembling ---\n" if $dbg;
+$lineno = 0;
+tie( *DIS, 'VirtFile' );
+disassemble_fh( \*OBJ, \&putdis );
+close( OBJ );
+close( DIS );
+
+# get header (for debugging only)
+#
+if( $dbg ){
+ my( $magic, $archname, $blversion, $ivsize, $ptrsize, $byteorder ) =
+ get_header();
+ printf "Magic: 0x%08x\n", $magic;
+ print "Architecture: $archname\n";
+ print "Byteloader V: $blversion\n";
+ print "ivsize: $ivsize\n";
+ print "ptrsize: $ptrsize\n";
+ print "Byteorder: $byteorder\n";
+}
+
+# check by comparing files line by line
+#
+print "--- checking ---\n" if $dbg;
+$lineno = 0;
+my( $asmline, $disline );
+while( defined( $asmline = <ASM> ) ){
+ $disline = <DIS>;
+ ++$lineno;
+ last if $lineno eq $firstbadline; # bail out where errors begin
+ ok( $asmline eq $disline, $descr[$lineno] );
+ printf "%5d %s\n", $lineno, $asmline if $dbg;
+}
+close( ASM );
+close( DIS );
+
+__END__