Can't printf U8s as UVs.
[p5sagit/p5-mst-13.2.git] / ext / B / B / Assembler.pm
CommitLineData
a798dbf2 1# Assembler.pm
2#
3# Copyright (c) 1996 Malcolm Beattie
4#
5# You may distribute under the terms of either the GNU General Public
6# License or the Artistic License, as specified in the README file.
059a8bb7 7
a798dbf2 8package B::Assembler;
9use Exporter;
10use B qw(ppname);
11use B::Asmdata qw(%insn_data @insn_name);
059a8bb7 12use Config qw(%Config);
13require ByteLoader; # we just need its $VERSIOM
a798dbf2 14
15@ISA = qw(Exporter);
059a8bb7 16@EXPORT_OK = qw(assemble_fh newasm endasm assemble);
88d01e8d 17$VERSION = 0.03;
a798dbf2 18
19use strict;
20my %opnumber;
21my ($i, $opname);
22for ($i = 0; defined($opname = ppname($i)); $i++) {
23 $opnumber{$opname} = $i;
24}
25
059a8bb7 26my($linenum, $errors, $out); # global state, set up by newasm
a798dbf2 27
28sub error {
29 my $str = shift;
30 warn "$linenum: $str\n";
31 $errors++;
32}
33
34my $debug = 0;
35sub debug { $debug = shift }
36
37#
38# First define all the data conversion subs to which Asmdata will refer
39#
40
41sub B::Asmdata::PUT_U8 {
42 my $arg = shift;
43 my $c = uncstring($arg);
44 if (defined($c)) {
45 if (length($c) != 1) {
46 error "argument for U8 is too long: $c";
47 $c = substr($c, 0, 1);
48 }
49 } else {
50 $c = chr($arg);
51 }
52 return $c;
53}
54
059a8bb7 55sub B::Asmdata::PUT_U16 { pack("S", $_[0]) }
56sub B::Asmdata::PUT_U32 { pack("L", $_[0]) }
57sub B::Asmdata::PUT_I32 { pack("L", $_[0]) }
0222aa7d 58sub B::Asmdata::PUT_NV { sprintf("%s\0", $_[0]) } # "%lf" loses precision and pack('d',...)
059a8bb7 59 # may not even be portable between compilers
60sub B::Asmdata::PUT_objindex { pack("L", $_[0]) } # could allow names here
cf743617 61sub B::Asmdata::PUT_svindex { &B::Asmdata::PUT_objindex }
62sub B::Asmdata::PUT_opindex { &B::Asmdata::PUT_objindex }
059a8bb7 63sub B::Asmdata::PUT_pvindex { &B::Asmdata::PUT_objindex }
a798dbf2 64
65sub B::Asmdata::PUT_strconst {
66 my $arg = shift;
67 $arg = uncstring($arg);
68 if (!defined($arg)) {
69 error "bad string constant: $arg";
70 return "";
71 }
72 if ($arg =~ s/\0//g) {
73 error "string constant argument contains NUL: $arg";
74 }
75 return $arg . "\0";
76}
77
78sub B::Asmdata::PUT_pvcontents {
79 my $arg = shift;
80 error "extraneous argument: $arg" if defined $arg;
81 return "";
82}
83sub B::Asmdata::PUT_PV {
84 my $arg = shift;
85 $arg = uncstring($arg);
86 error "bad string argument: $arg" unless defined($arg);
059a8bb7 87 return pack("L", length($arg)) . $arg;
a798dbf2 88}
d53d2b93 89sub B::Asmdata::PUT_comment_t {
a798dbf2 90 my $arg = shift;
91 $arg = uncstring($arg);
92 error "bad string argument: $arg" unless defined($arg);
93 if ($arg =~ s/\n//g) {
94 error "comment argument contains linefeed: $arg";
95 }
96 return $arg . "\n";
97}
059a8bb7 98sub B::Asmdata::PUT_double { sprintf("%s\0", $_[0]) } # see PUT_NV above
a798dbf2 99sub B::Asmdata::PUT_none {
100 my $arg = shift;
101 error "extraneous argument: $arg" if defined $arg;
102 return "";
103}
104sub B::Asmdata::PUT_op_tr_array {
105 my $arg = shift;
106 my @ary = split(/\s*,\s*/, $arg);
107 if (@ary != 256) {
108 error "wrong number of arguments to op_tr_array";
109 @ary = (0) x 256;
110 }
059a8bb7 111 return pack("S256", @ary);
a798dbf2 112}
113# XXX Check this works
114sub B::Asmdata::PUT_IV64 {
115 my $arg = shift;
059a8bb7 116 return pack("LL", $arg >> 32, $arg & 0xffffffff);
a798dbf2 117}
118
119my %unesc = (n => "\n", r => "\r", t => "\t", a => "\a",
120 b => "\b", f => "\f", v => "\013");
121
122sub uncstring {
123 my $s = shift;
124 $s =~ s/^"// and $s =~ s/"$// or return undef;
125 $s =~ s/\\(\d\d\d|.)/length($1) == 3 ? chr(oct($1)) : ($unesc{$1}||$1)/eg;
126 return $s;
127}
128
129sub strip_comments {
130 my $stmt = shift;
131 # Comments only allowed in instructions which don't take string arguments
132 $stmt =~ s{
133 (?sx) # Snazzy extended regexp coming up. Also, treat
134 # string as a single line so .* eats \n characters.
135 ^\s* # Ignore leading whitespace
136 (
137 [^"]* # A double quote '"' indicates a string argument. If we
138 # find a double quote, the match fails and we strip nothing.
139 )
140 \s*\# # Any amount of whitespace plus the comment marker...
141 .*$ # ...which carries on to end-of-string.
142 }{$1}; # Keep only the instruction and optional argument.
143 return $stmt;
144}
145
059a8bb7 146# create the ByteCode header: magic, archname, ByteLoader $VERSION, ivsize,
147# ptrsize, byteorder
148# nvtype is irrelevant (floats are stored as strings)
149# byteorder is strconst not U32 because of varying size issues
150
151sub gen_header {
152 my $header = "";
153
154 $header .= B::Asmdata::PUT_U32(0x43424c50); # 'PLBC'
155 $header .= B::Asmdata::PUT_strconst('"' . $Config{archname}. '"');
156 $header .= B::Asmdata::PUT_strconst(qq["$ByteLoader::VERSION"]);
157 $header .= B::Asmdata::PUT_U32($Config{ivsize});
158 $header .= B::Asmdata::PUT_U32($Config{ptrsize});
159 $header .= B::Asmdata::PUT_strconst(sprintf(qq["0x%s"], $Config{byteorder}));
160
161 $header;
162}
163
a798dbf2 164sub parse_statement {
165 my $stmt = shift;
166 my ($insn, $arg) = $stmt =~ m{
167 (?sx)
168 ^\s* # allow (but ignore) leading whitespace
169 (.*?) # Instruction continues up until...
170 (?: # ...an optional whitespace+argument group
171 \s+ # first whitespace.
172 (.*) # The argument is all the rest (newlines included).
173 )?$ # anchor at end-of-line
174 };
175 if (defined($arg)) {
176 if ($arg =~ s/^0x(?=[0-9a-fA-F]+$)//) {
177 $arg = hex($arg);
178 } elsif ($arg =~ s/^0(?=[0-7]+$)//) {
179 $arg = oct($arg);
180 } elsif ($arg =~ /^pp_/) {
181 $arg =~ s/\s*$//; # strip trailing whitespace
182 my $opnum = $opnumber{$arg};
183 if (defined($opnum)) {
184 $arg = $opnum;
185 } else {
186 error qq(No such op type "$arg");
187 $arg = 0;
188 }
189 }
190 }
191 return ($insn, $arg);
192}
193
194sub assemble_insn {
195 my ($insn, $arg) = @_;
196 my $data = $insn_data{$insn};
197 if (defined($data)) {
198 my ($bytecode, $putsub) = @{$data}[0, 1];
199 my $argcode = &$putsub($arg);
200 return chr($bytecode).$argcode;
201 } else {
202 error qq(no such instruction "$insn");
203 return "";
204 }
205}
206
207sub assemble_fh {
208 my ($fh, $out) = @_;
059a8bb7 209 my $line;
210 my $asm = newasm($out);
a798dbf2 211 while ($line = <$fh>) {
059a8bb7 212 assemble($line);
a798dbf2 213 }
059a8bb7 214 endasm();
215}
216
217sub newasm {
218 my($outsub) = @_;
219
220 die "Invalid printing routine for B::Assembler\n" unless ref $outsub eq 'CODE';
221 die <<EOD if ref $out;
222Can't have multiple byteassembly sessions at once!
223 (perhaps you forgot an endasm()?)
224EOD
225
226 $linenum = $errors = 0;
227 $out = $outsub;
228
229 $out->(gen_header());
230}
231
232sub endasm {
a798dbf2 233 if ($errors) {
059a8bb7 234 die "There were $errors assembly errors\n";
235 }
236 $linenum = $errors = $out = 0;
237}
238
239sub assemble {
240 my($line) = @_;
241 my ($insn, $arg);
242 $linenum++;
243 chomp $line;
244 if ($debug) {
245 my $quotedline = $line;
246 $quotedline =~ s/\\/\\\\/g;
247 $quotedline =~ s/"/\\"/g;
248 $out->(assemble_insn("comment", qq("$quotedline")));
249 }
250 $line = strip_comments($line) or next;
251 ($insn, $arg) = parse_statement($line);
252 $out->(assemble_insn($insn, $arg));
253 if ($debug) {
254 $out->(assemble_insn("nop", undef));
a798dbf2 255 }
256}
257
2581;
7f20e9dd 259
260__END__
261
262=head1 NAME
263
264B::Assembler - Assemble Perl bytecode
265
266=head1 SYNOPSIS
267
059a8bb7 268 use B::Assembler qw(newasm endasm assemble);
269 newasm(\&printsub); # sets up for assembly
270 assemble($buf); # assembles one line
271 endasm(); # closes down
272
273 use B::Assembler qw(assemble_fh);
274 assemble_fh($fh, \&printsub); # assemble everything in $fh
7f20e9dd 275
276=head1 DESCRIPTION
277
278See F<ext/B/B/Assembler.pm>.
279
059a8bb7 280=head1 AUTHORS
7f20e9dd 281
282Malcolm Beattie, C<mbeattie@sable.ox.ac.uk>
059a8bb7 283Per-statement interface by Benjamin Stuhl, C<sho_pi@hotmail.com>
7f20e9dd 284
285=cut