Commit | Line | Data |
6badd1a5 |
1 | package Opcode; |
2 | |
3b825e41 |
3 | use 5.006_001; |
6badd1a5 |
4 | |
b75c8c73 |
5 | use strict; |
6 | |
4eb3f1b8 |
7 | our($VERSION, @ISA, @EXPORT_OK); |
6badd1a5 |
8 | |
87fc0556 |
9 | $VERSION = "1.10"; |
6badd1a5 |
10 | |
6badd1a5 |
11 | use Carp; |
12 | use Exporter (); |
9426adcd |
13 | use XSLoader (); |
6badd1a5 |
14 | |
15 | BEGIN { |
b75c8c73 |
16 | @ISA = qw(Exporter); |
6badd1a5 |
17 | @EXPORT_OK = qw( |
18 | opset ops_to_opset |
19 | opset_to_ops opset_to_hex invert_opset |
20 | empty_opset full_opset |
21 | opdesc opcodes opmask define_optag |
22 | opmask_add verify_opset opdump |
23 | ); |
24 | } |
25 | |
68dc0745 |
26 | sub opset (;@); |
27 | sub opset_to_hex ($); |
28 | sub opdump (;$); |
6badd1a5 |
29 | use subs @EXPORT_OK; |
30 | |
4eb3f1b8 |
31 | XSLoader::load 'Opcode', $VERSION; |
6badd1a5 |
32 | |
33 | _init_optags(); |
34 | |
68dc0745 |
35 | sub ops_to_opset { opset @_ } # alias for old name |
6badd1a5 |
36 | |
37 | sub opset_to_hex ($) { |
38 | return "(invalid opset)" unless verify_opset($_[0]); |
39 | unpack("h*",$_[0]); |
40 | } |
41 | |
42 | sub opdump (;$) { |
43 | my $pat = shift; |
44 | # handy utility: perl -MOpcode=opdump -e 'opdump File' |
45 | foreach(opset_to_ops(full_opset)) { |
46 | my $op = sprintf " %12s %s\n", $_, opdesc($_); |
47 | next if defined $pat and $op !~ m/$pat/i; |
48 | print $op; |
49 | } |
50 | } |
51 | |
52 | |
53 | |
54 | sub _init_optags { |
55 | my(%all, %seen); |
56 | @all{opset_to_ops(full_opset)} = (); # keys only |
57 | |
7a57407b |
58 | local($_); |
6badd1a5 |
59 | local($/) = "\n=cut"; # skip to optags definition section |
60 | <DATA>; |
61 | $/ = "\n="; # now read in 'pod section' chunks |
62 | while(<DATA>) { |
63 | next unless m/^item\s+(:\w+)/; |
64 | my $tag = $1; |
65 | |
66 | # Split into lines, keep only indented lines |
67 | my @lines = grep { m/^\s/ } split(/\n/); |
68 | foreach (@lines) { s/--.*// } # delete comments |
69 | my @ops = map { split ' ' } @lines; # get op words |
70 | |
71 | foreach(@ops) { |
72 | warn "$tag - $_ already tagged in $seen{$_}\n" if $seen{$_}; |
73 | $seen{$_} = $tag; |
74 | delete $all{$_}; |
75 | } |
76 | # opset will croak on invalid names |
77 | define_optag($tag, opset(@ops)); |
78 | } |
79 | close(DATA); |
80 | warn "Untagged opnames: ".join(' ',keys %all)."\n" if %all; |
81 | } |
82 | |
83 | |
84 | 1; |
85 | |
86 | __DATA__ |
87 | |
88 | =head1 NAME |
89 | |
90 | Opcode - Disable named opcodes when compiling perl code |
91 | |
92 | =head1 SYNOPSIS |
93 | |
94 | use Opcode; |
95 | |
96 | |
97 | =head1 DESCRIPTION |
98 | |
99 | Perl code is always compiled into an internal format before execution. |
100 | |
101 | Evaluating perl code (e.g. via "eval" or "do 'file'") causes |
102 | the code to be compiled into an internal format and then, |
103 | provided there was no error in the compilation, executed. |
104 | The internal format is based on many distinct I<opcodes>. |
105 | |
106 | By default no opmask is in effect and any code can be compiled. |
107 | |
108 | The Opcode module allow you to define an I<operator mask> to be in |
109 | effect when perl I<next> compiles any code. Attempting to compile code |
110 | which contains a masked opcode will cause the compilation to fail |
111 | with an error. The code will not be executed. |
112 | |
113 | =head1 NOTE |
114 | |
115 | The Opcode module is not usually used directly. See the ops pragma and |
116 | Safe modules for more typical uses. |
117 | |
118 | =head1 WARNING |
119 | |
120 | The authors make B<no warranty>, implied or otherwise, about the |
121 | suitability of this software for safety or security purposes. |
122 | |
123 | The authors shall not in any case be liable for special, incidental, |
124 | consequential, indirect or other similar damages arising from the use |
125 | of this software. |
126 | |
127 | Your mileage will vary. If in any doubt B<do not use it>. |
128 | |
129 | |
130 | =head1 Operator Names and Operator Lists |
131 | |
132 | The canonical list of operator names is the contents of the array |
4369b173 |
133 | PL_op_name defined and initialised in file F<opcode.h> of the Perl |
6badd1a5 |
134 | source distribution (and installed into the perl library). |
135 | |
136 | Each operator has both a terse name (its opname) and a more verbose or |
137 | recognisable descriptive name. The opdesc function can be used to |
138 | return a list of descriptions for a list of operators. |
139 | |
140 | Many of the functions and methods listed below take a list of |
141 | operators as parameters. Most operator lists can be made up of several |
142 | types of element. Each element can be one of |
143 | |
144 | =over 8 |
145 | |
146 | =item an operator name (opname) |
147 | |
148 | Operator names are typically small lowercase words like enterloop, |
149 | leaveloop, last, next, redo etc. Sometimes they are rather cryptic |
150 | like gv2cv, i_ncmp and ftsvtx. |
151 | |
152 | =item an operator tag name (optag) |
153 | |
154 | Operator tags can be used to refer to groups (or sets) of operators. |
7b8d334a |
155 | Tag names always begin with a colon. The Opcode module defines several |
6badd1a5 |
156 | optags and the user can define others using the define_optag function. |
157 | |
158 | =item a negated opname or optag |
159 | |
160 | An opname or optag can be prefixed with an exclamation mark, e.g., !mkdir. |
161 | Negating an opname or optag means remove the corresponding ops from the |
162 | accumulated set of ops at that point. |
163 | |
164 | =item an operator set (opset) |
165 | |
7c011d3a |
166 | An I<opset> as a binary string of approximately 44 bytes which holds a |
6badd1a5 |
167 | set or zero or more operators. |
168 | |
169 | The opset and opset_to_ops functions can be used to convert from |
170 | a list of operators to an opset and I<vice versa>. |
171 | |
172 | Wherever a list of operators can be given you can use one or more opsets. |
173 | See also Manipulating Opsets below. |
174 | |
175 | =back |
176 | |
177 | |
178 | =head1 Opcode Functions |
179 | |
180 | The Opcode package contains functions for manipulating operator names |
181 | tags and sets. All are available for export by the package. |
182 | |
183 | =over 8 |
184 | |
185 | =item opcodes |
186 | |
187 | In a scalar context opcodes returns the number of opcodes in this |
7c011d3a |
188 | version of perl (around 350 for perl-5.7.0). |
6badd1a5 |
189 | |
190 | In a list context it returns a list of all the operator names. |
191 | (Not yet implemented, use @names = opset_to_ops(full_opset).) |
192 | |
193 | =item opset (OP, ...) |
194 | |
195 | Returns an opset containing the listed operators. |
196 | |
197 | =item opset_to_ops (OPSET) |
198 | |
199 | Returns a list of operator names corresponding to those operators in |
200 | the set. |
201 | |
202 | =item opset_to_hex (OPSET) |
203 | |
204 | Returns a string representation of an opset. Can be handy for debugging. |
205 | |
206 | =item full_opset |
207 | |
208 | Returns an opset which includes all operators. |
209 | |
210 | =item empty_opset |
211 | |
212 | Returns an opset which contains no operators. |
213 | |
214 | =item invert_opset (OPSET) |
215 | |
216 | Returns an opset which is the inverse set of the one supplied. |
217 | |
218 | =item verify_opset (OPSET, ...) |
219 | |
220 | Returns true if the supplied opset looks like a valid opset (is the |
221 | right length etc) otherwise it returns false. If an optional second |
222 | parameter is true then verify_opset will croak on an invalid opset |
223 | instead of returning false. |
224 | |
225 | Most of the other Opcode functions call verify_opset automatically |
226 | and will croak if given an invalid opset. |
227 | |
228 | =item define_optag (OPTAG, OPSET) |
229 | |
230 | Define OPTAG as a symbolic name for OPSET. Optag names always start |
231 | with a colon C<:>. |
232 | |
233 | The optag name used must not be defined already (define_optag will |
234 | croak if it is already defined). Optag names are global to the perl |
235 | process and optag definitions cannot be altered or deleted once |
236 | defined. |
237 | |
238 | It is strongly recommended that applications using Opcode should use a |
239 | leading capital letter on their tag names since lowercase names are |
240 | reserved for use by the Opcode module. If using Opcode within a module |
241 | you should prefix your tags names with the name of your module to |
242 | ensure uniqueness and thus avoid clashes with other modules. |
243 | |
244 | =item opmask_add (OPSET) |
245 | |
246 | Adds the supplied opset to the current opmask. Note that there is |
247 | currently I<no> mechanism for unmasking ops once they have been masked. |
248 | This is intentional. |
249 | |
250 | =item opmask |
251 | |
252 | Returns an opset corresponding to the current opmask. |
253 | |
254 | =item opdesc (OP, ...) |
255 | |
256 | This takes a list of operator names and returns the corresponding list |
257 | of operator descriptions. |
258 | |
259 | =item opdump (PAT) |
260 | |
261 | Dumps to STDOUT a two column list of op names and op descriptions. |
262 | If an optional pattern is given then only lines which match the |
263 | (case insensitive) pattern will be output. |
264 | |
265 | It's designed to be used as a handy command line utility: |
266 | |
267 | perl -MOpcode=opdump -e opdump |
268 | perl -MOpcode=opdump -e 'opdump Eval' |
269 | |
270 | =back |
271 | |
272 | =head1 Manipulating Opsets |
273 | |
274 | Opsets may be manipulated using the perl bit vector operators & (and), | (or), |
275 | ^ (xor) and ~ (negate/invert). |
276 | |
277 | However you should never rely on the numerical position of any opcode |
278 | within the opset. In other words both sides of a bit vector operator |
279 | should be opsets returned from Opcode functions. |
280 | |
281 | Also, since the number of opcodes in your current version of perl might |
282 | not be an exact multiple of eight, there may be unused bits in the last |
283 | byte of an upset. This should not cause any problems (Opcode functions |
284 | ignore those extra bits) but it does mean that using the ~ operator |
285 | will typically not produce the same 'physical' opset 'string' as the |
286 | invert_opset function. |
287 | |
288 | |
289 | =head1 TO DO (maybe) |
290 | |
291 | $bool = opset_eq($opset1, $opset2) true if opsets are logically eqiv |
292 | |
293 | $yes = opset_can($opset, @ops) true if $opset has all @ops set |
294 | |
295 | @diff = opset_diff($opset1, $opset2) => ('foo', '!bar', ...) |
296 | |
297 | =cut |
298 | |
299 | # the =cut above is used by _init_optags() to get here quickly |
300 | |
301 | =head1 Predefined Opcode Tags |
302 | |
303 | =over 5 |
304 | |
305 | =item :base_core |
306 | |
307 | null stub scalar pushmark wantarray const defined undef |
308 | |
309 | rv2sv sassign |
310 | |
311 | rv2av aassign aelem aelemfast aslice av2arylen |
312 | |
313 | rv2hv helem hslice each values keys exists delete |
314 | |
315 | preinc i_preinc predec i_predec postinc i_postinc postdec i_postdec |
316 | int hex oct abs pow multiply i_multiply divide i_divide |
317 | modulo i_modulo add i_add subtract i_subtract |
318 | |
319 | left_shift right_shift bit_and bit_xor bit_or negate i_negate |
320 | not complement |
321 | |
322 | lt i_lt gt i_gt le i_le ge i_ge eq i_eq ne i_ne ncmp i_ncmp |
323 | slt sgt sle sge seq sne scmp |
324 | |
325 | substr vec stringify study pos length index rindex ord chr |
326 | |
327 | ucfirst lcfirst uc lc quotemeta trans chop schop chomp schomp |
328 | |
8782bef2 |
329 | match split qr |
6badd1a5 |
330 | |
331 | list lslice splice push pop shift unshift reverse |
332 | |
c963b151 |
333 | cond_expr flip flop andassign orassign dorassign and or dor xor |
6badd1a5 |
334 | |
7399586d |
335 | warn die lineseq nextstate scope enter leave setstate |
6badd1a5 |
336 | |
337 | rv2cv anoncode prototype |
338 | |
cd06dffe |
339 | entersub leavesub leavesublv return method method_named -- XXX loops via recursion? |
6badd1a5 |
340 | |
341 | leaveeval -- needed for Safe to operate, is safe without entereval |
342 | |
343 | =item :base_mem |
344 | |
345 | These memory related ops are not included in :base_core because they |
346 | can easily be used to implement a resource attack (e.g., consume all |
347 | available memory). |
348 | |
349 | concat repeat join range |
350 | |
351 | anonlist anonhash |
352 | |
3c4b39be |
353 | Note that despite the existence of this optag a memory resource attack |
6badd1a5 |
354 | may still be possible using only :base_core ops. |
355 | |
356 | Disabling these ops is a I<very> heavy handed way to attempt to prevent |
357 | a memory resource attack. It's probable that a specific memory limit |
358 | mechanism will be added to perl in the near future. |
359 | |
360 | =item :base_loop |
361 | |
362 | These loop ops are not included in :base_core because they can easily be |
363 | used to implement a resource attack (e.g., consume all available CPU time). |
364 | |
365 | grepstart grepwhile |
366 | mapstart mapwhile |
367 | enteriter iter |
e897d888 |
368 | enterloop leaveloop unstack |
6badd1a5 |
369 | last next redo |
370 | goto |
371 | |
372 | =item :base_io |
373 | |
374 | These ops enable I<filehandle> (rather than filename) based input and |
375 | output. These are safe on the assumption that only pre-existing |
e866b74b |
376 | filehandles are available for use. Usually, to create new filehandles |
377 | other ops such as open would need to be enabled, if you don't take into |
378 | account the magical open of ARGV. |
6badd1a5 |
379 | |
380 | readline rcatline getc read |
381 | |
382 | formline enterwrite leavewrite |
383 | |
0d863452 |
384 | print say sysread syswrite send recv |
96e4d5b1 |
385 | |
8903cb82 |
386 | eof tell seek sysseek |
6badd1a5 |
387 | |
388 | readdir telldir seekdir rewinddir |
389 | |
390 | =item :base_orig |
391 | |
392 | These are a hotchpotch of opcodes still waiting to be considered |
393 | |
394 | gvsv gv gelem |
395 | |
396 | padsv padav padhv padany |
397 | |
87fc0556 |
398 | once |
399 | |
6badd1a5 |
400 | rv2gv refgen srefgen ref |
401 | |
402 | bless -- could be used to change ownership of objects (reblessing) |
403 | |
2cd61cdb |
404 | pushre regcmaybe regcreset regcomp subst substcont |
6badd1a5 |
405 | |
406 | sprintf prtf -- can core dump |
407 | |
408 | crypt |
409 | |
410 | tie untie |
411 | |
412 | dbmopen dbmclose |
413 | sselect select |
414 | pipe_op sockpair |
415 | |
416 | getppid getpgrp setpgrp getpriority setpriority localtime gmtime |
417 | |
418 | entertry leavetry -- can be used to 'hide' fatal errors |
419 | |
0d863452 |
420 | entergiven leavegiven |
421 | enterwhen leavewhen |
422 | break continue |
423 | smartmatch |
424 | |
53e06cf0 |
425 | custom -- where should this go |
426 | |
6badd1a5 |
427 | =item :base_math |
428 | |
429 | These ops are not included in :base_core because of the risk of them being |
430 | used to generate floating point exceptions (which would have to be caught |
431 | using a $SIG{FPE} handler). |
432 | |
433 | atan2 sin cos exp log sqrt |
434 | |
435 | These ops are not included in :base_core because they have an effect |
436 | beyond the scope of the compartment. |
437 | |
438 | rand srand |
439 | |
1f5895a1 |
440 | =item :base_thread |
441 | |
554b3eca |
442 | These ops are related to multi-threading. |
1f5895a1 |
443 | |
5b9081af |
444 | lock |
1f5895a1 |
445 | |
6badd1a5 |
446 | =item :default |
447 | |
448 | A handy tag name for a I<reasonable> default set of ops. (The current ops |
449 | allowed are unstable while development continues. It will change.) |
450 | |
e866b74b |
451 | :base_core :base_mem :base_loop :base_orig :base_thread |
452 | |
453 | This list used to contain :base_io prior to Opcode 1.07. |
6badd1a5 |
454 | |
455 | If safety matters to you (and why else would you be using the Opcode module?) |
456 | then you should not rely on the definition of this, or indeed any other, optag! |
457 | |
6badd1a5 |
458 | =item :filesys_read |
459 | |
460 | stat lstat readlink |
461 | |
462 | ftatime ftblk ftchr ftctime ftdir fteexec fteowned fteread |
463 | ftewrite ftfile ftis ftlink ftmtime ftpipe ftrexec ftrowned |
464 | ftrread ftsgid ftsize ftsock ftsuid fttty ftzero ftrwrite ftsvtx |
465 | |
466 | fttext ftbinary |
467 | |
468 | fileno |
469 | |
470 | =item :sys_db |
471 | |
472 | ghbyname ghbyaddr ghostent shostent ehostent -- hosts |
473 | gnbyname gnbyaddr gnetent snetent enetent -- networks |
474 | gpbyname gpbynumber gprotoent sprotoent eprotoent -- protocols |
475 | gsbyname gsbyport gservent sservent eservent -- services |
476 | |
477 | gpwnam gpwuid gpwent spwent epwent getlogin -- users |
478 | ggrnam ggrgid ggrent sgrent egrent -- groups |
479 | |
480 | =item :browse |
481 | |
482 | A handy tag name for a I<reasonable> default set of ops beyond the |
483 | :default optag. Like :default (and indeed all the other optags) its |
484 | current definition is unstable while development continues. It will change. |
485 | |
486 | The :browse tag represents the next step beyond :default. It it a |
487 | superset of the :default ops and adds :filesys_read the :sys_db. |
488 | The intent being that scripts can access more (possibly sensitive) |
489 | information about your system but not be able to change it. |
490 | |
491 | :default :filesys_read :sys_db |
492 | |
493 | =item :filesys_open |
494 | |
495 | sysopen open close |
496 | umask binmode |
497 | |
498 | open_dir closedir -- other dir ops are in :base_io |
499 | |
500 | =item :filesys_write |
501 | |
502 | link unlink rename symlink truncate |
503 | |
504 | mkdir rmdir |
505 | |
506 | utime chmod chown |
507 | |
508 | fcntl -- not strictly filesys related, but possibly as dangerous? |
509 | |
510 | =item :subprocess |
511 | |
512 | backtick system |
513 | |
514 | fork |
515 | |
516 | wait waitpid |
517 | |
f812a825 |
518 | glob -- access to Cshell via <`rm *`> |
519 | |
6badd1a5 |
520 | =item :ownprocess |
521 | |
522 | exec exit kill |
523 | |
524 | time tms -- could be used for timing attacks (paranoid?) |
525 | |
526 | =item :others |
527 | |
528 | This tag holds groups of assorted specialist opcodes that don't warrant |
529 | having optags defined for them. |
530 | |
531 | SystemV Interprocess Communications: |
532 | |
533 | msgctl msgget msgrcv msgsnd |
534 | |
535 | semctl semget semop |
536 | |
537 | shmctl shmget shmread shmwrite |
538 | |
539 | =item :still_to_be_decided |
540 | |
541 | chdir |
542 | flock ioctl |
543 | |
544 | socket getpeername ssockopt |
545 | bind connect listen accept shutdown gsockopt getsockname |
546 | |
547 | sleep alarm -- changes global timer state and signal handling |
548 | sort -- assorted problems including core dumps |
549 | tied -- can be used to access object implementing a tie |
550 | pack unpack -- can be used to create/use memory pointers |
551 | |
552 | entereval -- can be used to hide code from initial compile |
553 | require dofile |
554 | |
555 | caller -- get info about calling environment and args |
556 | |
557 | reset |
558 | |
559 | dbstate -- perl -d version of nextstate(ment) opcode |
560 | |
561 | =item :dangerous |
562 | |
563 | This tag is simply a bucket for opcodes that are unlikely to be used via |
3c4b39be |
564 | a tag name but need to be tagged for completeness and documentation. |
6badd1a5 |
565 | |
566 | syscall dump chroot |
567 | |
6badd1a5 |
568 | =back |
569 | |
570 | =head1 SEE ALSO |
571 | |
86780939 |
572 | L<ops> -- perl pragma interface to Opcode module. |
6badd1a5 |
573 | |
86780939 |
574 | L<Safe> -- Opcode and namespace limited execution compartments |
6badd1a5 |
575 | |
576 | =head1 AUTHORS |
577 | |
578 | Originally designed and implemented by Malcolm Beattie, |
579 | mbeattie@sable.ox.ac.uk as part of Safe version 1. |
580 | |
581 | Split out from Safe module version 1, named opcode tags and other |
7b8d334a |
582 | changes added by Tim Bunce. |
6badd1a5 |
583 | |
584 | =cut |
585 | |