Commit | Line | Data |
a798dbf2 |
1 | package B::Xref; |
2 | |
3 | =head1 NAME |
4 | |
5 | B::Xref - Generates cross reference reports for Perl programs |
6 | |
7 | =head1 SYNOPSIS |
8 | |
9 | perl -MO=Xref[,OPTIONS] foo.pl |
10 | |
11 | =head1 DESCRIPTION |
12 | |
13 | The B::Xref module is used to generate a cross reference listing of all |
14 | definitions and uses of variables, subroutines and formats in a Perl program. |
15 | It is implemented as a backend for the Perl compiler. |
16 | |
17 | The report generated is in the following format: |
18 | |
19 | File filename1 |
20 | Subroutine subname1 |
21 | Package package1 |
22 | object1 C<line numbers> |
23 | object2 C<line numbers> |
24 | ... |
25 | Package package2 |
26 | ... |
27 | |
28 | Each B<File> section reports on a single file. Each B<Subroutine> section |
29 | reports on a single subroutine apart from the special cases |
30 | "(definitions)" and "(main)". These report, respectively, on subroutine |
31 | definitions found by the initial symbol table walk and on the main part of |
32 | the program or module external to all subroutines. |
33 | |
34 | The report is then grouped by the B<Package> of each variable, |
35 | subroutine or format with the special case "(lexicals)" meaning |
36 | lexical variables. Each B<object> name (implicitly qualified by its |
37 | containing B<Package>) includes its type character(s) at the beginning |
38 | where possible. Lexical variables are easier to track and even |
39 | included dereferencing information where possible. |
40 | |
41 | The C<line numbers> are a comma separated list of line numbers (some |
42 | preceded by code letters) where that object is used in some way. |
43 | Simple uses aren't preceded by a code letter. Introductions (such as |
44 | where a lexical is first defined with C<my>) are indicated with the |
45 | letter "i". Subroutine and method calls are indicated by the character |
46 | "&". Subroutine definitions are indicated by "s" and format |
47 | definitions by "f". |
48 | |
49 | =head1 OPTIONS |
50 | |
51 | Option words are separated by commas (not whitespace) and follow the |
52 | usual conventions of compiler backend options. |
53 | |
54 | =over 8 |
55 | |
56 | =item C<-oFILENAME> |
57 | |
58 | Directs output to C<FILENAME> instead of standard output. |
59 | |
60 | =item C<-r> |
61 | |
62 | Raw output. Instead of producing a human-readable report, outputs a line |
63 | in machine-readable form for each definition/use of a variable/sub/format. |
64 | |
65 | =item C<-D[tO]> |
66 | |
67 | (Internal) debug options, probably only useful if C<-r> included. |
68 | The C<t> option prints the object on the top of the stack as it's |
69 | being tracked. The C<O> option prints each operator as it's being |
70 | processed in the execution order of the program. |
71 | |
72 | =back |
73 | |
74 | =head1 BUGS |
75 | |
76 | Non-lexical variables are quite difficult to track through a program. |
77 | Sometimes the type of a non-lexical variable's use is impossible to |
78 | determine. Introductions of non-lexical non-scalars don't seem to be |
79 | reported properly. |
80 | |
81 | =head1 AUTHOR |
82 | |
83 | Malcolm Beattie, mbeattie@sable.ox.ac.uk. |
84 | |
85 | =cut |
86 | |
87 | use strict; |
88 | use B qw(peekop class comppadlist main_start svref_2object walksymtable); |
89 | |
90 | # Constants (should probably be elsewhere) |
91 | sub OPpLVAL_INTRO () { 128 } |
92 | sub SVf_POK () { 0x40000 } |
93 | |
94 | sub UNKNOWN { ["?", "?", "?"] } |
95 | |
96 | my @pad; # lexicals in current pad |
97 | # as ["(lexical)", type, name] |
98 | my %done; # keyed by $$op: set when each $op is done |
99 | my $top = UNKNOWN; # shadows top element of stack as |
100 | # [pack, type, name] (pack can be "(lexical)") |
101 | my $file; # shadows current filename |
102 | my $line; # shadows current line number |
103 | my $subname; # shadows current sub name |
104 | my %table; # Multi-level hash to record all uses etc. |
105 | my @todo = (); # List of CVs that need processing |
106 | |
107 | my %code = (intro => "i", used => "", |
108 | subdef => "s", subused => "&", |
109 | formdef => "f", meth => "->"); |
110 | |
111 | |
112 | # Options |
113 | my ($debug_op, $debug_top, $nodefs, $raw); |
114 | |
115 | sub process { |
116 | my ($var, $event) = @_; |
117 | my ($pack, $type, $name) = @$var; |
118 | if ($type eq "*") { |
119 | if ($event eq "used") { |
120 | return; |
121 | } elsif ($event eq "subused") { |
122 | $type = "&"; |
123 | } |
124 | } |
125 | $type =~ s/(.)\*$/$1/g; |
126 | if ($raw) { |
127 | printf "%-16s %-12s %5d %-12s %4s %-16s %s\n", |
128 | $file, $subname, $line, $pack, $type, $name, $event; |
129 | } else { |
130 | # Wheee |
131 | push(@{$table{$file}->{$subname}->{$pack}->{$type.$name}->{$event}}, |
132 | $line); |
133 | } |
134 | } |
135 | |
136 | sub load_pad { |
137 | my $padlist = shift; |
138 | my ($namelistav, @namelist, $ix); |
139 | @pad = (); |
140 | return if class($padlist) eq "SPECIAL"; |
141 | ($namelistav) = $padlist->ARRAY; |
142 | @namelist = $namelistav->ARRAY; |
143 | for ($ix = 1; $ix < @namelist; $ix++) { |
144 | my $namesv = $namelist[$ix]; |
145 | next if class($namesv) eq "SPECIAL"; |
146 | my ($type, $name) = $namesv->PV =~ /^(.)(.*)$/; |
147 | $pad[$ix] = ["(lexical)", $type, $name]; |
148 | } |
149 | } |
150 | |
151 | sub xref { |
152 | my $start = shift; |
153 | my $op; |
154 | for ($op = $start; $$op; $op = $op->next) { |
155 | last if $done{$$op}++; |
156 | warn sprintf("top = [%s, %s, %s]\n", @$top) if $debug_top; |
157 | warn peekop($op), "\n" if $debug_op; |
158 | my $ppname = $op->ppaddr; |
159 | if ($ppname =~ /^pp_(or|and|mapwhile|grepwhile)$/) { |
160 | xref($op->other); |
161 | } elsif ($ppname eq "pp_match" || $ppname eq "pp_subst") { |
162 | xref($op->pmreplstart); |
163 | } elsif ($ppname eq "pp_substcont") { |
164 | xref($op->other->pmreplstart); |
165 | $op = $op->other; |
166 | redo; |
167 | } elsif ($ppname eq "pp_cond_expr") { |
168 | # pp_cond_expr never returns op_next |
169 | xref($op->true); |
170 | $op = $op->false; |
171 | redo; |
172 | } elsif ($ppname eq "pp_enterloop") { |
173 | xref($op->redoop); |
174 | xref($op->nextop); |
175 | xref($op->lastop); |
176 | } elsif ($ppname eq "pp_subst") { |
177 | xref($op->pmreplstart); |
178 | } else { |
179 | no strict 'refs'; |
180 | &$ppname($op) if defined(&$ppname); |
181 | } |
182 | } |
183 | } |
184 | |
185 | sub xref_cv { |
186 | my $cv = shift; |
187 | my $pack = $cv->GV->STASH->NAME; |
188 | $subname = ($pack eq "main" ? "" : "$pack\::") . $cv->GV->NAME; |
189 | load_pad($cv->PADLIST); |
190 | xref($cv->START); |
191 | $subname = "(main)"; |
192 | } |
193 | |
194 | sub xref_object { |
195 | my $cvref = shift; |
196 | xref_cv(svref_2object($cvref)); |
197 | } |
198 | |
199 | sub xref_main { |
200 | $subname = "(main)"; |
201 | load_pad(comppadlist); |
202 | xref(main_start); |
203 | while (@todo) { |
204 | xref_cv(shift @todo); |
205 | } |
206 | } |
207 | |
208 | sub pp_nextstate { |
209 | my $op = shift; |
210 | $file = $op->filegv->SV->PV; |
211 | $line = $op->line; |
212 | $top = UNKNOWN; |
213 | } |
214 | |
215 | sub pp_padsv { |
216 | my $op = shift; |
217 | $top = $pad[$op->targ]; |
218 | process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used"); |
219 | } |
220 | |
221 | sub pp_padav { pp_padsv(@_) } |
222 | sub pp_padhv { pp_padsv(@_) } |
223 | |
224 | sub deref { |
225 | my ($var, $as) = @_; |
226 | $var->[1] = $as . $var->[1]; |
227 | process($var, "used"); |
228 | } |
229 | |
230 | sub pp_rv2cv { deref($top, "&"); } |
231 | sub pp_rv2hv { deref($top, "%"); } |
232 | sub pp_rv2sv { deref($top, "\$"); } |
233 | sub pp_rv2av { deref($top, "\@"); } |
234 | sub pp_rv2gv { deref($top, "*"); } |
235 | |
236 | sub pp_gvsv { |
237 | my $op = shift; |
238 | my $gv = $op->gv; |
239 | $top = [$gv->STASH->NAME, '$', $gv->NAME]; |
240 | process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used"); |
241 | } |
242 | |
243 | sub pp_gv { |
244 | my $op = shift; |
245 | my $gv = $op->gv; |
246 | $top = [$gv->STASH->NAME, "*", $gv->NAME]; |
247 | process($top, $op->private & OPpLVAL_INTRO ? "intro" : "used"); |
248 | } |
249 | |
250 | sub pp_const { |
251 | my $op = shift; |
252 | my $sv = $op->sv; |
253 | $top = ["?", "", |
254 | (class($sv) ne "SPECIAL" && $sv->FLAGS & SVf_POK) ? $sv->PV : "?"]; |
255 | } |
256 | |
257 | sub pp_method { |
258 | my $op = shift; |
259 | $top = ["(method)", "->".$top->[1], $top->[2]]; |
260 | } |
261 | |
262 | sub pp_entersub { |
263 | my $op = shift; |
264 | if ($top->[1] eq "m") { |
265 | process($top, "meth"); |
266 | } else { |
267 | process($top, "subused"); |
268 | } |
269 | $top = UNKNOWN; |
270 | } |
271 | |
272 | # |
273 | # Stuff for cross referencing definitions of variables and subs |
274 | # |
275 | |
276 | sub B::GV::xref { |
277 | my $gv = shift; |
278 | my $cv = $gv->CV; |
279 | if ($$cv) { |
280 | #return if $done{$$cv}++; |
281 | $file = $gv->FILEGV->SV->PV; |
282 | $line = $gv->LINE; |
283 | process([$gv->STASH->NAME, "&", $gv->NAME], "subdef"); |
284 | push(@todo, $cv); |
285 | } |
286 | my $form = $gv->FORM; |
287 | if ($$form) { |
288 | return if $done{$$form}++; |
289 | $file = $gv->FILEGV->SV->PV; |
290 | $line = $gv->LINE; |
291 | process([$gv->STASH->NAME, "", $gv->NAME], "formdef"); |
292 | } |
293 | } |
294 | |
295 | sub xref_definitions { |
296 | my ($pack, %exclude); |
297 | return if $nodefs; |
298 | $subname = "(definitions)"; |
299 | foreach $pack (qw(B O AutoLoader DynaLoader Config DB VMS |
300 | strict vars FileHandle Exporter Carp)) { |
301 | $exclude{$pack."::"} = 1; |
302 | } |
303 | no strict qw(vars refs); |
304 | walksymtable(\%{"main::"}, "xref", sub { !defined($exclude{$_[0]}) }); |
305 | } |
306 | |
307 | sub output { |
308 | return if $raw; |
309 | my ($file, $subname, $pack, $name, $ev, $perfile, $persubname, |
310 | $perpack, $pername, $perev); |
311 | foreach $file (sort(keys(%table))) { |
312 | $perfile = $table{$file}; |
313 | print "File $file\n"; |
314 | foreach $subname (sort(keys(%$perfile))) { |
315 | $persubname = $perfile->{$subname}; |
316 | print " Subroutine $subname\n"; |
317 | foreach $pack (sort(keys(%$persubname))) { |
318 | $perpack = $persubname->{$pack}; |
319 | print " Package $pack\n"; |
320 | foreach $name (sort(keys(%$perpack))) { |
321 | $pername = $perpack->{$name}; |
322 | my @lines; |
323 | foreach $ev (qw(intro formdef subdef meth subused used)) { |
324 | $perev = $pername->{$ev}; |
325 | if (defined($perev) && @$perev) { |
326 | my $code = $code{$ev}; |
327 | push(@lines, map("$code$_", @$perev)); |
328 | } |
329 | } |
330 | printf " %-16s %s\n", $name, join(", ", @lines); |
331 | } |
332 | } |
333 | } |
334 | } |
335 | } |
336 | |
337 | sub compile { |
338 | my @options = @_; |
339 | my ($option, $opt, $arg); |
340 | OPTION: |
341 | while ($option = shift @options) { |
342 | if ($option =~ /^-(.)(.*)/) { |
343 | $opt = $1; |
344 | $arg = $2; |
345 | } else { |
346 | unshift @options, $option; |
347 | last OPTION; |
348 | } |
349 | if ($opt eq "-" && $arg eq "-") { |
350 | shift @options; |
351 | last OPTION; |
352 | } elsif ($opt eq "o") { |
353 | $arg ||= shift @options; |
354 | open(STDOUT, ">$arg") or return "$arg: $!\n"; |
355 | } elsif ($opt eq "d") { |
356 | $nodefs = 1; |
357 | } elsif ($opt eq "r") { |
358 | $raw = 1; |
359 | } elsif ($opt eq "D") { |
360 | $arg ||= shift @options; |
361 | foreach $arg (split(//, $arg)) { |
362 | if ($arg eq "o") { |
363 | B->debug(1); |
364 | } elsif ($arg eq "O") { |
365 | $debug_op = 1; |
366 | } elsif ($arg eq "t") { |
367 | $debug_top = 1; |
368 | } |
369 | } |
370 | } |
371 | } |
372 | if (@options) { |
373 | return sub { |
374 | my $objname; |
375 | xref_definitions(); |
376 | foreach $objname (@options) { |
377 | $objname = "main::$objname" unless $objname =~ /::/; |
378 | eval "xref_object(\\&$objname)"; |
379 | die "xref_object(\\&$objname) failed: $@" if $@; |
380 | } |
381 | output(); |
382 | } |
383 | } else { |
384 | return sub { |
385 | xref_definitions(); |
386 | xref_main(); |
387 | output(); |
388 | } |
389 | } |
390 | } |
391 | |
392 | 1; |