Commit | Line | Data |
3fea05b9 |
1 | #!/usr/bin/perl -w |
2 | # |
3 | # `Diff' program in Perl |
4 | # Copyright 1998 M-J. Dominus. (mjd-perl-diff@plover.com) |
5 | # |
6 | # This program is free software; you can redistribute it and/or modify it |
7 | # under the same terms as Perl itself. |
8 | # |
9 | # Altered to output in `context diff' format (but without context) |
10 | # September 1998 Christian Murphy (cpm@muc.de) |
11 | # |
12 | # Command-line arguments and context lines feature added |
13 | # September 1998 Amir D. Karger (karger@bead.aecom.yu.edu) |
14 | # |
15 | # In this file, "item" usually means "line of text", and "item number" usually |
16 | # means "line number". But theoretically the code could be used more generally |
17 | use strict; |
18 | |
19 | use Algorithm::Diff qw(diff); |
20 | use File::stat; |
21 | use vars qw ($opt_C $opt_c $opt_u $opt_U); |
22 | use Getopt::Std; |
23 | |
24 | my $usage = << "ENDUSAGE"; |
25 | Usage: $0 [{-c | -u}] [{-C | -U} lines] oldfile newfile |
26 | -c will do a context diff with 3 lines of context |
27 | -C will do a context diff with 'lines' lines of context |
28 | -u will do a unified diff with 3 lines of context |
29 | -U will do a unified diff with 'lines' lines of context |
30 | ENDUSAGE |
31 | |
32 | getopts('U:C:cu') or bag("$usage"); |
33 | bag("$usage") unless @ARGV == 2; |
34 | my ($file1, $file2) = @ARGV; |
35 | if (defined $opt_C || defined $opt_c) { |
36 | $opt_c = ""; # -c on if -C given on command line |
37 | $opt_u = undef; |
38 | } elsif (defined $opt_U || defined $opt_u) { |
39 | $opt_u = ""; # -u on if -U given on command line |
40 | $opt_c = undef; |
41 | } else { |
42 | $opt_c = ""; # by default, do context diff, not old diff |
43 | } |
44 | |
45 | my ($char1, $char2); # string to print before file names |
46 | my $Context_Lines; # lines of context to print |
47 | if (defined $opt_c) { |
48 | $Context_Lines = defined $opt_C ? $opt_C : 3; |
49 | $char1 = '*' x 3; $char2 = '-' x 3; |
50 | } elsif (defined $opt_u) { |
51 | $Context_Lines = defined $opt_U ? $opt_U : 3; |
52 | $char1 = '-' x 3; $char2 = '+' x 3; |
53 | } |
54 | |
55 | # After we've read up to a certain point in each file, the number of items |
56 | # we've read from each file will differ by $FLD (could be 0) |
57 | my $File_Length_Difference = 0; |
58 | |
59 | open (F1, $file1) or bag("Couldn't open $file1: $!"); |
60 | open (F2, $file2) or bag("Couldn't open $file2: $!"); |
61 | my (@f1, @f2); |
62 | chomp(@f1 = <F1>); |
63 | close F1; |
64 | chomp(@f2 = <F2>); |
65 | close F2; |
66 | |
67 | # diff yields lots of pieces, each of which is basically a Block object |
68 | my $diffs = diff(\@f1, \@f2); |
69 | exit 0 unless @$diffs; |
70 | |
71 | my $st = stat($file1); |
72 | print "$char1 $file1\t", scalar localtime($st->mtime), "\n"; |
73 | $st = stat($file2); |
74 | print "$char2 $file2\t", scalar localtime($st->mtime), "\n"; |
75 | |
76 | my ($hunk,$oldhunk); |
77 | # Loop over hunks. If a hunk overlaps with the last hunk, join them. |
78 | # Otherwise, print out the old one. |
79 | foreach my $piece (@$diffs) { |
80 | $hunk = new Hunk ($piece, $Context_Lines); |
81 | next unless $oldhunk; |
82 | |
83 | if ($hunk->does_overlap($oldhunk)) { |
84 | $hunk->prepend_hunk($oldhunk); |
85 | } else { |
86 | $oldhunk->output_diff(\@f1, \@f2); |
87 | } |
88 | |
89 | } continue { |
90 | $oldhunk = $hunk; |
91 | } |
92 | |
93 | # print the last hunk |
94 | $oldhunk->output_diff(\@f1, \@f2); |
95 | exit 1; |
96 | # END MAIN PROGRAM |
97 | |
98 | sub bag { |
99 | my $msg = shift; |
100 | $msg .= "\n"; |
101 | warn $msg; |
102 | exit 2; |
103 | } |
104 | |
105 | # Package Hunk. A Hunk is a group of Blocks which overlap because of the |
106 | # context surrounding each block. (So if we're not using context, every |
107 | # hunk will contain one block.) |
108 | { |
109 | package Hunk; |
110 | |
111 | sub new { |
112 | # Arg1 is output from &LCS::diff (which corresponds to one Block) |
113 | # Arg2 is the number of items (lines, e.g.,) of context around each block |
114 | # |
115 | # This subroutine changes $File_Length_Difference |
116 | # |
117 | # Fields in a Hunk: |
118 | # blocks - a list of Block objects |
119 | # start - index in file 1 where first block of the hunk starts |
120 | # end - index in file 1 where last block of the hunk ends |
121 | # |
122 | # Variables: |
123 | # before_diff - how much longer file 2 is than file 1 due to all hunks |
124 | # until but NOT including this one |
125 | # after_diff - difference due to all hunks including this one |
126 | my ($class, $piece, $context_items) = @_; |
127 | |
128 | my $block = new Block ($piece); # this modifies $FLD! |
129 | |
130 | my $before_diff = $File_Length_Difference; # BEFORE this hunk |
131 | my $after_diff = $before_diff + $block->{"length_diff"}; |
132 | $File_Length_Difference += $block->{"length_diff"}; |
133 | |
134 | # @remove_array and @insert_array hold the items to insert and remove |
135 | # Save the start & beginning of each array. If the array doesn't exist |
136 | # though (e.g., we're only adding items in this block), then figure |
137 | # out the line number based on the line number of the other file and |
138 | # the current difference in file lenghts |
139 | my @remove_array = $block->remove; |
140 | my @insert_array = $block->insert; |
141 | my ($a1, $a2, $b1, $b2, $start1, $start2, $end1, $end2); |
142 | $a1 = @remove_array ? $remove_array[0 ]->{"item_no"} : -1; |
143 | $a2 = @remove_array ? $remove_array[-1]->{"item_no"} : -1; |
144 | $b1 = @insert_array ? $insert_array[0 ]->{"item_no"} : -1; |
145 | $b2 = @insert_array ? $insert_array[-1]->{"item_no"} : -1; |
146 | |
147 | $start1 = $a1 == -1 ? $b1 - $before_diff : $a1; |
148 | $end1 = $a2 == -1 ? $b2 - $after_diff : $a2; |
149 | $start2 = $b1 == -1 ? $a1 + $before_diff : $b1; |
150 | $end2 = $b2 == -1 ? $a2 + $after_diff : $b2; |
151 | |
152 | # At first, a hunk will have just one Block in it |
153 | my $hunk = { |
154 | "start1" => $start1, |
155 | "start2" => $start2, |
156 | "end1" => $end1, |
157 | "end2" => $end2, |
158 | "blocks" => [$block], |
159 | }; |
160 | bless $hunk, $class; |
161 | |
162 | $hunk->flag_context($context_items); |
163 | |
164 | return $hunk; |
165 | } |
166 | |
167 | # Change the "start" and "end" fields to note that context should be added |
168 | # to this hunk |
169 | sub flag_context { |
170 | my ($hunk, $context_items) = @_; |
171 | return unless $context_items; # no context |
172 | |
173 | # add context before |
174 | my $start1 = $hunk->{"start1"}; |
175 | my $num_added = $context_items > $start1 ? $start1 : $context_items; |
176 | $hunk->{"start1"} -= $num_added; |
177 | $hunk->{"start2"} -= $num_added; |
178 | |
179 | # context after |
180 | my $end1 = $hunk->{"end1"}; |
181 | $num_added = ($end1+$context_items > $#f1) ? |
182 | $#f1 - $end1 : |
183 | $context_items; |
184 | $hunk->{"end1"} += $num_added; |
185 | $hunk->{"end2"} += $num_added; |
186 | } |
187 | |
188 | # Is there an overlap between hunk arg0 and old hunk arg1? |
189 | # Note: if end of old hunk is one less than beginning of second, they overlap |
190 | sub does_overlap { |
191 | my ($hunk, $oldhunk) = @_; |
192 | return "" unless $oldhunk; # first time through, $oldhunk is empty |
193 | |
194 | # Do I actually need to test both? |
195 | return ($hunk->{"start1"} - $oldhunk->{"end1"} <= 1 || |
196 | $hunk->{"start2"} - $oldhunk->{"end2"} <= 1); |
197 | } |
198 | |
199 | # Prepend hunk arg1 to hunk arg0 |
200 | # Note that arg1 isn't updated! Only arg0 is. |
201 | sub prepend_hunk { |
202 | my ($hunk, $oldhunk) = @_; |
203 | |
204 | $hunk->{"start1"} = $oldhunk->{"start1"}; |
205 | $hunk->{"start2"} = $oldhunk->{"start2"}; |
206 | |
207 | unshift (@{$hunk->{"blocks"}}, @{$oldhunk->{"blocks"}}); |
208 | } |
209 | |
210 | |
211 | # DIFF OUTPUT ROUTINES. THESE ROUTINES CONTAIN DIFF FORMATTING INFO... |
212 | sub output_diff { |
213 | if (defined $main::opt_u) {&output_unified_diff(@_)} |
214 | elsif (defined $main::opt_c) {&output_context_diff(@_)} |
215 | else {die "unknown diff"} |
216 | } |
217 | |
218 | sub output_unified_diff { |
219 | my ($hunk, $fileref1, $fileref2) = @_; |
220 | my @blocklist; |
221 | |
222 | # Calculate item number range. |
223 | my $range1 = $hunk->unified_range(1); |
224 | my $range2 = $hunk->unified_range(2); |
225 | print "@@ -$range1 +$range2 @@\n"; |
226 | |
227 | # Outlist starts containing the hunk of file 1. |
228 | # Removing an item just means putting a '-' in front of it. |
229 | # Inserting an item requires getting it from file2 and splicing it in. |
230 | # We splice in $num_added items. Remove blocks use $num_added because |
231 | # splicing changed the length of outlist. |
232 | # We remove $num_removed items. Insert blocks use $num_removed because |
233 | # their item numbers---corresponding to positions in file *2*--- don't take |
234 | # removed items into account. |
235 | my $low = $hunk->{"start1"}; |
236 | my $hi = $hunk->{"end1"}; |
237 | my ($num_added, $num_removed) = (0,0); |
238 | my @outlist = @$fileref1[$low..$hi]; |
239 | map {s/^/ /} @outlist; # assume it's just context |
240 | |
241 | foreach my $block (@{$hunk->{"blocks"}}) { |
242 | foreach my $item ($block->remove) { |
243 | my $op = $item->{"sign"}; # - |
244 | my $offset = $item->{"item_no"} - $low + $num_added; |
245 | $outlist[$offset] =~ s/^ /$op/; |
246 | $num_removed++; |
247 | } |
248 | foreach my $item ($block->insert) { |
249 | my $op = $item->{"sign"}; # + |
250 | my $i = $item->{"item_no"}; |
251 | my $offset = $i - $hunk->{"start2"} + $num_removed; |
252 | splice(@outlist,$offset,0,"$op$$fileref2[$i]"); |
253 | $num_added++; |
254 | } |
255 | } |
256 | |
257 | map {s/$/\n/} @outlist; # add \n's |
258 | print @outlist; |
259 | |
260 | } |
261 | |
262 | sub output_context_diff { |
263 | my ($hunk, $fileref1, $fileref2) = @_; |
264 | my @blocklist; |
265 | |
266 | print "***************\n"; |
267 | # Calculate item number range. |
268 | my $range1 = $hunk->context_range(1); |
269 | my $range2 = $hunk->context_range(2); |
270 | |
271 | # Print out file 1 part for each block in context diff format if there are |
272 | # any blocks that remove items |
273 | print "*** $range1 ****\n"; |
274 | my $low = $hunk->{"start1"}; |
275 | my $hi = $hunk->{"end1"}; |
276 | if (@blocklist = grep {$_->remove} @{$hunk->{"blocks"}}) { |
277 | my @outlist = @$fileref1[$low..$hi]; |
278 | map {s/^/ /} @outlist; # assume it's just context |
279 | foreach my $block (@blocklist) { |
280 | my $op = $block->op; # - or ! |
281 | foreach my $item ($block->remove) { |
282 | $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/; |
283 | } |
284 | } |
285 | map {s/$/\n/} @outlist; # add \n's |
286 | print @outlist; |
287 | } |
288 | |
289 | print "--- $range2 ----\n"; |
290 | $low = $hunk->{"start2"}; |
291 | $hi = $hunk->{"end2"}; |
292 | if (@blocklist = grep {$_->insert} @{$hunk->{"blocks"}}) { |
293 | my @outlist = @$fileref2[$low..$hi]; |
294 | map {s/^/ /} @outlist; # assume it's just context |
295 | foreach my $block (@blocklist) { |
296 | my $op = $block->op; # + or ! |
297 | foreach my $item ($block->insert) { |
298 | $outlist[$item->{"item_no"} - $low] =~ s/^ /$op/; |
299 | } |
300 | } |
301 | map {s/$/\n/} @outlist; # add \n's |
302 | print @outlist; |
303 | } |
304 | } |
305 | |
306 | sub context_range { |
307 | # Generate a range of item numbers to print. Only print 1 number if the range |
308 | # has only one item in it. Otherwise, it's 'start,end' |
309 | my ($hunk, $flag) = @_; |
310 | my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"}); |
311 | $start++; $end++; # index from 1, not zero |
312 | my $range = ($start < $end) ? "$start,$end" : $end; |
313 | return $range; |
314 | } |
315 | |
316 | sub unified_range { |
317 | # Generate a range of item numbers to print for unified diff |
318 | # Print number where block starts, followed by number of lines in the block |
319 | # (don't print number of lines if it's 1) |
320 | my ($hunk, $flag) = @_; |
321 | my ($start, $end) = ($hunk->{"start$flag"},$hunk->{"end$flag"}); |
322 | $start++; $end++; # index from 1, not zero |
323 | my $length = $end - $start + 1; |
324 | my $first = $length < 2 ? $end : $start; # strange, but correct... |
325 | my $range = $length== 1 ? $first : "$first,$length"; |
326 | return $range; |
327 | } |
328 | } # end Package Hunk |
329 | |
330 | # Package Block. A block is an operation removing, adding, or changing |
331 | # a group of items. Basically, this is just a list of changes, where each |
332 | # change adds or deletes a single item. |
333 | # (Change could be a separate class, but it didn't seem worth it) |
334 | { |
335 | package Block; |
336 | sub new { |
337 | # Input is a chunk from &Algorithm::LCS::diff |
338 | # Fields in a block: |
339 | # length_diff - how much longer file 2 is than file 1 due to this block |
340 | # Each change has: |
341 | # sign - '+' for insert, '-' for remove |
342 | # item_no - number of the item in the file (e.g., line number) |
343 | # We don't bother storing the text of the item |
344 | # |
345 | my ($class,$chunk) = @_; |
346 | my @changes = (); |
347 | |
348 | # This just turns each change into a hash. |
349 | foreach my $item (@$chunk) { |
350 | my ($sign, $item_no, $text) = @$item; |
351 | my $hashref = {"sign" => $sign, "item_no" => $item_no}; |
352 | push @changes, $hashref; |
353 | } |
354 | |
355 | my $block = { "changes" => \@changes }; |
356 | bless $block, $class; |
357 | |
358 | $block->{"length_diff"} = $block->insert - $block->remove; |
359 | return $block; |
360 | } |
361 | |
362 | |
363 | # LOW LEVEL FUNCTIONS |
364 | sub op { |
365 | # what kind of block is this? |
366 | my $block = shift; |
367 | my $insert = $block->insert; |
368 | my $remove = $block->remove; |
369 | |
370 | $remove && $insert and return '!'; |
371 | $remove and return '-'; |
372 | $insert and return '+'; |
373 | warn "unknown block type"; |
374 | return '^'; # context block |
375 | } |
376 | |
377 | # Returns a list of the changes in this block that remove items |
378 | # (or the number of removals if called in scalar context) |
379 | sub remove { return grep {$_->{"sign"} eq '-'} @{shift->{"changes"}}; } |
380 | |
381 | # Returns a list of the changes in this block that insert items |
382 | sub insert { return grep {$_->{"sign"} eq '+'} @{shift->{"changes"}}; } |
383 | |
384 | } # end of package Block |
385 | |