1 package Text::Diff::Table;
6 use vars qw{$VERSION @ISA @EXPORT_OK};
9 @ISA = qw( Text::Diff::Base Exporter );
10 @EXPORT_OK = qw( expand_tabs );
15 $_ eq '"' || $_ eq '$' ? qq{'$_'}
16 : $_ eq "\\" ? qq{"\\\\"}
21 map( sprintf( "\\x%02x", $_ ), ( 0..31, 127..255 ) ),
22 # map( "\\c$_", "A".."Z"),
23 "\\t", "\\n", "\\r", "\\f", "\\b", "\\a", "\\e"
24 ## NOTE: "\\\\" is not here because some things are explicitly
25 ## escaped before escape() is called and we don't want to
26 ## double-escape "\". Also, in most texts, leaving "\" more
27 ## readable makes sense.
33 $s =~ s{(\t)(\t*)|([^\t]+)}{
35 my $spaces = " " x ( 8 - $count % 8 + 8 * length $2 );
48 sub trim_trailing_line_ends($) {
50 $s =~ s/[\r\n]+(?!\n)$//;
57 ## use utf8 if available. don't if not.
58 my $escaper = <<'EOCODE';
65 : sprintf( "\\x{%04x}", $_ );
71 unless ( eval $escaper ) {
72 $escaper =~ s/ *use *utf8 *;\n// or die "Can't drop use utf8;";
73 eval $escaper or die $@;
79 return bless { @_ }, $proto
82 my $missing_elt = [ "", "" ];
86 my @seqs = ( shift, shift );
87 my $ops = shift; ## Leave sequences in @_[0,1]
92 my $opcode = $_->[Text::Diff::OPCODE()];
93 if ( $opcode eq " " ) {
94 push @A, $missing_elt while @A < @B;
95 push @B, $missing_elt while @B < @A;
97 push @A, [ $_->[0] + ( $options->{OFFSET_A} || 0), $seqs[0][$_->[0]] ]
98 if $opcode eq " " || $opcode eq "-";
99 push @B, [ $_->[1] + ( $options->{OFFSET_B} || 0), $seqs[1][$_->[1]] ]
100 if $opcode eq " " || $opcode eq "+";
103 push @A, $missing_elt while @A < @B;
104 push @B, $missing_elt while @B < @A;
107 my ( $A, $B ) = (shift @A, shift @B );
109 ## Do minimal cleaning on identical elts so these look "normal":
110 ## tabs are expanded, trailing newelts removed, etc. For differing
111 ## elts, make invisible characters visible if the invisible characters
113 my $elt_type = $B == $missing_elt ? "A" :
114 $A == $missing_elt ? "B" :
115 $A->[1] eq $B->[1] ? "="
117 if ( $elt_type ne "*" ) {
118 if ( $elt_type eq "=" || $A->[1] =~ /\S/ || $B->[1] =~ /\S/ ) {
119 $A->[1] = escape trim_trailing_line_ends expand_tabs $A->[1];
120 $B->[1] = escape trim_trailing_line_ends expand_tabs $B->[1];
123 $A->[1] = escape $A->[1];
124 $B->[1] = escape $B->[1];
128 ## not using \z here for backcompat reasons.
129 $A->[1] =~ /^(\s*?)([^ \t].*?)?(\s*)(?![\n\r])$/s;
130 my ( $l_ws_A, $body_A, $t_ws_A ) = ( $1, $2, $3 );
131 $body_A = "" unless defined $body_A;
132 $B->[1] =~ /^(\s*?)([^ \t].*?)?(\s*)(?![\n\r])$/s;
133 my ( $l_ws_B, $body_B, $t_ws_B ) = ( $1, $2, $3 );
134 $body_B = "" unless defined $body_B;
138 if ( $l_ws_A ne $l_ws_B ) {
139 ## Make leading tabs visible. Other non-' ' chars
140 ## will be dealt with in escape(), but this prevents
141 ## tab expansion from hiding tabs by making them
143 $added_escapes = 1 if $l_ws_A =~ s/\t/\\t/g;
144 $added_escapes = 1 if $l_ws_B =~ s/\t/\\t/g;
147 if ( $t_ws_A ne $t_ws_B ) {
148 ## Only trailing whitespace gets the \s treatment
149 ## to make it obvious what's going on.
150 $added_escapes = 1 if $t_ws_A =~ s/ /\\s/g;
151 $added_escapes = 1 if $t_ws_B =~ s/ /\\s/g;
152 $added_escapes = 1 if $t_ws_A =~ s/\t/\\t/g;
153 $added_escapes = 1 if $t_ws_B =~ s/\t/\\t/g;
156 $t_ws_A = $t_ws_B = "";
159 my $do_tab_escape = $added_escapes || do {
160 my $expanded_A = expand_tabs join( $body_A, $l_ws_A, $t_ws_A );
161 my $expanded_B = expand_tabs join( $body_B, $l_ws_B, $t_ws_B );
162 $expanded_A eq $expanded_B;
165 my $do_back_escape = $do_tab_escape || do {
166 my ( $unescaped_A, $escaped_A,
167 $unescaped_B, $escaped_B
170 join( "", /(\\.)/g ),
174 expand_tabs join( $body_A, $l_ws_A, $t_ws_A ),
175 expand_tabs join( $body_B, $l_ws_B, $t_ws_B );
176 $unescaped_A ne $unescaped_B && $escaped_A eq $escaped_B;
179 if ( $do_back_escape ) {
180 $body_A =~ s/\\/\\\\/g;
181 $body_B =~ s/\\/\\\\/g;
184 my $line_A = join $body_A, $l_ws_A, $t_ws_A;
185 my $line_B = join $body_B, $l_ws_B, $t_ws_B;
187 unless ( $do_tab_escape ) {
188 $line_A = expand_tabs $line_A;
189 $line_B = expand_tabs $line_B;
192 $A->[1] = escape $line_A;
193 $B->[1] = escape $line_B;
196 push @elts, [ @$A, @$B, $elt_type ];
199 push @{$self->{ELTS}}, @elts, ["bar"];
209 my @seqs = (shift,shift);
214 if ( defined $options->{FILENAME_A} || defined $options->{FILENAME_B} ) {
215 push @heading_lines, [
218 ( "", escape( defined $_ ? $_ : "<undef>" ) );
220 ( @{$options}{qw( FILENAME_A FILENAME_B)} )
226 if ( defined $options->{MTIME_A} || defined $options->{MTIME_B} ) {
227 push @heading_lines, [
231 ( defined $_ && length $_ )
237 @{$options}{qw( MTIME_A MTIME_B )}
243 if ( defined $options->{INDEX_LABEL} ) {
244 push @heading_lines, [ "", "", "", "", "=" ] unless @heading_lines;
245 $heading_lines[-1]->[0] = $heading_lines[-1]->[2] =
246 $options->{INDEX_LABEL};
249 ## Not ushifting on to @{$self->{ELTS}} in case it's really big. Want
250 ## to avoid the overhead.
252 my $four_column_mode = 0;
253 for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
254 next if $cols->[-1] eq "bar";
255 if ( $cols->[0] ne $cols->[2] ) {
256 $four_column_mode = 1;
261 unless ( $four_column_mode ) {
262 for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
263 next if $cols->[-1] eq "bar";
269 for my $cols ( @heading_lines, @{$self->{ELTS}} ) {
270 next if $cols->[-1] eq "bar";
271 for my $i (0..($#$cols-1)) {
272 $w[$i] = length $cols->[$i]
273 if defined $cols->[$i] && length $cols->[$i] > $w[$i];
277 my %fmts = $four_column_mode
279 "=" => "| %$w[0]s|%-$w[1]s | %$w[2]s|%-$w[3]s |\n",
280 "A" => "* %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s |\n",
281 "B" => "| %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s *\n",
282 "*" => "* %$w[0]s|%-$w[1]s * %$w[2]s|%-$w[3]s *\n",
285 "=" => "| %$w[0]s|%-$w[1]s |%-$w[2]s |\n",
286 "A" => "* %$w[0]s|%-$w[1]s |%-$w[2]s |\n",
287 "B" => "| %$w[0]s|%-$w[1]s |%-$w[2]s *\n",
288 "*" => "* %$w[0]s|%-$w[1]s |%-$w[2]s *\n",
291 $fmts{bar} = sprintf $fmts{"="}, "", "", "", "";
292 $fmts{bar} =~ s/\S/+/g;
293 $fmts{bar} =~ s/ /-/g;
296 sprintf( $fmts{$_->[-1]}, @$_ )
300 @heading_lines ? ["bar"] : (),
305 @{$self->{ELTS}} = [];
316 Text::Diff::Table - Text::Diff plugin to generate "table" format output
322 diff \@a, $b { STYLE => "Table" };
326 This is a plugin output formatter for Text::Diff that generates "table" style
329 +--+----------------------------------+--+------------------------------+
330 | |../Test-Differences-0.2/MANIFEST | |../Test-Differences/MANIFEST |
331 | |Thu Dec 13 15:38:49 2001 | |Sat Dec 15 02:09:44 2001 |
332 +--+----------------------------------+--+------------------------------+
334 | 1|Differences.pm | 2|Differences.pm |
335 | 2|MANIFEST | 3|MANIFEST |
336 | | * 4|MANIFEST.SKIP *
337 | 3|Makefile.PL | 5|Makefile.PL |
338 | | * 6|t/00escape.t *
339 | 4|t/00flatten.t | 7|t/00flatten.t |
340 | 5|t/01text_vs_data.t | 8|t/01text_vs_data.t |
341 | 6|t/10test.t | 9|t/10test.t |
342 +--+----------------------------------+--+------------------------------+
344 This format also goes to some pains to highlight "invisible" characters on
345 differing elements by selectively escaping whitespace. Each element is split
346 in to three segments (leading whitespace, body, trailing whitespace). If
347 whitespace differs in a segement, that segment is whitespace escaped.
349 Here is an example of the selective whitespace.
351 +--+--------------------------+--------------------------+
352 | |demo_ws_A.txt |demo_ws_B.txt |
353 | |Fri Dec 21 08:36:32 2001 |Fri Dec 21 08:36:50 2001 |
354 +--+--------------------------+--------------------------+
355 | 1|identical |identical |
356 * 2| spaced in | also spaced in *
357 * 3|embedded space |embedded tab *
358 | 4|identical |identical |
359 * 5| spaced in |\ttabbed in *
360 * 6|trailing spaces\s\s\n |trailing tabs\t\t\n *
361 | 7|identical |identical |
362 * 8|lf line\n |crlf line\r\n *
363 * 9|embedded ws |embedded\tws *
364 +--+--------------------------+--------------------------+
366 Here's why the lines do or do not have whitespace escaped:
370 =item lines 1, 4, 7 don't differ, no need.
372 =item lines 2, 3 differ in non-whitespace, no need.
374 =item lines 5, 6, 8, 9 all have subtle ws changes.
378 Whether or not line 3 should have that tab character escaped is a judgement
379 call; so far I'm choosing not to.
383 Table formatting requires buffering the entire diff in memory in order to
384 calculate column widths. This format should only be used for smaller
387 Assumes tab stops every 8 characters, as $DIETY intended.
389 Assumes all character codes >= 127 need to be escaped as hex codes, ie that the
390 user's terminal is ASCII, and not even "high bit ASCII", capable. This can be
391 made an option when the need arises.
393 Assumes that control codes (character codes 0..31) that don't have slash-letter
394 escapes ("\n", "\r", etc) in Perl are best presented as hex escapes ("\x01")
395 instead of octal ("\001") or control-code ("\cA") escapes.
399 Barrie Slaymaker E<lt>barries@slaysys.comE<gt>
403 Copyright 2001 Barrie Slaymaker, All Rights Reserved.
405 You may use this software under the terms of the GNU public license, any
406 version, or the Artistic license.