extras/slurp_bench.pl

   1 #!/usr/local/bin/perl
   2
   3 use strict ;
   4 use warnings ;
   5
   6 use Getopt::Long ;
   7 use Benchmark qw( timethese cmpthese ) ;
   8 use Carp ;
   9 use FileHandle ;
  10 use Fcntl qw( :DEFAULT :seek );
  11
  12 use File::Slurp () ;
  13
  14 my $file = 'slurp_data' ;
  15
  16 my %opts ;
  17
  18 parse_options() ;
  19
  20 run_benchmarks() ;
  21
  22 unlink $file ;
  23
  24 exit ;
  25
  26 my( @lines, $text, $size ) ;
  27
  28 sub run_benchmarks {
  29
  30         foreach my $size ( @{$opts{size_list}} ) {
  31
  32                 @lines = ( 'a' x 80 . "\n") x ( $size / 81 + 1 ) ;
  33                 $text = join( '', @lines ) ;
  34
  35                 my $overage = length($text) - $size ;
  36                 substr( $text, -$overage, $overage, '' ) ;
  37                 substr( $lines[-1], -$overage, $overage, '' ) ;
  38
  39                 File::Slurp::write_file( $file, $text ) ;
  40
  41 #               bench_list_slurp( $size ) if $opts{list} && $opts{slurp} ;
  42 #               bench_scalar_slurp( $size ) if $opts{scalar} && $opts{slurp} ;
  43                 bench_spew_list()
  44 #               bench_scalar_spew( $size ) if $opts{scalar} && $opts{spew} ;
  45         }
  46 }
  47
  48 sub bench_spew_list {
  49
  50         return unless $opts{list} && $opts{spew} ;
  51
  52         print "\n\nWriting (Spew) a list of lines: Size = $size bytes\n\n" ;
  53
  54         my $result = timethese( $opts{duration}, {
  55
  56                 'FS::write_file' =>
  57                         sub { File::Slurp::write_file( $file, @lines ) },
  58
  59                 'print' =>
  60                         sub { print_file( $file, @lines ) },
  61
  62                 'print/join' =>
  63                         sub { print_join_file( $file, @lines ) },
  64
  65                 'syswrite/join' =>
  66                         sub { syswrite_join_file( $file, @lines ) },
  67
  68                 'original write_file' =>
  69                         sub { orig_write_file( $file, @lines ) },
  70
  71         } ) ;
  72
  73         cmpthese( $result ) ;
  74 }
  75
  76 # sub bench_scalar_spew {
  77
  78 #       my ( $size ) = @_ ;
  79
  80 #       print "\n\nScalar Spew of $size file\n\n" ;
  81
  82 #       my $result = timethese( $dur, {
  83
  84 #               new =>
  85 #                       sub { File::Slurp::write_file( $file, $text ) },
  86
  87 #               new_ref =>
  88 #                       sub { File::Slurp::write_file( $file, \$text ) },
  89
  90 #               print_file =>
  91 #                       sub { print_file( $file, $text ) },
  92
  93 #               print_join_file =>
  94 #                       sub { print_join_file( $file, $text ) },
  95
  96 #               syswrite_file =>
  97 #                       sub { syswrite_file( $file, $text ) },
  98
  99 #               syswrite_file2 =>
 100 #                       sub { syswrite_file2( $file, $text ) },
 101
 102 #               orig_write_file =>
 103 #                       sub { orig_write_file( $file, $text ) },
 104
 105 #       } ) ;
 106
 107 #       cmpthese( $result ) ;
 108 # }
 109
 110 # sub bench_scalar_slurp {
 111
 112 #       my ( $size ) = @_ ;
 113
 114 #       print "\n\nScalar Slurp of $size file\n\n" ;
 115
 116 #       my $buffer ;
 117
 118 #       my $result = timethese( $dur, {
 119
 120 #               new =>
 121 #                       sub { my $text = File::Slurp::read_file( $file ) },
 122
 123 #               new_buf_ref =>
 124 #                       sub { my $text ;
 125 #                          File::Slurp::read_file( $file, buf_ref => \$text ) },
 126 #               new_buf_ref2 =>
 127 #                       sub {
 128 #                          File::Slurp::read_file( $file, buf_ref => \$buffer ) },
 129 #               new_scalar_ref =>
 130 #                       sub { my $text =
 131 #                           File::Slurp::read_file( $file, scalar_ref => 1 ) },
 132
 133 #               read_file =>
 134 #                       sub { my $text = read_file( $file ) },
 135
 136 #               sysread_file =>
 137 #                       sub { my $text = sysread_file( $file ) },
 138
 139 #               orig_read_file =>
 140 #                       sub { my $text = orig_read_file( $file ) },
 141
 142 #               'Slurp.pm scalar' =>
 143 #                       sub { my $text = slurp_scalar( $file ) },
 144
 145 #               file_contents =>
 146 #                       sub { my $text = file_contents( $file ) },
 147
 148 #               file_contents_no_OO =>
 149 #                       sub { my $text = file_contents_no_OO( $file ) },
 150 #       } ) ;
 151
 152 #       cmpthese( $result ) ;
 153 # }
 154
 155 # sub bench_list_slurp {
 156
 157 #       my ( $size ) = @_ ;
 158
 159 #       print "\n\nList Slurp of $size file\n\n" ;
 160
 161 #       my $result = timethese( $dur, {
 162
 163 #               new =>
 164 #                       sub { my @lines = File::Slurp::read_file( $file ) },
 165
 166 #               new_array_ref =>
 167 #                       sub { my $lines_ref =
 168 #                            File::Slurp::read_file( $file, array_ref => 1 ) },
 169
 170 #               new_in_anon_array =>
 171 #                       sub { my $lines_ref =
 172 #                            [ File::Slurp::read_file( $file ) ] },
 173
 174 #               read_file =>
 175 #                       sub { my @lines = read_file( $file ) },
 176
 177 #               sysread_file =>
 178 #                       sub { my @lines = sysread_file( $file ) },
 179
 180 #               orig_read_file =>
 181 #                       sub { my @lines = orig_read_file( $file ) },
 182
 183 #               'Slurp.pm to array' =>
 184 #                       sub { my @lines = slurp_array( $file ) },
 185
 186 #               orig_slurp_to_array_ref =>
 187 #                       sub { my $lines_ref = orig_slurp_to_array( $file ) },
 188 #       } ) ;
 189
 190 #       cmpthese( $result ) ;
 191 # }
 192
 193
 194 ###########################
 195 # write file benchmark subs
 196 ###########################
 197
 198
 199 sub print_file {
 200
 201         my( $file_name ) = shift ;
 202
 203         local( *FH ) ;
 204
 205         open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
 206
 207         print FH @_ ;
 208 }
 209
 210 sub print_file2 {
 211
 212         my( $file_name ) = shift ;
 213
 214         local( *FH ) ;
 215
 216         my $mode = ( -e $file_name ) ? '<' : '>' ;
 217
 218         open( FH, "+$mode$file_name" ) || carp "can't create $file_name $!" ;
 219
 220         print FH @_ ;
 221 }
 222
 223 sub print_join_file {
 224
 225         my( $file_name ) = shift ;
 226
 227         local( *FH ) ;
 228
 229         my $mode = ( -e $file_name ) ? '<' : '>' ;
 230
 231         open( FH, "+$mode$file_name" ) || carp "can't create $file_name $!" ;
 232
 233         print FH join( '', @_ ) ;
 234 }
 235
 236
 237 sub syswrite_join_file {
 238
 239         my( $file_name ) = shift ;
 240
 241         local( *FH ) ;
 242
 243         open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
 244
 245         syswrite( FH, join( '', @_ ) ) ;
 246 }
 247
 248 sub sysopen_syswrite_join_file {
 249
 250         my( $file_name ) = shift ;
 251
 252         local( *FH ) ;
 253
 254         sysopen( FH, $file_name, O_WRONLY | O_CREAT ) ||
 255                                 carp "can't create $file_name $!" ;
 256
 257         syswrite( FH, join( '', @_ ) ) ;
 258 }
 259
 260 sub orig_write_file
 261 {
 262         my ($f, @data) = @_;
 263
 264         local(*F);
 265
 266         open(F, ">$f") || croak "open >$f: $!";
 267         (print F @data) || croak "write $f: $!";
 268         close(F) || croak "close $f: $!";
 269         return 1;
 270 }
 271
 272
 273 #######################
 274 # top level subs for script
 275
 276 #######################
 277
 278 sub parse_options {
 279
 280         my $result = GetOptions (\%opts, qw(
 281                 iterations|i=s
 282                 direction|d=s
 283                 context|c=s
 284                 sizes|s=s
 285                 legend|key|l|k
 286                 help|usage
 287         ) ) ;
 288
 289         usage() unless $result ;
 290
 291         usage() if $opts{help} ;
 292
 293         legend() if $opts{legend} ;
 294
 295 # set defaults
 296
 297         $opts{direction} ||= 'both' ;
 298         $opts{context} ||= 'both' ;
 299         $opts{iterations} ||= -2 ;
 300         $opts{sizes} ||= '500,10k,1m' ;
 301
 302         if ( $opts{direction} eq 'both' ) {
 303
 304                 $opts{spew} = 1 ;
 305                 $opts{slurp} = 1 ;
 306         }
 307         elsif ( $opts{direction} eq 'in' ) {
 308
 309                 $opts{slurp} = 1 ;
 310
 311         }
 312         elsif ( $opts{direction} eq 'out' ) {
 313
 314                 $opts{spew} = 1 ;
 315         }
 316         else {
 317
 318                 usage( "Unknown direction: $opts{direction}" ) ;
 319         }
 320
 321         if ( $opts{context} eq 'both' ) {
 322
 323                 $opts{list} = 1 ;
 324                 $opts{scalar} = 1 ;
 325         }
 326         elsif ( $opts{context} eq 'scalar' ) {
 327
 328                 $opts{scalar} = 1 ;
 329
 330         }
 331         elsif ( $opts{context} eq 'list' ) {
 332
 333                 $opts{list} = 1 ;
 334         }
 335         else {
 336
 337                 usage( "Unknown context: $opts{context}" ) ;
 338         }
 339
 340         if ( $opts{context} eq 'both' ) {
 341
 342                 $opts{list} = 1 ;
 343                 $opts{scalar} = 1 ;
 344         }
 345         elsif ( $opts{context} eq 'scalar' ) {
 346
 347                 $opts{scalar} = 1 ;
 348
 349         }
 350         elsif ( $opts{context} eq 'list' ) {
 351
 352                 $opts{list} = 1 ;
 353         }
 354         else {
 355
 356                 usage( "Unknown context: $opts{context}" ) ;
 357         }
 358
 359         foreach my $size ( split ',', ( $opts{sizes} ) ) {
 360
 361
 362 # check for valid size and suffix. grab both.
 363
 364                 usage( "Illegal size: $size") unless $size =~ /^(\d+)([km])?$/ ;
 365
 366 # handle suffix multipliers
 367
 368                 $size =  $1 * (( $2 eq 'k' ) ? 1024 : 1024*1024) if $2 ;
 369
 370                 push( @{$opts{size_list}}, $size ) ;
 371         }
 372
 373 #use Data::Dumper ;
 374 #print Dumper \%opts ;
 375 }
 376
 377 sub legend {
 378
 379         die <<'LEGEND' ;
 380 k
 381 Key to Slurp/Spew Benchmarks
 382
 383
 384 Write a list of lines to a file
 385
 386         Key                     Description/Source
 387         ---                     ------------------
 388
 389         FS:write_file           Current File::Slurp::write_file
 390         FS:write_file_ref       Current File::Slurp::write_file (scalar ref)
 391         print                   Open a file and call print()
 392         syswrite/join           Open a file, call syswrite on joined lines
 393         sysopen/syswrite        Sysopen a file, call syswrite on joined lines
 394         original write_file     Original (pre 9999.*) File::Slurp::write_file
 395
 396
 397 LEGEND
 398 }
 399
 400 sub usage {
 401
 402         my( $err ) = @_ ;
 403
 404         $err ||= '' ;
 405
 406         die <<DIE ;
 407 $err
 408 Usage: $0 [--iterations=<iter>] [--direction=<dir>] [--context=<con>]
 409           [--sizes=<size_list>] [--legend] [--help]
 410
 411         --iterations=<iter>     Run the benchmarks this many iterations
 412         -i=<iter>               A positive number is iteration count,
 413                                 a negative number is minimum CPU time in
 414                                 seconds. Default is -2 (run for 2 CPU seconds).
 415
 416         --direction=<dir>       Which direction to slurp: 'in', 'out' or 'both'.
 417         -d=<dir>                Default is 'both'.
 418
 419         --context=<con>         Which context is used for slurping: 'list',
 420         -c=<con>                'scalar' or 'both'. Default is 'both'.
 421
 422         --sizes=<size_list>     What sizes will be used in slurping (either
 423         -s=<size_list>          direction). This is a comma separated list of
 424                                 integers. You can use 'k' or 'm' as suffixes
 425                                 for 1024 and 1024**2. Default is '500,1k,1m'.
 426
 427         --legend                Print out a legend of all the benchmark entries.
 428         --key
 429         -l
 430         -k
 431
 432         --help                  Print this help text
 433         --usage
 434 DIE
 435
 436 }
 437
 438 __END__
 439
 440
 441 sub bench_scalar_spew {
 442
 443         my ( $size ) = @_ ;
 444
 445         print "\n\nScalar Spew of $size file\n\n" ;
 446
 447         my $result = timethese( $dur, {
 448
 449                 new =>
 450                         sub { File::Slurp::write_file( $file, $text ) },
 451
 452                 new_ref =>
 453                         sub { File::Slurp::write_file( $file, \$text ) },
 454
 455                 print_file =>
 456                         sub { print_file( $file, $text ) },
 457
 458                 print_join_file =>
 459                         sub { print_join_file( $file, $text ) },
 460
 461                 syswrite_file =>
 462                         sub { syswrite_file( $file, $text ) },
 463
 464                 syswrite_file2 =>
 465                         sub { syswrite_file2( $file, $text ) },
 466
 467                 orig_write_file =>
 468                         sub { orig_write_file( $file, $text ) },
 469
 470         } ) ;
 471
 472         cmpthese( $result ) ;
 473 }
 474
 475 sub bench_scalar_slurp {
 476
 477         my ( $size ) = @_ ;
 478
 479         print "\n\nScalar Slurp of $size file\n\n" ;
 480
 481         my $buffer ;
 482
 483         my $result = timethese( $dur, {
 484
 485                 new =>
 486                         sub { my $text = File::Slurp::read_file( $file ) },
 487
 488                 new_buf_ref =>
 489                         sub { my $text ;
 490                            File::Slurp::read_file( $file, buf_ref => \$text ) },
 491                 new_buf_ref2 =>
 492                         sub {
 493                            File::Slurp::read_file( $file, buf_ref => \$buffer ) },
 494                 new_scalar_ref =>
 495                         sub { my $text =
 496                             File::Slurp::read_file( $file, scalar_ref => 1 ) },
 497
 498                 read_file =>
 499                         sub { my $text = read_file( $file ) },
 500
 501                 sysread_file =>
 502                         sub { my $text = sysread_file( $file ) },
 503
 504                 orig_read_file =>
 505                         sub { my $text = orig_read_file( $file ) },
 506
 507                 orig_slurp =>
 508                         sub { my $text = orig_slurp_to_scalar( $file ) },
 509
 510                 file_contents =>
 511                         sub { my $text = file_contents( $file ) },
 512
 513                 file_contents_no_OO =>
 514                         sub { my $text = file_contents_no_OO( $file ) },
 515         } ) ;
 516
 517         cmpthese( $result ) ;
 518 }
 519
 520 sub bench_list_slurp {
 521
 522         my ( $size ) = @_ ;
 523
 524         print "\n\nList Slurp of $size file\n\n" ;
 525
 526         my $result = timethese( $dur, {
 527
 528                 new =>
 529                         sub { my @lines = File::Slurp::read_file( $file ) },
 530
 531                 new_array_ref =>
 532                         sub { my $lines_ref =
 533                              File::Slurp::read_file( $file, array_ref => 1 ) },
 534
 535                 new_in_anon_array =>
 536                         sub { my $lines_ref =
 537                              [ File::Slurp::read_file( $file ) ] },
 538
 539                 read_file =>
 540                         sub { my @lines = read_file( $file ) },
 541
 542                 sysread_file =>
 543                         sub { my @lines = sysread_file( $file ) },
 544
 545                 orig_read_file =>
 546                         sub { my @lines = orig_read_file( $file ) },
 547
 548                 orig_slurp_to_array =>
 549                         sub { my @lines = orig_slurp_to_array( $file ) },
 550
 551                 orig_slurp_to_array_ref =>
 552                         sub { my $lines_ref = orig_slurp_to_array( $file ) },
 553         } ) ;
 554
 555         cmpthese( $result ) ;
 556 }
 557
 558 ######################################
 559 # uri's old fast slurp
 560
 561 sub read_file {
 562
 563         my( $file_name ) = shift ;
 564
 565         local( *FH ) ;
 566         open( FH, $file_name ) || carp "can't open $file_name $!" ;
 567
 568         return <FH> if wantarray ;
 569
 570         my $buf ;
 571
 572         read( FH, $buf, -s FH ) ;
 573         return $buf ;
 574 }
 575
 576 sub sysread_file {
 577
 578         my( $file_name ) = shift ;
 579
 580         local( *FH ) ;
 581         open( FH, $file_name ) || carp "can't open $file_name $!" ;
 582
 583         return <FH> if wantarray ;
 584
 585         my $buf ;
 586
 587         sysread( FH, $buf, -s FH ) ;
 588         return $buf ;
 589 }
 590
 591 ######################################
 592 # from File::Slurp.pm on cpan
 593
 594 sub orig_read_file
 595 {
 596         my ($file) = @_;
 597
 598         local($/) = wantarray ? $/ : undef;
 599         local(*F);
 600         my $r;
 601         my (@r);
 602
 603         open(F, "<$file") || croak "open $file: $!";
 604         @r = <F>;
 605         close(F) || croak "close $file: $!";
 606
 607         return $r[0] unless wantarray;
 608         return @r;
 609 }
 610
 611
 612 ######################################
 613 # from Slurp.pm on cpan
 614
 615 sub slurp {
 616     local( $/, @ARGV ) = ( wantarray ? $/ : undef, @_ );
 617     return <ARGV>;
 618 }
 619
 620 sub slurp_array {
 621     my @array = slurp( @_ );
 622     return wantarray ? @array : \@array;
 623 }
 624
 625 sub slurp_scalar {
 626     my $scalar = slurp( @_ );
 627     return $scalar;
 628 }
 629
 630 ######################################
 631 # very slow slurp code used by a client
 632
 633 sub file_contents {
 634     my $file = shift;
 635     my $fh = new FileHandle $file or
 636         warn("Util::file_contents:Can't open file $file"), return '';
 637     return join '', <$fh>;
 638 }
 639
 640 # same code but doesn't use FileHandle.pm
 641
 642 sub file_contents_no_OO {
 643     my $file = shift;
 644
 645         local( *FH ) ;
 646         open( FH, $file ) || carp "can't open $file $!" ;
 647
 648     return join '', <FH>;
 649 }
 650
 651 ##########################