extras/slurp_bench.pl

   1 #!/usr/local/bin/perl
   2
   3 use strict ;
   4 use warnings ;
   5
   6 use Getopt::Long ;
   7 use Benchmark qw( timethese cmpthese ) ;
   8 use Carp ;
   9 use FileHandle ;
  10 use Fcntl qw( :DEFAULT :seek );
  11
  12 use File::Slurp () ;
  13
  14 my $file = 'slurp_data' ;
  15 my( @lines, $text ) ;
  16
  17 my %opts ;
  18
  19 parse_options() ;
  20
  21 run_benchmarks() ;
  22
  23 unlink $file ;
  24
  25 exit ;
  26
  27 sub run_benchmarks {
  28
  29         foreach my $size ( @{$opts{size_list}} ) {
  30
  31                 @lines = ( 'a' x 80 . "\n") x ( $size / 81 + 1 ) ;
  32                 $text = join( '', @lines ) ;
  33
  34                 my $overage = length($text) - $size ;
  35                 substr( $text, -$overage, $overage, '' ) ;
  36                 substr( $lines[-1], -$overage, $overage, '' ) ;
  37
  38                 if ( $opts{slurp} ) {
  39
  40                         File::Slurp::write_file( $file, $text ) ;
  41
  42                         bench_list_slurp( $size ) if $opts{list} ;
  43                         bench_scalar_slurp( $size ) if $opts{scalar} ;
  44                 }
  45
  46                 if ( $opts{spew} ) {
  47
  48                         bench_spew_list( $size ) if $opts{list} ;
  49                         bench_scalar_spew( $size ) if $opts{scalar} ;
  50                 }
  51         }
  52 }
  53
  54 sub bench_spew_list {
  55
  56         my( $size ) = @_ ;
  57
  58         print "\n\nWriting (Spew) a list of lines: Size = $size bytes\n\n" ;
  59
  60         my $result = timethese( $opts{iterations}, {
  61                 'FS::write_file' =>
  62                         sub { File::Slurp::write_file( $file, @lines ) },
  63                 'FS::write_file Aref' =>
  64                         sub { File::Slurp::write_file( $file, \@lines ) },
  65                 'print' =>
  66                         sub { print_file( $file, @lines ) },
  67                 'print/join' =>
  68                         sub { print_join_file( $file, @lines ) },
  69                 'syswrite/join' =>
  70                         sub { syswrite_join_file( $file, @lines ) },
  71                 'original write_file' =>
  72                         sub { orig_write_file( $file, @lines ) },
  73         } ) ;
  74
  75         cmpthese( $result ) ;
  76 }
  77
  78 sub print_file {
  79
  80         my( $file_name ) = shift ;
  81
  82         unlink $file_name ;
  83
  84         local( *FH ) ;
  85         open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
  86
  87         print FH @_ ;
  88 }
  89
  90 sub print_join_file {
  91
  92         my( $file_name ) = shift ;
  93
  94         unlink $file_name ;
  95
  96         local( *FH ) ;
  97         open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
  98
  99         print FH join( '', @_ ) ;
 100 }
 101
 102 sub syswrite_join_file {
 103
 104         my( $file_name ) = shift ;
 105
 106         unlink $file_name ;
 107
 108         local( *FH ) ;
 109         open( FH, ">$file_name" ) || carp "can't create $file_name $!" ;
 110
 111         syswrite( FH, join( '', @_ ) ) ;
 112 }
 113
 114 sub sysopen_syswrite_join_file {
 115
 116         my( $file_name ) = shift ;
 117
 118         unlink $file_name ;
 119
 120         local( *FH ) ;
 121         sysopen( FH, $file_name, O_WRONLY | O_CREAT ) ||
 122                                 carp "can't create $file_name $!" ;
 123
 124         syswrite( FH, join( '', @_ ) ) ;
 125 }
 126
 127 sub orig_write_file
 128 {
 129         my ($f, @data) = @_;
 130
 131         unlink $f ;
 132
 133         local(*F);
 134
 135         open(F, ">$f") || croak "open >$f: $!";
 136         (print F @data) || croak "write $f: $!";
 137         close(F) || croak "close $f: $!";
 138         return 1;
 139 }
 140
 141 # sub bench_scalar_spew {
 142
 143 #       my ( $size ) = @_ ;
 144
 145 #       print "\n\nScalar Spew of $size file\n\n" ;
 146
 147 #       my $result = timethese( $dur, {
 148
 149 #               new =>
 150 #                       sub { File::Slurp::write_file( $file, $text ) },
 151
 152 #               new_ref =>
 153 #                       sub { File::Slurp::write_file( $file, \$text ) },
 154
 155 #               print_file =>
 156 #                       sub { print_file( $file, $text ) },
 157
 158 #               print_join_file =>
 159 #                       sub { print_join_file( $file, $text ) },
 160
 161 #               syswrite_file =>
 162 #                       sub { syswrite_file( $file, $text ) },
 163
 164 #               syswrite_file2 =>
 165 #                       sub { syswrite_file2( $file, $text ) },
 166
 167 #               orig_write_file =>
 168 #                       sub { orig_write_file( $file, $text ) },
 169
 170 #       } ) ;
 171
 172 #       cmpthese( $result ) ;
 173 # }
 174
 175 # sub bench_scalar_slurp {
 176
 177 #       my ( $size ) = @_ ;
 178
 179 #       print "\n\nScalar Slurp of $size file\n\n" ;
 180
 181 #       my $buffer ;
 182
 183 #       my $result = timethese( $dur, {
 184
 185 #               new =>
 186 #                       sub { my $text = File::Slurp::read_file( $file ) },
 187
 188 #               new_buf_ref =>
 189 #                       sub { my $text ;
 190 #                          File::Slurp::read_file( $file, buf_ref => \$text ) },
 191 #               new_buf_ref2 =>
 192 #                       sub {
 193 #                          File::Slurp::read_file( $file, buf_ref => \$buffer ) },
 194 #               new_scalar_ref =>
 195 #                       sub { my $text =
 196 #                           File::Slurp::read_file( $file, scalar_ref => 1 ) },
 197
 198 #               read_file =>
 199 #                       sub { my $text = read_file( $file ) },
 200
 201 #               sysread_file =>
 202 #                       sub { my $text = sysread_file( $file ) },
 203
 204 #               orig_read_file =>
 205 #                       sub { my $text = orig_read_file( $file ) },
 206
 207 #               'Slurp.pm scalar' =>
 208 #                       sub { my $text = slurp_scalar( $file ) },
 209
 210 #               file_contents =>
 211 #                       sub { my $text = file_contents( $file ) },
 212
 213 #               file_contents_no_OO =>
 214 #                       sub { my $text = file_contents_no_OO( $file ) },
 215 #       } ) ;
 216
 217 #       cmpthese( $result ) ;
 218 # }
 219
 220 # sub bench_list_slurp {
 221
 222 #       my ( $size ) = @_ ;
 223
 224 #       print "\n\nList Slurp of $size file\n\n" ;
 225
 226 #       my $result = timethese( $dur, {
 227
 228 #               new =>
 229 #                       sub { my @lines = File::Slurp::read_file( $file ) },
 230
 231 #               new_array_ref =>
 232 #                       sub { my $lines_ref =
 233 #                            File::Slurp::read_file( $file, array_ref => 1 ) },
 234
 235 #               new_in_anon_array =>
 236 #                       sub { my $lines_ref =
 237 #                            [ File::Slurp::read_file( $file ) ] },
 238
 239 #               read_file =>
 240 #                       sub { my @lines = read_file( $file ) },
 241
 242 #               sysread_file =>
 243 #                       sub { my @lines = sysread_file( $file ) },
 244
 245 #               orig_read_file =>
 246 #                       sub { my @lines = orig_read_file( $file ) },
 247
 248 #               'Slurp.pm to array' =>
 249 #                       sub { my @lines = slurp_array( $file ) },
 250
 251 #               orig_slurp_to_array_ref =>
 252 #                       sub { my $lines_ref = orig_slurp_to_array( $file ) },
 253 #       } ) ;
 254
 255 #       cmpthese( $result ) ;
 256 # }
 257
 258
 259 ###########################
 260 # write file benchmark subs
 261 ###########################
 262
 263
 264
 265 #######################
 266 # top level subs for script
 267
 268 #######################
 269
 270 sub parse_options {
 271
 272         my $result = GetOptions (\%opts, qw(
 273                 iterations|i=s
 274                 direction|d=s
 275                 context|c=s
 276                 sizes|s=s
 277                 legend|key|l|k
 278                 help|usage
 279         ) ) ;
 280
 281         usage() unless $result ;
 282
 283         usage() if $opts{help} ;
 284
 285         legend() if $opts{legend} ;
 286
 287 # set defaults
 288
 289         $opts{direction} ||= 'both' ;
 290         $opts{context} ||= 'both' ;
 291         $opts{iterations} ||= -2 ;
 292         $opts{sizes} ||= '500,10k,1m' ;
 293
 294         if ( $opts{direction} eq 'both' ) {
 295
 296                 $opts{spew} = 1 ;
 297                 $opts{slurp} = 1 ;
 298         }
 299         elsif ( $opts{direction} eq 'in' ) {
 300
 301                 $opts{slurp} = 1 ;
 302
 303         }
 304         elsif ( $opts{direction} eq 'out' ) {
 305
 306                 $opts{spew} = 1 ;
 307         }
 308         else {
 309
 310                 usage( "Unknown direction: $opts{direction}" ) ;
 311         }
 312
 313         if ( $opts{context} eq 'both' ) {
 314
 315                 $opts{list} = 1 ;
 316                 $opts{scalar} = 1 ;
 317         }
 318         elsif ( $opts{context} eq 'scalar' ) {
 319
 320                 $opts{scalar} = 1 ;
 321
 322         }
 323         elsif ( $opts{context} eq 'list' ) {
 324
 325                 $opts{list} = 1 ;
 326         }
 327         else {
 328
 329                 usage( "Unknown context: $opts{context}" ) ;
 330         }
 331
 332         if ( $opts{context} eq 'both' ) {
 333
 334                 $opts{list} = 1 ;
 335                 $opts{scalar} = 1 ;
 336         }
 337         elsif ( $opts{context} eq 'scalar' ) {
 338
 339                 $opts{scalar} = 1 ;
 340
 341         }
 342         elsif ( $opts{context} eq 'list' ) {
 343
 344                 $opts{list} = 1 ;
 345         }
 346         else {
 347
 348                 usage( "Unknown context: $opts{context}" ) ;
 349         }
 350
 351         foreach my $size ( split ',', ( $opts{sizes} ) ) {
 352
 353
 354 # check for valid size and suffix. grab both.
 355
 356                 usage( "Illegal size: $size") unless $size =~ /^(\d+)([km])?$/ ;
 357
 358 # handle suffix multipliers
 359
 360                 $size =  $1 * (( $2 eq 'k' ) ? 1024 : 1024*1024) if $2 ;
 361
 362                 push( @{$opts{size_list}}, $size ) ;
 363         }
 364
 365 #use Data::Dumper ;
 366 #print Dumper \%opts ;
 367 }
 368
 369 sub legend {
 370
 371         die <<'LEGEND' ;
 372 Legend for the Slurp Benchmark Entries
 373
 374 In all cases below 'FS' or 'F::S' means the current File::Slurp module
 375 is being used in the benchmark. The full name and description will say
 376 which options are being used.
 377
 378 These benchmarks write a list of lines to a file. Use the direction option
 379 of 'out' or 'both' and the context option is 'list' or 'both'.
 380
 381         Key                     Description/Source
 382         ---                     ------------------
 383         FS::write_file          Current F::S write_file
 384         FS::write_file Aref     Current F::S write_file on array ref of data
 385         print                   Open a file and call print() on the list data
 386         print/join              Open a file and call print() on the joined
 387                                 list data
 388         syswrite/join           Open a file, call syswrite on joined list data
 389         sysopen/syswrite        Sysopen a file, call syswrite on joined
 390                                 list data
 391         original write_file     write_file code from original File::Slurp
 392                                 (pre-version 9999.*)
 393
 394 LEGEND
 395 }
 396
 397 sub usage {
 398
 399         my( $err ) = @_ ;
 400
 401         $err ||= '' ;
 402
 403         die <<DIE ;
 404 $err
 405 Usage: $0 [--iterations=<iter>] [--direction=<dir>] [--context=<con>]
 406           [--sizes=<size_list>] [--legend] [--help]
 407
 408         --iterations=<iter>     Run the benchmarks this many iterations
 409         -i=<iter>               A positive number is iteration count,
 410                                 a negative number is minimum CPU time in
 411                                 seconds. Default is -2 (run for 2 CPU seconds).
 412
 413         --direction=<dir>       Which direction to slurp: 'in', 'out' or 'both'.
 414         -d=<dir>                Default is 'both'.
 415
 416         --context=<con>         Which context is used for slurping: 'list',
 417         -c=<con>                'scalar' or 'both'. Default is 'both'.
 418
 419         --sizes=<size_list>     What sizes will be used in slurping (either
 420         -s=<size_list>          direction). This is a comma separated list of
 421                                 integers. You can use 'k' or 'm' as suffixes
 422                                 for 1024 and 1024**2. Default is '500,1k,1m'.
 423
 424         --legend                Print out a legend of all the benchmark entries.
 425         --key
 426         -l
 427         -k
 428
 429         --help                  Print this help text
 430         --usage
 431 DIE
 432
 433 }
 434
 435 __END__
 436
 437
 438 sub bench_scalar_spew {
 439
 440         my ( $size ) = @_ ;
 441
 442         print "\n\nScalar Spew of $size file\n\n" ;
 443
 444         my $result = timethese( $dur, {
 445
 446                 new =>
 447                         sub { File::Slurp::write_file( $file, $text ) },
 448
 449                 new_ref =>
 450                         sub { File::Slurp::write_file( $file, \$text ) },
 451
 452                 print_file =>
 453                         sub { print_file( $file, $text ) },
 454
 455                 print_join_file =>
 456                         sub { print_join_file( $file, $text ) },
 457
 458                 syswrite_file =>
 459                         sub { syswrite_file( $file, $text ) },
 460
 461                 syswrite_file2 =>
 462                         sub { syswrite_file2( $file, $text ) },
 463
 464                 orig_write_file =>
 465                         sub { orig_write_file( $file, $text ) },
 466
 467         } ) ;
 468
 469         cmpthese( $result ) ;
 470 }
 471
 472 sub bench_scalar_slurp {
 473
 474         my ( $size ) = @_ ;
 475
 476         print "\n\nScalar Slurp of $size file\n\n" ;
 477
 478         my $buffer ;
 479
 480         my $result = timethese( $dur, {
 481
 482                 new =>
 483                         sub { my $text = File::Slurp::read_file( $file ) },
 484
 485                 new_buf_ref =>
 486                         sub { my $text ;
 487                            File::Slurp::read_file( $file, buf_ref => \$text ) },
 488                 new_buf_ref2 =>
 489                         sub {
 490                            File::Slurp::read_file( $file, buf_ref => \$buffer ) },
 491                 new_scalar_ref =>
 492                         sub { my $text =
 493                             File::Slurp::read_file( $file, scalar_ref => 1 ) },
 494
 495                 read_file =>
 496                         sub { my $text = read_file( $file ) },
 497
 498                 sysread_file =>
 499                         sub { my $text = sysread_file( $file ) },
 500
 501                 orig_read_file =>
 502                         sub { my $text = orig_read_file( $file ) },
 503
 504                 orig_slurp =>
 505                         sub { my $text = orig_slurp_to_scalar( $file ) },
 506
 507                 file_contents =>
 508                         sub { my $text = file_contents( $file ) },
 509
 510                 file_contents_no_OO =>
 511                         sub { my $text = file_contents_no_OO( $file ) },
 512         } ) ;
 513
 514         cmpthese( $result ) ;
 515 }
 516
 517 sub bench_list_slurp {
 518
 519         my ( $size ) = @_ ;
 520
 521         print "\n\nList Slurp of $size file\n\n" ;
 522
 523         my $result = timethese( $dur, {
 524
 525                 new =>
 526                         sub { my @lines = File::Slurp::read_file( $file ) },
 527
 528                 new_array_ref =>
 529                         sub { my $lines_ref =
 530                              File::Slurp::read_file( $file, array_ref => 1 ) },
 531
 532                 new_in_anon_array =>
 533                         sub { my $lines_ref =
 534                              [ File::Slurp::read_file( $file ) ] },
 535
 536                 read_file =>
 537                         sub { my @lines = read_file( $file ) },
 538
 539                 sysread_file =>
 540                         sub { my @lines = sysread_file( $file ) },
 541
 542                 orig_read_file =>
 543                         sub { my @lines = orig_read_file( $file ) },
 544
 545                 orig_slurp_to_array =>
 546                         sub { my @lines = orig_slurp_to_array( $file ) },
 547
 548                 orig_slurp_to_array_ref =>
 549                         sub { my $lines_ref = orig_slurp_to_array( $file ) },
 550         } ) ;
 551
 552         cmpthese( $result ) ;
 553 }
 554
 555 ######################################
 556 # uri's old fast slurp
 557
 558 sub read_file {
 559
 560         my( $file_name ) = shift ;
 561
 562         local( *FH ) ;
 563         open( FH, $file_name ) || carp "can't open $file_name $!" ;
 564
 565         return <FH> if wantarray ;
 566
 567         my $buf ;
 568
 569         read( FH, $buf, -s FH ) ;
 570         return $buf ;
 571 }
 572
 573 sub sysread_file {
 574
 575         my( $file_name ) = shift ;
 576
 577         local( *FH ) ;
 578         open( FH, $file_name ) || carp "can't open $file_name $!" ;
 579
 580         return <FH> if wantarray ;
 581
 582         my $buf ;
 583
 584         sysread( FH, $buf, -s FH ) ;
 585         return $buf ;
 586 }
 587
 588 ######################################
 589 # from File::Slurp.pm on cpan
 590
 591 sub orig_read_file
 592 {
 593         my ($file) = @_;
 594
 595         local($/) = wantarray ? $/ : undef;
 596         local(*F);
 597         my $r;
 598         my (@r);
 599
 600         open(F, "<$file") || croak "open $file: $!";
 601         @r = <F>;
 602         close(F) || croak "close $file: $!";
 603
 604         return $r[0] unless wantarray;
 605         return @r;
 606 }
 607
 608
 609 ######################################
 610 # from Slurp.pm on cpan
 611
 612 sub slurp {
 613     local( $/, @ARGV ) = ( wantarray ? $/ : undef, @_ );
 614     return <ARGV>;
 615 }
 616
 617 sub slurp_array {
 618     my @array = slurp( @_ );
 619     return wantarray ? @array : \@array;
 620 }
 621
 622 sub slurp_scalar {
 623     my $scalar = slurp( @_ );
 624     return $scalar;
 625 }
 626
 627 ######################################
 628 # very slow slurp code used by a client
 629
 630 sub file_contents {
 631     my $file = shift;
 632     my $fh = new FileHandle $file or
 633         warn("Util::file_contents:Can't open file $file"), return '';
 634     return join '', <$fh>;
 635 }
 636
 637 # same code but doesn't use FileHandle.pm
 638
 639 sub file_contents_no_OO {
 640     my $file = shift;
 641
 642         local( *FH ) ;
 643         open( FH, $file ) || carp "can't open $file $!" ;
 644
 645     return join '', <FH>;
 646 }
 647
 648 ##########################