From: Jos I. Boumans Date: Fri, 28 Jan 2005 18:32:44 +0000 (+0100) Subject: Add IPC::Run 0.80. X-Git-Url: http://git.shadowcat.co.uk/gitweb/gitweb.cgi?a=commitdiff_plain;h=f75b6f69235c1ecfc10e61352bbcd27ae7ec7c66;p=p5sagit%2Fp5-mst-13.2.git Add IPC::Run 0.80. Subject: [PATCH] IPC::Run 0.80 integration (was Re: [RFC] More core integration) From: "Jos I. Boumans" Message-Id: <9E6ECDBE-7152-11D9-B82E-000A956B0E06@xs4all.net> with PERL_CORE test premables hacked for better $^X portability (we hope.) p4raw-id: //depot/perl@23954 --- diff --git a/MANIFEST b/MANIFEST index c111398..db8a898 100644 --- a/MANIFEST +++ b/MANIFEST @@ -1469,6 +1469,28 @@ lib/IPC/Open2.pm Open a two-ended pipe lib/IPC/Open2.t See if IPC::Open2 works lib/IPC/Open3.pm Open a three-ended pipe! lib/IPC/Open3.t See if IPC::Open3 works +lib/IPC/Run/Debug.pm IPC::Run +lib/IPC/Run/IO.pm IPC::Run +lib/IPC/Run.pm IPC::Run +lib/IPC/Run/t/adopt.t IPC::Run +lib/IPC/Run/t/binmode.t IPC::Run +lib/IPC/Run/t/bogus.t IPC::Run +lib/IPC/Run/t/filter.t IPC::Run +lib/IPC/Run/t/harness.t IPC::Run +lib/IPC/Run/Timer.pm IPC::Run +lib/IPC/Run/t/io.t IPC::Run +lib/IPC/Run/t/kill_kill.t IPC::Run +lib/IPC/Run/t/parallel.t IPC::Run +lib/IPC/Run/t/pty.t IPC::Run +lib/IPC/Run/t/pump.t IPC::Run +lib/IPC/Run/t/run.t IPC::Run +lib/IPC/Run/t/signal.t IPC::Run +lib/IPC/Run/t/timeout.t IPC::Run +lib/IPC/Run/t/timer.t IPC::Run +lib/IPC/Run/t/win32_compile.t IPC::Run +lib/IPC/Run/Win32Helper.pm IPC::Run +lib/IPC/Run/Win32IO.pm IPC::Run +lib/IPC/Run/Win32Pump.pm IPC::Run lib/less.pm For "use less" lib/less.t See if less support works lib/lib_pm.PL For "use lib", produces lib/lib.pm diff --git a/lib/IPC/Run.pm b/lib/IPC/Run.pm new file mode 100644 index 0000000..fd21836 --- /dev/null +++ b/lib/IPC/Run.pm @@ -0,0 +1,4476 @@ +package IPC::Run ; +# +# Copyright (c) 1999 by Barrie Slaymaker, barries@slaysys.com +# +# You may distribute under the terms of either the GNU General Public +# License or the Artistic License, as specified in the README file. +# + +$VERSION = 0.80; + +=head1 NAME + +IPC::Run - system() and background procs w/ piping, redirs, ptys (Unix, Win32) + +=head1 SYNOPSIS + + ## First,a command to run: + my @cat = qw( cat ) ; + + ## Using run() instead of system(): + use IPC::Run qw( run timeout ) ; + + run \@cmd, \$in, \$out, \$err, timeout( 10 ) or die "cat: $?" + + # Can do I/O to sub refs and filenames, too: + run \@cmd, '<', "in.txt", \&out, \&err or die "cat: $?" + run \@cat, '<', "in.txt", '>>', "out.txt", '2>>', "err.txt" ; + + + # Redirecting using psuedo-terminals instad of pipes. + run \@cat, 'pty>', \$out_and_err ; + + ## Scripting subprocesses (like Expect): + + use IPC::Run qw( start pump finish timeout ) ; + + # Incrementally read from / write to scalars. + # $in is drained as it is fed to cat's stdin, + # $out accumulates cat's stdout + # $err accumulates cat's stderr + # $h is for "harness". + my $h = start \@cat, \$in, \$out, \$err, timeout( 10 ) ; + + $in .= "some input\n" ; + pump $h until $out =~ /input\n/g ; + + $in .= "some more input\n" ; + pump $h until $out =~ /\G.*more input\n/ ; + + $in .= "some final input\n" ; + finish $h or die "cat returned $?" ; + + warn $err if $err ; + print $out ; ## All of cat's output + + # Piping between children + run \@cat, '|', \@gzip ; + + # Multiple children simultaneously (run() blocks until all + # children exit, use start() for background execution): + run \@foo1, '&', \@foo2 ; + + # Calling \&set_up_child in the child before it executes the + # command (only works on systems with true fork() & exec()) + # exceptions thrown in set_up_child() will be propagated back + # to the parent and thrown from run(). + run \@cat, \$in, \$out, + init => \&set_up_child ; + + # Read from / write to file handles you open and close + open IN, 'out.txt' or die $! ; + print OUT "preamble\n" ; + run \@cat, \*IN, \*OUT or die "cat returned $?" ; + print OUT "postamble\n" ; + close IN ; + close OUT ; + + # Create pipes for you to read / write (like IPC::Open2 & 3). + $h = start + \@cat, + 'pipe', \*OUT, + '2>pipe', \*ERR + or die "cat returned $?" ; + print IN "some input\n" ; + close IN ; + print , ; + finish $h ; + + # Mixing input and output modes + run \@cat, 'in.txt', \&catch_some_out, \*ERR_LOG ) ; + + # Other redirection constructs + run \@cat, '>&', \$out_and_err ; + run \@cat, '2>&1' ; + run \@cat, '0<&3' ; + run \@cat, '<&-' ; + run \@cat, '3<', \$in3 ; + run \@cat, '4>', \$out4 ; + # etc. + + # Passing options: + run \@cat, 'in.txt', debug => 1 ; + + # Call this system's shell, returns TRUE on 0 exit code + # THIS IS THE OPPOSITE SENSE OF system()'s RETURN VALUE + run "cat a b c" or die "cat returned $?" ; + + # Launch a sub process directly, no shell. Can't do redirection + # with this form, it's here to behave like system() with an + # inverted result. + $r = run "cat a b c" ; + + # Read from a file in to a scalar + run io( "filename", 'r', \$recv ) ; + run io( \*HANDLE, 'r', \$recv ) ; + +=head1 DESCRIPTION + +IPC::Run allows you run and interact with child processes using files, pipes, +and pseudo-ttys. Both system()-style and scripted usages are supported and +may be mixed. Likewise, functional and OO API styles are both supported and +may be mixed. + +Various redirection operators reminiscent of those seen on common Unix and DOS +command lines are provided. + +Before digging in to the details a few LIMITATIONS are important enough +to be mentioned right up front: + +=over + +=item Win32 Support + +Win32 support is working but B, but does pass all relevant tests +on NT 4.0. See L. + +=item pty Support + +If you need pty support, IPC::Run should work well enough most of the +time, but IO::Pty is being improved, and IPC::Run will be improved to +use IO::Pty's new features when it is release. + +The basic problem is that the pty needs to initialize itself before the +parent writes to the master pty, or the data written gets lost. So +IPC::Run does a sleep(1) in the parent after forking to (hopefully) give +the child a chance to run. This is a kludge that works well on non +heavily loaded systems :(. + +ptys are not supported yet under Win32, but will be emulated... + +=item Debugging Tip + +You may use the environment variable C to see what's going on +under the hood: + + $ IPCRUNDEBUG=basic myscript # prints minimal debugging + $ IPCRUNDEBUG=data myscript # prints all data reads/writes + $ IPCRUNDEBUG=details myscript # prints lots of low-level details + $ IPCRUNDEBUG=gory myscript # (Win32 only) prints data moving through + # the helper processes. + +=back + +We now return you to your regularly scheduled documentation. + +=head2 Harnesses + +Child processes and I/O handles are gathered in to a harness, then +started and run until the processing is finished or aborted. + +=head2 run() vs. start(); pump(); finish(); + +There are two modes you can run harnesses in: run() functions as an +enhanced system(), and start()/pump()/finish() allow for background +processes and scripted interactions with them. + +When using run(), all data to be sent to the harness is set up in +advance (though one can feed subprocesses input from subroutine refs to +get around this limitation). The harness is run and all output is +collected from it, then any child processes are waited for: + + run \@cmd, \< and C<$err> in our examples. + +Regular expressions can be used to wait for appropriate output in +several ways. The C example in the previous section demonstrates +how to pump() until some string appears in the output. Here's an +example that uses C to fetch files from a remote server: + + $h = harness \@smbclient, \$in, \$out ; + + $in = "cd /src\n" ; + $h->pump until $out =~ /^smb.*> \Z/m ; + die "error cding to /src:\n$out" if $out =~ "ERR" ; + $out = '' ; + + $in = "mget *\n" ; + $h->pump until $out =~ /^smb.*> \Z/m ; + die "error retrieving files:\n$out" if $out =~ "ERR" ; + + $in = "quit\n" ; + $h->finish ; + +Notice that we carefully clear $out after the first command/response +cycle? That's because IPC::Run does not delete $out when we continue, +and we don't want to trip over the old output in the second +command/response cycle. + +Say you want to accumulate all the output in $out and analyze it +afterwards. Perl offers incremental regular expression matching using +the C and pattern matching idiom and the C<\G> assertion. +IPC::Run is careful not to disturb the current C value for +scalars it appends data to, so we could modify the above so as not to +destroy $out by adding a couple of C modifiers. The C keeps us +from tripping over the previous prompt and the C keeps us from +resetting the prior match position if the expected prompt doesn't +materialize immediately: + + $h = harness \@smbclient, \$in, \$out ; + + $in = "cd /src\n" ; + $h->pump until $out =~ /^smb.*> \Z/mgc ; + die "error cding to /src:\n$out" if $out =~ "ERR" ; + + $in = "mget *\n" ; + $h->pump until $out =~ /^smb.*> \Z/mgc ; + die "error retrieving files:\n$out" if $out =~ "ERR" ; + + $in = "quit\n" ; + $h->finish ; + + analyze( $out ) ; + +When using this technique, you may want to preallocate $out to have +plenty of memory or you may find that the act of growing $out each time +new input arrives causes an C slowdown as $out grows. +Say we expect no more than 10,000 characters of input at the most. To +preallocate memory to $out, do something like: + + my $out = "x" x 10_000 ; + $out = "" ; + +C will allocate at least 10,000 characters' worth of space, then +mark the $out as having 0 length without freeing all that yummy RAM. + +=head2 Timeouts and Timers + +More than likely, you don't want your subprocesses to run forever, and +sometimes it's nice to know that they're going a little slowly. +Timeouts throw exceptions after a some time has elapsed, timers merely +cause pump() to return after some time has elapsed. Neither is +reset/restarted automatically. + +Timeout objects are created by calling timeout( $interval ) and passing +the result to run(), start() or harness(). The timeout period starts +ticking just after all the child processes have been fork()ed or +spawn()ed, and are polled for expiration in run(), pump() and finish(). +If/when they expire, an exception is thrown. This is typically useful +to keep a subprocess from taking too long. + +If a timeout occurs in run(), all child processes will be terminated and +all file/pipe/ptty descriptors opened by run() will be closed. File +descriptors opened by the parent process and passed in to run() are not +closed in this event. + +If a timeout occurs in pump(), pump_nb(), or finish(), it's up to you to +decide whether to kill_kill() all the children or to implement some more +graceful fallback. No I/O will be closed in pump(), pump_nb() or +finish() by such an exception (though I/O is often closed down in those +routines during the natural course of events). + +Often an exception is too harsh. timer( $interval ) creates timer +objects that merely prevent pump() from blocking forever. This can be +useful for detecting stalled I/O or printing a soothing message or "." +to pacify an anxious user. + +Timeouts and timers can both be restarted at any time using the timer's +start() method (this is not the start() that launches subprocesses). To +restart a timer, you need to keep a reference to the timer: + + ## Start with a nice long timeout to let smbclient connect. If + ## pump or finish take too long, an exception will be thrown. + + my $h ; + eval { + $h = harness \@smbclient, \$in, \$out, \$err, ( my $t = timeout 30 ) ; + sleep 11 ; # No effect: timer not running yet + + start $h ; + $in = "cd /src\n" ; + pump $h until ! length $in ; + + $in = "ls\n" ; + ## Now use a short timeout, since this should be faster + $t->start( 5 ) ; + pump $h until ! length $in ; + + $t->start( 10 ) ; ## Give smbclient a little while to shut down. + $h->finish ; + } ; + if ( $@ ) { + my $x = $@ ; ## Preserve $@ in case another exception occurs + $h->kill_kill ; ## kill it gently, then brutally if need be, or just + ## brutally on Win32. + die $x ; + } + +Timeouts and timers are I checked once the subprocesses are shut +down; they will not expire in the interval between the last valid +process and when IPC::Run scoops up the processes' result codes, for +instance. + +=head2 Spawning synchronization, child exception propagation + +start() pauses the parent until the child executes the command or CODE +reference and propagates any exceptions thrown (including exec() +failure) back to the parent. This has several pleasant effects: any +exceptions thrown in the child, including exec() failure, come flying +out of start() or run() as though they had ocurred in the parent. + +This includes exceptions your code thrown from init subs. In this +example: + + eval { + run \@cmd, init => sub { die "blast it! foiled again!" } ; + } ; + print $@ ; + +the exception "blast it! foiled again" will be thrown from the child +process (preventing the exec()) and printed by the parent. + +In situations like + + run \@cmd1, "|", \@cmd2, "|", \@cmd3 ; + +@cmd1 will be initted and exec()ed before @cmd2, and @cmd2 before @cmd3. +This can save time and prevent oddball errors emitted by later commands +when earlier commands fail to execute. Note that IPC::Run doesn't start +any commands unless it can find the executables referenced by all +commands. These executables must pass both the C<-f> and C<-x> tests +described in L. + +Another nice effect is that init() subs can take their time doing things +and there will be no problems caused by a parent continuing to execute +before a child's init() routine is complete. Say the init() routine +needs to open a socket or a temp file that the parent wants to connect +to; without this synchronization, the parent will need to implement a +retry loop to wait for the child to run, since often, the parent gets a +lot of things done before the child's first timeslice is allocated. + +This is also quite necessary for pseudo-tty initialization, which needs +to take place before the parent writes to the child via pty. Writes +that occur before the pty is set up can get lost. + +A final, minor, nicety is that debugging output from the child will be +emitted before the parent continues on, making for much clearer debugging +output in complex situations. + +The only drawback I can conceive of is that the parent can't continue to +operate while the child is being initted. If this ever becomes a +problem in the field, we can implement an option to avoid this behavior, +but I don't expect it to. + +B: executing CODE references isn't supported on Win32, see +L for details. + +=head2 Syntax + +run(), start(), and harness() can all take a harness specification +as input. A harness specification is either a single string to be passed +to the systems' shell: + + run "echo 'hi there'" ; + +or a list of commands, io operations, and/or timers/timeouts to execute. +Consecutive commands must be separated by a pipe operator '|' or an '&'. +External commands are passed in as array references, and, on systems +supporting fork(), Perl code may be passed in as subs: + + run \@cmd ; + run \@cmd1, '|', \@cmd2 ; + run \@cmd1, '&', \@cmd2 ; + run \&sub1 ; + run \&sub1, '|', \&sub2 ; + run \&sub1, '&', \&sub2 ; + +'|' pipes the stdout of \@cmd1 the stdin of \@cmd2, just like a +shell pipe. '&' does not. Child processes to the right of a '&' +will have their stdin closed unless it's redirected-to. + +L objects may be passed in as well, whether or not +child processes are also specified: + + run io( "infile", ">", \$in ), io( "outfile", "<", \$in ) ; + +as can L objects: + + run \@cmd, io( "outfile", "<", \$in ), timeout( 10 ) ; + +Commands may be followed by scalar, sub, or i/o handle references for +redirecting +child process input & output: + + run \@cmd, \undef, \$out ; + run \@cmd, \$in, \$out ; + run \@cmd1, \&in, '|', \@cmd2, \*OUT ; + run \@cmd1, \*IN, '|', \@cmd2, \&out ; + +This is known as succinct redirection syntax, since run(), start() +and harness(), figure out which file descriptor to redirect and how. +File descriptor 0 is presumed to be an input for +the child process, all others are outputs. The assumed file +descriptor always starts at 0, unless the command is being piped to, +in which case it starts at 1. + +To be explicit about your redirects, or if you need to do more complex +things, there's also a redirection operator syntax: + + run \@cmd, '<', \undef, '>', \$out ; + run \@cmd, '<', \undef, '>&', \$out_and_err ; + run( + \@cmd1, + '<', \$in, + '|', \@cmd2, + \$out + ) ; + +Operator syntax is required if you need to do something other than simple +redirection to/from scalars or subs, like duping or closing file descriptors +or redirecting to/from a named file. The operators are covered in detail +below. + +After each \@cmd (or \&foo), parsing begins in succinct mode and toggles to +operator syntax mode when an operator (ie plain scalar, not a ref) is seen. +Once in +operator syntax mode, parsing only reverts to succinct mode when a '|' or +'&' is seen. + +In succinct mode, each parameter after the \@cmd specifies what to +do with the next highest file descriptor. These File descriptor start +with 0 (stdin) unless stdin is being piped to (C<'|', \@cmd>), in which +case they start with 1 (stdout). Currently, being on the left of +a pipe (C<\@cmd, \$out, \$err, '|'>) does I cause stdout to be +skipped, though this may change since it's not as DWIMerly as it +could be. Only stdin is assumed to be an +input in succinct mode, all others are assumed to be outputs. + +If no piping or redirection is specified for a child, it will inherit +the parent's open file handles as dictated by your system's +close-on-exec behavior and the $^F flag, except that processes after a +'&' will not inherit the parent's stdin. Also note that $^F does not +affect file desciptors obtained via POSIX, since it only applies to +full-fledged Perl file handles. Such processes will have their stdin +closed unless it has been redirected-to. + +If you want to close a child processes stdin, you may do any of: + + run \@cmd, \undef ; + run \@cmd, \"" ; + run \@cmd, '<&-' ; + run \@cmd, '0<&-' ; + +Redirection is done by placing redirection specifications immediately +after a command or child subroutine: + + run \@cmd1, \$in, '|', \@cmd2, \$out ; + run \@cmd1, '<', \$in, '|', \@cmd2, '>', \$out ; + +If you omit the redirection operators, descriptors are counted +starting at 0. Descriptor 0 is assumed to be input, all others +are outputs. A leading '|' consumes descriptor 0, so this +works as expected. + + run \@cmd1, \$in, '|', \@cmd2, \$out ; + +The parameter following a redirection operator can be a scalar ref, +a subroutine ref, a file name, an open filehandle, or a closed +filehandle. + +If it's a scalar ref, the child reads input from or sends output to +that variable: + + $in = "Hello World.\n" ; + run \@cat, \$in, \$out ; + print $out ; + +Scalars used in incremental (start()/pump()/finish()) applications are treated +as queues: input is removed from input scalers, resulting in them dwindling +to '', and output is appended to output scalars. This is not true of +harnesses run() in batch mode. + +It's usually wise to append new input to be sent to the child to the input +queue, and you'll often want to zap output queues to '' before pumping. + + $h = start \@cat, \$in ; + $in = "line 1\n" ; + pump $h ; + $in .= "line 2\n" ; + pump $h ; + $in .= "line 3\n" ; + finish $h ; + +The final call to finish() must be there: it allows the child process(es) +to run to completion and waits for their exit values. + +=head1 OBSTINATE CHILDREN + +Interactive applications are usually optimized for human use. This +can help or hinder trying to interact with them through modules like +IPC::Run. Frequently, programs alter their behavior when they detect +that stdin, stdout, or stderr are not connected to a tty, assuming that +they are being run in batch mode. Whether this helps or hurts depends +on which optimizations change. And there's often no way of telling +what a program does in these areas other than trial and error and, +occasionally, reading the source. This includes different versions +and implementations of the same program. + +All hope is not lost, however. Most programs behave in reasonably +tractable manners, once you figure out what it's trying to do. + +Here are some of the issues you might need to be aware of. + +=over + +=item * + +fflush()ing stdout and stderr + +This lets the user see stdout and stderr immediately. Many programs +undo this optimization if stdout is not a tty, making them harder to +manage by things like IPC::Run. + +Many programs decline to fflush stdout or stderr if they do not +detect a tty there. Some ftp commands do this, for instance. + +If this happens to you, look for a way to force interactive behavior, +like a command line switch or command. If you can't, you will +need to use a pseudo terminal ('pty>'). + +=item * + +false prompts + +Interactive programs generally do not guarantee that output from user +commands won't contain a prompt string. For example, your shell prompt +might be a '$', and a file named '$' might be the only file in a directory +listing. + +This can make it hard to guarantee that your output parser won't be fooled +into early termination of results. + +To help work around this, you can see if the program can alter it's +prompt, and use something you feel is never going to occur in actual +practice. + +You should also look for your prompt to be the only thing on a line: + + pump $h until $out =~ /^\s?\z/m ; + +(use C<(?!\n)\Z> in place of C<\z> on older perls). + +You can also take the approach that IPC::ChildSafe takes and emit a +command with known output after each 'real' command you issue, then +look for this known output. See new_appender() and new_chunker() for +filters that can help with this task. + +If it's not convenient or possibly to alter a prompt or use a known +command/response pair, you might need to autodetect the prompt in case +the local version of the child program is different then the one +you tested with, or if the user has control over the look & feel of +the prompt. + +=item * + +Refusing to accept input unless stdin is a tty. + +Some programs, for security reasons, will only accept certain types +of input from a tty. su, notable, will not prompt for a password unless +it's connected to a tty. + +If this is your situation, use a pseudo terminal ('pty>'). + +=item * + +Not prompting unless connected to a tty. + +Some programs don't prompt unless stdin or stdout is a tty. See if you can +turn prompting back on. If not, see if you can come up with a command that +you can issue after every real command and look for it's output, as +IPC::ChildSafe does. There are two filters included with IPC::Run that +can help with doing this: appender and chunker (see new_appender() and +new_chunker()). + +=item * + +Different output format when not connected to a tty. + +Some commands alter their formats to ease machine parsability when they +aren't connected to a pipe. This is actually good, but can be surprising. + +=back + +=head1 PSEUDO TERMINALS + +On systems providing pseudo terminals under /dev, IPC::Run can use IO::Pty +(available on CPAN) to provide a terminal environment to subprocesses. +This is necessary when the subprocess really wants to think it's connected +to a real terminal. + +=head2 CAVEATS + +Psuedo-terminals are not pipes, though they are similar. Here are some +differences to watch out for. + +=over + +=item Echoing + +Sending to stdin will cause an echo on stdout, which occurs before each +line is passed to the child program. There is currently no way to +disable this, although the child process can and should disable it for +things like passwords. + +=item Shutdown + +IPC::Run cannot close a pty until all output has been collected. This +means that it is not possible to send an EOF to stdin by half-closing +the pty, as we can when using a pipe to stdin. + +This means that you need to send the child process an exit command or +signal, or run() / finish() will time out. Be careful not to expect a +prompt after sending the exit command. + +=item Command line editing + +Some subprocesses, notable shells that depend on the user's prompt +settings, will reissue the prompt plus the command line input so far +once for each character. + +=item '>pty>' means '&>pty>', not '1>pty>' + +The pseudo terminal redirects both stdout and stderr unless you specify +a file descriptor. If you want to grab stderr separately, do this: + + start \@cmd, 'pty>', \$out, '2>', \$err ; + +=item stdin, stdout, and stderr not inherited + +Child processes harnessed to a pseudo terminal have their stdin, stdout, +and stderr completely closed before any redirection operators take +effect. This casts of the bonds of the controlling terminal. This is +not done when using pipes. + +Right now, this affects all children in a harness that has a pty in use, +even if that pty would not affect a particular child. That's a bug and +will be fixed. Until it is, it's best not to mix-and-match children. + +=back + +=head2 Redirection Operators + + Operator SHNP Description + ======== ==== =========== + <, N< SHN Redirects input to a child's fd N (0 assumed) + + >, N> SHN Redirects output from a child's fd N (1 assumed) + >>, N>> SHN Like '>', but appends to scalars or named files + >&, &> SHN Redirects stdout & stderr from a child process + + pty, N>pty S Like '>', but uses a pseudo-tty instead of a pipe + + N<&M Dups input fd N to input fd M + M>&N Dups output fd N to input fd M + N<&- Closes fd N + + pipe, N>pipe P Pipe opens H for caller to read, write, close. + +'N' and 'M' are placeholders for integer file descriptor numbers. The +terms 'input' and 'output' are from the child process's perspective. + +The SHNP field indicates what parameters an operator can take: + + S: \$scalar or \&function references. Filters may be used with + these operators (and only these). + H: \*HANDLE or IO::Handle for caller to open, and close + N: "file name". + P: \*HANDLE opened by IPC::Run as the parent end of a pipe, but read + and written to and closed by the caller (like IPC::Open3). + +=over + +=item Redirecting input: [n]<, [n] +below for more information. + +The : The handle returned is actually a socket handle, so you can +use select() on it. + +=item Redirecting output: [n]>, [n]>>, [n]>&[m], [n]>pipe + +You can redirect any output the child emits +to a scalar variable, subroutine, file handle, or file name. You +can have &run truncate or append to named files or scalars. If +you are redirecting stdin as well, or if the command is on the +receiving end of a pipeline ('|'), you can omit the redirection +operator: + + @ls = ( 'ls' ) ; + run \@ls, \undef, \$out + or die "ls returned $?" ; + + run \@ls, \undef, \&out ; ## Calls &out each time some output + ## is received from the child's + ## when undef is returned. + + run \@ls, \undef, '2>ls.err' ; + run \@ls, '2>', 'ls.err' ; + +The two parameter form guarantees that the filename +will not be interpreted as a redirection operator: + + run \@ls, '>', "&more" ; + run \@ls, '2>', ">foo\n" ; + +You can pass file handles you've opened for writing: + + open( *OUT, ">out.txt" ) ; + open( *ERR, ">err.txt" ) ; + run \@cat, \*OUT, \*ERR ; + +Passing a scalar reference and a code reference requires a little +more work, but allows you to capture all of the output in a scalar +or each piece of output by a callback: + +These two do the same things: + + run( [ 'ls' ], '2>', sub { $err_out .= $_[0] } ) ; + +does the same basic thing as: + + run( [ 'ls' ], '2>', \$err_out ) ; + +The subroutine will be called each time some data is read from the child. + +The >pipe operator is different in concept than the other '>' operators, +although it's syntax is similar: + + $h = start \@cat, $in, '>pipe', \*OUT, '2>pipe', \*ERR ; + $in = "hello world\n" ; + finish $h ; + print ; + print ; + close OUT ; + close ERR ; + +causes two pipe to be created, with one end attached to cat's stdout +and stderr, respectively, and the other left open on OUT and ERR, so +that the script can manually +read(), select(), etc. on them. This is like +the behavior of IPC::Open2 and IPC::Open3. + +B: The handle returned is actually a socket handle, so you can +use select() on it. + +=item Duplicating output descriptors: >&m, n>&m + +This duplicates output descriptor number n (default is 1 if n is omitted) +from descriptor number m. + +=item Duplicating input descriptors: <&m, n<&m + +This duplicates input descriptor number n (default is 0 if n is omitted) +from descriptor number m + +=item Closing descriptors: <&-, 3<&- + +This closes descriptor number n (default is 0 if n is omitted). The +following commands are equivalent: + + run \@cmd, \undef ; + run \@cmd, '<&-' ; + run \@cmd, ', >&, &>pipe, >pipe& + +The following pairs of commands are equivalent: + + run \@cmd, '>&', \$out ; run \@cmd, '>', \$out, '2>&1' ; + run \@cmd, '>&', 'out.txt' ; run \@cmd, '>', 'out.txt', '2>&1' ; + +etc. + +File descriptor numbers are not permitted to the left or the right of +these operators, and the '&' may occur on either end of the operator. + +The '&>pipe' and '>pipe&' variants behave like the '>pipe' operator, except +that both stdout and stderr write to the created pipe. + +=item Redirection Filters + +Both input redirections and output redirections that use scalars or +subs as endpoints may have an arbitrary number of filter subs placed +between them and the child process. This is useful if you want to +receive output in chunks, or if you want to massage each chunk of +data sent to the child. To use this feature, you must use operator +syntax: + + run( + \@cmd + '<', \&in_filter_2, \&in_filter_1, $in, + '>', \&out_filter_1, \&in_filter_2, $out, + ) ; + +This capability is not provided for IO handles or named files. + +Two filters are provided by IPC::Run: appender and chunker. Because +these may take an argument, you need to use the constructor functions +new_appender() and new_chunker() rather than using \& syntax: + + run( + \@cmd + '<', new_appender( "\n" ), $in, + '>', new_chunker, $out, + ) ; + +=back + +=head2 Just doing I/O + +If you just want to do I/O to a handle or file you open yourself, you +may specify a filehandle or filename instead of a command in the harness +specification: + + run io( "filename", '>', \$recv ) ; + + $h = start io( $io, '>', \$recv ) ; + + $h = harness \@cmd, '&', io( "file", '<', \$send ) ; + +=head2 Options + +Options are passed in as name/value pairs: + + run \@cat, \$in, debug => 1 ; + +If you pass the debug option, you may want to pass it in first, so you +can see what parsing is going on: + + run debug => 1, \@cat, \$in ; + +=over + +=item debug + +Enables debugging output in parent and child. Debugging info is emitted +to the STDERR that was present when IPC::Run was first Ced (it's +Ced out of the way so that it can be redirected in children without +having debugging output emitted on it). + +=back + +=head1 RETURN VALUES + +harness() and start() return a reference to an IPC::Run harness. This is +blessed in to the IPC::Run package, so you may make later calls to +functions as members if you like: + + $h = harness( ... ) ; + $h->start ; + $h->pump ; + $h->finish ; + + $h = start( .... ) ; + $h->pump ; + ... + +Of course, using method call syntax lets you deal with any IPC::Run +subclasses that might crop up, but don't hold your breath waiting for +any. + +run() and finish() return TRUE when all subcommands exit with a 0 result +code. B. + +All routines raise exceptions (via die()) when error conditions are +recognized. A non-zero command result is not treated as an error +condition, since some commands are tests whose results are reported +in their exit codes. + +=head1 ROUTINES + +=over + +=cut + +@ISA = qw( Exporter ) ; + +## We use @EXPORT for the end user's convenience: there's only one function +## exported, it's homonymous with the module, it's an unusual name, and +## it can be suppressed by "use IPC::Run () ;". + +my @FILTER_IMP = qw( input_avail get_more_input ) ; +my @FILTERS = qw( + new_appender + new_chunker + new_string_source + new_string_sink +) ; +my @API = qw( + run + harness start pump pumpable finish + signal kill_kill reap_nb + io timer timeout + close_terminal + binary +) ; + +@EXPORT_OK = ( @API, @FILTER_IMP, @FILTERS, qw( filter_tests Win32_MODE ) ) ; +%EXPORT_TAGS = ( + 'filter_imp' => \@FILTER_IMP, + 'all' => \@EXPORT_OK, + 'filters' => \@FILTERS, + 'api' => \@API, +) ; + +use strict ; + +use IPC::Run::Debug; +use Exporter ; +use Fcntl ; +use POSIX () ; +use Symbol ; +use Carp ; +use File::Spec ; +use IO::Handle ; +require IPC::Run::IO ; +require IPC::Run::Timer ; +use UNIVERSAL qw( isa ) ; + +use constant Win32_MODE => $^O =~ /os2|Win32/i ; + +BEGIN { + if ( Win32_MODE ) { + eval "use IPC::Run::Win32Helper; 1;" + or ( $@ && die ) or die "$!" ; + } + else { + eval "use File::Basename; 1;" or die $! ; + } +} + + +sub input_avail() ; +sub get_more_input() ; + +############################################################################### + +## +## State machine states, set in $self->{STATE} +## +## These must be in ascending order numerically +## +sub _newed() {0} +sub _harnessed(){1} +sub _finished() {2} ## _finished behave almost exactly like _harnessed +sub _started() {3} + +## +## Which fds have been opened in the parent. This may have extra fds, since +## we aren't all that rigorous about closing these off, but that's ok. This +## is used on Unixish OSs to close all fds in the child that aren't needed +## by that particular child. +my %fds ; + +## There's a bit of hackery going on here. +## +## We want to have any code anywhere be able to emit +## debugging statements without knowing what harness the code is +## being called in/from, since we'd need to pass a harness around to +## everything. +## +## Thus, $cur_self was born. + +use vars qw( $cur_self ) ; + +sub _debug_fd { + return fileno STDERR unless defined $cur_self ; + + if ( _debugging && ! defined $cur_self->{DEBUG_FD} ) { + my $fd = select STDERR ; $| = 1 ; select $fd ; + $cur_self->{DEBUG_FD} = POSIX::dup fileno STDERR ; + _debug( "debugging fd is $cur_self->{DEBUG_FD}\n" ) + if _debugging_details ; + } + + return fileno STDERR unless defined $cur_self->{DEBUG_FD} ; + + return $cur_self->{DEBUG_FD} +} + +sub DESTROY { + ## We absolutely do not want to do anything else here. We are likely + ## to be in a child process and we don't want to do things like kill_kill + ## ourself or cause other destruction. + my IPC::Run $self = shift ; + POSIX::close $self->{DEBUG_FD} if defined $self->{DEBUG_FD} ; + $self->{DEBUG_FD} = undef ; +} + +## +## Support routines (NOT METHODS) +## +my %cmd_cache ; + +sub _search_path { + my ( $cmd_name ) = @_ ; + if ( File::Spec->file_name_is_absolute( $cmd_name ) && -x $cmd_name) { + _debug "'", $cmd_name, "' is absolute" + if _debugging_details ; + return $cmd_name ; + } + + my $dirsep = + ( Win32_MODE + ? '[/\\\\]' + : $^O =~ /MacOS/ + ? ':' + : $^O =~ /VMS/ + ? '[\[\]]' + : '/' + ) ; + + if ( Win32_MODE + && ( $cmd_name =~ /$dirsep/ ) + && ( $cmd_name !~ /\..+$/ ) ## Only run if cmd_name has no extension? + ) { + for ( split /;/, $ENV{PATHEXT} || ".COM;.BAT;.EXE" ) { + my $name = "$cmd_name$_"; + $cmd_name = $name, last if -f $name && -x _; + } + } + + if ( $cmd_name =~ /($dirsep)/ ) { + _debug "'$cmd_name' contains '$1'" if _debugging; + croak "file not found: $cmd_name" unless -e $cmd_name ; + croak "not a file: $cmd_name" unless -f $cmd_name ; + croak "permission denied: $cmd_name" unless -x $cmd_name ; + return $cmd_name ; + } + + if ( exists $cmd_cache{$cmd_name} ) { + _debug "'$cmd_name' found in cache: '$cmd_cache{$cmd_name}'" + if _debugging; + return $cmd_cache{$cmd_name} if -x $cmd_cache{$cmd_name} ; + _debug "'$cmd_cache{$cmd_name}' no longer executable, searching..." + if _debugging; + delete $cmd_cache{$cmd_name} ; + } + + my @searched_in ; + + ## This next bit is Unix/Win32 specific, unfortunately. + ## There's been some conversation about extending File::Spec to provide + ## a universal interface to PATH, but I haven't seen it yet. + my $re = Win32_MODE ? qr/;/ : qr/:/ ; + +LOOP: + for ( split( $re, $ENV{PATH}, -1 ) ) { + $_ = "." unless length $_ ; + push @searched_in, $_ ; + + my $prospect = File::Spec->catfile( $_, $cmd_name ) ; + my @prospects ; + + @prospects = + ( Win32_MODE && ! ( -f $prospect && -x _ ) ) + ? map "$prospect$_", split /;/, $ENV{PATHEXT} || ".COM;.BAT;.EXE" + : ( $prospect ) ; + + for my $found ( @prospects ) { + if ( -f $found && -x _ ) { + $cmd_cache{$cmd_name} = $found ; + last LOOP ; + } + } + } + + if ( exists $cmd_cache{$cmd_name} ) { + _debug "'", $cmd_name, "' added to cache: '", $cmd_cache{$cmd_name}, "'" + if _debugging_details ; + return $cmd_cache{$cmd_name} ; + } + + croak "Command '$cmd_name' not found in " . join( ", ", @searched_in ) ; +} + + +sub _empty($) { ! ( defined $_[0] && length $_[0] ) } + +## 'safe' versions of otherwise fun things to do. See also IPC::Run::Win32Helper. +sub _close { + confess 'undef' unless defined $_[0] ; + no strict 'refs' ; + my $fd = $_[0] =~ /^\d+$/ ? $_[0] : fileno $_[0] ; + my $r = POSIX::close $fd ; + $r = $r ? '' : " ERROR $!" ; + delete $fds{$fd} ; + _debug "close( $fd ) = " . ( $r || 0 ) if _debugging_details ; +} + +sub _dup { + confess 'undef' unless defined $_[0] ; + my $r = POSIX::dup( $_[0] ) ; + croak "$!: dup( $_[0] )" unless defined $r ; + $r = 0 if $r eq '0 but true' ; + _debug "dup( $_[0] ) = $r" if _debugging_details ; + $fds{$r} = 1 ; + return $r ; +} + + +sub _dup2_rudely { + confess 'undef' unless defined $_[0] && defined $_[1] ; + my $r = POSIX::dup2( $_[0], $_[1] ) ; + croak "$!: dup2( $_[0], $_[1] )" unless defined $r ; + $r = 0 if $r eq '0 but true' ; + _debug "dup2( $_[0], $_[1] ) = $r" if _debugging_details ; + $fds{$r} = 1 ; + return $r ; +} + +sub _exec { + confess 'undef passed' if grep !defined, @_ ; +# exec @_ or croak "$!: exec( " . join( ', ', @_ ) . " )" ; + _debug 'exec()ing ', join " ", map "'$_'", @_ if _debugging_details ; + +# { +## Commented out since we don't call this on Win32. +# # This works around the bug where 5.6.1 complains +# # "Can't exec ...: No error" after an exec on NT, where +# # exec() is simulated and actually returns in Perl's C +# # code, though Perl's &exec does not... +# no warnings "exec" ; +# +# # Just in case the no warnings workaround +# # stops beign a workaround, we don't want +# # old values of $! causing spurious strerr() +# # messages to appear in the "Can't exec" message +# undef $! ; + exec @_ ; +# } +# croak "$!: exec( " . join( ', ', map "'$_'", @_ ) . " )" ; + ## Fall through so $! can be reported to parent. +} + + +sub _sysopen { + confess 'undef' unless defined $_[0] && defined $_[1] ; +_debug sprintf( "O_RDONLY=0x%02x ", O_RDONLY ), +sprintf( "O_WRONLY=0x%02x ", O_WRONLY ), +sprintf( "O_RDWR=0x%02x ", O_RDWR ), +sprintf( "O_TRUNC=0x%02x ", O_TRUNC), +sprintf( "O_CREAT=0x%02x ", O_CREAT), +sprintf( "O_APPEND=0x%02x ", O_APPEND), +if _debugging_details ; + my $r = POSIX::open( $_[0], $_[1], 0644 ) ; + croak "$!: open( $_[0], ", sprintf( "0x%03x", $_[1] ), " )" unless defined $r ; + _debug "open( $_[0], ", sprintf( "0x%03x", $_[1] ), " ) = $r" + if _debugging_data ; + $fds{$r} = 1 ; + return $r ; +} + +sub _pipe { + ## Normal, blocking write for pipes that we read and the child writes, + ## since most children expect writes to stdout to block rather than + ## do a partial write. + my ( $r, $w ) = POSIX::pipe ; + croak "$!: pipe()" unless defined $r ; + _debug "pipe() = ( $r, $w ) " if _debugging_details ; + $fds{$r} = $fds{$w} = 1 ; + return ( $r, $w ) ; +} + +sub _pipe_nb { + ## For pipes that we write, unblock the write side, so we can fill a buffer + ## and continue to select(). + ## Contributed by Borislav Deianov , with minor + ## bugfix on fcntl result by me. + local ( *R, *W ) ; + my $f = pipe( R, W ) ; + croak "$!: pipe()" unless defined $f ; + my ( $r, $w ) = ( fileno R, fileno W ) ; + _debug "pipe_nb pipe() = ( $r, $w )" if _debugging_details ; + unless ( Win32_MODE ) { + ## POSIX::fcntl doesn't take fd numbers, so gotta use Perl's and + ## then _dup the originals (which get closed on leaving this block) + my $fres = fcntl( W, &F_SETFL, O_WRONLY | O_NONBLOCK ); + croak "$!: fcntl( $w, F_SETFL, O_NONBLOCK )" unless $fres ; + _debug "fcntl( $w, F_SETFL, O_NONBLOCK )" if _debugging_details ; + } + ( $r, $w ) = ( _dup( $r ), _dup( $w ) ) ; + _debug "pipe_nb() = ( $r, $w )" if _debugging_details ; + return ( $r, $w ) ; +} + +sub _pty { + require IO::Pty ; + my $pty = IO::Pty->new() ; + croak "$!: pty ()" unless $pty ; + $pty->autoflush() ; + $pty->blocking( 0 ) or croak "$!: pty->blocking ( 0 )" ; + _debug "pty() = ( ", $pty->fileno, ", ", $pty->slave->fileno, " )" + if _debugging_details ; + $fds{$pty->fileno} = $fds{$pty->slave->fileno} = 1 ; + return $pty ; +} + + +sub _read { + confess 'undef' unless defined $_[0] ; + my $s = '' ; + my $r = POSIX::read( $_[0], $s, 10_000 ) ; + croak "$!: read( $_[0] )" if not($r) and $! != POSIX::EINTR; + $r ||= 0; + _debug "read( $_[0] ) = $r chars '$s'" if _debugging_data ; + return $s ; +} + + +## A METHOD, not a function. +sub _spawn { + my IPC::Run $self = shift ; + my ( $kid ) = @_ ; + + _debug "opening sync pipe ", $kid->{PID} if _debugging_details ; + my $sync_reader_fd ; + ( $sync_reader_fd, $self->{SYNC_WRITER_FD} ) = _pipe ; + $kid->{PID} = fork() ; + croak "$! during fork" unless defined $kid->{PID} ; + + unless ( $kid->{PID} ) { + ## _do_kid_and_exit closes sync_reader_fd since it closes all unwanted and + ## unloved fds. + $self->_do_kid_and_exit( $kid ) ; + } + _debug "fork() = ", $kid->{PID} if _debugging_details ; + + ## Wait for kid to get to it's exec() and see if it fails. + _close $self->{SYNC_WRITER_FD} ; + my $sync_pulse = _read $sync_reader_fd ; + _close $sync_reader_fd ; + + if ( ! defined $sync_pulse || length $sync_pulse ) { + if ( waitpid( $kid->{PID}, 0 ) >= 0 ) { + $kid->{RESULT} = $? ; + } + else { + $kid->{RESULT} = -1 ; + } + $sync_pulse = + "error reading synchronization pipe for $kid->{NUM}, pid $kid->{PID}" + unless length $sync_pulse ; + croak $sync_pulse ; + } + return $kid->{PID} ; + +## Wait for pty to get set up. This is a hack until we get synchronous +## selects. +if ( keys %{$self->{PTYS}} && $IO::Pty::VERSION < 0.9 ) { +_debug "sleeping to give pty a chance to init, will fix when newer IO::Pty arrives." ; +sleep 1 ; +} +} + + +sub _write { + confess 'undef' unless defined $_[0] && defined $_[1] ; + my $r = POSIX::write( $_[0], $_[1], length $_[1] ) ; + croak "$!: write( $_[0], '$_[1]' )" unless $r ; + _debug "write( $_[0], '$_[1]' ) = $r" if _debugging_data ; + return $r ; +} + + +=item run + +Run takes a harness or harness specification and runs it, pumping +all input to the child(ren), closing the input pipes when no more +input is available, collecting all output that arrives, until the +pipes delivering output are closed, then waiting for the children to +exit and reaping their result codes. + +You may think of C as being like + + start( ... )->finish() ; + +, though there is one subtle difference: run() does not +set \$input_scalars to '' like finish() does. If an exception is thrown +from run(), all children will be killed off "gently", and then "annihilated" +if they do not go gently (in to that dark night. sorry). + +If any exceptions are thrown, this does a L before propogating +them. + +=cut + +use vars qw( $in_run ); ## No, not Enron ;) + +sub run { + local $in_run = 1; ## Allow run()-only optimizations. + my IPC::Run $self = start( @_ ); + my $r = eval { + $self->{clear_ins} = 0 ; + $self->finish ; + } ; + if ( $@ ) { + my $x = $@ ; + $self->kill_kill ; + die $x ; + } + return $r ; +} + + +=item signal + + ## To send it a specific signal by name ("USR1"): + signal $h, "USR1" ; + $h->signal ( "USR1" ) ; + +If $signal is provided and defined, sends a signal to all child processes. Try +not to send numeric signals, use C<"KILL"> instead of C<9>, for instance. +Numeric signals aren't portable. + +Throws an exception if $signal is undef. + +This will I clean up the harness, C it if you kill it. + +Normally TERM kills a process gracefully (this is what the command line utility +C does by default), INT is sent by one of the keys C<^C>, C or +CDelE>, and C is used to kill a process and make it coredump. + +The C signal is often used to get a process to "restart", rereading +config files, and C and C for really application-specific things. + +Often, running C (that's a lower case "L") on the command line will +list the signals present on your operating system. + +B: The signal subsystem is not at all portable. We *may* offer +to simulate C and C on some operating systems, submit code +to me if you want this. + +B: Up to and including perl v5.6.1, doing almost anything in a +signal handler could be dangerous. The most safe code avoids all +mallocs and system calls, usually by preallocating a flag before +entering the signal handler, altering the flag's value in the +handler, and responding to the changed value in the main system: + + my $got_usr1 = 0 ; + sub usr1_handler { ++$got_signal } + + $SIG{USR1} = \&usr1_handler ; + while () { sleep 1 ; print "GOT IT" while $got_usr1-- ; } + +Even this approach is perilous if ++ and -- aren't atomic on your system +(I've never heard of this on any modern CPU large enough to run perl). + +=cut + +sub signal { + my IPC::Run $self = shift ; + + local $cur_self = $self ; + + $self->_kill_kill_kill_pussycat_kill unless @_ ; + + Carp::cluck "Ignoring extra parameters passed to kill()" if @_ > 1 ; + + my ( $signal ) = @_ ; + croak "Undefined signal passed to signal" unless defined $signal ; + for ( grep $_->{PID} && ! defined $_->{RESULT}, @{$self->{KIDS}} ) { + _debug "sending $signal to $_->{PID}" + if _debugging; + kill $signal, $_->{PID} + or _debugging && _debug "$! sending $signal to $_->{PID}" ; + } + + return ; +} + + +=item kill_kill + + ## To kill off a process: + $h->kill_kill ; + kill_kill $h ; + + ## To specify the grace period other than 30 seconds: + kill_kill $h, grace => 5 ; + + ## To send QUIT instead of KILL if a process refuses to die: + kill_kill $h, coup_d_grace => "QUIT" ; + +Sends a C, waits for all children to exit for up to 30 seconds, then +sends a C to any that survived the C. + +Will wait for up to 30 more seconds for the OS to sucessfully C the +processes. + +The 30 seconds may be overriden by setting the C option, this +overrides both timers. + +The harness is then cleaned up. + +The doubled name indicates that this function may kill again and avoids +colliding with the core Perl C function. + +Returns a 1 if the C was sufficient, or a 0 if C was +required. Throws an exception if C did not permit the children +to be reaped. + +B: The grace period is actually up to 1 second longer than that +given. This is because the granularity of C