package Text::ParseWords;
require 5.000;
-require Exporter;
-require AutoLoader;
use Carp;
-@ISA = qw(Exporter AutoLoader);
+require AutoLoader;
+*AUTOLOAD = \&AutoLoader::AUTOLOAD;
+
+require Exporter;
+@ISA = qw(Exporter);
@EXPORT = qw(shellwords quotewords);
@EXPORT_OK = qw(old_shellwords);
-# This code needs updating to use new Perl 5 features (regexp etc).
+=head1 NAME
-# ParseWords.pm
-#
-# Usage:
-# use ParseWords;
-# @words = "ewords($delim, $keep, @lines);
-# @words = &shellwords(@lines);
-# @words = &old_shellwords(@lines);
-
-# Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
-# Permission to use and distribute under the same terms as Perl.
-# No warranty expressed or implied.
-
-# Basically an update and generalization of the old shellwords.pl.
-# Much code shamelessly stolen from the old version (author unknown).
-#
-# "ewords() accepts a delimiter (which can be a regular expression)
-# and a list of lines and then breaks those lines up into a list of
-# words ignoring delimiters that appear inside quotes.
-#
-# The $keep argument is a boolean flag. If true, the quotes are kept
-# with each word, otherwise quotes are stripped in the splitting process.
-# $keep also defines whether unprotected backslashes are retained.
-#
+Text::ParseWords - parse text into an array of tokens
-1;
-__END__
+=head1 SYNOPSIS
+ use Text::ParseWords;
+ @words = "ewords($delim, $keep, @lines);
+ @words = &shellwords(@lines);
+ @words = &old_shellwords(@lines);
-sub shellwords {
+=head1 DESCRIPTION
+
+"ewords() accepts a delimiter (which can be a regular expression)
+and a list of lines and then breaks those lines up into a list of
+words ignoring delimiters that appear inside quotes.
+
+The $keep argument is a boolean flag. If true, the quotes are kept
+with each word, otherwise quotes are stripped in the splitting process.
+$keep also defines whether unprotected backslashes are retained.
+
+A &shellwords() replacement is included to demonstrate the new package.
+This version differs from the original in that it will _NOT_ default
+to using $_ if no arguments are given. I personally find the old behavior
+to be a mis-feature.
+
+"ewords() works by simply jamming all of @lines into a single
+string in $_ and then pulling off words a bit at a time until $_
+is exhausted.
+
+=head1 AUTHORS
+
+Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
- # A &shellwords() replacement is included to demonstrate the new package.
- # This version differs from the original in that it will _NOT_ default
- # to using $_ if no arguments are given. I personally find the old behavior
- # to be a mis-feature.
+Basically an update and generalization of the old shellwords.pl.
+Much code shamelessly stolen from the old version (author unknown).
+=cut
+
+1;
+__END__
+
+sub shellwords {
local(@lines) = @_;
$lines[$#lines] =~ s/\s+$//;
"ewords('\s+', 0, @lines);
sub quotewords {
-# "ewords() works by simply jamming all of @lines into a single
-# string in $_ and then pulling off words a bit at a time until $_
-# is exhausted.
-#
# The inner "for" loop builds up each word (or $field) one $snippet
# at a time. A $snippet is a quoted string, a backslashed character,
# or an unquoted string. We fall out of the "for" loop when we reach
# at a time behavior was necessary if the delimiter was going to be a
# regexp (love to hear it if you can figure out a better way).
- local($delim, $keep, @lines) = @_;
- local(@words,$snippet,$field,$_);
+ my ($delim, $keep, @lines) = @_;
+ my (@words, $snippet, $field);
+
+ local $_ = join ('', @lines);
- $_ = join('', @lines);
- while ($_) {
+ while (length) {
$field = '';
+
for (;;) {
- $snippet = '';
- if (s/^"(([^"\\]|\\[\\"])*)"//) {
+ $snippet = '';
+
+ if (s/^"([^"\\]*(\\.[^"\\]*)*)"//) {
$snippet = $1;
- $snippet = "\"$snippet\"" if ($keep);
+ $snippet = qq|"$snippet"| if $keep;
}
- elsif (s/^'(([^'\\]|\\[\\'])*)'//) {
+ elsif (s/^'([^'\\]*(\\.[^'\\]*)*)'//) {
$snippet = $1;
- $snippet = "'$snippet'" if ($keep);
+ $snippet = "'$snippet'" if $keep;
}
elsif (/^["']/) {
- croak "Unmatched quote";
+ croak 'Unmatched quote';
}
- elsif (s/^\\(.)//) {
- $snippet = $1;
- $snippet = "\\$snippet" if ($keep);
- }
- elsif (!$_ || s/^$delim//) {
- last;
+ elsif (s/^\\(.)//) {
+ $snippet = $1;
+ $snippet = "\\$snippet" if $keep;
+ }
+ elsif (!length || s/^$delim//) {
+ last;
}
else {
- while ($_ && !(/^$delim/ || /^['"\\]/)) {
- $snippet .= substr($_, 0, 1);
- substr($_, 0, 1) = '';
- }
+ while (length && !(/^$delim/ || /^['"\\]/)) {
+ $snippet .= substr ($_, 0, 1);
+ substr($_, 0, 1) = '';
+ }
}
+
$field .= $snippet;
}
- push(@words, $field);
+
+ push @words, $field;
}
- @words;
+
+ return @words;
}