home *** CD-ROM | disk | FTP | other *** search
- package Text::ParseWords;
-
- require 5.000;
- use Carp;
-
- require AutoLoader;
- *AUTOLOAD = \&AutoLoader::AUTOLOAD;
-
- require Exporter;
- @ISA = qw(Exporter);
- @EXPORT = qw(shellwords quotewords);
- @EXPORT_OK = qw(old_shellwords);
-
- =head1 NAME
-
- Text::ParseWords - parse text into an array of tokens
-
- =head1 SYNOPSIS
-
- use Text::ParseWords;
- @words = "ewords($delim, $keep, @lines);
- @words = &shellwords(@lines);
- @words = &old_shellwords(@lines);
-
- =head1 DESCRIPTION
-
- "ewords() accepts a delimiter (which can be a regular expression)
- and a list of lines and then breaks those lines up into a list of
- words ignoring delimiters that appear inside quotes.
-
- The $keep argument is a boolean flag. If true, the quotes are kept
- with each word, otherwise quotes are stripped in the splitting process.
- $keep also defines whether unprotected backslashes are retained.
-
- A &shellwords() replacement is included to demonstrate the new package.
- This version differs from the original in that it will _NOT_ default
- to using $_ if no arguments are given. I personally find the old behavior
- to be a mis-feature.
-
- "ewords() works by simply jamming all of @lines into a single
- string in $_ and then pulling off words a bit at a time until $_
- is exhausted.
-
- =head1 AUTHORS
-
- Hal Pomeranz (pomeranz@netcom.com), 23 March 1994
-
- Basically an update and generalization of the old shellwords.pl.
- Much code shamelessly stolen from the old version (author unknown).
-
- =cut
-
- 1;
- __END__
-
- sub shellwords {
- local(@lines) = @_;
- $lines[$#lines] =~ s/\s+$//;
- "ewords('\s+', 0, @lines);
- }
-
-
-
- sub quotewords {
-
- # The inner "for" loop builds up each word (or $field) one $snippet
- # at a time. A $snippet is a quoted string, a backslashed character,
- # or an unquoted string. We fall out of the "for" loop when we reach
- # the end of $_ or when we hit a delimiter. Falling out of the "for"
- # loop, we push the $field we've been building up onto the list of
- # @words we'll be returning, and then loop back and pull another word
- # off of $_.
- #
- # The first two cases inside the "for" loop deal with quoted strings.
- # The first case matches a double quoted string, removes it from $_,
- # and assigns the double quoted string to $snippet in the body of the
- # conditional. The second case handles single quoted strings. In
- # the third case we've found a quote at the current beginning of $_,
- # but it didn't match the quoted string regexps in the first two cases,
- # so it must be an unbalanced quote and we croak with an error (which can
- # be caught by eval()).
- #
- # The next case handles backslashed characters, and the next case is the
- # exit case on reaching the end of the string or finding a delimiter.
- #
- # Otherwise, we've found an unquoted thing and we pull of characters one
- # at a time until we reach something that could start another $snippet--
- # a quote of some sort, a backslash, or the delimiter. This one character
- # at a time behavior was necessary if the delimiter was going to be a
- # regexp (love to hear it if you can figure out a better way).
-
- my ($delim, $keep, @lines) = @_;
- my (@words, $snippet, $field);
-
- local $_ = join ('', @lines);
-
- while (length) {
- $field = '';
-
- for (;;) {
- $snippet = '';
-
- if (s/^"([^"\\]*(\\.[^"\\]*)*)"//) {
- $snippet = $1;
- $snippet = qq|"$snippet"| if $keep;
- }
- elsif (s/^'([^'\\]*(\\.[^'\\]*)*)'//) {
- $snippet = $1;
- $snippet = "'$snippet'" if $keep;
- }
- elsif (/^["']/) {
- croak 'Unmatched quote';
- }
- elsif (s/^\\(.)//) {
- $snippet = $1;
- $snippet = "\\$snippet" if $keep;
- }
- elsif (!length || s/^$delim//) {
- last;
- }
- else {
- while (length && !(/^$delim/ || /^['"\\]/)) {
- $snippet .= substr ($_, 0, 1);
- substr($_, 0, 1) = '';
- }
- }
-
- $field .= $snippet;
- }
-
- push @words, $field;
- }
-
- return @words;
- }
-
-
- sub old_shellwords {
-
- # Usage:
- # use ParseWords;
- # @words = old_shellwords($line);
- # or
- # @words = old_shellwords(@lines);
-
- local($_) = join('', @_);
- my(@words,$snippet,$field);
-
- s/^\s+//;
- while ($_ ne '') {
- $field = '';
- for (;;) {
- if (s/^"(([^"\\]|\\.)*)"//) {
- ($snippet = $1) =~ s#\\(.)#$1#g;
- }
- elsif (/^"/) {
- croak "Unmatched double quote: $_";
- }
- elsif (s/^'(([^'\\]|\\.)*)'//) {
- ($snippet = $1) =~ s#\\(.)#$1#g;
- }
- elsif (/^'/) {
- croak "Unmatched single quote: $_";
- }
- elsif (s/^\\(.)//) {
- $snippet = $1;
- }
- elsif (s/^([^\s\\'"]+)//) {
- $snippet = $1;
- }
- else {
- s/^\s+//;
- last;
- }
- $field .= $snippet;
- }
- push(@words, $field);
- }
- @words;
- }
-