2024-10-29 20:49:19 +01:00
|
|
|
#!/usr/bin/env perl
|
|
|
|
|
|
|
|
package SplitLine;
|
|
|
|
|
|
|
|
use 5.020;
|
|
|
|
|
|
|
|
use warnings;
|
|
|
|
use strict;
|
|
|
|
|
|
|
|
use feature 'signatures';
|
|
|
|
no warnings 'experimental::signatures';
|
|
|
|
|
|
|
|
use parent qw(Exporter);
|
|
|
|
our @EXPORT = qw(split_line);
|
|
|
|
|
|
|
|
# splits line into arguments separated by unquoted whitespace.
|
|
|
|
# handles unbalanced quotes by treating them as part of the
|
|
|
|
# argument they were found within.
|
2024-11-04 04:26:56 +01:00
|
|
|
sub split_line($line, %opts) {
|
2024-10-29 20:49:19 +01:00
|
|
|
my %default_opts = (
|
2024-11-04 04:26:56 +01:00
|
|
|
strip_quotes => 0,
|
|
|
|
keep_spaces => 0,
|
|
|
|
preserve_escapes => 0,
|
|
|
|
strip_commas => 0,
|
2024-10-29 20:49:19 +01:00
|
|
|
);
|
|
|
|
|
2024-11-04 04:26:56 +01:00
|
|
|
print STDERR "split: [$line]\n";
|
|
|
|
|
2024-10-29 20:49:19 +01:00
|
|
|
%opts = (%default_opts, %opts);
|
|
|
|
|
2024-11-04 04:26:56 +01:00
|
|
|
return () if not length $line;
|
|
|
|
|
2024-10-29 20:49:19 +01:00
|
|
|
my @chars = split //, $line;
|
|
|
|
|
|
|
|
my @args;
|
2024-11-04 04:26:56 +01:00
|
|
|
my $ch;
|
|
|
|
my $pos;
|
2024-10-29 20:49:19 +01:00
|
|
|
my $quote;
|
2024-11-04 04:26:56 +01:00
|
|
|
my $escaped = 0;
|
|
|
|
my $token = '';
|
|
|
|
my $last_token = '';
|
|
|
|
my $i = 0;
|
2024-10-29 20:49:19 +01:00
|
|
|
my $ignore_quote = 0;
|
2024-11-04 04:26:56 +01:00
|
|
|
my $spaces = 0;
|
|
|
|
my $add_token = 0;
|
|
|
|
my $got_ch = 0;
|
2024-10-29 20:49:19 +01:00
|
|
|
|
|
|
|
while (1) {
|
|
|
|
if ($i >= @chars) {
|
|
|
|
if (defined $quote) {
|
|
|
|
# reached end, but unbalanced quote... reset to beginning of quote and ignore it
|
2024-11-04 04:26:56 +01:00
|
|
|
$i = $pos;
|
2024-10-29 20:49:19 +01:00
|
|
|
$ignore_quote = 1;
|
2024-11-04 04:26:56 +01:00
|
|
|
$quote = undef;
|
|
|
|
$token = $last_token;
|
2024-10-29 20:49:19 +01:00
|
|
|
} else {
|
|
|
|
# add final token and exit
|
2024-11-04 04:26:56 +01:00
|
|
|
$token .= '\\' if $escaped;
|
|
|
|
push @args, $token;
|
2024-10-29 20:49:19 +01:00
|
|
|
last;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
$ch = $chars[$i++];
|
|
|
|
|
|
|
|
$spaces = 0 if $ch ne ' ';
|
|
|
|
|
|
|
|
if ($escaped) {
|
2024-11-04 04:26:56 +01:00
|
|
|
if ($add_token) {
|
|
|
|
push @args, $token;
|
|
|
|
$token = '';
|
|
|
|
$add_token = 0;
|
|
|
|
}
|
|
|
|
|
2024-10-29 20:49:19 +01:00
|
|
|
if ($opts{preserve_escapes}) {
|
|
|
|
$token .= "\\$ch";
|
|
|
|
} else {
|
|
|
|
$token .= $ch;
|
|
|
|
}
|
2024-11-04 04:26:56 +01:00
|
|
|
|
2024-10-29 20:49:19 +01:00
|
|
|
$escaped = 0;
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if ($ch eq '\\') {
|
|
|
|
$escaped = 1;
|
2024-11-04 04:26:56 +01:00
|
|
|
$got_ch = 1;
|
2024-10-29 20:49:19 +01:00
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (defined $quote) {
|
|
|
|
if ($ch eq $quote) {
|
|
|
|
# closing quote
|
|
|
|
$token .= $ch unless $opts{strip_quotes};
|
|
|
|
$quote = undef;
|
|
|
|
} else {
|
|
|
|
# still within quoted argument
|
|
|
|
$token .= $ch;
|
|
|
|
}
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (not defined $quote and ($ch eq "'" or $ch eq '"')) {
|
2024-11-04 04:26:56 +01:00
|
|
|
$got_ch = 1;
|
|
|
|
|
|
|
|
if ($add_token) {
|
|
|
|
push @args, $token;
|
|
|
|
$token = '';
|
|
|
|
$add_token = 0;
|
|
|
|
}
|
|
|
|
|
2024-10-29 20:49:19 +01:00
|
|
|
if ($ignore_quote) {
|
|
|
|
# treat unbalanced quote as part of this argument
|
|
|
|
$token .= $ch;
|
|
|
|
$ignore_quote = 0;
|
|
|
|
} else {
|
|
|
|
# begin potential quoted argument
|
2024-11-04 04:26:56 +01:00
|
|
|
$pos = $i - 1;
|
|
|
|
$quote = $ch;
|
2024-10-29 20:49:19 +01:00
|
|
|
$last_token = $token;
|
|
|
|
$token .= $ch unless $opts{strip_quotes};
|
|
|
|
}
|
|
|
|
next;
|
|
|
|
}
|
|
|
|
|
2024-11-04 04:26:56 +01:00
|
|
|
if ($ch eq ' ' or $ch eq "\n" or $ch eq "\t" or ($opts{strip_commas} and $ch eq ',')) {
|
2024-10-29 20:49:19 +01:00
|
|
|
if (++$spaces > 1 and $opts{keep_spaces}) {
|
|
|
|
$token .= $ch;
|
|
|
|
next;
|
|
|
|
} else {
|
2024-11-04 04:26:56 +01:00
|
|
|
if ($opts{keep_spaces} && $ch eq "\n") {
|
|
|
|
$token .= $ch;
|
|
|
|
}
|
|
|
|
|
|
|
|
unless ($opts{strip_commas} and $token eq ',') {
|
|
|
|
$add_token = 1 if $got_ch;;
|
|
|
|
}
|
2024-10-29 20:49:19 +01:00
|
|
|
next;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-11-04 04:26:56 +01:00
|
|
|
if ($add_token) {
|
|
|
|
push @args, $token;
|
|
|
|
$token = '';
|
|
|
|
$add_token = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
$got_ch = 1;
|
2024-10-29 20:49:19 +01:00
|
|
|
$token .= $ch;
|
|
|
|
}
|
|
|
|
|
2024-11-04 04:26:56 +01:00
|
|
|
use Data::Dumper;
|
|
|
|
print STDERR "split: ", Dumper(\@args), "\n";
|
2024-10-29 20:49:19 +01:00
|
|
|
return @args;
|
|
|
|
}
|
|
|
|
|
|
|
|
1;
|