-
Notifications
You must be signed in to change notification settings - Fork 11
/
Copy pathcsv-check
executable file
·97 lines (81 loc) · 3.42 KB
/
csv-check
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
#!raku
# csv-check: Check validity of CSV file and report
# (m)'18 Copyright H.M.Brand 2007-2018
# Check csv-check-tuxic for a version with the original style
use v6;
use Text::CSV;
our $VERSION = "6.1-ugly"; # 2018-09-06
sub usage (Bool $err = True, Str :$pn = "csv-check") {
$err and temp $*OUT = $*ERR;
print qq:to/EOU/;
usage: $pn [-s <sep>] [-q <quot>] [-e <esc>] [file.csv]
-s <sep> use <sep> as seperator char. Auto-detect. Default = ','
The string "tab" is allowed.
-q <quot> use <quot> as quotation char. Default = '"'
The string "undef" will disable quotation.
-e <esc> use <esc> as escape char. Auto-detect. Default = '"'
The string "undef" will disable escapes.
EOU
exit $err;
} # usage
sub to-str (Str $tag, Str $s) {
my Str $x = $s.perl;
$x ~~ s{^'"' (.*) '"'$} = $0;
$x ~~ s:g{ "\\" '"' } = '"';
"\e[34m$tag\e[0m = <\e[32m$x\e[0m>";
} # to-str
sub MAIN (Bool :$help = False, Str :$s = "", Str :$q = '"', Str :$e = '"', *@f) {
my Str $pn = $*PROGRAM-NAME;
$pn ~~ s{ .* "/"} = "";
$help and usage :$pn, False;
@f.elems or @f.push: "-";
for @f -> $fn {
my Str $data = $fn eq "-" ?? $*IN.slurp-rest !! slurp $fn; # Binary NYI
my Str $eol;
my Str $sep = $s eq "tab" ?? "\t" !! $s;
my Str $quo = $q eq "undef" ?? Str !! $q;
my Str $esc = $e eq "undef" ?? Str !! $e;
my Bool $bin = False;
my Int $rows = 0;
my %cols;
unless ($sep) { # No sep char passed, try to auto-detect;
$sep = $data ~~ m/<["\d]> "," <["\d,]>/ ?? "," !!
$data ~~ m/<["\d]> ";" <["\d;]>/ ?? ";" !!
$data ~~ m/<["\d]> "\t" <["\d]> / ?? "\t" !!
# If neither, then for unquoted strings
$data ~~ m/ \w "," <[\w,]> / ?? "," !!
$data ~~ m/ \w ";" <[\w;]> / ?? ";" !!
$data ~~ m/ \w "\t" <[\w]> / ?? "\t" !! ",";
$data ~~ m/(<[\r\n]>+)$/ and $eol = $0.Str;
}
my $csv = Text::CSV.new(:$sep, :$quo, :$esc, :meta);
my $fh = IO::String.new($data);
my CSV::Field @row;
while (@row = $csv.getline($fh)) {
$rows++;
%cols{@row.elems}++;
@row.map(*.is_binary).any and $bin = True;
}
# Report findings
"Checked \e[35m$fn\e[0m with \e[34m$pn $VERSION\e[0m using \e[34mText::CSV {$csv.version}\e[0m".say;
my $diag = $csv.error-diag;
if ($diag.error == 2012 && $csv.eof) {
my @coll = %cols.keys.sort: { $^a <=> $^b };
my $cols = @coll.elems == 1 ?? @coll[0] !! "\e[31m({@coll.join(', ')})\e[0m";
say "OK: rows: \e[32m$rows\e[0m, columns: \e[32m$cols\e[0m";
say " {to-str 'sep', $sep}, {to-str 'quo', $quo}, {to-str 'eol', $eol}, bin = $bin";
if (@coll.elems > 1) {
"\e[33mWARN: multiple column lengths:\e[0m".say;
for @coll -> $c {
printf " %6d line%s with %4d field%s\n",
%cols{$c}, %cols{$c} == 1 ?? " " !! "s",
$c, $c == 1 ?? "" !! "s";
}
}
}
else {
$csv.diag-verbose(9);
$csv.error-diag.sink;
}
}
}