-
Notifications
You must be signed in to change notification settings - Fork 150
/
Copy pathnames-counter.pl
120 lines (92 loc) · 3.4 KB
/
names-counter.pl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/perl
#
# File: names-counter
# Abstract: Count the number of unique names in names.csv DB.
# Usage: names-counter [csv-data-file]
# Data Fmt: name,name,name...
# Author: Bill.Costa@unh.edu
# Version: 20110311
#
# Notes: Perl style comments and blank lines in the input
# data stream are ignored.
#
#==============================================================================
# Setup and Global Definitions ==============================================
#==============================================================================
#-- Pragmas ---------------------------
use warnings; # Save me from my own dumb errors.
use strict; # Keep things squeaky clean.
#-- Core Modules ----------------------
use FindBin; # Where the heck are we'z?
#==============================================================================
# Subroutines ===============================================================
#==============================================================================
sub commify
# Commify a number; "Perl Cookbook" 2.17
{
my $text = reverse($_[0]);
$text =~ s/(\d\d\d)(?=\d)(?!\d*\.)/$1,/g;
return(scalar(reverse($text)));
}
#==============================================================================
# Main Line =================================================================
#==============================================================================
#-------------------------------+
# Get/find our input data file. |
#-------------------------------+
my $dataName = $ARGV[0];
my $junk = $ARGV[1];
die("? $0: too many arguments after '$dataName'\n") if (defined($junk));
$dataName = 'names.csv' if (not defined($dataName));
my $inSpec;
if (-e $dataName)
{
$inSpec = $dataName;
}
else
{
$inSpec = "$FindBin::Bin/$dataName";
die("? $0: cannot find input data\n_ $inSpec\n")
if (not -e $inSpec);
}
open(IN, "<$inSpec")
or die("! $0: error opening $inSpec\n_ $!\n");
#-------------------------------+
# Parse each rec, stuff into a |
# hash. |
#-------------------------------+
my $NAME_RE = qr/^[a-z][a-z\.\-\ ]*$/i;
my %cat;
while (defined(my $ln = <IN>))
{
chomp($ln);
chop($ln) if ($ln =~ m/\x0D/); # Eat any trailing ^M too.
$ln =~ s/#.*$//; # Eat comments.
next if ($ln =~ m/^\s*$/); # Eat blank lines.
foreach my $name (split(/,/, $ln))
{
if ($name =~ m/^\s*$/) { warn("- blank field line $. <$ln>\n") }
elsif ($name !~ m/$NAME_RE/) { warn("- invalid char line $. <$ln>\n") }
else { $cat{$name}++; }
}
}
close(IN);
#-------------------------------+
# Report name count and show |
# most references. |
#-------------------------------+
my $nameCnt = scalar(keys(%cat));
die("? $0: no names cataloged\n_ $inSpec\n_ ") if ($nameCnt <= 0);
print(commify($nameCnt), " unique names cataloged in $inSpec\n\n");
my $topLim = 5;
my $head = "Names referenced $topLim or more times:\n";
foreach my $name (sort { $cat{$b} <=> $cat{$a} } keys(%cat))
{
last if ($cat{$name} < $topLim);
print($head);
$head = '';
printf("%8i: %s\n", $cat{$name}, $name);
}
die("? $0: probable data err; no names found with $topLim or more refs\n_ ")
if ($head ne '');
#==[ EOF: names-counter ]==