-
Notifications
You must be signed in to change notification settings - Fork 25
/
Copy pathreposloc
executable file
·448 lines (386 loc) · 11 KB
/
reposloc
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
#!/usr/bin/perl
use strict;
use warnings;
use Cwd qw/getcwd/;
use DateTime;
use DateTime::Duration;
use File::Path qw/rmtree/;
use File::Spec::Functions qw/catfile/;
use File::Temp qw/tmpnam/;
use IO::Handle;
use JSON qw/from_json/;
use POSIX qw/LONG_MAX/;
use Switch;
sub print_help;
sub vcs_clone;
sub vcs_revlist;
sub vcs_checkout;
# Parse arguments.
my %opts;
my $na = shift || '.';
while ($na =~ /^-./) {
switch ($na) {
case '-sparse' { $opts{sparse} = 1 }
case '-noplot' { $opts{noplot} = 1 }
case '-bylang' { $opts{bylang} = 1 }
case '-nocomments' { $opts{nocomments} = 1 }
case '-progress' { $opts{progress} = 1 }
case '-warn' { $opts{warn} = 1 }
case '-verbose' { $opts{verbose} = 1 }
else {
print_help;
exit
}
}
$na = shift || '.';
}
# Figure out what the repositories we want are
my @repos;
if ($na =~ /^-$/) {
# Read the list of repositories from standard input
push @repos, $_ while <STDIN>;
} else {
push @repos, $na;
push @repos, @ARGV;
}
@repos = keys { map {$_ => 1} @repos };
# Get type of repositories
my %vcs;
for my $r (@repos) {
my $v = "";
if ($r =~ /^git::(.*)$/) {
$v = "git";
$r = $1;
} elsif ($na =~ /^hg::(.*)$/) {
$v = "hg";
$r = $1;
} else {
# autodetect based on presence of '.git', '.hg' etc.
if (-d catfile($r, ".git")) {
$v = "git";
} elsif (-d catfile($r, ".hg")) {
$v = "hg";
} else {
die "directory does not appear to be a repository";
}
}
$vcs{$r} = $v;
}
# Clone all repositories and get stats
my %destrepo;
my $calldir = getcwd;
my %commits;
my ($ri, $rt) = (0, $#repos+1);
my @langs;
my %data; # $data{$repo}{$timestamp}
for my $repo (@repos) {
# Clone
$destrepo{$repo} = tmpnam();
unless (vcs_clone $vcs{$repo}, $repo, $destrepo{$repo}) {
rmtree $destrepo{$repo};
delete $destrepo{$repo};
warn "Ignoring repo $repo";
next;
}
# Read list of commits
chdir $destrepo{$repo};
my @ci = vcs_revlist $vcs{$repo};
chdir $calldir;
if ($opts{sparse}) {
my @cs;
# Weed out changes that occured on the same day. We have to do
# this both here (rather than below) so that we don't waste
# time running `sloc` on these changes.
my $last = DateTime->from_epoch(epoch => 0);
for (@ci) {
my ($ts, $rev) = split / /;
my $dt = DateTime->from_epoch(epoch => $ts);
$dt->truncate(to=>"day");
if ($dt->delta_days($last)->in_units('days') >= 1) {
$last = $dt;
push @cs, "$ts $rev";
}
}
@ci = @cs;
}
# We'll construct an in-memory graph for each repository, and then add
# them together at the end.
$ri++;
my ($pci, $pct) = (0, $#ci+1); print STDERR " $ri/$rt\tFetching stats for '$repo' ($pct commits)...\n" if $opts{progress} or $opts{verbose};
chdir $destrepo{$repo};
for (@ci) {
$pci++;
print STDERR " $pci" if $opts{progress} or $opts{verbose};
my ($ts, $rev) = split / /;
vcs_checkout $vcs{$repo}, $rev;
my $json = `sloc -json`;
my $result = from_json $json or die $!;
$data{$repo}{$ts} = $result;
for my $l (keys $result) {
unshift @langs, $l unless grep {$_ eq $l} @langs;
}
}
print STDERR "\n" if $opts{progress} or $opts{verbose};
chdir $calldir;
rmtree $destrepo{$repo};
}
## Post-process and graph
my $out;
my $dfilename = tmpnam();
unless ($opts{noplot}) {
# gnuplot is rather limited, so we'll need to write the data to a
# temporary file.
open $out, ">", $dfilename or die $!;
} else {
$out = \*STDOUT;
}
# Header
print $out "Date ";
if ($opts{bylang}) {
print $out "$_ " for @langs;
} else {
unless ($opts{nocomments}) {
print $out "Code Comments";
} else {
print $out "Code";
}
}
print $out "\n";
# Collect all times in use.
@repos = keys %destrepo;
my %timesh;
my %rtimes;
for my $repo (@repos) {
my @tss = sort keys $data{$repo};
$timesh{$_} = 1 for @tss;
$rtimes{$repo} = \@tss;
}
my @atimes = sort (keys %timesh);
my @times;
unless ($opts{sparse}) {
# Outside of sparse mode, we just want all the times.
@times = @atimes;
} else {
# In sparse mode, we want a total of one time per day in which
# something changed.
my $last = DateTime->from_epoch(epoch => 0);
for my $ts (@atimes) {
my $dt = DateTime->from_epoch(epoch => $ts);
$dt->truncate(to => "day");
if ($dt->delta_days($last)->in_units('days') >= 1) {
$last = $dt;
push @times, $ts;
}
}
}
# Now construct the actual graphs
my %current; # Most recent data for each repo
my ($startts, $endts) = ($times[0], $times[$#times]);
for my $ts (@times) {
# update %current if necessary
for my $repo (@repos) {
my @rts = @{$rtimes{$repo}};
# see if there's anything waiting
my $nt = $rts[0] || LONG_MAX;
while ($nt <= $ts) {
shift @rts;
# and here we update %current
$current{$repo} = $data{$repo}{$nt};
$nt = $rts[0] || LONG_MAX;
}
$rtimes{$repo} = \@rts;
}
print $out "$ts";
if ($opts{bylang}) {
my $v = 0;
for my $lang (@langs) {
for my $repo (@repos) {
my $r = $current{$repo}{$lang};
$v += $r->{CodeLines} if $r;
$v += $r->{CommentLines} if $r and not $opts{nocomments};
}
print $out " $v";
}
} else {
my ($code, $comment) = (0, 0);
for my $repo (@repos) {
my $d = $current{$repo};
next unless $d;
for my $lang (keys $d) {
$code += $d->{$lang}{CodeLines};
$comment += $d->{$lang}{CommentLines};
}
}
print $out " $code";
my $total = $code + $comment;
print $out " $total" unless $opts{nocomments};
}
print $out "\n";
}
unless ($opts{noplot}) {
close $out or die $!;
my $gp;
open $gp, "| gnuplot" or die $!;
$gp->autoflush(1);
my $outfile = catfile($calldir, 'output.svg');
print $gp <<END;
set terminal svg size 800, 580 dynamic enhanced background "#FFFFFF"
set output '$outfile'
set xdata time
set timefmt "%s"
set xrange ["$startts":"$endts"]
set yrange [0:*]
set key left
END
unless ($opts{bylang}) {
if ($opts{nocomments}) {
print $gp "plot '$dfilename' using 1:2 with filledcurves y1=0 title columnheader\n";
} else {
print $gp "plot '$dfilename' using 1:3 with filledcurves y1=0 title columnheader, '' using 1:2 with filledcurves y1=0 title columnheader\n";
}
} else {
# One part for each language
print $gp "plot '$dfilename' ";
for (my $i = $#langs+2; $i > 2; $i--) {
print $gp "using 1:$i with filledcurves y1=0 title columnheader, '' ";
}
print $gp "using 1:2 with filledcurves y1=0 title columnheader\n";
}
print $gp "exit\n";
wait;
unlink $dfilename or die $!;
}
sub print_help {
print <<END;
usage: $0 [OPTIONS] [[VCS::]REPOSITORY] ...
options:
-sparse Iterate on dates rather than commits
-noplot Don't pass to gnuplot; just print to standard output
-bylang Show statistics for individual languages
-nocomments Don't include comment counts
-progress Display progress information
-warn Don't abort on VCS call errors
-verbose Print extra information
END
}
sub vcs_error {
my $err = shift;
die $err unless $opts{warn};
warn $err;
}
sub vcs_clone {
my ($vcs, $repo, $destrepo) = @_;
switch ($vcs) {
case "git" {
my @cmd = ("git", "clone", "-b", "master");
push @cmd, "-q" unless $opts{verbose};
push @cmd, $repo, $destrepo;
print ("> " . (join ' ', @cmd) . "\n") if $opts{verbose};
vcs_error "git clone failed" and return 0 if system @cmd;
}
case "hg" {
my @cmd = ("hg", "clone", "-b", "default");
push @cmd, "-q" unless $opts{verbose};
push @cmd, $repo, $destrepo;
print ("> " . (join ' ', @cmd) . "\n") if $opts{verbose};
vcs_error "mercurial clone failed" and return 0 if system @cmd;
}
}
1
}
sub git_revlist {
my @rl;
open IN, 'git rev-list master --reverse --timestamp |' or die $!;
while (<IN>) {
chomp;
push @rl, $_;
}
close IN or die $!;
return @rl;
}
sub hg_revlist {
my @rl;
open IN, 'hg log -f --template "{date} {node}\n" |' or die $!;
while (<IN>) {
chomp;
my ($d, $rev) = split / /;
my ($ts, $ms) = split /\./, $d;
push @rl, "$ts $rev";
}
close IN or die $!;
return (reverse @rl);
}
sub vcs_revlist {
my ($vcs) = @_;
switch ($vcs) {
case "git" { return git_revlist; }
case "hg" { return hg_revlist; }
}
}
sub vcs_checkout {
my ($vcs, $rev) = @_;
switch ($vcs) {
case "git" { system "git checkout --quiet $rev"; }
case "hg" { system "hg checkout --quiet $rev"; }
}
}
__END__
=head1 NAME
reposloc - get source-lines-of-code statistics from a git or mercurial
repository
=head1 SYNOPSIS
B<reposloc> [I<OPTIONS>] [[I<VCS>::]I<REPOSITORY>] ...
B<reposloc> [I<OPTIONS>] - < repositories.txt
=head1 DESCRIPTION
B<reposloc> performs source-lines-of-code analysis on a repository's history
and produces graphs of the number of lines of code versus time. Supported
version control systems are I<git>(1) and I<hg>(1) - if none is specified,
B<reposloc> will attempt to autodetect the system used. Autodetection does not
work on "bare" repositories. If multiple repositories are specified, they will
be treated as a single large repository. If a single dash is provided as the
repository name, then standard input will be read as a newline-separated list
of repositories.
Detection of URLs in the repositories list is not supported - to automatically
clone and analyze a remote repository, use the I<VCS>:: syntax.
B<reposloc> has two main modes of operation: it can tally total statistics
across languages (and optionally display comments and code as separate data),
or it can tally statistics for each language individually, with the intent of
comparison. The choice of mode has no effect on performance.
=head1 OPTIONS
=over
=item B<-sparse>
By default, B<reposloc> will look at every single commit in the repository. For
large repositories (with more than a few thousand commits) or multiple
repositories, this takes quite a while and is mostly a waste of time. Using
B<-sparse> tells reposloc only to collect one data point per day.
=item B<-noplot>
Dump the resulting data to standard output without handing it off to I<gnuplot>(1).
=item B<-bylang>
Collects data on a per-language basis for comparison between languages, rather
than only collecting total statistics (more useful for broad graphs and
comparing SLoC with comments).
=item B<-nocomments>
In "totals" mode, just graph the number of lines of code, and don't display
data for amount of comments. In by-language mode (with B<-bylang>), doesn't
include comments in each language's statistic.
=item B<-progress>
Print progress information to standard error.
=item B<-warn>
Warn only - do not abort - when external programs return errors. The entire
associated repository will be dropped from the resulting data, to avoid errors
showing up as discontinuities. This is useful for running analysis on large
sets of repositories, not all of which might be 'clean' (or even exist any
more).
=item B<-verbose>
Shows what commands are being run, and instructs those commands to print their
own verbose information. This option may not go well with B<-progress>.
=back
=head1 BUGS
Analysis of branches other than C<master> is not supported.
The B<-warn> flag only affects behaviour on clone failures - the most common,
but not by any means the only.
Please report any further bugs found to the author.
=head1 AUTHOR
Scott Lawrence <bytbox@gmail.com>
=head1 SEE ALSO
I<sloc>(1)