Skip to content

Commit

Permalink
new switch --ignore-regex, #862, #865, #868
Browse files Browse the repository at this point in the history
  • Loading branch information
AlDanial committed Dec 2, 2024
1 parent e35e611 commit e32d240
Show file tree
Hide file tree
Showing 4 changed files with 214 additions and 37 deletions.
117 changes: 99 additions & 18 deletions Unix/cloc
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,7 @@ my (
$opt_ignore_whitespace ,
$opt_ignore_case ,
$opt_ignore_case_ext ,
@opt_ignore_regex ,
$opt_follow_links ,
$opt_autoconf ,
$opt_sum_one ,
Expand Down Expand Up @@ -352,6 +353,7 @@ my $getopt_success = GetOptions( # {{{1
"ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace ,
"ignore_case|ignore-case" => \$opt_ignore_case ,
"ignore_case_ext|ignore-case-ext" => \$opt_ignore_case_ext ,
"ignore_regex|ignore-regex=s" => \@opt_ignore_regex ,
"follow_links|follow-links" => \$opt_follow_links ,
"autoconf" => \$opt_autoconf ,
"sum_one|sum-one" => \$opt_sum_one ,
Expand Down Expand Up @@ -456,6 +458,7 @@ load_from_config_file($config_file, # {{{2
\$opt_ignore_whitespace ,
\$opt_ignore_case ,
\$opt_ignore_case_ext ,
\@opt_ignore_regex ,
\$opt_follow_links ,
\$opt_autoconf ,
\$opt_sum_one ,
Expand Down Expand Up @@ -540,6 +543,7 @@ $opt_exclude_ext = "" unless $opt_exclude_ext;
$opt_ignore_whitespace = 0 unless $opt_ignore_whitespace;
$opt_ignore_case = 0 unless $opt_ignore_case;
$opt_ignore_case_ext = 0 unless $opt_ignore_case_ext;
my %ignore_regex = ();
$opt_lang_no_ext = 0 unless $opt_lang_no_ext;
$opt_follow_links = 0 unless $opt_follow_links;
if (defined $opt_diff_timeout) {
Expand Down Expand Up @@ -841,6 +845,8 @@ if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) {
}
check_scale_existence(\%Filters_by_Language, \%Language_by_Extension,
\%Scale_Factor);
parse_ignore_regex(\@opt_ignore_regex, \%Filters_by_Language, \%ignore_regex)
if @opt_ignore_regex;

my $nCounted = 0;

Expand Down Expand Up @@ -1416,7 +1422,7 @@ if ( $max_processes == 0) {
# Multiprocessing is disabled
my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot,
\@files_removed_tot, \@file_pairs_tot,
0, \%Language, \%Ignored);
0, \%Language, \%ignore_regex, \%Ignored);
%Results_by_File = %{$part->{'results_by_file'}};
%Results_by_Language= %{$part->{'results_by_language'}};
%Delta_by_File = %{$part->{'delta_by_file'}};
Expand Down Expand Up @@ -1504,7 +1510,7 @@ if ( $max_processes == 0) {
$pm->start() and next;
my $count_result = count_filesets ( $fset_a, $fset_b,
\@files_added_part, \@files_removed_part,
\@filepairs_part, 1, \%Language, \%Ignored );
\@filepairs_part, 1, \%Language, \%ignore_regex, \%Ignored );
$pm->finish(0 , $count_result);
}
# Wait for processes to finish
Expand Down Expand Up @@ -1634,7 +1640,7 @@ my @sorted_files = sort keys %unique_source_file;

if ( $max_processes == 0) {
# Multiprocessing is disabled
my $part = count_files ( \@sorted_files , 0, \%Language);
my $part = count_files ( \@sorted_files , 0, \%ignore_regex, \%Language);
%Results_by_File = %{$part->{'results_by_file'}};
%Results_by_Language= %{$part->{'results_by_language'}};
%Ignored = ( %Ignored, %{$part->{'ignored'}});
Expand Down Expand Up @@ -1677,7 +1683,7 @@ if ( $max_processes == 0) {
my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes );
while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) {
$pm->start() and next;
my $count_result = count_files ( \@part, 1, \%Language );
my $count_result = count_files ( \@part, 1, \%ignore_regex, \%Language );
$pm->finish(0 , $count_result);
}
# Wait for processes to finish
Expand Down Expand Up @@ -1975,6 +1981,18 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
C++; this switch would count .C files as C rather
than C++ on *nix operating systems). File name
case insensitivity is always true on Windows.
--ignore-regex Ignore lines in source files that match the given
Perl regular expression for the given language(s).
This option can be specified multiple times.
Language names are comma separated and are followed
by the pipe character and the regular expression.
Use * to match all languages.
Examples:
--ignore-regex=\"C,Java,C++|^\\s*[{};]\\s*\$\"
--ignore-regex=\"*|DEBUG|TEST\\s+ONLY\"
These filters are applied after comments are
removed. Use --strip-comments=EXT to create
new files that show these filters applied.
--lang-no-ext=<lang> Count files without extensions using the <lang>
counter. This option overrides internal logic
for files without extensions (where such files
Expand Down Expand Up @@ -2188,6 +2206,7 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
--categorized=<file> Save file sizes in bytes, identified languages
and names of categorized files to <file>.
--counted=<file> Save names of processed source files to <file>.
See also --found, --ignored, --unique.
--diff-alignment=<file> Write to <file> a list of files and file pairs
showing which files were added, removed, and/or
compared during a run with --diff. This switch
Expand All @@ -2198,9 +2217,11 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
regular expressions. An examination of the
source code may be needed for further explanation.
--help Print this usage information and exit.
--found=<file> Save names of every file found to <file>.
--found=<file> Save names of every file found to <file>. See
also --counted, --ignored, --unique.
--ignored=<file> Save names of ignored files and the reason they
were ignored to <file>.
were ignored to <file>. See also --counted,
--found, --unique.
--print-filter-stages Print processed source code before and after
each filter is applied.
--show-ext[=<ext>] Print information about all known (or just the
Expand All @@ -2209,6 +2230,8 @@ Usage: $script [options] <file(s)/dir(s)/git hash(es)> | <set 1> <set 2> | <repo
given) languages and exit.
--show-os Print the value of the operating system mode
and exit. See also --unix, --windows.
--unique=<file> Save names of unique files found to <file>. See
also --counted, --found, --ignored.
-v[=<n>] Verbose switch (optional numeric value).
-verbose[=<n>] Long form of -v.
--version Print the version of this program and exit.
Expand Down Expand Up @@ -2558,7 +2581,7 @@ sub file_extension { # {{{1
}
} # 1}}}
sub count_files { # {{{1
my ($filelist, $counter_type, $language_hash) = @_;
my ($filelist, $counter_type, $rha_ignore_regex, $language_hash) = @_;
print "-> count_files()\n" if $opt_v > 2;
my @p_errors = ();
my %p_ignored = ();
Expand Down Expand Up @@ -2629,7 +2652,8 @@ sub count_files { # {{{1
} else {
($all_line_count,
$blank_count ,
$comment_count ,) = call_counter($file, $Language{$file}, \@Errors);
$comment_count ,) = call_counter($file, $Language{$file},
$rha_ignore_regex, \@Errors);
$code_count = $all_line_count - $blank_count - $comment_count;
}
}
Expand Down Expand Up @@ -2666,6 +2690,7 @@ sub count_filesets { # {{{1
$file_pairs,
$counter_type,
$language_hash,
$rha_ignore_regex,
$rh_Ignored) = @_;
print "-> count_filesets()\n" if $opt_v > 2;
my @p_errors = ();
Expand Down Expand Up @@ -2695,7 +2720,7 @@ sub count_filesets { # {{{1
my ($all_line_count,
$blank_count ,
$comment_count ,
) = call_counter($file, $Lang, \@p_errors);
) = call_counter($file, $Lang, $rha_ignore_regex, \@p_errors);
$already_counted{$file} = 1;
my $code_count = $all_line_count-$blank_count-$comment_count;
if ($opt_by_file) {
Expand Down Expand Up @@ -2755,7 +2780,7 @@ sub count_filesets { # {{{1
my ($all_line_count,
$blank_count ,
$comment_count ,
) = call_counter($f, $this_lang, \@p_errors);
) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors);
$p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count;
$p_dbl{ $this_lang }{'blank'}{'added'} += $blank_count;
$p_dbl{ $this_lang }{'code'}{'added'} +=
Expand Down Expand Up @@ -2795,7 +2820,7 @@ sub count_filesets { # {{{1
my ($all_line_count,
$blank_count ,
$comment_count ,
) = call_counter($f, $this_lang, \@p_errors);
) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors);
$p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count;
$p_dbl{ $this_lang}{'blank'}{'removed'} += $blank_count;
$p_dbl{ $this_lang}{'code'}{'removed'} +=
Expand Down Expand Up @@ -3057,18 +3082,18 @@ sub count_filesets { # {{{1
($all_line_count_L,
$blank_count_L ,
$comment_count_L ,
) = call_counter($file_L, $Lang_L, \@Errors);
) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors);

($all_line_count_R,
$blank_count_R ,
$comment_count_R ,
) = call_counter($file_R, $Lang_R, \@Errors);
) = call_counter($file_R, $Lang_R, $rha_ignore_regex, \@Errors);
} else {
# L and R file contents are identical, no need to diff
($all_line_count_L,
$blank_count_L ,
$comment_count_L ,
) = call_counter($file_L, $Lang_L, \@Errors);
) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors);
$all_line_count_R = $all_line_count_L;
$blank_count_R = $blank_count_L ;
$comment_count_R = $comment_count_L ;
Expand Down Expand Up @@ -6872,9 +6897,10 @@ sub different_files { # {{{1
return @unique;
} # 1}}}
sub call_counter { # {{{1
my ($file , # in
$language , # in
$ra_Errors, # out
my ($file , # in
$language , # in
$rha_ignore_regex , # in
$ra_Errors , # out
) = @_;

# Logic: pass the file through the following filters:
Expand All @@ -6883,7 +6909,9 @@ sub call_counter { # {{{1
# 3. remove comments using each filter defined for this language
# (example: SQL has two, remove_starts_with(--) and
# remove_c_comments() )
# 4. compute comment lines as
# 4. if ignore regex filters are defined, remove lines that
# match any of them
# 5. compute comment lines as
# total lines - blank lines - lines left over after all
# comment filters have been applied

Expand Down Expand Up @@ -6938,6 +6966,22 @@ sub call_counter { # {{{1
@lines = rm_comments(\@lines, $language, $file,
\%EOL_Continuation_re, $ra_Errors);

if (%{$rha_ignore_regex} and defined($rha_ignore_regex->{$language})) {
my @keep_lines = ();
foreach my $line (@lines) {
my $keep = 1;
foreach my $regex (@{$rha_ignore_regex->{$language}}) {
if ($line =~ m{$regex}) {
print "reject '$line' in $file because of '$regex'\n" if $opt_v > 4;
$keep = 0;
last;
}
}
push @keep_lines, $line if $keep;
}
@lines = @keep_lines;
}

my $comment_lines = $total_lines - $blank_lines - scalar @lines;
if ($opt_strip_comments) {
my $stripped_file = "";
Expand Down Expand Up @@ -14693,6 +14737,7 @@ sub load_from_config_file { # {{{1
$rs_ignore_whitespace ,
$rs_ignore_case ,
$rs_ignore_case_ext ,
$ra_ignore_regex ,
$rs_follow_links ,
$rs_autoconf ,
$rs_sum_one ,
Expand Down Expand Up @@ -14801,6 +14846,7 @@ sub load_from_config_file { # {{{1
} elsif (!defined ${$rs_ignore_whitespace} and /^(ignore_whitespace|ignore-whitespace)/) { ${$rs_ignore_whitespace} = 1;
} elsif (!defined ${$rs_ignore_case_ext} and /^(ignore_case_ext|ignore-case-ext)/) { ${$rs_ignore_case_ext} = 1;
} elsif (!defined ${$rs_ignore_case} and /^(ignore_case|ignore-case)/) { ${$rs_ignore_case} = 1;
} elsif (! @{$ra_ignore_regex} and /^(?:ignore_regex|ignore-regex)(=|\s+)['"]?(.*?)['"]?$/) { push @{$ra_ignore_regex}, $2;
} elsif (!defined ${$rs_follow_links} and /^(follow_links|follow-links)/) { ${$rs_follow_links} = 1;
} elsif (!defined ${$rs_autoconf} and /^autoconf/) { ${$rs_autoconf} = 1;
} elsif (!defined ${$rs_sum_one} and /^(sum_one|sum-one)/) { ${$rs_sum_one} = 1;
Expand Down Expand Up @@ -15095,6 +15141,41 @@ sub print_format_n { # {{{1
return @prt_lines;
print "<- print_format_n()\n" if $opt_v > 2;
} # 1}}}
sub parse_ignore_regex { # {{{1
#
# Convert the list of "language(s)|regex" into a hash
# $ignore_regex{language} = [list of regex]

my ($ra_lang_regex , # in, as given on command line
$rhaa_Filters_by_Language, # in, hash of filters by language
$rha_ignore_regex) = @_;
print "-> parse_ignore_regex()\n" if $opt_v > 2;

foreach my $lang_regex (@{$ra_lang_regex}) {
die "Missing '|' character in --ignore-regex '$lang_regex'\n"
unless $lang_regex =~ /\|/;
my ($lang, $regex) = split(/\|/, $lang_regex, 2);
die "Invalid --ignore-regex: $lang_regex\n"
unless defined $lang and defined $regex;
my @languages = split(/,/, $lang);
foreach my $lang (@languages) {
if ($lang eq '*') {
foreach my $lang (keys %{$rhaa_Filters_by_Language}) {
push @{$rha_ignore_regex->{$lang}}, $regex;
}
} else {
die "Unknown language '$lang' in --ignore-regex '$lang_regex'\n"
unless defined $rhaa_Filters_by_Language->{$lang};
push @{$rha_ignore_regex->{$lang}}, $regex;
}
#print "lang=$lang regex=[$regex]\n";
}
}
#use Data::Dumper;
#print Dumper($rha_ignore_regex);
print "<- parse_ignore_regex()\n" if $opt_v > 2;
}
# 1}}}
# really_is_pascal, really_is_incpascal, really_is_php from SLOCCount
my %php_files = (); # really_is_php()
sub really_is_pascal { # {{{1
Expand Down
10 changes: 9 additions & 1 deletion Unix/cloc.1.pod
Original file line number Diff line number Diff line change
Expand Up @@ -614,6 +614,7 @@ names of categorized files to FILE.
=item B<--counted=FILE>

Save names of processed source files to FILE.
See also B<--found>, B<--ignored>, B<--unique>.

=item B<--diff-alignment=FILE>

Expand All @@ -636,11 +637,13 @@ Print cloc's internal usage information and exit.

=item B<--found=FILE>

Save names of every file found to FILE.
Save names of every file found to FILE. See also B<--counted>,
B<--ignored>, B<--unique>.

=item B<--ignored=FILE>

Save names of ignored files and the reason they were ignored to FILE.
See also B<--counted>, B<--found>, B<--unique>.

=item B<--print-filter-stages>

Expand All @@ -662,6 +665,11 @@ exit.
Print the value of the operating system mode and exit. See also
B<--unix>, B<--windows>.

=item B<--unique=FILE>

Save names of ignored files and the reason they were ignored to FILE.
See also B<--counted>, B<--found>, B<--ignored>.

=item B<-v[=N]>

Turn on verbose with optional numeric value.
Expand Down
7 changes: 7 additions & 0 deletions Unix/t/01_opts.t
Original file line number Diff line number Diff line change
Expand Up @@ -865,6 +865,13 @@ my @Tests = (
'ref' => '../tests/outputs/issues/851/results.yaml',
},

{
'name' => '--ignore-regex (github issues #862, #865, #868)',
'cd' => '../tests/inputs/issues/862',
'args' => '--ignore-regex="C,Fortran 77|^\\s*([{};]|END)\\s*\$" *.f *.c',
'ref' => '../tests/outputs/issues/862/results.yaml',
},

);

# Special cases:
Expand Down
Loading

0 comments on commit e32d240

Please sign in to comment.