From e32d240a835213e1359f0e8b71eeef1062e42314 Mon Sep 17 00:00:00 2001 From: AlDanial Date: Sun, 1 Dec 2024 17:54:05 -0800 Subject: [PATCH] new switch --ignore-regex, #862, #865, #868 --- Unix/cloc | 117 +++++++++++++++++++++++++++++++++++++++-------- Unix/cloc.1.pod | 10 +++- Unix/t/01_opts.t | 7 +++ cloc | 117 +++++++++++++++++++++++++++++++++++++++-------- 4 files changed, 214 insertions(+), 37 deletions(-) diff --git a/Unix/cloc b/Unix/cloc index 0e158963..f10ae6d1 100755 --- a/Unix/cloc +++ b/Unix/cloc @@ -247,6 +247,7 @@ my ( $opt_ignore_whitespace , $opt_ignore_case , $opt_ignore_case_ext , + @opt_ignore_regex , $opt_follow_links , $opt_autoconf , $opt_sum_one , @@ -352,6 +353,7 @@ my $getopt_success = GetOptions( # {{{1 "ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace , "ignore_case|ignore-case" => \$opt_ignore_case , "ignore_case_ext|ignore-case-ext" => \$opt_ignore_case_ext , + "ignore_regex|ignore-regex=s" => \@opt_ignore_regex , "follow_links|follow-links" => \$opt_follow_links , "autoconf" => \$opt_autoconf , "sum_one|sum-one" => \$opt_sum_one , @@ -456,6 +458,7 @@ load_from_config_file($config_file, # {{{2 \$opt_ignore_whitespace , \$opt_ignore_case , \$opt_ignore_case_ext , + \@opt_ignore_regex , \$opt_follow_links , \$opt_autoconf , \$opt_sum_one , @@ -540,6 +543,7 @@ $opt_exclude_ext = "" unless $opt_exclude_ext; $opt_ignore_whitespace = 0 unless $opt_ignore_whitespace; $opt_ignore_case = 0 unless $opt_ignore_case; $opt_ignore_case_ext = 0 unless $opt_ignore_case_ext; +my %ignore_regex = (); $opt_lang_no_ext = 0 unless $opt_lang_no_ext; $opt_follow_links = 0 unless $opt_follow_links; if (defined $opt_diff_timeout) { @@ -841,6 +845,8 @@ if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) { } check_scale_existence(\%Filters_by_Language, \%Language_by_Extension, \%Scale_Factor); +parse_ignore_regex(\@opt_ignore_regex, \%Filters_by_Language, \%ignore_regex) + if @opt_ignore_regex; my $nCounted = 0; @@ -1416,7 +1422,7 @@ if ( $max_processes == 0) { # Multiprocessing is disabled my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot, \@files_removed_tot, \@file_pairs_tot, - 0, \%Language, \%Ignored); + 0, \%Language, \%ignore_regex, \%Ignored); %Results_by_File = %{$part->{'results_by_file'}}; %Results_by_Language= %{$part->{'results_by_language'}}; %Delta_by_File = %{$part->{'delta_by_file'}}; @@ -1504,7 +1510,7 @@ if ( $max_processes == 0) { $pm->start() and next; my $count_result = count_filesets ( $fset_a, $fset_b, \@files_added_part, \@files_removed_part, - \@filepairs_part, 1, \%Language, \%Ignored ); + \@filepairs_part, 1, \%Language, \%ignore_regex, \%Ignored ); $pm->finish(0 , $count_result); } # Wait for processes to finish @@ -1634,7 +1640,7 @@ my @sorted_files = sort keys %unique_source_file; if ( $max_processes == 0) { # Multiprocessing is disabled - my $part = count_files ( \@sorted_files , 0, \%Language); + my $part = count_files ( \@sorted_files , 0, \%ignore_regex, \%Language); %Results_by_File = %{$part->{'results_by_file'}}; %Results_by_Language= %{$part->{'results_by_language'}}; %Ignored = ( %Ignored, %{$part->{'ignored'}}); @@ -1677,7 +1683,7 @@ if ( $max_processes == 0) { my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes ); while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) { $pm->start() and next; - my $count_result = count_files ( \@part, 1, \%Language ); + my $count_result = count_files ( \@part, 1, \%ignore_regex, \%Language ); $pm->finish(0 , $count_result); } # Wait for processes to finish @@ -1975,6 +1981,18 @@ Usage: $script [options] | | Count files without extensions using the counter. This option overrides internal logic for files without extensions (where such files @@ -2188,6 +2206,7 @@ Usage: $script [options] | | Save file sizes in bytes, identified languages and names of categorized files to . --counted= Save names of processed source files to . + See also --found, --ignored, --unique. --diff-alignment= Write to a list of files and file pairs showing which files were added, removed, and/or compared during a run with --diff. This switch @@ -2198,9 +2217,11 @@ Usage: $script [options] | | Save names of every file found to . + --found= Save names of every file found to . See + also --counted, --ignored, --unique. --ignored= Save names of ignored files and the reason they - were ignored to . + were ignored to . See also --counted, + --found, --unique. --print-filter-stages Print processed source code before and after each filter is applied. --show-ext[=] Print information about all known (or just the @@ -2209,6 +2230,8 @@ Usage: $script [options] | | Save names of unique files found to . See + also --counted, --found, --ignored. -v[=] Verbose switch (optional numeric value). -verbose[=] Long form of -v. --version Print the version of this program and exit. @@ -2558,7 +2581,7 @@ sub file_extension { # {{{1 } } # 1}}} sub count_files { # {{{1 - my ($filelist, $counter_type, $language_hash) = @_; + my ($filelist, $counter_type, $rha_ignore_regex, $language_hash) = @_; print "-> count_files()\n" if $opt_v > 2; my @p_errors = (); my %p_ignored = (); @@ -2629,7 +2652,8 @@ sub count_files { # {{{1 } else { ($all_line_count, $blank_count , - $comment_count ,) = call_counter($file, $Language{$file}, \@Errors); + $comment_count ,) = call_counter($file, $Language{$file}, + $rha_ignore_regex, \@Errors); $code_count = $all_line_count - $blank_count - $comment_count; } } @@ -2666,6 +2690,7 @@ sub count_filesets { # {{{1 $file_pairs, $counter_type, $language_hash, + $rha_ignore_regex, $rh_Ignored) = @_; print "-> count_filesets()\n" if $opt_v > 2; my @p_errors = (); @@ -2695,7 +2720,7 @@ sub count_filesets { # {{{1 my ($all_line_count, $blank_count , $comment_count , - ) = call_counter($file, $Lang, \@p_errors); + ) = call_counter($file, $Lang, $rha_ignore_regex, \@p_errors); $already_counted{$file} = 1; my $code_count = $all_line_count-$blank_count-$comment_count; if ($opt_by_file) { @@ -2755,7 +2780,7 @@ sub count_filesets { # {{{1 my ($all_line_count, $blank_count , $comment_count , - ) = call_counter($f, $this_lang, \@p_errors); + ) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors); $p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count; $p_dbl{ $this_lang }{'blank'}{'added'} += $blank_count; $p_dbl{ $this_lang }{'code'}{'added'} += @@ -2795,7 +2820,7 @@ sub count_filesets { # {{{1 my ($all_line_count, $blank_count , $comment_count , - ) = call_counter($f, $this_lang, \@p_errors); + ) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors); $p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count; $p_dbl{ $this_lang}{'blank'}{'removed'} += $blank_count; $p_dbl{ $this_lang}{'code'}{'removed'} += @@ -3057,18 +3082,18 @@ sub count_filesets { # {{{1 ($all_line_count_L, $blank_count_L , $comment_count_L , - ) = call_counter($file_L, $Lang_L, \@Errors); + ) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors); ($all_line_count_R, $blank_count_R , $comment_count_R , - ) = call_counter($file_R, $Lang_R, \@Errors); + ) = call_counter($file_R, $Lang_R, $rha_ignore_regex, \@Errors); } else { # L and R file contents are identical, no need to diff ($all_line_count_L, $blank_count_L , $comment_count_L , - ) = call_counter($file_L, $Lang_L, \@Errors); + ) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors); $all_line_count_R = $all_line_count_L; $blank_count_R = $blank_count_L ; $comment_count_R = $comment_count_L ; @@ -6872,9 +6897,10 @@ sub different_files { # {{{1 return @unique; } # 1}}} sub call_counter { # {{{1 - my ($file , # in - $language , # in - $ra_Errors, # out + my ($file , # in + $language , # in + $rha_ignore_regex , # in + $ra_Errors , # out ) = @_; # Logic: pass the file through the following filters: @@ -6883,7 +6909,9 @@ sub call_counter { # {{{1 # 3. remove comments using each filter defined for this language # (example: SQL has two, remove_starts_with(--) and # remove_c_comments() ) - # 4. compute comment lines as + # 4. if ignore regex filters are defined, remove lines that + # match any of them + # 5. compute comment lines as # total lines - blank lines - lines left over after all # comment filters have been applied @@ -6938,6 +6966,22 @@ sub call_counter { # {{{1 @lines = rm_comments(\@lines, $language, $file, \%EOL_Continuation_re, $ra_Errors); + if (%{$rha_ignore_regex} and defined($rha_ignore_regex->{$language})) { + my @keep_lines = (); + foreach my $line (@lines) { + my $keep = 1; + foreach my $regex (@{$rha_ignore_regex->{$language}}) { + if ($line =~ m{$regex}) { +print "reject '$line' in $file because of '$regex'\n" if $opt_v > 4; + $keep = 0; + last; + } + } + push @keep_lines, $line if $keep; + } + @lines = @keep_lines; + } + my $comment_lines = $total_lines - $blank_lines - scalar @lines; if ($opt_strip_comments) { my $stripped_file = ""; @@ -14693,6 +14737,7 @@ sub load_from_config_file { # {{{1 $rs_ignore_whitespace , $rs_ignore_case , $rs_ignore_case_ext , + $ra_ignore_regex , $rs_follow_links , $rs_autoconf , $rs_sum_one , @@ -14801,6 +14846,7 @@ sub load_from_config_file { # {{{1 } elsif (!defined ${$rs_ignore_whitespace} and /^(ignore_whitespace|ignore-whitespace)/) { ${$rs_ignore_whitespace} = 1; } elsif (!defined ${$rs_ignore_case_ext} and /^(ignore_case_ext|ignore-case-ext)/) { ${$rs_ignore_case_ext} = 1; } elsif (!defined ${$rs_ignore_case} and /^(ignore_case|ignore-case)/) { ${$rs_ignore_case} = 1; + } elsif (! @{$ra_ignore_regex} and /^(?:ignore_regex|ignore-regex)(=|\s+)['"]?(.*?)['"]?$/) { push @{$ra_ignore_regex}, $2; } elsif (!defined ${$rs_follow_links} and /^(follow_links|follow-links)/) { ${$rs_follow_links} = 1; } elsif (!defined ${$rs_autoconf} and /^autoconf/) { ${$rs_autoconf} = 1; } elsif (!defined ${$rs_sum_one} and /^(sum_one|sum-one)/) { ${$rs_sum_one} = 1; @@ -15095,6 +15141,41 @@ sub print_format_n { # {{{1 return @prt_lines; print "<- print_format_n()\n" if $opt_v > 2; } # 1}}} +sub parse_ignore_regex { # {{{1 + # + # Convert the list of "language(s)|regex" into a hash + # $ignore_regex{language} = [list of regex] + + my ($ra_lang_regex , # in, as given on command line + $rhaa_Filters_by_Language, # in, hash of filters by language + $rha_ignore_regex) = @_; + print "-> parse_ignore_regex()\n" if $opt_v > 2; + + foreach my $lang_regex (@{$ra_lang_regex}) { + die "Missing '|' character in --ignore-regex '$lang_regex'\n" + unless $lang_regex =~ /\|/; + my ($lang, $regex) = split(/\|/, $lang_regex, 2); + die "Invalid --ignore-regex: $lang_regex\n" + unless defined $lang and defined $regex; + my @languages = split(/,/, $lang); + foreach my $lang (@languages) { + if ($lang eq '*') { + foreach my $lang (keys %{$rhaa_Filters_by_Language}) { + push @{$rha_ignore_regex->{$lang}}, $regex; + } + } else { + die "Unknown language '$lang' in --ignore-regex '$lang_regex'\n" + unless defined $rhaa_Filters_by_Language->{$lang}; + push @{$rha_ignore_regex->{$lang}}, $regex; + } +#print "lang=$lang regex=[$regex]\n"; + } + } +#use Data::Dumper; +#print Dumper($rha_ignore_regex); + print "<- parse_ignore_regex()\n" if $opt_v > 2; +} +# 1}}} # really_is_pascal, really_is_incpascal, really_is_php from SLOCCount my %php_files = (); # really_is_php() sub really_is_pascal { # {{{1 diff --git a/Unix/cloc.1.pod b/Unix/cloc.1.pod index e4bf2b7a..b8d150f3 100644 --- a/Unix/cloc.1.pod +++ b/Unix/cloc.1.pod @@ -614,6 +614,7 @@ names of categorized files to FILE. =item B<--counted=FILE> Save names of processed source files to FILE. +See also B<--found>, B<--ignored>, B<--unique>. =item B<--diff-alignment=FILE> @@ -636,11 +637,13 @@ Print cloc's internal usage information and exit. =item B<--found=FILE> -Save names of every file found to FILE. +Save names of every file found to FILE. See also B<--counted>, +B<--ignored>, B<--unique>. =item B<--ignored=FILE> Save names of ignored files and the reason they were ignored to FILE. +See also B<--counted>, B<--found>, B<--unique>. =item B<--print-filter-stages> @@ -662,6 +665,11 @@ exit. Print the value of the operating system mode and exit. See also B<--unix>, B<--windows>. +=item B<--unique=FILE> + +Save names of ignored files and the reason they were ignored to FILE. +See also B<--counted>, B<--found>, B<--ignored>. + =item B<-v[=N]> Turn on verbose with optional numeric value. diff --git a/Unix/t/01_opts.t b/Unix/t/01_opts.t index 1b668abe..dd55ffbe 100755 --- a/Unix/t/01_opts.t +++ b/Unix/t/01_opts.t @@ -865,6 +865,13 @@ my @Tests = ( 'ref' => '../tests/outputs/issues/851/results.yaml', }, + { + 'name' => '--ignore-regex (github issues #862, #865, #868)', + 'cd' => '../tests/inputs/issues/862', + 'args' => '--ignore-regex="C,Fortran 77|^\\s*([{};]|END)\\s*\$" *.f *.c', + 'ref' => '../tests/outputs/issues/862/results.yaml', + }, + ); # Special cases: diff --git a/cloc b/cloc index 5fcf4454..4a5df13c 100755 --- a/cloc +++ b/cloc @@ -248,6 +248,7 @@ my ( $opt_ignore_whitespace , $opt_ignore_case , $opt_ignore_case_ext , + @opt_ignore_regex , $opt_follow_links , $opt_autoconf , $opt_sum_one , @@ -353,6 +354,7 @@ my $getopt_success = GetOptions( # {{{1 "ignore_whitespace|ignore-whitespace" => \$opt_ignore_whitespace , "ignore_case|ignore-case" => \$opt_ignore_case , "ignore_case_ext|ignore-case-ext" => \$opt_ignore_case_ext , + "ignore_regex|ignore-regex=s" => \@opt_ignore_regex , "follow_links|follow-links" => \$opt_follow_links , "autoconf" => \$opt_autoconf , "sum_one|sum-one" => \$opt_sum_one , @@ -457,6 +459,7 @@ load_from_config_file($config_file, # {{{2 \$opt_ignore_whitespace , \$opt_ignore_case , \$opt_ignore_case_ext , + \@opt_ignore_regex , \$opt_follow_links , \$opt_autoconf , \$opt_sum_one , @@ -531,6 +534,7 @@ $opt_exclude_ext = "" unless $opt_exclude_ext; $opt_ignore_whitespace = 0 unless $opt_ignore_whitespace; $opt_ignore_case = 0 unless $opt_ignore_case; $opt_ignore_case_ext = 0 unless $opt_ignore_case_ext; +my %ignore_regex = (); $opt_lang_no_ext = 0 unless $opt_lang_no_ext; $opt_follow_links = 0 unless $opt_follow_links; if (defined $opt_diff_timeout) { @@ -831,6 +835,8 @@ if ($opt_lang_no_ext and !defined $Filters_by_Language{$opt_lang_no_ext}) { } check_scale_existence(\%Filters_by_Language, \%Language_by_Extension, \%Scale_Factor); +parse_ignore_regex(\@opt_ignore_regex, \%Filters_by_Language, \%ignore_regex) + if @opt_ignore_regex; my $nCounted = 0; @@ -1406,7 +1412,7 @@ if ( $max_processes == 0) { # Multiprocessing is disabled my $part = count_filesets ( $fset_a, $fset_b, \@files_added_tot, \@files_removed_tot, \@file_pairs_tot, - 0, \%Language, \%Ignored); + 0, \%Language, \%ignore_regex, \%Ignored); %Results_by_File = %{$part->{'results_by_file'}}; %Results_by_Language= %{$part->{'results_by_language'}}; %Delta_by_File = %{$part->{'delta_by_file'}}; @@ -1494,7 +1500,7 @@ if ( $max_processes == 0) { $pm->start() and next; my $count_result = count_filesets ( $fset_a, $fset_b, \@files_added_part, \@files_removed_part, - \@filepairs_part, 1, \%Language, \%Ignored ); + \@filepairs_part, 1, \%Language, \%ignore_regex, \%Ignored ); $pm->finish(0 , $count_result); } # Wait for processes to finish @@ -1624,7 +1630,7 @@ my @sorted_files = sort keys %unique_source_file; if ( $max_processes == 0) { # Multiprocessing is disabled - my $part = count_files ( \@sorted_files , 0, \%Language); + my $part = count_files ( \@sorted_files , 0, \%ignore_regex, \%Language); %Results_by_File = %{$part->{'results_by_file'}}; %Results_by_Language= %{$part->{'results_by_language'}}; %Ignored = ( %Ignored, %{$part->{'ignored'}}); @@ -1667,7 +1673,7 @@ if ( $max_processes == 0) { my $num_files_per_part = ceil ( ( scalar @sorted_files ) / $num_processes ); while ( my @part = splice @sorted_files, 0 , $num_files_per_part ) { $pm->start() and next; - my $count_result = count_files ( \@part, 1, \%Language ); + my $count_result = count_files ( \@part, 1, \%ignore_regex, \%Language ); $pm->finish(0 , $count_result); } # Wait for processes to finish @@ -1965,6 +1971,18 @@ Usage: $script [options] | | Count files without extensions using the counter. This option overrides internal logic for files without extensions (where such files @@ -2178,6 +2196,7 @@ Usage: $script [options] | | Save file sizes in bytes, identified languages and names of categorized files to . --counted= Save names of processed source files to . + See also --found, --ignored, --unique. --diff-alignment= Write to a list of files and file pairs showing which files were added, removed, and/or compared during a run with --diff. This switch @@ -2188,9 +2207,11 @@ Usage: $script [options] | | Save names of every file found to . + --found= Save names of every file found to . See + also --counted, --ignored, --unique. --ignored= Save names of ignored files and the reason they - were ignored to . + were ignored to . See also --counted, + --found, --unique. --print-filter-stages Print processed source code before and after each filter is applied. --show-ext[=] Print information about all known (or just the @@ -2199,6 +2220,8 @@ Usage: $script [options] | | Save names of unique files found to . See + also --counted, --found, --ignored. -v[=] Verbose switch (optional numeric value). -verbose[=] Long form of -v. --version Print the version of this program and exit. @@ -2548,7 +2571,7 @@ sub file_extension { # {{{1 } } # 1}}} sub count_files { # {{{1 - my ($filelist, $counter_type, $language_hash) = @_; + my ($filelist, $counter_type, $rha_ignore_regex, $language_hash) = @_; print "-> count_files()\n" if $opt_v > 2; my @p_errors = (); my %p_ignored = (); @@ -2619,7 +2642,8 @@ sub count_files { # {{{1 } else { ($all_line_count, $blank_count , - $comment_count ,) = call_counter($file, $Language{$file}, \@Errors); + $comment_count ,) = call_counter($file, $Language{$file}, + $rha_ignore_regex, \@Errors); $code_count = $all_line_count - $blank_count - $comment_count; } } @@ -2656,6 +2680,7 @@ sub count_filesets { # {{{1 $file_pairs, $counter_type, $language_hash, + $rha_ignore_regex, $rh_Ignored) = @_; print "-> count_filesets()\n" if $opt_v > 2; my @p_errors = (); @@ -2685,7 +2710,7 @@ sub count_filesets { # {{{1 my ($all_line_count, $blank_count , $comment_count , - ) = call_counter($file, $Lang, \@p_errors); + ) = call_counter($file, $Lang, $rha_ignore_regex, \@p_errors); $already_counted{$file} = 1; my $code_count = $all_line_count-$blank_count-$comment_count; if ($opt_by_file) { @@ -2745,7 +2770,7 @@ sub count_filesets { # {{{1 my ($all_line_count, $blank_count , $comment_count , - ) = call_counter($f, $this_lang, \@p_errors); + ) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors); $p_dbl{ $this_lang }{'comment'}{'added'} += $comment_count; $p_dbl{ $this_lang }{'blank'}{'added'} += $blank_count; $p_dbl{ $this_lang }{'code'}{'added'} += @@ -2785,7 +2810,7 @@ sub count_filesets { # {{{1 my ($all_line_count, $blank_count , $comment_count , - ) = call_counter($f, $this_lang, \@p_errors); + ) = call_counter($f, $this_lang, $rha_ignore_regex, \@p_errors); $p_dbl{ $this_lang}{'comment'}{'removed'} += $comment_count; $p_dbl{ $this_lang}{'blank'}{'removed'} += $blank_count; $p_dbl{ $this_lang}{'code'}{'removed'} += @@ -3047,18 +3072,18 @@ sub count_filesets { # {{{1 ($all_line_count_L, $blank_count_L , $comment_count_L , - ) = call_counter($file_L, $Lang_L, \@Errors); + ) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors); ($all_line_count_R, $blank_count_R , $comment_count_R , - ) = call_counter($file_R, $Lang_R, \@Errors); + ) = call_counter($file_R, $Lang_R, $rha_ignore_regex, \@Errors); } else { # L and R file contents are identical, no need to diff ($all_line_count_L, $blank_count_L , $comment_count_L , - ) = call_counter($file_L, $Lang_L, \@Errors); + ) = call_counter($file_L, $Lang_L, $rha_ignore_regex, \@Errors); $all_line_count_R = $all_line_count_L; $blank_count_R = $blank_count_L ; $comment_count_R = $comment_count_L ; @@ -6862,9 +6887,10 @@ sub different_files { # {{{1 return @unique; } # 1}}} sub call_counter { # {{{1 - my ($file , # in - $language , # in - $ra_Errors, # out + my ($file , # in + $language , # in + $rha_ignore_regex , # in + $ra_Errors , # out ) = @_; # Logic: pass the file through the following filters: @@ -6873,7 +6899,9 @@ sub call_counter { # {{{1 # 3. remove comments using each filter defined for this language # (example: SQL has two, remove_starts_with(--) and # remove_c_comments() ) - # 4. compute comment lines as + # 4. if ignore regex filters are defined, remove lines that + # match any of them + # 5. compute comment lines as # total lines - blank lines - lines left over after all # comment filters have been applied @@ -6928,6 +6956,22 @@ sub call_counter { # {{{1 @lines = rm_comments(\@lines, $language, $file, \%EOL_Continuation_re, $ra_Errors); + if (%{$rha_ignore_regex} and defined($rha_ignore_regex->{$language})) { + my @keep_lines = (); + foreach my $line (@lines) { + my $keep = 1; + foreach my $regex (@{$rha_ignore_regex->{$language}}) { + if ($line =~ m{$regex}) { +print "reject '$line' in $file because of '$regex'\n" if $opt_v > 4; + $keep = 0; + last; + } + } + push @keep_lines, $line if $keep; + } + @lines = @keep_lines; + } + my $comment_lines = $total_lines - $blank_lines - scalar @lines; if ($opt_strip_comments) { my $stripped_file = ""; @@ -14683,6 +14727,7 @@ sub load_from_config_file { # {{{1 $rs_ignore_whitespace , $rs_ignore_case , $rs_ignore_case_ext , + $ra_ignore_regex , $rs_follow_links , $rs_autoconf , $rs_sum_one , @@ -14791,6 +14836,7 @@ sub load_from_config_file { # {{{1 } elsif (!defined ${$rs_ignore_whitespace} and /^(ignore_whitespace|ignore-whitespace)/) { ${$rs_ignore_whitespace} = 1; } elsif (!defined ${$rs_ignore_case_ext} and /^(ignore_case_ext|ignore-case-ext)/) { ${$rs_ignore_case_ext} = 1; } elsif (!defined ${$rs_ignore_case} and /^(ignore_case|ignore-case)/) { ${$rs_ignore_case} = 1; + } elsif (! @{$ra_ignore_regex} and /^(?:ignore_regex|ignore-regex)(=|\s+)['"]?(.*?)['"]?$/) { push @{$ra_ignore_regex}, $2; } elsif (!defined ${$rs_follow_links} and /^(follow_links|follow-links)/) { ${$rs_follow_links} = 1; } elsif (!defined ${$rs_autoconf} and /^autoconf/) { ${$rs_autoconf} = 1; } elsif (!defined ${$rs_sum_one} and /^(sum_one|sum-one)/) { ${$rs_sum_one} = 1; @@ -15085,6 +15131,41 @@ sub print_format_n { # {{{1 return @prt_lines; print "<- print_format_n()\n" if $opt_v > 2; } # 1}}} +sub parse_ignore_regex { # {{{1 + # + # Convert the list of "language(s)|regex" into a hash + # $ignore_regex{language} = [list of regex] + + my ($ra_lang_regex , # in, as given on command line + $rhaa_Filters_by_Language, # in, hash of filters by language + $rha_ignore_regex) = @_; + print "-> parse_ignore_regex()\n" if $opt_v > 2; + + foreach my $lang_regex (@{$ra_lang_regex}) { + die "Missing '|' character in --ignore-regex '$lang_regex'\n" + unless $lang_regex =~ /\|/; + my ($lang, $regex) = split(/\|/, $lang_regex, 2); + die "Invalid --ignore-regex: $lang_regex\n" + unless defined $lang and defined $regex; + my @languages = split(/,/, $lang); + foreach my $lang (@languages) { + if ($lang eq '*') { + foreach my $lang (keys %{$rhaa_Filters_by_Language}) { + push @{$rha_ignore_regex->{$lang}}, $regex; + } + } else { + die "Unknown language '$lang' in --ignore-regex '$lang_regex'\n" + unless defined $rhaa_Filters_by_Language->{$lang}; + push @{$rha_ignore_regex->{$lang}}, $regex; + } +#print "lang=$lang regex=[$regex]\n"; + } + } +#use Data::Dumper; +#print Dumper($rha_ignore_regex); + print "<- parse_ignore_regex()\n" if $opt_v > 2; +} +# 1}}} # really_is_pascal, really_is_incpascal, really_is_php from SLOCCount my %php_files = (); # really_is_php() sub really_is_pascal { # {{{1