From 845caa2ca27c87ee9efa8c2e4efa00213f11786f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Julien=20=C3=89LIE?= Date: Wed, 27 Dec 2023 14:00:24 +0100 Subject: [PATCH] scanspool: Add -r flag to remove articles in error see #288 --- doc/pod/news.pod | 11 +++++----- doc/pod/scanspool.pod | 11 +++++++++- frontends/scanspool.in | 46 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 59 insertions(+), 9 deletions(-) diff --git a/doc/pod/news.pod b/doc/pod/news.pod index 2e51ec34c..ca4687b8a 100644 --- a/doc/pod/news.pod +++ b/doc/pod/news.pod @@ -34,6 +34,12 @@ as a format like C was expected. =item * +B now detects empty files in a tradspool news spool, correctly +parses continuation lines in header fields, and can automatically remove +articles reported to have a problem (when run with the new B<-r> flag). + +=item * + Fixed a hang when posting articles if COMPRESS DEFLATE is active but TLS is not. Thanks to Enrik Berkhan for the patch for B. @@ -99,11 +105,6 @@ Jesse Rehmer for the bug report and Bo Lindbergh for the fix. =item * -B now detects empty files in a tradspool news spool, and correctly -parses continuation lines in header fields. - -=item * - B no longer malfunctions nor throttles when the maximum number of file descriptors supported by the system is reached. If needing to use more file descriptors than the default system limit, a new C option diff --git a/doc/pod/scanspool.pod b/doc/pod/scanspool.pod index 23dbd131d..6f1cadb50 100644 --- a/doc/pod/scanspool.pod +++ b/doc/pod/scanspool.pod @@ -4,7 +4,7 @@ scanspool - Perform a sanity scan over all articles in news spool =head1 SYNOPSIS -B [B<-cnv>] [B<-a> I] [B<-s> I] +B [B<-cnrv>] [B<-a> I] [B<-s> I] =head1 DESCRIPTION @@ -91,6 +91,15 @@ scan the Newsgroups header field of the articles. Don't throttle B while scanning. +=item B<-r> + +Remove articles reported to have a problem. To see the list of articles in +question, run B without this flag first. + +Be warned that using this flag may result in inaccessible articles if articles +are removed whereas they are perfectly valid articles. Have a look at them to +ensure they can be safely removed, and then use this flag. + =item B<-s> I The root of the spool tree; I is the default. diff --git a/frontends/scanspool.in b/frontends/scanspool.in index 3f6f1c5e4..f34e2996d 100644 --- a/frontends/scanspool.in +++ b/frontends/scanspool.in @@ -93,7 +93,7 @@ $VERSION =~ s/INN //; $0 =~ s!.*/!!; my $usage = "Usage: - $0 [-cnv] [-a active-file] [-s spool-dir] + $0 [-cnrv] [-a active-file] [-s spool-dir] Perform a sanity scan over all articles in @@ -101,6 +101,7 @@ Options: -a active-file active file to use (default /active) -c check article filenames, don't scan the articles -n don't throttle innd + -r remove articles reported to have a problem -s spool-dir spool tree (default ) -v verbose mode verbose messages begin with a tab @@ -123,7 +124,7 @@ my $reason = "running scanspool"; # throttle reason # parse args # my %opt; -getopts("a:s:vcn", \%opt) || die $usage; +getopts("a:cnrs:v", \%opt) || die $usage; $active = $opt{'a'} if defined $opt{'a'}; $spool = $opt{'s'} if defined $opt{'s'}; @@ -252,6 +253,26 @@ sub problem { return; } +# rm_file - remove a file +# +# Remove the file given as argument, if the -r flag is used. +# Otherwise, it is a no-op. +# Report a fatal error to stderr and exit if the removal fails. +# +# usage: +# rm_file(filename) +# +sub rm_file { + my ($filename) = @_; + + if (defined $opt{'r'}) { + unlink("$filename") or fatal(4, "cannot remove $filename"); + problem("$filename: successfully removed"); + } + + return; +} + # fatal - report a fatal error to stderr and exit # # Print a message to stderr. The message has the program name prepended @@ -308,6 +329,7 @@ sub check_spool { my $aline; # header line from an article my $newsgroupsField; # in a continuation line, reading Newsgroups my @group; # array of groups from the Newsgroups header field + my $remove; # mark an article as to be removed my $FINDFILE; # find command pipe handle # if verbose, say what we are doing @@ -367,12 +389,14 @@ sub check_spool { if (!defined $gname2type{$artgrp} || $gname2type{$artgrp} =~ /[=jx]/o) { if ($preverrgrp ne $artgrp) { - problem("$artgrp: not an active group directory"); + problem("$artgrp: not an active group directory (probably " + . "a removed newsgroup; see articles with -v)"); $preverrgrp = $artgrp; } if (defined $opt{'v'}) { problem("$filename: article found in non-active directory"); } + rm_file($filename); next; } @@ -381,18 +405,26 @@ sub check_spool { $artnum =~ s#^.+/##o; if ($artnum =~ m/^0/o) { problem("$filename: article basename starts with a 0"); + rm_file($filename); + next if defined $opt{'r'}; } if (defined $gname2type{$artgrp}) { if ($lowart{$artgrp} > $highart{$artgrp}) { problem("$filename: active indicates group should be empty"); + rm_file($filename); + next if defined $opt{'r'}; } elsif ($artnum < $lowart{$artgrp}) { problem("$filename: article number is too low " . "(first article has number $lowart{$artgrp} " . "in the active file)"); + rm_file($filename); + next if defined $opt{'r'}; } elsif ($artnum > $highart{$artgrp}) { problem("$filename: article number is too high " . "(last article has number $highart{$artgrp} " . "in the active file"); + rm_file($filename); + next if defined $opt{'r'}; } } @@ -407,6 +439,7 @@ sub check_spool { if (-z "$filename") { problem("WARNING: $filename: empty file"); + rm_file($filename); next; } @@ -419,6 +452,7 @@ sub check_spool { @group = (); $newsgroupsField = 0; + $remove = 0; # read until the Newsgroups header field is found AREADLINE: @@ -440,6 +474,7 @@ sub check_spool { } elsif ($aline =~ /^\r?\n$/o) { # end of headers problem("WARNING: $filename: no Newsgroups header field"); + $remove++; last; } else { # do not continue parsing the headers as we have just found out @@ -448,6 +483,7 @@ sub check_spool { } if (eof $ARTICLE) { problem("WARNING: $filename: article with no body"); + $remove++; } } @@ -469,11 +505,15 @@ sub check_spool { # no group or group alias was found problem("$filename: does not belong in $artgrp" . " according to its Newsgroups header field"); + $remove++; } } # close the article close $ARTICLE; + + # remove the article if asked to + rm_file($filename) if $remove > 0; } # all done with the find