diff --git a/doc/pod/inn.conf.pod b/doc/pod/inn.conf.pod index 4589fd094..c3c43d46d 100644 --- a/doc/pod/inn.conf.pod +++ b/doc/pod/inn.conf.pod @@ -299,6 +299,10 @@ will be accepted on the local machine, but articles rejected by the filter will I be fed to any peers specified in F with the C flag. The default value is false. +If this parameter is set, you may also use the I key of storage +method entries in F to store filtered articles in dedicated +storage classes. + =item I If set to a value other than C<0>, a hash of recently received Message-IDs diff --git a/doc/pod/install.pod b/doc/pod/install.pod index 023bbe168..593b8e13a 100644 --- a/doc/pod/install.pod +++ b/doc/pod/install.pod @@ -1319,6 +1319,7 @@ A storage class definition looks like this: expires: [,] options: exactmatch: + filtered: } is the name of the storage method to use to store articles in diff --git a/doc/pod/news.pod b/doc/pod/news.pod index 870d39779..15a674fea 100644 --- a/doc/pod/news.pod +++ b/doc/pod/news.pod @@ -52,6 +52,13 @@ how to use the module. =item * +Added a new I option in F to store articles which have +been rejected by a Perl or Python filter for B in a separate storage +class, when I is set to true in F. Thanks to +Christoph Biedl for the feature. + +=item * + The B program, which previously was in the F directory, is now installed by default. It notably permits generating a delayed feed, for instance to give cancel control articles and NoCeM notices time to arrive, and diff --git a/doc/pod/storage.conf.pod b/doc/pod/storage.conf.pod index 852bf589f..562e2f57b 100644 --- a/doc/pod/storage.conf.pod +++ b/doc/pod/storage.conf.pod @@ -35,6 +35,7 @@ entries, the first one will be used. Each entry is formatted as follows: expires: [,] options: exactmatch: + filtered: } If spaces or tabs are included in a value, that value must be enclosed in @@ -137,6 +138,21 @@ wildmat as described above.) This is a boolean value; C, C and C are usable to enable this key. The case of these values is not significant. The default is false. +=item I: + +If this key is set to true, the article must have been rejected by any +enabled article filters (Perl or Python) for B. This also requires that +I is set to true in F. Filtered articles are +usually stored in a small CNFS buffer, or another storage method with a rather +tight expiration policy. This is a boolean value; C, C and C +are usable to enable this key. The case of these values is not significant. +The default is false. + +If all the storage classes have this key set to false, filtered articles are +stored in the same storage class as accepted articles. It is only when at +least one storage class has this key set to true than filtered articles and +accepted articles are no longer stored mixed together in any storage class. + =back If an article matches all of the constraints of an entry, it is stored diff --git a/frontends/cnfsstat.in b/frontends/cnfsstat.in index 667c24ccb..751752a3c 100644 --- a/frontends/cnfsstat.in +++ b/frontends/cnfsstat.in @@ -159,28 +159,30 @@ if ($lastconftime < $maxtime) { my $logline; my $header_printed = 0; -my ($gr, $cl, $min, $max); +my ($gr, $cl, $min, $max, $filtered); if ($oclass) { if ($class{$oclass}) { if (!$header_printed) { if ($stor{$oclass}) { - ($gr, $cl, $min, $max) = split(/:/, $stor{$oclass}); + ($gr, $cl, $min, $max, undef, $filtered) + = split(/:/, $stor{$oclass}); } else { - ($gr, $cl, $min, $max) = ('', $oclass, 0, 0); + ($gr, $cl, $min, $max, $filtered) = ('', $oclass, 0, 0, 0); } # Remove leading and trailing double quotes, if present. + my $filtered_s = $filtered ? ", filtered only" : ""; $gr =~ s/"?([^"]*)"?/$1/g; if ($use_syslog) { if ($min || $max) { $logline = sprintf( "Class %s for groups matching \"%s\" " - . "article size min/max: %d/%d", - $oclass, $gr, $min, $max, + . "article size min/max: %d/%d%s", + $oclass, $gr, $min, $max, $filtered_s, ); } else { $logline = sprintf( - "Class %s for groups matching \"%s\"", - $oclass, $gr, + "Class %s for groups matching \"%s\"%s", + $oclass, $gr, $filtered_s, ); } } else { @@ -189,6 +191,9 @@ if ($oclass) { if ($min || $max) { print STDOUT ", article size min/max: $min/$max"; } + if ($filtered) { + print STDOUT ", filtered articles only"; + } print STDOUT "\n"; } $header_printed = 1; @@ -213,19 +218,22 @@ if ($oclass) { } else { # Print all Classes my %buffDone; foreach my $c (@storsort) { - ($gr, $cl, $min, $max) = split(/:/, $stor{$c}); + ($gr, $cl, $min, $max, undef, $filtered) = split(/:/, $stor{$c}); + my $filtered_s = $filtered ? ", filtered only" : ""; # Remove leading and trailing double quotes, if present. $gr =~ s/"?([^"]*)"?/$1/g; if ($use_syslog) { if ($min || $max) { $logline = sprintf( "Class %s for groups matching \"%s\" " - . "article size min/max: %d/%d", - $c, $gr, $min, $max, + . "article size min/max: %d/%d%s", + $c, $gr, $min, $max, $filtered_s, ); } else { - $logline - = sprintf("Class %s for groups matching \"%s\"", $c, $gr); + $logline = sprintf( + "Class %s for groups matching \"%s\"%s", $c, $gr, + $filtered_s, + ); } } else { print STDOUT "Class $c"; @@ -233,6 +241,9 @@ if ($oclass) { if ($min || $max) { print STDOUT ", article size min/max: $min/$max"; } + if ($filtered) { + print STDOUT ", filtered articles only"; + } print STDOUT "\n"; } @buffers = split(/,/, $class{$c}); @@ -357,10 +368,16 @@ sub read_storageconf { $key{'SIZE'} .= ",0" unless $key{'SIZE'} =~ /,/; $key{'SIZE'} =~ s/,/:/; + $key{'FILTERED'} + = defined $key{'FILTERED'} + ? $key{'FILTERED'} =~ /^(true|yes|on)$/ + ? 1 + : 0 + : 0; if (!defined $stor{ $key{'OPTIONS'} }) { $stor{ $key{'OPTIONS'} } = "$key{'NEWSGROUPS'}:$key{'CLASS'}:" - . "$key{'SIZE'}:$key{'OPTIONS'}"; + . "$key{'SIZE'}:$key{'OPTIONS'}:$key{'FILTERED'}"; push(@storsort, $key{'OPTIONS'}); } } diff --git a/include/inn/storage.h b/include/inn/storage.h index 56230792a..9bfd51820 100644 --- a/include/inn/storage.h +++ b/include/inn/storage.h @@ -50,15 +50,16 @@ typedef struct { void *private; /* A pointer to method specific data */ time_t arrived; /* The time when the article arrived */ time_t expires; /* The time when the article will be expired */ + bool filtered; /* Article was marked by a filter */ char *groups; /* Where Newsgroups header field body starts */ int groupslen; /* Length of Newsgroups header field body */ TOKEN *token; /* A pointer to the article's TOKEN */ } ARTHANDLE; /* Initializer for the ARTHANDLE structure. */ -#define ARTHANDLE_INITIALIZER \ - { \ - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 \ +#define ARTHANDLE_INITIALIZER \ + { \ + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 \ } #define SMERR_NOERROR 0 diff --git a/innd/art.c b/innd/art.c index be51e7bf1..1ede0738c 100644 --- a/innd/art.c +++ b/innd/art.c @@ -435,7 +435,7 @@ ARTheaderpcmp(const void *p1, const void *p2) /* Write an article using the storage api. Put it together in memory and call out to the api. */ static TOKEN -ARTstore(CHANNEL *cp) +ARTstore(CHANNEL *cp, bool filtered) { struct buffer *Article = &cp->In; ARTDATA *data = &cp->Data; @@ -557,6 +557,7 @@ ARTstore(CHANNEL *cp) arth.arrived = (time_t) 0; arth.token = (TOKEN *) NULL; arth.expires = data->Expires; + arth.filtered = filtered; if (innconf->storeonxref) { arth.groups = data->Replic; arth.groupslen = data->ReplicLength; @@ -1694,7 +1695,7 @@ HashFeedMatch(HASHFEEDLIST *hf, char *MessageID) */ static void ARTpropagate(ARTDATA *data, const char **hops, int hopcount, char **list, - bool ControlStore, bool OverviewCreated, bool Filtered) + bool ControlStore, bool OverviewCreated, bool filtered) { HDRCONTENT *hc = data->HdrContent; SITE *sp, *funnel; @@ -1815,7 +1816,7 @@ ARTpropagate(ARTDATA *data, const char **hops, int hopcount, char **list, } /* Handle dontrejectfiltered. */ - if (Filtered && sp->DropFiltered) + if (filtered && sp->DropFiltered) continue; /* Write that the site is getting it, and flag to send it. */ @@ -2576,7 +2577,7 @@ ARTpost(CHANNEL *cp) for (i = 0; (ngp = GroupPointers[i]) != NULL; i++) ngp->PostCount = 0; - token = ARTstore(cp); + token = ARTstore(cp, Filtered); /* Change trailing '\r\n' to '\0\n' of all system header fields. */ for (i = 0; i < MAX_ARTHEADER; i++) { if (HDR_FOUND(i)) { diff --git a/samples/storage.conf b/samples/storage.conf index 76b08115c..0df2f7a62 100644 --- a/samples/storage.conf +++ b/samples/storage.conf @@ -6,14 +6,15 @@ ## ## method { ## newsgroups: -## class: +## class: ## size: [,] ## expires: [,] ## options: ## exactmatch: +## filtered: ## } ## -## See the storage.conf man page for more information. +## See the storage.conf manual page for more information. ## ## Only newsgroups, class, and (for CNFS, to specify the metacycbuff) ## options are required; the other keys are optional. If any CNFS @@ -47,3 +48,13 @@ method tradspool { # class: 2 # options: TEXT #} + +## If dontrejectfiltered is set to true in inn.conf, keep filtered articles +## in this class. A third metacycbuff is used for them. + +#method cnfs { +# newsgroups: * +# class: 3 +# filtered: true +# options: FILTERED +#} diff --git a/scripts/inncheck.in b/scripts/inncheck.in index db187b610..6887e688a 100644 --- a/scripts/inncheck.in +++ b/scripts/inncheck.in @@ -1052,6 +1052,7 @@ sub storage_conf { 'expires' => 'mintime[,maxtime] definition', 'options' => 'string', 'exactmatch' => 'boolean', + 'filtered' => 'boolean', } } ); diff --git a/storage/interface.c b/storage/interface.c index 81e7a25b4..d5393af26 100644 --- a/storage/interface.c +++ b/storage/interface.c @@ -36,6 +36,7 @@ static unsigned int typetoindex[256]; int SMerrno; char *SMerrorstr = NULL; static bool Initialized = false; +static bool filteredKeyUsed = false; bool SMopenmode = false; bool SMpreopen = false; @@ -261,6 +262,7 @@ ParseTime(char *tmbuf) #define SMexpire 14 #define SMoptions 15 #define SMexactmatch 16 +#define SMfiltered 17 static CONFTOKEN smtoks[] = { {SMlbrace, (char *) "{" }, @@ -272,6 +274,7 @@ static CONFTOKEN smtoks[] = { {SMexpire, (char *) "expires:" }, {SMoptions, (char *) "options:" }, {SMexactmatch, (char *) "exactmatch:"}, + {SMfiltered, (char *) "filtered:" }, {0, NULL } }; @@ -298,6 +301,7 @@ SMreadconfig(void) char *options = 0; int inbrace; bool exactmatch = false; + bool filtered = false; /* if innconf isn't already read in, do so. */ if (innconf == NULL) { @@ -321,6 +325,7 @@ SMreadconfig(void) } free(path); + filteredKeyUsed = false; inbrace = 0; while ((tok = CONFgettoken(smtoks, f)) != NULL) { if (!inbrace) { @@ -351,6 +356,7 @@ SMreadconfig(void) minexpire = 0; maxexpire = 0; exactmatch = false; + filtered = false; } else { type = tok->type; @@ -403,6 +409,13 @@ SMreadconfig(void) || strcasecmp(p, "on") == 0) exactmatch = true; break; + case SMfiltered: + if (strcasecmp(p, "true") == 0 || strcasecmp(p, "yes") == 0 + || strcasecmp(p, "on") == 0) { + filtered = true; + filteredKeyUsed = true; + } + break; default: SMseterror(SMERR_CONFIG, "Unknown keyword in method declaration"); @@ -448,6 +461,7 @@ SMreadconfig(void) sub->minexpire = minexpire; sub->maxexpire = maxexpire; sub->exactmatch = exactmatch; + sub->filtered = filtered; free(method); method = 0; @@ -643,6 +657,7 @@ SMgetsub(const ARTHANDLE article) && (!sub->maxsize || (article.len <= sub->maxsize)) && (!sub->minexpire || article.expires >= sub->minexpire) && (!sub->maxexpire || (article.expires <= sub->maxexpire)) + && (!filteredKeyUsed || article.filtered == sub->filtered) && MatchGroups(article.groups, article.groupslen, sub->pattern, sub->exactmatch)) { if (InitMethod(typetoindex[sub->type])) diff --git a/storage/interface.h b/storage/interface.h index b3cc35ca6..51beab910 100644 --- a/storage/interface.h +++ b/storage/interface.h @@ -38,13 +38,12 @@ typedef struct __S_SUB__ { time_t maxexpire; /* Maximum expire offset to send method */ int numpatterns; /* Number of patterns in patterns */ int class; /* Number of the storage class for this subscription */ - char *pattern; /* Wildmat pattern to check against the - groups to determine if the article - should go to this method */ - char *options; /* additional options specific to the - method */ - bool exactmatch; /* all newsgroups to which article belongs + char *pattern; /* Wildmat pattern to check against the groups to + determine if the article should go to this method */ + char *options; /* Additional options specific to the method */ + bool exactmatch; /* All newsgroups to which article belongs should match the patterns */ + bool filtered; /* Article was marked by a filter */ struct __S_SUB__ *next; } STORAGE_SUB;