Skip to content

Commit

Permalink
make newsfeed update delay adaptive per-feed
Browse files Browse the repository at this point in the history
  • Loading branch information
tejing1 committed Nov 13, 2024
1 parent 2cfac43 commit 568479d
Show file tree
Hide file tree
Showing 5 changed files with 325 additions and 54 deletions.
3 changes: 0 additions & 3 deletions homeConfigurations/tejing/news/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -39,9 +39,6 @@ in
# Enable my sfeed module
my.sfeed.enable = true;

# Update every hour, at a non-round time
my.sfeed.update = "*:45:35";

# Pass useful args through to submodule config
my.sfeed.rc._module.args = { inherit pkgs my; };
my.sfeed.rc.imports = [
Expand Down
11 changes: 5 additions & 6 deletions homeConfigurations/tejing/news/rc.public.nix
Original file line number Diff line number Diff line change
Expand Up @@ -56,15 +56,14 @@ in
};

# Gets around cloudflare's bot detection, last time I checked
fetch.imitate_browser = {
helper.curl_cf = {
code = ''
curl --no-alpn -fsSLm 15 -A ${escapeShellArg innocuousUserAgent} "$2"'';
curl --no-alpn -fsSL -A ${escapeShellArg innocuousUserAgent} "$@"'';
inputs = [ pkgs.curl ];
};
fetch.imitate_browser_post = {
fetch.imitate_browser = {
code = ''
curl --no-alpn -fsSLm 15 -A ${escapeShellArg innocuousUserAgent} -X POST "$2"'';
inputs = [ pkgs.curl ];
curl_cf -m 15 "$2"'';
};

# A good general pattern for scraping html pages
Expand Down Expand Up @@ -114,7 +113,7 @@ in
helper.normalize_dates = {
code = ''awk -F'\t' -v OFS=$'\t' -f '' + pkgs.writeText "sfeed_process_dates.awk" ''
BEGIN {
datecmd = "${pkgs.coreutils}/bin/date -f- +%s"
datecmd = "${pkgs.coreutils}/bin/date -uf- +%s"
PROCINFO[datecmd, "pty"] = 1
}
{
Expand Down
Binary file modified homeConfigurations/tejing/news/rc.secret.nix
Binary file not shown.
233 changes: 188 additions & 45 deletions homeModules/sfeed/default.nix
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ let
optionalString optionalAttrs splitString mapAttrsToList groupBy'
nameValuePair removeSuffix unique filterAttrs makeBinPath
mkEnableOption mkOption mkIf;
inherit (lib.types) attrsOf submodule str ints listOf package nullOr submoduleWith;
inherit (lib.types) attrsOf submodule str ints float listOf package nullOr submoduleWith;
inherit (pkgs) resholve;

# Changing these 2 lines will safely relocate this module's options
Expand All @@ -33,8 +33,7 @@ let
defineFunctions = funcs: concatMapStringsSep "\n"
({name, value}: ''
${name}() {
${indentString " " ''
${removeSuffix "\n" value}''}
${indentString " " (removeSuffix "\n" value)}
}'')
(if isList funcs
then funcs
Expand All @@ -44,12 +43,9 @@ let
# like branches
caseStatement = word: default: branches: ''
case ${word} in
${indentString " " ''
${concatMapStringsSep "\n" ({ patterns, commands }: ''
${indentString " " (concatMapStringsSep "\n" ({ patterns, commands }: ''
${patterns})
${indentString " " ''
${removeSuffix "\n" commands}
;;''}'')
${indentString " " (removeSuffix "\n" commands + "\n;;")}'')
(mapAttrsToList
(_: patterns:
{ patterns = concatMapStringsSep "|" escapeShellArg patterns;
Expand All @@ -63,44 +59,116 @@ let
(mapAttrsToList
(pattern: commands: { inherit pattern commands; })
branches))
++ [ { patterns = "*"; commands = default; } ])}''}
++ [ { patterns = "*"; commands = default; } ]))}
esac'';

# Code for the feed() function
feed_function = defineFunctions {
feed = ''
[ "$(jobs | wc -l)" -ge ${toString cfg.jobs} ] && wait -nf
_feed "$@" &
settings = concatMapStringsSep "\n" ({name,value}: "${name}=${escapeShellArg value}") (mapAttrsToList nameValuePair {
maxjobs = cfg.jobs;
sfeedpath="${config.home.homeDirectory}/.sfeed/feeds";
stamppath="${config.home.homeDirectory}/.sfeed/stamps";
});

makedirs = concatMapStringsSep "\n" (dir: "mkdir -p ${dir}") [
''"$stamppath"''
];

cp_function = defineFunctions {
cp_function = ''
test -n "$(declare -f "$1")" || return
eval "''${_/#"$1"/"$2"}"
'';
};

# Code for the _feed() function
feed_function = ''
cp_function _feed __feed
${defineFunctions {
"_feed" = ''
local name="$(printf '%s' "$1" | tr '/' '_')"
local file="$sfeedpath/$name"
local stamp="$stamppath/$name"
if __feed "$@"; then
touch "$file"
rm -f "$stamp"
return 0
else
touch "$stamp"
return 1
fi
'';
}}'';

# Code for the adaptfeed() function
adaptfeed_function = defineFunctions {
adaptfeed = ''
local file="''${sfeedpath}/$(printf '%s' "''${10}" | tr '/' '_')"
local stamp="''${sfeedpath%%/feeds}/stamps/$(printf '%s' "''${10}" | tr '/' '_')"
mkdir -p "''${sfeedpath%%/feeds}/stamps"
if [ -e "$file" ]; then
local lastchecked="$(stat -c'%Y' "$file")"
else
local lastchecked=0
fi
if [ -e "$stamp" ]; then
local lastfailed="$(stat -c'%Y' "$stamp")"
else
local lastfailed=0
fi
# Temporarily set pipefail
local restore_opts="$(shopt -po pipefail)";set -o pipefail
if sort -k1,1n "$file" | awk -F'\t' -v recentdiv="$1" -v recentmin="$2" -v regressiondiv="$3" -v regressionmin="$4" -v maxdelay="$5" -v firststeplength="$6" -v weightdoublingtime="$7" -v backofffactor="$8" -v deferprobablility="$9" -v lastchecked="$lastchecked" -v lastfailed="$lastfailed" -f ${./regressions.awk}; then
# Restore pipefail setting
eval "$restore_opts"
else
# Restore pipefail setting
eval "$restore_opts"
shift 9
feed "$@" || return
fi
'';
};

# Command to go in the feeds() function, for a given feed
feed_command = name: {url, baseurl, encoding, adapt, ... }:
(if adapt.enable then
"adaptfeed " + escapeShellArgs [
adapt.recentdiv
adapt.recentmin
adapt.regressiondiv
adapt.regressionmin
adapt.maxdelay
adapt.firststeplength
adapt.weightdoublingtime
adapt.backofffactor
adapt.deferprobablility
]
else
"feed"
) + " " + escapeShellArgs (
[ name url ] ++
(if encoding != ""
then [ baseurl encoding ]
else
(if baseurl != ""
then [ baseurl ]
else []
)
)
);

# Code for the feeds() function
feeds_function = defineFunctions {
feeds = if length (attrValues cfg.rc.feeds) == 0 then ":" else
(concatMapStringsSep "\n"
(feed_args: "feed " + escapeShellArgs feed_args)
(mapAttrsToList
(name: {url, baseurl ? "", encoding ? "", ... }:
[ name url ] ++
(if encoding != ""
then [ baseurl encoding ]
else
(if baseurl != ""
then [ baseurl ]
else [])))
cfg.rc.feeds));
concatStringsSep "\n" (mapAttrsToList feed_command cfg.rc.feeds);
};

# Code for all helper functions defined through cfg.rc.helper
helper_functions = defineFunctions
(mapAttrs (_: v: v.code) cfg.rc.helper
// optionalAttrs
(any
(feed: any (name: hasAttr name feed) overrideableFuncs)
(attrValues cfg.rc.feeds))
{cp_function = ''
test -n "$(declare -f "$1")" || return
eval "''${_/#"$1"/"$2"}"'';});
helper_functions = defineFunctions (mapAttrs (_: v: v.code) cfg.rc.helper);

# Code to define all configured implementations for ${name} and override the ${name}() function
functionOverride = name: optionalString
Expand All @@ -124,15 +192,19 @@ let
# Generate the entire sfeedrc file
sfeedrc = resholve.writeScript "sfeedrc" {
interpreter = "none";
inputs = unique (concatMap (x: if x ? inputs then x.inputs else []) (concatMap attrValues (attrValues cfg.rc)));
inputs = unique ([ pkgs.gawk ] ++ concatMap (x: if x ? inputs then x.inputs else []) (concatMap attrValues (attrValues cfg.rc)));
execer = unique (concatMap (x: if x ? execer then x.execer else []) (concatMap attrValues (attrValues cfg.rc)));
fake.function = [ "_feed" ] ++ map (name: "__${name}") overrideableFuncs;
fake.function = [ "feed" "__feed" ] ++ map (name: "__${name}") overrideableFuncs;
}
(concatStringsSep "\n\n"
(map (removeSuffix "\n")
(filter (x: x != "") (
[
settings
makedirs
cp_function
feed_function
adaptfeed_function
feeds_function
helper_functions
] ++ map functionOverride overrideableFuncs))));
Expand Down Expand Up @@ -160,6 +232,64 @@ let
type = str;
description = "URL of the feed";
};
baseurl = mkOption {
type = str;
default = "";
description = "Base URL of the feed. If set to the empty string, the feed url is used.";
};
encoding = mkOption {
type = str;
default = "";
description = "Encoding of the feed. If set to the empty string, it is autodetected, falling back to utf-8.";
};
adapt = {
enable = mkEnableOption "adapting the timing of checks for this feed to its history" // {default = true;};
recentdiv = mkOption {
type = ints.positive;
default = 15;
description = "Divide the time from latest entry to latest check by this to determine recentdelay";
};
recentmin = mkOption {
type = ints.unsigned;
default = 60*60*24;
description = "Clamp recentdelay to be at least this";
};
regressiondiv = mkOption {
type = ints.positive;
default = 50;
description = "Divide the regression-estimated time per release by this to determine regressiondelay";
};
regressionmin = mkOption {
type = ints.unsigned;
default = 60*30;
description = "Clamp regressiondelay to be at least this";
};
maxdelay = mkOption {
type = ints.unsigned;
default = 60*60*24*30;
description = "Clamp the minimum of recentdelay and regressiondelay to be at most this, giving the final check delay";
};
firststeplength = mkOption {
type = ints.unsigned;
default = 60*60;
description = "The regression should consider the history to start this amount of time before the first entry";
};
weightdoublingtime = mkOption {
type = ints.positive;
default = 60*60*24*90;
description = "The doubling time of the exponential time-weighting used in the regression";
};
backofffactor = mkOption {
type = float;
default = 1.2;
description = "Check again after a failure if time since last success is more than this multiple of the time between last success and last failure.";
};
deferprobablility = mkOption {
type = float;
default = 0.1;
description = "Probability (between 0 and 1) with which to randomly defer checks when they are otherwise considered ready. Breaks up cadence between feeds over time.";
};
};
} // listToAttrs (map (func: nameValuePair func (mkOption {
type = nullOr str;
default = null;
Expand Down Expand Up @@ -205,14 +335,24 @@ in
{
options = putopt {
enable = mkEnableOption "sfeed";
update = mkOption {
type = str;
default = "hourly";
description = "systemd calendar event string describing when to update feeds (see man systemd.time)";
update.averagedelay = mkOption {
type = ints.positive;
default = 600;
description = "Average delay, in seconds, between runs of the service.";
};
update.deviation = mkOption {
type = ints.unsigned;
default = 30;
description = "How long, in seconds, to randomly deviate either way on the delay.";
};
update.accuracy = mkOption {
type = ints.unsigned;
default = 5;
description = "How long, in seconds, to non-randomly deviate either way on the delay to coalesce with other wakeups.";
};
jobs = mkOption {
type = ints.positive;
default = 12;
default = 16;
description = "number of simultaneous fetches to run";
};
rc = mkOption {
Expand All @@ -231,12 +371,13 @@ in
home.file.".sfeed/sfeedrc".source = sfeedrc;

systemd.user.services.sfeed_update = {
Unit.Description = "news feed update";
Unit.Description = "news feed updater";
Service.Environment = [ "PATH=${makeBinPath (attrValues {
inherit (pkgs)
sfeed
curl
glibc # for iconv
findutils # for xargs
coreutils
;
})}" ];
Expand All @@ -245,8 +386,10 @@ in
systemd.user.timers.sfeed_update = {
Unit.Description = "news feed update timer";
Install.WantedBy = [ "timers.target" ];
Timer.OnCalendar = cfg.update;
Timer.Persistent = true;
Timer.OnActiveSec = 0;
Timer.OnUnitInactiveSec = cfg.update.averagedelay - cfg.update.deviation - cfg.update.accuracy;
Timer.RandomizedDelaySec = 2 * cfg.update.deviation;
Timer.AccuracySec = 2 * cfg.update.accuracy;
};
};
}
Loading

0 comments on commit 568479d

Please sign in to comment.