From 81a2a7090fafa11e4ef15aaa2ef59cd743dda954 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lloren=C3=A7=20Muntaner?= Date: Fri, 8 Nov 2024 07:36:15 +0100 Subject: [PATCH] Fix dynamic captcha when config changes to longer intervals (#2688) * Fix dynamic captcha when config changes to longer intervals * Fix issue * Add test and improve comments * Fix format --- .../src/storage/registration_rates.rs | 271 +++++++++++++++++- 1 file changed, 256 insertions(+), 15 deletions(-) diff --git a/src/internet_identity/src/storage/registration_rates.rs b/src/internet_identity/src/storage/registration_rates.rs index ce93d58229..69180a905b 100644 --- a/src/internet_identity/src/storage/registration_rates.rs +++ b/src/internet_identity/src/storage/registration_rates.rs @@ -11,9 +11,13 @@ pub struct RegistrationRates { /// Min heap of registration timestamps to calculate the reference registration rate. /// [prune_data] removes values that are older than now minus the reference rate interval whenever new data is added. reference_rate_data: MinHeap, + /// The interval between the moment of prunning and the oldest data point pruned for the `reference_rate_data` + reference_last_pruned_interval_ns: Option, /// Min heap of registration timestamps to calculate the current registration rate. /// [prune_data] removes values that are older than now minus the current rate interval whenever new data is added. current_rate_data: MinHeap, + /// The interval between the moment of prunning and the oldest data point pruned for the `current_rate_data` + current_last_pruned_interval_ns: Option, } #[derive(Debug, Clone, PartialEq)] @@ -21,11 +25,17 @@ pub struct NormalizedRegistrationRates { pub reference_rate_per_second: f64, pub current_rate_per_second: f64, pub captcha_threshold_rate: f64, + pub is_reference_data_insufficient: bool, + pub is_current_data_insufficient: bool, } impl NormalizedRegistrationRates { pub fn captcha_required(&self) -> bool { - self.current_rate_per_second > self.captcha_threshold_rate + // The data might be insuficcient when the config changes to a longer window. + // In that case, we trigger the captcha to be on the safe side. + self.is_current_data_insufficient + || self.is_reference_data_insufficient + || self.current_rate_per_second > self.captcha_threshold_rate } } @@ -42,7 +52,9 @@ impl RegistrationRates { ) -> Self { Self { reference_rate_data, + reference_last_pruned_interval_ns: None, current_rate_data, + current_last_pruned_interval_ns: None, } } pub fn new_registration(&mut self) { @@ -76,12 +88,24 @@ impl RegistrationRates { self.current_rate_data.len(), config.current_rate_retention_ns, ); + let captcha_threshold_rate = reference_rate_per_second * config.threshold_multiplier; let rates = NormalizedRegistrationRates { reference_rate_per_second, current_rate_per_second, captcha_threshold_rate, + // The only moment we won't have last pruned interval is when we upgrade to the code with this data. + // In that case, assume we have sufficient data because we won't change the config at the same time. + is_reference_data_insufficient: self + .reference_last_pruned_interval_ns + .map(|interval| interval < config.reference_rate_retention_ns) + .unwrap_or(false), + is_current_data_insufficient: self + .current_last_pruned_interval_ns + .map(|interval| interval < config.current_rate_retention_ns) + .unwrap_or(false), }; + Some(rates) } @@ -89,14 +113,18 @@ impl RegistrationRates { let Some(data_retention) = dynamic_captcha_config() else { return; }; - prune_data( + // If prunning doesn't return a a new interval, do not change the interval. + self.reference_last_pruned_interval_ns = prune_data( &mut self.reference_rate_data, data_retention.reference_rate_retention_ns, - ); - prune_data( + ) + .or(self.reference_last_pruned_interval_ns); + // If prunning doesn't return a a new interval, do not change the interval. + self.current_last_pruned_interval_ns = prune_data( &mut self.current_rate_data, data_retention.current_rate_retention_ns, - ); + ) + .or(self.current_last_pruned_interval_ns); } } @@ -122,10 +150,12 @@ fn dynamic_captcha_config() -> Option { } } -fn prune_data(data: &mut MinHeap, interval_ns: u64) { +/// Prune data older than the `now - interval` and return the longest interval pruned +fn prune_data(data: &mut MinHeap, interval_ns: u64) -> Option { const MAX_TO_PRUNE: usize = 100; let now = time(); + let mut latest_pruned_interval = None; for _ in 0..MAX_TO_PRUNE { let Some(timestamp) = data.peek() else { break; @@ -136,8 +166,12 @@ fn prune_data(data: &mut MinHeap, interval_ns: u64) { if timestamp > (now - interval_ns) { break; } + + latest_pruned_interval = Some(now - timestamp); data.pop(); } + + latest_pruned_interval } #[cfg(not(test))] @@ -169,7 +203,7 @@ mod test { #[test] fn should_calculate_rates() { - let mut registration_rates = setup(); + let mut registration_rates = setup(100, 1000); // no data -> 0 rates assert_eq!( @@ -178,6 +212,8 @@ mod test { reference_rate_per_second: 0.0, current_rate_per_second: 0.0, captcha_threshold_rate: 0.0, + is_current_data_insufficient: false, + is_reference_data_insufficient: false, } ); @@ -189,6 +225,8 @@ mod test { reference_rate_per_second: 0.001, // 1 / 1000, as per config current_rate_per_second: 0.01, // 1 / 100, as per config captcha_threshold_rate: 0.0012, // 20% more than the reference rate, as per config + is_current_data_insufficient: false, + is_reference_data_insufficient: false, } ); @@ -202,13 +240,15 @@ mod test { reference_rate_per_second: 0.002, // 2 / 1000, as per config current_rate_per_second: 0.02, // 2 / 100, as per config captcha_threshold_rate: 0.0024, // 20% more than the reference rate, as per config + is_current_data_insufficient: false, + is_reference_data_insufficient: false, } ); } #[test] fn should_calculate_rates_many_data_points() { - let mut registration_rates = setup(); + let mut registration_rates = setup(100, 1000); for _ in 0..1000 { registration_rates.new_registration(); TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); @@ -220,13 +260,139 @@ mod test { reference_rate_per_second: 1.0, current_rate_per_second: 1.0, captcha_threshold_rate: 1.2, // 20% more than the reference rate, as per config + is_current_data_insufficient: false, + is_reference_data_insufficient: false, } ); } + #[test] + fn should_enable_captcha_if_current_rate_could_have_missing_data() { + let mut registration_rates = setup(100, 1000); + + // Add data so that reference rate has enough data. + for _ in 0..1000 { + registration_rates.new_registration(); + TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); + } + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(10).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [10] + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(60).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [10, 70] + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(10).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [10, 70, 80] + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(20).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [10, 70, 80, 100] + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(90).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [100, 190]. 10, 70 and 80 got pruned. + + assert!(!registration_rates + .registration_rates() + .expect("reference rates is not defined") + .captcha_required()); + + // Change the config for current rate interval from 100 to 150 + // Timestamps [100, 190] are there. However, 70 and 80 should have been counted with a config of 150. + // They were pruned because the config at that time was 100. Therefore, there is missing data when we go to 150. + let new_current_interval_s = 150; + state::persistent_state_mut(|ps| { + ps.captcha_config = CaptchaConfig { + max_unsolved_captchas: 500, + captcha_trigger: CaptchaTrigger::Dynamic { + threshold_pct: 10, + current_rate_sampling_interval_s: new_current_interval_s, + reference_rate_sampling_interval_s: 100, + }, + } + }); + + // Captcha should be disable because we could have insufficient data + assert!(registration_rates + .registration_rates() + .expect("reference rates is not defined") + .captcha_required()); + } + + #[test] + fn should_enable_captcha_if_reference_rate_could_have_missing_data() { + let mut registration_rates = setup(100, 100); + + // Add data so that reference rate has enough data. + for _ in 0..100 { + registration_rates.new_registration(); + TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); + } + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(10).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [10] + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(60).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [10, 70] + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(10).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [10, 70, 80] + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(20).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [10, 70, 80, 100] + + TIME.with_borrow_mut(|t| *t += Duration::from_secs(90).as_nanos() as u64); + registration_rates.new_registration(); + + // State of the current registrations heap: [100, 190]. 10, 70 and 80 got pruned. + + assert!(!registration_rates + .registration_rates() + .expect("reference rates is not defined") + .captcha_required()); + + // Change the config for current rate interval from 100 to 150 + // Timestamps [100, 190] are there. However, 70 and 80 should have been counted with a config of 150. + // They were pruned because the config at that time was 100. Therefore, there is missing data when we go to 150. + let new_reference_interval_s = 150; + state::persistent_state_mut(|ps| { + ps.captcha_config = CaptchaConfig { + max_unsolved_captchas: 500, + captcha_trigger: CaptchaTrigger::Dynamic { + threshold_pct: 10, + current_rate_sampling_interval_s: 100, + reference_rate_sampling_interval_s: new_reference_interval_s, + }, + } + }); + + // Captcha should be disable because we could have insufficient data + assert!(registration_rates + .registration_rates() + .expect("reference rates is not defined") + .captcha_required()); + } + #[test] fn should_only_use_recent_data_for_current_rate() { - let mut registration_rates = setup(); + let mut registration_rates = setup(100, 1000); // initialize reference rate with 1 registration / second for _ in 0..1000 { registration_rates.new_registration(); @@ -247,13 +413,15 @@ mod test { current_rate_per_second: 3.0, reference_rate_per_second: 1.2, // increases too, but more slowly captcha_threshold_rate: 1.44, // reference rate * 1.2 (as per config) + is_current_data_insufficient: false, + is_reference_data_insufficient: false, } ); } #[test] - fn previous_config_should_not_affect_new_config() { - let mut registration_rates = setup(); + fn previous_config_should_not_affect_new_config_shorter_window() { + let mut registration_rates = setup(100, 1000); // initialize reference rate with 1 registration / second for _ in 0..1000 { registration_rates.new_registration(); @@ -274,6 +442,8 @@ mod test { current_rate_per_second: 3.0, reference_rate_per_second: 1.2, // increases too, but more slowly captcha_threshold_rate: 1.44, // reference rate * 1.2 (as per config) + is_current_data_insufficient: false, + is_reference_data_insufficient: false, } ); @@ -304,9 +474,73 @@ mod test { ); } + #[test] + fn changing_longer_reference_window_enables_captcha() { + let mut registration_rates = setup(100, 1000); + // initialize reference rate with 1 registration / second + for _ in 0..1000 { + registration_rates.new_registration(); + TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); + } + + assert!(!registration_rates + .registration_rates() + .expect("reference rates is not defined") + .captcha_required()); + + let longer_reference_window = 2000; + state::persistent_state_mut(|ps| { + ps.captcha_config = CaptchaConfig { + max_unsolved_captchas: 500, + captcha_trigger: CaptchaTrigger::Dynamic { + threshold_pct: 10, + current_rate_sampling_interval_s: 100, + reference_rate_sampling_interval_s: longer_reference_window, + }, + } + }); + + assert!(registration_rates + .registration_rates() + .expect("reference rates is not defined") + .captcha_required()); + } + + #[test] + fn changing_longer_current_window_enables_captcha() { + let mut registration_rates = setup(100, 1000); + // initialize reference rate with 1 registration / second + for _ in 0..1000 { + registration_rates.new_registration(); + TIME.with_borrow_mut(|t| *t += Duration::from_secs(1).as_nanos() as u64); + } + + assert!(!registration_rates + .registration_rates() + .expect("reference rates is not defined") + .captcha_required()); + + let longer_current_window = 200; + state::persistent_state_mut(|ps| { + ps.captcha_config = CaptchaConfig { + max_unsolved_captchas: 500, + captcha_trigger: CaptchaTrigger::Dynamic { + threshold_pct: 10, + current_rate_sampling_interval_s: longer_current_window, + reference_rate_sampling_interval_s: 1000, + }, + } + }); + + assert!(registration_rates + .registration_rates() + .expect("reference rates is not defined") + .captcha_required()); + } + #[test] fn should_prune_old_data() { - let mut registration_rates = setup(); + let mut registration_rates = setup(100, 1000); // add 100 registrations at t0 for _ in 0..100 { @@ -322,6 +556,8 @@ mod test { current_rate_per_second: 1.0, reference_rate_per_second: 0.1, captcha_threshold_rate: 0.12, + is_current_data_insufficient: false, + is_reference_data_insufficient: false, } ); @@ -337,6 +573,8 @@ mod test { current_rate_per_second: 0.01, reference_rate_per_second: 0.001, captcha_threshold_rate: 0.0012, + is_current_data_insufficient: false, + is_reference_data_insufficient: false, } ); } @@ -375,7 +613,10 @@ mod test { assert!(registration_rates.registration_rates().is_none()); } - fn setup() -> RegistrationRates { + fn setup( + current_rate_sampling_interval_s: u64, + reference_rate_sampling_interval_s: u64, + ) -> RegistrationRates { reset_time(); // setup config @@ -384,8 +625,8 @@ mod test { max_unsolved_captchas: 500, captcha_trigger: CaptchaTrigger::Dynamic { threshold_pct: 20, - current_rate_sampling_interval_s: 100, - reference_rate_sampling_interval_s: 1000, + current_rate_sampling_interval_s, + reference_rate_sampling_interval_s, }, } });