From 4d8a283e6642a7a07b5fcb487768cb021f18e396 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Mon, 15 Apr 2024 15:58:59 +0800 Subject: [PATCH 1/2] algo: scale resource to 0~100 in re Signed-off-by: Zach Zhu --- .../portrait/horizontal/predictive/replicas_estimator.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py b/algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py index f4b0902..17fd51b 100644 --- a/algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py +++ b/algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py @@ -113,6 +113,13 @@ def preprocess_data(self): df.sort_values(by=self.time_col, inplace=True) df = df.reset_index(drop=True) + # scale resource to 0~100 + resource_max = df[self.resource_col].max() + resource_scaling_factor = 1 if resource_max <= 100 else 10**np.ceil(np.log10(resource_max / 100)) + self.logger.info(f'resource scaling factor: {resource_scaling_factor}') + df[self.resource_col] = df[self.resource_col] / resource_scaling_factor + self.resource_target = self.resource_target / resource_scaling_factor + features = self.traffic_cols self.logger.info(f'checkout before filtering NaN: ' From 53ddab4551665d56bc6849c1fd57e6c7b3d1c208 Mon Sep 17 00:00:00 2001 From: Zach Zhu Date: Mon, 15 Apr 2024 16:01:54 +0800 Subject: [PATCH 2/2] algo: introduce estimation threshold control to re Signed-off-by: Zach Zhu --- .../portrait/horizontal/predictive/main.py | 16 ++++++++-- .../predictive/replicas_estimator.py | 32 +++++++++++++++---- 2 files changed, 39 insertions(+), 9 deletions(-) diff --git a/algorithm/kapacity/portrait/horizontal/predictive/main.py b/algorithm/kapacity/portrait/horizontal/predictive/main.py index 9ced49c..fb0e7d5 100644 --- a/algorithm/kapacity/portrait/horizontal/predictive/main.py +++ b/algorithm/kapacity/portrait/horizontal/predictive/main.py @@ -83,6 +83,16 @@ def parse_args(): parser.add_argument('--re-test-dataset-size-in-seconds', help='size of test dataset in seconds for replicas estimation model', required=False, default=86400) + parser.add_argument('--re-min-correlation-allowed', + help='minimum allowed correlation of replicas estimation model,' + 'the estimation would fail if the model\'s correlation is lower than this threshold,' + 'this arg should be a float number within range [0, 1]', + required=False, default=0.9) + parser.add_argument('--re-max-mse-allowed', + help='maximum allowed MSE of replicas estimation model,' + 'the estimation would fail if the model\'s MSE is larger than this threshold,' + 'this arg should be a float number within range [0, +∞)', + required=False, default=10.0) parser.add_argument('--scaling-freq', help='frequency of scaling, the duration should be larger than the frequency' 'of the time series forecasting model', required=True) @@ -131,12 +141,14 @@ def predict_replicas(args, metric_ctx, pred_traffics): traffic_col, metric_ctx.resource_target, int(args.re_time_delta_hours), - int(args.re_test_dataset_size_in_seconds)) + int(args.re_test_dataset_size_in_seconds), + float(args.re_min_correlation_allowed), + float(args.re_max_mse_allowed)) if 'NO_RESULT' in pred['rule_code'].unique(): raise RuntimeError('there exist points that no replica number would meet the resource target, please consider setting a more reasonable resource target') return pred except estimator.EstimationException as e: - raise RuntimeError("replicas estimation failed, this may be caused by insufficient or irregular history data") from e + raise RuntimeError(f'replicas estimation failed, this may be caused by insufficient or irregular history data, detailed estimation info: {e.info}') from e def merge_history_dict(history_dict): diff --git a/algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py b/algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py index 17fd51b..e012b1e 100644 --- a/algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py +++ b/algorithm/kapacity/portrait/horizontal/predictive/replicas_estimator.py @@ -635,7 +635,12 @@ def bin2str(x): class EstimationException(Exception): - pass + def __init__(self, message, info): + self.message = message + self.info = info + + def __str__(self): + return self.message def estimate(data: pd.DataFrame, @@ -646,7 +651,9 @@ def estimate(data: pd.DataFrame, traffic_cols: list[str], resource_target: float, time_delta_hours: int, - test_dataset_size_in_seconds: int = 86400) -> pd.DataFrame: + test_dataset_size_in_seconds: int = 86400, + min_correlation_allowed: float = 0.9, + max_mse_allowed: float = 10.0) -> pd.DataFrame: logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s: %(message)s') logger = logging.getLogger() @@ -667,19 +674,30 @@ def estimate(data: pd.DataFrame, estimator.test() logger.info(f'********* testing cost time: {time.time() - st10} *********') - if (estimator.pearsonr[0] >= 0.9 and estimator.pearsonr[1] < 0.01 - and estimator.big_e_10 == 0 and estimator.mse < 10): + logger.info(f'********* [linear] correlation: {estimator.pearsonr[0]}, significance: {estimator.pearsonr[1]}, big_e_10: {estimator.big_e_10}, mse: {estimator.mse} *********') + logger.info(f'********* [residual] correlation: {estimator.pearsonr_rf[0]}, significance: {estimator.pearsonr_rf[1]}, big_e_10: {estimator.big_e_10_rf}, mse: {estimator.mse_rf} *********') + + if (estimator.pearsonr[0] >= min_correlation_allowed and estimator.pearsonr[1] < 0.01 + and estimator.big_e_10 == 0 and estimator.mse <= max_mse_allowed): st10 = time.time() estimator.policy_linear() logger.info(f'********* linear policy cost time: {time.time() - st10} *********') return estimator.output - elif (estimator.pearsonr_rf[0] >= 0.9 and estimator.pearsonr_rf[1] < 0.01 and estimator.big_e_10_rf == 0 - and estimator.mse_rf < 10 and estimator.pearsonr[0] >= 0.6 and estimator.pearsonr[1] < 0.01): + elif (estimator.pearsonr_rf[0] >= min_correlation_allowed and estimator.pearsonr_rf[1] < 0.01 and estimator.big_e_10_rf == 0 + and estimator.mse_rf <= max_mse_allowed and estimator.pearsonr[0] >= 0.6 and estimator.pearsonr[1] < 0.01): st10 = time.time() estimator.policy_residual() logger.info(f'********* residual policy cost time: {time.time() - st10} *********') return estimator.output else: - raise EstimationException("no policy fits") + raise EstimationException('no policy fits', + {'linear': {'correlation': estimator.pearsonr[0], + 'significance': estimator.pearsonr[1], + 'big_e_10': estimator.big_e_10, + 'mse': estimator.mse}, + 'residual': {'correlation': estimator.pearsonr_rf[0], + 'significance': estimator.pearsonr_rf[1], + 'big_e_10': estimator.big_e_10_rf, + 'mse': estimator.mse_rf}})