From cce50290fa5e5321fd4e70747015c0bea473ce78 Mon Sep 17 00:00:00 2001 From: Niels Nuyttens Date: Mon, 12 Feb 2024 13:43:36 +0100 Subject: [PATCH] Fix stupid web editor merge --- .../confidence_based/metrics.py | 39 +++++++++++++------ 1 file changed, 27 insertions(+), 12 deletions(-) diff --git a/nannyml/performance_estimation/confidence_based/metrics.py b/nannyml/performance_estimation/confidence_based/metrics.py index 204d70a5..5dcd94dd 100644 --- a/nannyml/performance_estimation/confidence_based/metrics.py +++ b/nannyml/performance_estimation/confidence_based/metrics.py @@ -277,14 +277,13 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict: chunk_record[f'sampling_error_{column_name}'] = metric_estimate_sampling_error chunk_record[f'realized_{column_name}'] = self._realized_performance(chunk_data) chunk_record[f'upper_confidence_boundary_{column_name}'] = np.minimum( - np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, - estimated_metric_value + SAMPLING_ERROR_RANGE * metric_estimate_sampling_error, - ) - - chunk_record[f'lower_confidence_boundary_{column_name}'] = np.maximum( - -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, - estimated_metric_value - SAMPLING_ERROR_RANGE * metric_estimate_sampling_error, - ) + np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, + estimated_metric_value + SAMPLING_ERROR_RANGE * metric_estimate_sampling_error, + ) + chunk_record[f'lower_confidence_boundary_{column_name}'] = np.maximum( + -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, + estimated_metric_value - SAMPLING_ERROR_RANGE * metric_estimate_sampling_error, + ) chunk_record[f'upper_threshold_{column_name}'] = self.upper_threshold_value chunk_record[f'lower_threshold_{column_name}'] = self.lower_threshold_value chunk_record[f'alert_{column_name}'] = self.alert(estimated_metric_value) @@ -340,6 +339,7 @@ def create(cls, key: str, use_case: ProblemType, **kwargs) -> Metric: @classmethod def register(cls, metric: str, use_case: ProblemType) -> Callable: """Register a Metric in the MetricFactory registry.""" + def inner_wrapper(wrapped_class: Type[Metric]) -> Type[Metric]: if metric in cls.registry: if use_case in cls.registry[metric]: @@ -355,6 +355,7 @@ def inner_wrapper(wrapped_class: Type[Metric]) -> Type[Metric]: @MetricFactory.register('roc_auc', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationAUROC(Metric): """CBPE binary classification AUROC Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -450,6 +451,7 @@ def estimate_roc_auc(y_pred_proba: pd.Series) -> float: @MetricFactory.register('f1', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationF1(Metric): """CBPE binary classification f1 Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -536,6 +538,7 @@ def estimate_f1(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> float: @MetricFactory.register('precision', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationPrecision(Metric): """CBPE binary classification precision Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -622,6 +625,7 @@ def estimate_precision(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> floa @MetricFactory.register('recall', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationRecall(Metric): """CBPE binary classification recall Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -707,6 +711,7 @@ def estimate_recall(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> float: @MetricFactory.register('specificity', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationSpecificity(Metric): """CBPE binary classification specificity Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -793,6 +798,7 @@ def estimate_specificity(y_pred: pd.DataFrame, y_pred_proba: pd.DataFrame) -> fl @MetricFactory.register('accuracy', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationAccuracy(Metric): """CBPE binary classification accuracy Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -860,6 +866,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('confusion_matrix', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationConfusionMatrix(Metric): """CBPE binary classification confusion matrix Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -1324,7 +1331,7 @@ def get_true_pos_info(self, chunk_data: pd.DataFrame) -> Dict: true_pos_info['lower_confidence_boundary_true_positive'] = np.maximum( -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, - estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives + estimated_true_positives - SAMPLING_ERROR_RANGE * sampling_error_true_positives, ) true_pos_info['upper_threshold_true_positive'] = self.true_positive_upper_threshold @@ -1372,7 +1379,7 @@ def get_true_neg_info(self, chunk_data: pd.DataFrame) -> Dict: true_neg_info['lower_confidence_boundary_true_negative'] = np.maximum( -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, - estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives + estimated_true_negatives - SAMPLING_ERROR_RANGE * sampling_error_true_negatives, ) true_neg_info['upper_threshold_true_negative'] = self.true_negative_upper_threshold @@ -1526,6 +1533,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('business_value', ProblemType.CLASSIFICATION_BINARY) class BinaryClassificationBusinessValue(Metric): """CBPE binary classification business value Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -1717,6 +1725,7 @@ def _ensure_targets(self, data: pd.DataFrame) -> Optional[pd.DataFrame]: @MetricFactory.register('roc_auc', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationAUROC(_MulticlassClassificationMetric): """CBPE multiclass classification AUROC Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -1783,6 +1792,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('f1', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationF1(_MulticlassClassificationMetric): """CBPE multiclass classification f1 Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -1852,6 +1862,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('precision', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationPrecision(_MulticlassClassificationMetric): """CBPE multiclass classification precision Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -1921,6 +1932,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('recall', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationRecall(_MulticlassClassificationMetric): """CBPE multiclass classification recall Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -1990,6 +2002,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('specificity', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationSpecificity(_MulticlassClassificationMetric): """CBPE multiclass classification specificity Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -2063,6 +2076,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('accuracy', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationAccuracy(_MulticlassClassificationMetric): """CBPE multiclass classification accuracy Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -2129,6 +2143,7 @@ def _realized_performance(self, data: pd.DataFrame) -> float: @MetricFactory.register('confusion_matrix', ProblemType.CLASSIFICATION_MULTICLASS) class MulticlassClassificationConfusionMatrix(Metric): """CBPE multiclass classification confusion matrix Metric Class.""" + def __init__( self, y_pred_proba: ModelOutputsType, @@ -2343,7 +2358,7 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict: ) chunk_record[f'upper_confidence_boundary_true_{true_class}_pred_{pred_class}'] = min( np.inf if self.upper_threshold_value_limit is None else self.upper_threshold_value_limit, - upper_confidence_boundary + upper_confidence_boundary, ) lower_confidence_boundary = ( @@ -2353,7 +2368,7 @@ def get_chunk_record(self, chunk_data: pd.DataFrame) -> Dict: ) chunk_record[f'lower_confidence_boundary_true_{true_class}_pred_{pred_class}'] = max( -np.inf if self.lower_threshold_value_limit is None else self.lower_threshold_value_limit, - lower_confidence_boundary + lower_confidence_boundary, ) chunk_record[f'upper_threshold_true_{true_class}_pred_{pred_class}'] = self.alert_thresholds[