diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt index 820d7f7f225..e9b4e4a7e30 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt @@ -41,7 +41,7 @@ interface ReportProcessor { /** The default implementation of [ReportProcessor]. */ companion object Default : ReportProcessor { private val logger: Logger = Logger.getLogger(this::class.java.name) - const private val PYTHON_LIBRARY_RESOURCE_NAME = + private const val PYTHON_LIBRARY_RESOURCE_NAME = "src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.zip" private val resourcePath: Path = this::class.java.classLoader.getJarResourcePath(PYTHON_LIBRARY_RESOURCE_NAME) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py index 5d218c8d9f5..48206af40de 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py @@ -14,65 +14,72 @@ from collections import defaultdict -class Measurement: - """Represents a measurement with a mean value and a standard deviation""" - value: float - sigma: float - metric_name: str +class Measurement: + """Represents a measurement with a mean value and a standard deviation""" + value: float + sigma: float + name: str - def __init__(self, value: float, sigma: float, metric_name: str): - self.value = value - self.sigma = sigma - self.metric_name = metric_name + def __init__(self, value: float, sigma: float, name: str): + self.value = value + self.sigma = sigma + self.name = name - def __repr__(self): - return 'Measurement({:.2f}, {:.2f}, {})\n'.format(self.value, self.sigma, self.metric_name) + def __repr__(self): + return 'Measurement({:.2f}, {:.2f}, {})\n'.format(self.value, self.sigma, + self.name) class SetMeasurementsSpec: - """Stores information about the relationships between sets and their - measurements.""" + """Stores information about relationships between sets and measurements. + + This class maintains data about subset relationships, cover relationships, + and measurements associated with sets. It provides methods to add and + retrieve this information. - __subsets_by_set: dict[int, list[int]] - # https://en.wikipedia.org/wiki/Cover_(topology) - __covers_by_set: dict[int, list[list[int]]] - __measurements_by_set: dict[int, list[Measurement]] + Attributes: + _subsets_by_set: A dictionary mapping a set ID to a list of its subset + set IDs. + _covers_by_set: A dictionary mapping a set ID to a list of its covers, + where each cover is a list of set IDs. See + https://en.wikipedia.org/wiki/Cover_(topology). + _measurements_by_set: A dictionary mapping a set ID to a list of + Measurement objects associated with that set. + """ - def __init__(self): - self.__subsets_by_set = defaultdict(list[int]) - self.__covers_by_set = defaultdict(list[list[int]]) - self.__measurements_by_set = defaultdict(list[Measurement]) + def __init__(self): + self._subsets_by_set = defaultdict(list[int]) + self._covers_by_set = defaultdict(list[list[int]]) + self._measurements_by_set = defaultdict(list[Measurement]) - def add_subset_relation(self, parent_set_id: int, child_set_id: int): - self.__subsets_by_set[parent_set_id].append(child_set_id) + def add_subset_relation(self, parent_set_id: int, child_set_id: int): + self._subsets_by_set[parent_set_id].append(child_set_id) - def add_cover(self, parent: int, children: list[int]): - self.__covers_by_set[parent].append(children) - for child in children: - self.add_subset_relation(parent, child) + def add_cover(self, parent: int, children: list[int]): + self._covers_by_set[parent].append(children) - def add_measurement(self, set_id: int, measurement: Measurement): - self.__measurements_by_set[set_id].append(measurement) + def add_measurement(self, set_id: int, measurement: Measurement): + self._measurements_by_set[set_id].append(measurement) - def all_sets(self) -> set[int]: - return set(i for i in self.__measurements_by_set.keys()) + def all_sets(self) -> set[int]: + return set(i for i in self._measurements_by_set.keys()) - def get_covers_of_set(self, set_id: int): - return self.__covers_by_set[set_id] + def get_covers_of_set(self, set_id: int) -> list[list[int]]: + return self._covers_by_set[set_id] - def get_subsets(self, parent_set_id): - return self.__subsets_by_set[parent_set_id] + def get_subsets(self, parent_set_id: int) -> list[int]: + return self._subsets_by_set[parent_set_id] - def get_measurements(self, measured_set_id): - return self.__measurements_by_set.get(measured_set_id) + def get_measurements(self, measured_set_id: int) -> list[Measurement]: + return self._measurements_by_set.get(measured_set_id) - def get_measurement_metric(self, measured_set_id): - measurement = self.__measurements_by_set.get(measured_set_id) - return measurement[0].metric_name + def get_measurement_metric(self, measured_set_id: int) -> str: + measurement = self._measurements_by_set.get(measured_set_id) + return measurement[0].name - def __repr__(self): - return (('SetMeasurementsSpec(' - 'subsets_by_set={},covers_by_set={},measurements_by_set={})') - .format(self.__subsets_by_set, self.__covers_by_set, - self.__measurements_by_set)) + def __repr__(self): + return (('SetMeasurementsSpec(' + 'subsets_by_set={},covers_by_set={},measurements_by_set={})') + .format(self._subsets_by_set, self._covers_by_set, + self._measurements_by_set)) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py index d7f624deae2..a8d73ed8864 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py @@ -20,175 +20,186 @@ from typing import Any SOLVER = "highs" - +MAX_ATTEMPTS = 10 SEMAPHORE = Semaphore() + class SolutionNotFoundError(ValueError): - __non_solution: Solution + _non_solution: Solution - def __init__(self, non_solution: Solution): - super().__init__(non_solution) - self.__non_solution = non_solution + def __init__(self, non_solution: Solution): + super().__init__(non_solution) + self._non_solution = non_solution - def get_non_solution_details(self): - return self.__non_solution + def get_non_solution_details(self): + return self._non_solution class Solver: - def __init__(self, set_measurement_spec: SetMeasurementsSpec): - variable_index_by_set_id = Solver.__map_sets_to_variables( - set_measurement_spec) - self.num_variables = len(variable_index_by_set_id) - self.__init_qp(self.num_variables) - self.__add_covers(set_measurement_spec, variable_index_by_set_id) - self.__add_subsets(set_measurement_spec, variable_index_by_set_id) - self.__add_measurement_targets(set_measurement_spec, - variable_index_by_set_id) - self.__init_base_value(set_measurement_spec, variable_index_by_set_id) - - self.variable_map = dict( - (variable_index_by_set_id[i], i) for i in variable_index_by_set_id) - - def __init_base_value(self, set_measurement_spec, variable_index_by_set_id): - mean_measurement_by_variable: dict[int, float] = {} - for measured_set in set_measurement_spec.all_sets(): - mean_measurement_by_variable[ - variable_index_by_set_id[measured_set]] = ( - sum(v.value - for v in set_measurement_spec.get_measurements( - measured_set)) / len( - set_measurement_spec.get_measurements(measured_set))) - self.base_value = np.array(list( - (mean_measurement_by_variable[i] - for i in range(0, self.num_variables)))) - - def __add_measurement_targets(self, set_measurement_spec, - variable_index_by_set_id): - for (measured_set, variable) in variable_index_by_set_id.items(): - variables = np.zeros(self.num_variables) - variables[variable] = 1 - for measurement in set_measurement_spec.get_measurements( - measured_set): - if abs(measurement.sigma) == 0: - self.__add_eq_term(variables, measurement.value) - else: - self.__add_loss_term( - np.multiply(variables, 1 / measurement.sigma), - -measurement.value / measurement.sigma) - - @staticmethod - def __map_sets_to_variables(set_measurement_spec) -> dict[int, int]: - variable_index_by_set_id: dict[int, int] = {} - num_variables = 0 - for measured_set in set_measurement_spec.all_sets(): - variable_index_by_set_id[measured_set] = num_variables - num_variables += 1 - return variable_index_by_set_id - - def __init_qp(self, num_variables): - self.num_variables = num_variables - # Minimize 1/2 x^T P x + q^T x - self.P = np.zeros(shape=(num_variables, num_variables)) - self.q = np.zeros(shape=(1, num_variables)) - # subject to G x <= h - self.G = [] - self.h = [] - # and A x = h - self.A = [] - self.b = [] - - def __add_subsets(self, set_measurement_spec, variable_index_by_set_id): - for measured_set in set_measurement_spec.all_sets(): - for subset in set(set_measurement_spec.get_subsets(measured_set)): - self.__add_parent_gt_child_term( - variable_index_by_set_id[measured_set], - variable_index_by_set_id[subset]) - - def __add_covers(self, set_measurement_spec, variable_index_by_set_id): - for measured_set in set_measurement_spec.all_sets(): - for cover in set_measurement_spec.get_covers_of_set(measured_set): - self.__add_cover_set_constraint( - list(variable_index_by_set_id[i] for i in cover), - variable_index_by_set_id[measured_set]) - - def __add_cover_set_constraint(self, cover_variables: list[int], - set_variable: int): - variables = np.zeros(self.num_variables) - variables.put(cover_variables, -1) - variables[set_variable] = 1 - self.__add_gt_term(variables) - - def __is_feasible(self, vector: np.array) -> bool: - for i, g in enumerate(self.G): - if np.dot(vector, g) > self.h[i][0]: - return False - return True - - def __add_parent_gt_child_term(self, parent: int, child: int): - variables = np.zeros(self.num_variables) - variables.put(parent, -1) - variables[child] = 1 - self.__add_gt_term(variables) - - def __add_loss_term(self, variables, k: float): - for v1, coeff1 in enumerate(variables): - self.q[0][v1] += coeff1 * k - for v2, coeff2 in enumerate(variables): - self.P[v1][v2] += coeff1 * coeff2 - - def __add_eq_term(self, variables, k: float): - self.A.append(variables) - self.b.append(k) - - def __add_gt_term(self, variables): - self.G.append(variables) - self.h.append([0]) - - def __solve(self): - x0 = np.random.randn(self.num_variables) - return self.__solve_with_initial_value(x0) - - def __solve_with_initial_value(self, x0) -> Solution: - problem = self.__problem() - solution = solve_problem(problem, solver=SOLVER, verbose=False) - return solution - - def __problem(self): - problem: Problem - if len(self.A) > 0: - problem = Problem( - self.P, self.q, np.array(self.G), np.array(self.h), - np.array(self.A), np.array(self.b)) + def __init__(self, set_measurement_spec: SetMeasurementsSpec): + variable_index_by_set_id = Solver._map_sets_to_variables( + set_measurement_spec) + self.num_variables = len(variable_index_by_set_id) + self._init_qp(self.num_variables) + self._add_covers(set_measurement_spec, variable_index_by_set_id) + self._add_subsets(set_measurement_spec, variable_index_by_set_id) + self._add_measurement_targets(set_measurement_spec, + variable_index_by_set_id) + self._init_base_value(set_measurement_spec, variable_index_by_set_id) + + self.variable_map = dict( + (variable_index_by_set_id[i], i) for i in variable_index_by_set_id) + + def _init_base_value(self, set_measurement_spec: SetMeasurementsSpec, + variable_index_by_set_id: dict[int, int]): + mean_measurement_by_variable: dict[int, float] = {} + for measured_set in set_measurement_spec.all_sets(): + mean_measurement_by_variable[ + variable_index_by_set_id[measured_set]] = ( + sum(v.value + for v in set_measurement_spec.get_measurements( + measured_set)) / len( + set_measurement_spec.get_measurements(measured_set))) + self.base_value = np.array(list( + (mean_measurement_by_variable[i] + for i in range(0, self.num_variables)))) + + def _add_measurement_targets(self, set_measurement_spec: SetMeasurementsSpec, + variable_index_by_set_id: dict[int, int]): + for (measured_set, variable) in variable_index_by_set_id.items(): + variables = np.zeros(self.num_variables) + variables[variable] = 1 + for measurement in set_measurement_spec.get_measurements( + measured_set): + if abs(measurement.sigma) == 0: + self._add_eq_term(variables, measurement.value) else: - problem = Problem( - self.P, self.q, np.array(self.G), np.array(self.h)) - return problem - - def solve(self) -> Solution: - if self.__is_feasible(self.base_value): - solution = Solution(x=self.base_value, - found=True, - extras={'status': 'trivial'}, - problem=self.__problem()) + self._add_loss_term( + np.multiply(variables, 1 / measurement.sigma), + -measurement.value / measurement.sigma) + + @staticmethod + def _map_sets_to_variables(set_measurement_spec: SetMeasurementsSpec) -> dict[int, int]: + variable_index_by_set_id: dict[int, int] = {} + num_variables = 0 + for measured_set in set_measurement_spec.all_sets(): + variable_index_by_set_id[measured_set] = num_variables + num_variables += 1 + return variable_index_by_set_id + + def _init_qp(self, num_variables: int): + self.num_variables = num_variables + # Minimize 1/2 x^T P x + q^T x + self.P = np.zeros(shape=(num_variables, num_variables)) + self.q = np.zeros(shape=(1, num_variables)) + # subject to G x <= h + self.G = [] + self.h = [] + # and A x = h + self.A = [] + self.b = [] + + def _add_subsets(self, set_measurement_spec: SetMeasurementsSpec, + variable_index_by_set_id: dict[int, int]): + for measured_set in set_measurement_spec.all_sets(): + for subset in set(set_measurement_spec.get_subsets(measured_set)): + self._add_parent_gt_child_term( + variable_index_by_set_id[measured_set], + variable_index_by_set_id[subset]) + + def _add_covers(self, set_measurement_spec: SetMeasurementsSpec, + variable_index_by_set_id: dict[int, int]): + for measured_set in set_measurement_spec.all_sets(): + for cover in set_measurement_spec.get_covers_of_set(measured_set): + self._add_cover_set_constraint( + list(variable_index_by_set_id[i] for i in cover), + variable_index_by_set_id[measured_set]) + + def _add_cover_set_constraint(self, cover_variables: set[int], + set_variable: int): + variables = np.zeros(self.num_variables) + variables.put(cover_variables, -1) + variables[set_variable] = 1 + self._add_gt_term(variables) + + def _is_feasible(self, vector: np.array) -> bool: + for i, g in enumerate(self.G): + if np.dot(vector, g) > self.h[i][0]: + return False + return True + + def _add_parent_gt_child_term(self, parent: int, child: int): + variables = np.zeros(self.num_variables) + variables.put(parent, -1) + variables[child] = 1 + self._add_gt_term(variables) + + def _add_loss_term(self, variables: np.array, k: float): + for v1, coeff1 in enumerate(variables): + self.q[0][v1] += coeff1 * k + for v2, coeff2 in enumerate(variables): + self.P[v1][v2] += coeff1 * coeff2 + + def _add_eq_term(self, variables: np.array, k: float): + self.A.append(variables) + self.b.append(k) + + def _add_gt_term(self, variables: np.array): + self.G.append(variables) + self.h.append([0]) + + def _solve(self): + x0 = np.random.randn(self.num_variables) + return self._solve_with_initial_value(x0) + + def _solve_with_initial_value(self, x0) -> Solution: + problem = self._problem() + solution = solve_problem(problem, solver=SOLVER, verbose=False) + return solution + + def _problem(self): + problem: Problem + if len(self.A) > 0: + problem = Problem( + self.P, self.q, np.array(self.G), np.array(self.h), + np.array(self.A), np.array(self.b)) + else: + problem = Problem( + self.P, self.q, np.array(self.G), np.array(self.h)) + return problem + + def solve(self) -> Solution: + attempt_count = 0 + if self._is_feasible(self.base_value): + solution = Solution(x=self.base_value, + found=True, + extras={'status': 'trivial'}, + problem=self._problem()) + else: + while attempt_count < MAX_ATTEMPTS: + # TODO: check if qpsolvers is thread safe, + # and remove this semaphore. + SEMAPHORE.acquire() + solution = self._solve() + SEMAPHORE.release() + + if solution.found: + break else: - # TODO: check if qpsolvers is thread safe, - # and remove this semaphore. - SEMAPHORE.acquire() - solution = self.__solve() - SEMAPHORE.release() - - if not solution.found: - raise SolutionNotFoundError(solution) - - return solution - - def translate_solution(self, solution: Solution) -> dict[int, float]: - result: dict[int, Any] = {} - for var in range(0, self.num_variables): - result[self.variable_map[var]] = solution.x[var] - return result - - def solve_and_translate(self): - solution = self.solve() - return self.translate_solution(solution) \ No newline at end of file + attempt_count += 1 + + if not solution.found: + raise SolutionNotFoundError(solution) + + return solution + + def translate_solution(self, solution: Solution) -> dict[int, float]: + result: dict[int, float] = {} + for var in range(0, self.num_variables): + result[self.variable_map[var]] = solution.x[var] + return result + + def solve_and_translate(self) -> dict[int, float]: + solution = self.solve() + return self.translate_solution(solution) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index 3c15e588c15..272e80d8646 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -18,347 +18,604 @@ from noiseninja.noised_measurements import SetMeasurementsSpec, Measurement from noiseninja.solver import Solver -from typing import FrozenSet +from qpsolvers import Solution +from typing import Any, FrozenSet, Tuple from itertools import combinations from functools import reduce -class MetricReport: - """Represents a metric sub-report view (e.g. MRC, AMI, etc) - within a report. +def get_subset_relationships(edp_combinations: list[FrozenSet[str]]) -> list[ + Tuple[FrozenSet[str], FrozenSet[str]]]: + """Returns a list of tuples where first element in the tuple is the parent + and second element is the subset.""" + subset_relationships = [] + for comb1, comb2 in combinations(edp_combinations, 2): + if comb1.issubset(comb2): + subset_relationships.append((comb2, comb1)) + elif comb2.issubset(comb1): + subset_relationships.append((comb1, comb2)) + return subset_relationships + + +def is_cover(target_set: FrozenSet[str], + possible_cover: list[FrozenSet[str]]) -> bool: + """Checks if a collection of sets covers a target set. + + Args: + target_set: The set that should be covered. + possible_cover: A collection of sets that may cover the target set. + + Returns: + True if the union of the sets in `possible_cover` equals `target_set`, + False otherwise. + """ + union_of_possible_cover = reduce( + lambda x, y: x.union(y), possible_cover + ) + return union_of_possible_cover == target_set + + +def get_covers(target_set: FrozenSet[str], other_sets: list[FrozenSet[str]]) -> \ +list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: + """Finds all combinations of sets from `other_sets` that cover `target_set`. + + This function identifies all possible combinations of sets within `other_sets` + whose union equals the `target_set`. It only considers sets that are subsets of + the `target_set`. + + Args: + target_set: The set that needs to be covered. + other_sets: A collection of sets that may be used to cover the `target_set`. + + Returns: + A list of tuples, where each tuple represents a covering relationship. + The first element of the tuple is the `target_set`, and the second element + is a tuple containing the sets from `other_sets` that cover it. + """ + + def generate_all_length_combinations(data: list[Any]) -> list[ + tuple[Any, ...]]: + """Generates all possible combinations of elements from a list. + + Args: + data: A list of elements. + + Returns: + A list of tuples, where each tuple represents a combination of elements. """ + return [ + comb for r in range(1, len(data) + 1) for comb in + combinations(data, r) + ] + + cover_relationship = [] + all_subsets_of_possible_covered = [other_set for other_set in other_sets + if + other_set.issubset(target_set)] + possible_covers = generate_all_length_combinations( + all_subsets_of_possible_covered) + for possible_cover in possible_covers: + if is_cover(target_set, possible_cover): + cover_relationship.append((target_set, possible_cover)) + return cover_relationship + + +def get_cover_relationships(edp_combinations: list[FrozenSet[str]]) -> list[ + Tuple[FrozenSet[str], list[FrozenSet[str]]]]: + """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). + For each set (s_i) in the list, enumerate combinations of all sets excluding this one. + For each of these considered combinations, take their union and check if it is equal to + s_i. If so, this combination is a cover of s_i. + """ + cover_relationships = [] + for i in range(len(edp_combinations)): + possible_covered = edp_combinations[i] + other_sets = edp_combinations[:i] + edp_combinations[i + 1:] + cover_relationship = get_covers(possible_covered, other_sets) + cover_relationships.extend(cover_relationship) + return cover_relationships - __reach_time_series_by_edp_combination: dict[FrozenSet[str], list[Measurement]] - def __init__( - self, - reach_time_series_by_edp_combination: dict[FrozenSet[str], list[Measurement]], - ): - num_periods = len(next(iter(reach_time_series_by_edp_combination.values()))) - for series in reach_time_series_by_edp_combination.values(): - if len(series) != num_periods: - raise ValueError( - "all time series must have the same length {1: d} vs {2: d}".format( - len(series), len(num_periods) - ) - ) - - self.__reach_time_series_by_edp_combination = ( - reach_time_series_by_edp_combination - ) +class MetricReport: + """Represents a metric sub-report view (e.g., MRC, AMI) within a report. + + This class stores and provides access to reach measurements for different + EDP (Event, Data Provider, and Platform) combinations. It holds two types + of reach data: + + * Cumulative reach over time, represented as a time series. + * Reach for the whole campaign. + + Attributes: + _reach_time_series: A dictionary mapping EDP combinations (represented + as frozensets of strings) to lists of Measurement + objects, where each list represents a time series of + reach values. + _reach_whole_campaign: A dictionary mapping EDP combinations to + Measurement objects representing the reach for + the whole campaign. + """ - def sample_with_noise(self) -> "MetricReport": - """ - :return: a new MetricReport where measurements have been resampled - according to their mean and variance. - """ - return MetricReport( - reach_time_series_by_edp_combination={ - edp_comb: [ - MetricReport.__sample_with_noise(measurement) - for measurement in self.__reach_time_series_by_edp_combination[ - edp_comb - ] - ] - for edp_comb in self.__reach_time_series_by_edp_combination.keys() - } + def __init__( + self, + reach_time_series: dict[FrozenSet[str], list[Measurement]], + reach_whole_campaign: dict[FrozenSet[str], Measurement], + ): + num_periods = len(next(iter(reach_time_series.values()))) + for series in reach_time_series.values(): + if len(series) != num_periods: + raise ValueError( + "All time series must have the same length {1: d} vs {2: d}".format( + len(series), len(num_periods) + ) ) - def get_edp_comb_measurement(self, edp_comb: str, period: int): - return self.__reach_time_series_by_edp_combination[edp_comb][period] - - def get_edp_combs(self): - return list(self.__reach_time_series_by_edp_combination.keys()) - - def get_num_edp_combs(self): - return len(self.__reach_time_series_by_edp_combination.keys()) - - def get_number_of_periods(self): - return len(next(iter(self.__reach_time_series_by_edp_combination.values()))) - - def get_subset_relationships(self): - """Returns a list of tuples where first element in the tuple is the parent - and second element is the subset.""" - subset_relationships = [] - edp_combinations = list(self.__reach_time_series_by_edp_combination) - - for comb1, comb2 in combinations(edp_combinations, 2): - if comb1.issubset(comb2): - subset_relationships.append((comb2, comb1)) - elif comb2.issubset(comb1): - subset_relationships.append((comb1, comb2)) - return subset_relationships - - def get_cover_relationships(self): - """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). - For each set (s_i) in the list, enumerate combinations of all sets excluding this one. - For each of these considered combinations, take their union and check if it is equal to - s_i. If so, this combination is a cover of s_i. - """ - - def generate_all_length_combinations(data): - return [ - comb for r in range(1, len(data) + 1) for comb in combinations(data, r) + self._reach_time_series = reach_time_series + self._reach_whole_campaign = reach_whole_campaign + + def sample_with_noise(self) -> "MetricReport": + """ + :return: a new MetricReport where measurements have been resampled + according to their mean and variance. + """ + return MetricReport( + reach_time_series={ + edp_combination: [ + MetricReport._sample_with_noise(measurement) + for measurement in self._reach_time_series[ + edp_combination + ] ] + for edp_combination in + self._reach_time_series.keys() + } + ) - cover_relationships = [] - edp_combinations = list(self.__reach_time_series_by_edp_combination) - for i in range(len(edp_combinations)): - possible_covered = edp_combinations[i] - other_sets = edp_combinations[:i] + edp_combinations[i + 1 :] - all_subsets_of_possible_covered = [other_set for other_set in other_sets if other_set.issubset(possible_covered)] - possible_covers = generate_all_length_combinations(all_subsets_of_possible_covered) - for possible_cover in possible_covers: - union_of_possible_cover = reduce( - lambda x, y: x.union(y), possible_cover - ) - if union_of_possible_cover == possible_covered: - cover_relationships.append((possible_covered, possible_cover)) - return cover_relationships - - @staticmethod - def __sample_with_noise(measurement: Measurement): - return Measurement( - measurement.value + random.gauss(0, measurement.sigma), measurement.sigma - ) + def get_cumulative_measurement(self, edp_combination: FrozenSet[str], + period: int) -> Measurement: + return self._reach_time_series[edp_combination][ + period] + + def get_whole_campaign_measurement(self, + edp_combination: FrozenSet[str]) -> Measurement: + return self._reach_whole_campaign[edp_combination] + + def get_cumulative_edp_combinations(self) -> set[FrozenSet[str]]: + return set(self._reach_time_series.keys()) + + def get_whole_campaign_edp_combinations(self) -> set[FrozenSet[str]]: + return set(self._reach_whole_campaign.keys()) + + def get_cumulative_edp_combinations_count(self) -> int: + return len(self._reach_time_series.keys()) + + def get_whole_campaign_edp_combinations_count(self) -> int: + return len(self._reach_whole_campaign.keys()) + + def get_number_of_periods(self) -> int: + return len(next(iter(self._reach_time_series.values()))) + + def get_cumulative_subset_relationships(self) -> list[ + Tuple[FrozenSet[str], FrozenSet[str]]]: + return get_subset_relationships(list(self._reach_time_series)) + + def get_whole_campaign_subset_relationships(self) -> list[ + Tuple[FrozenSet[str], FrozenSet[str]]]: + return get_subset_relationships(list(self._reach_whole_campaign)) + + def get_cumulative_cover_relationships(self) -> list[ + Tuple[FrozenSet[str], list[FrozenSet[str]]]]: + return get_cover_relationships(list(self._reach_time_series)) + + def get_whole_campaign_cover_relationships(self) -> list[ + Tuple[FrozenSet[str], list[FrozenSet[str]]]]: + return get_cover_relationships(list(self._reach_whole_campaign)) + + @staticmethod + def _sample_with_noise(measurement: Measurement) -> Measurement: + return Measurement( + measurement.value + random.gauss(0, measurement.sigma), + measurement.sigma + ) class Report: - """ - Represents a full report, consisting of multiple MetricReports, - which may have set relationships between each other. + """Represents a full report with multiple MetricReports and set relationships. + + This class aggregates multiple MetricReport objects, and the subset relation + between the the metrics. + + Attributes: + _metric_reports: A dictionary mapping metric names (e.g., "MRC", "AMI") + to their corresponding MetricReport objects. + _metric_subsets_by_parent: A dictionary defining subset relationships + between metrics. Each key is a parent metric, + and the value is a list of its child metrics. + _cumulative_inconsistency_allowed_edp_combinations: A set of EDP + combinations for + which inconsistencies + in cumulative + measurements are + allowed. This is for + TV measurements. """ - __metric_reports: dict[str, MetricReport] - __metric_subsets_by_parent: dict[str, list[str]] - __metric_index: dict[str, int] - __edp_comb_index: dict[str, int] - - def __init__( - self, - metric_reports: dict[str, MetricReport], - metric_subsets_by_parent: dict[str, list[str]], - cumulative_inconsistency_allowed_edp_combs: set[str], + def __init__( + self, + metric_reports: dict[str, MetricReport], + metric_subsets_by_parent: dict[str, list[str]], + cumulative_inconsistency_allowed_edp_combinations: set[str], + ): + """ + Args: + metric_reports: a dictionary mapping metric types to a MetricReport + metric_subsets_by_parent: a dictionary containing subset + relationship between the metrics. .e.g. ami >= [custom, mrc] + cumulative_inconsistency_allowed_edps : a set containing edp keys that won't + be forced to have self cumulative reaches be increasing + """ + self._metric_reports = metric_reports + self._metric_subsets_by_parent = metric_subsets_by_parent + self._cumulative_inconsistency_allowed_edp_combinations = ( + cumulative_inconsistency_allowed_edp_combinations + ) + + # All metrics in the set relationships must have a corresponding report. + for parent in metric_subsets_by_parent.keys(): + if not (parent in metric_reports): + raise ValueError( + "key {1} does not have a corresponding report".format(parent) + ) + for child in metric_subsets_by_parent[parent]: + if not (child in metric_reports): + raise ValueError( + "key {1} does not have a corresponding report".format(child) + ) + + self._metric_index = {} + for index, metric in enumerate(metric_reports.keys()): + self._metric_index[metric] = index + + self._edp_combination_index = {} + for index, edp_combination in enumerate( + next(iter(metric_reports.values())).get_cumulative_edp_combinations() ): - """ - Args: - metric_reports: a dictionary mapping metric types to a MetricReport - metric_subsets_by_parent: a dictionary containing subset - relationship between the metrics. .e.g. ami >= [custom, mrc] - cumulative_inconsistency_allowed_edps : a set containing edp keys that won't - be forced to have self cumulative reaches be increasing - """ - self.__metric_reports = metric_reports - self.__metric_subsets_by_parent = metric_subsets_by_parent - self.__cumulative_inconsistency_allowed_edp_combs = ( - cumulative_inconsistency_allowed_edp_combs + self._edp_combination_index[edp_combination] = index + + self._num_edp_combinations = len(self._edp_combination_index.keys()) + self._num_periods = next( + iter(metric_reports.values())).get_number_of_periods() + + # Assign an index to each measurement. + measurement_index = 0 + self._measurement_name_to_index = {} + for metric in metric_reports.keys(): + for edp_combination in metric_reports[ + metric].get_whole_campaign_edp_combinations(): + measurement = metric_reports[metric].get_whole_campaign_measurement( + edp_combination) + self._measurement_name_to_index[measurement.name] = measurement_index + measurement_index += 1 + for edp_combination in metric_reports[ + metric].get_cumulative_edp_combinations(): + for period in range(0, self._num_periods): + measurement = metric_reports[metric].get_cumulative_measurement( + edp_combination, period) + self._measurement_name_to_index[measurement.name] = measurement_index + measurement_index += 1 + + self._num_vars = measurement_index + + def get_metric_report(self, metric: str) -> "MetricReport": + return self._metric_reports[metric] + + def get_metrics(self) -> set[str]: + return set(self._metric_reports.keys()) + + def get_corrected_report(self) -> "Report": + """Returns a corrected, consistent report. + Note all measurements in the corrected report are set to have 0 variance + """ + spec = self.to_set_measurement_spec() + solution = Solver(spec).solve_and_translate() + return self.report_from_solution(solution, spec) + + def report_from_solution(self, solution: Solution, + spec: SetMeasurementsSpec) -> "Report": + return Report( + metric_reports={ + metric: self._metric_report_from_solution(metric, solution) + for metric in self._metric_reports + }, + metric_subsets_by_parent=self._metric_subsets_by_parent, + cumulative_inconsistency_allowed_edp_combinations=self._cumulative_inconsistency_allowed_edp_combinations, + ) + + def sample_with_noise(self) -> "Report": + """Returns a new report sampled according to the mean and variance of + all metrics in this report. Useful to bootstrap sample reports. + """ + return Report( + metric_reports={ + i: self._metric_reports[i].sample_with_noise() + for i in self._metric_reports + }, + metric_subsets_by_parent=self._metric_subsets_by_parent, + cumulative_inconsistency_allowed_edp_combinations=self._cumulative_inconsistency_allowed_edp_combinations, + ) + + def to_array(self) -> np.array: + """Returns an array representation of all the mean measurement values + in this report + """ + array = np.zeros(self._num_vars) + for metric in self._metric_reports: + for edp_combination in self._metric_reports[ + metric].get_cumulative_edp_combinations(): + for period in range(0, self._num_periods): + array.put( + self._get_measurement_index( + self._metric_reports[metric] + .get_cumulative_measurement(edp_combination, period) + ), + self._metric_reports[metric] + .get_cumulative_measurement(edp_combination, period) + .value, + ) + for edp_combination in self._metric_reports[ + metric].get_whole_campaign_edp_combinations(): + array.put( + self._get_measurement_index( + self._metric_reports[metric] + .get_whole_campaign_measurement(edp_combination) + ), + self._metric_reports[metric] + .get_whole_campaign_measurement(edp_combination) + .value, ) - - # All metrics in the set relationships must have a corresponding report. - for parent in metric_subsets_by_parent.keys(): - if not (parent in metric_reports): - raise ValueError( - "key {1} does not have a corresponding report".format(parent) - ) - for child in metric_subsets_by_parent[parent]: - if not (child in metric_reports): - raise ValueError( - "key {1} does not have a corresponding report".format(child) - ) - - self.__metric_index = {} - for index, metric in enumerate(metric_reports.keys()): - self.__metric_index[metric] = index - - self.__edp_comb_index = {} - for index, edp_comb in enumerate( - next(iter(metric_reports.values())).get_edp_combs() - ): - self.__edp_comb_index[edp_comb] = index - - self.__num_edp_combs = len(self.__edp_comb_index.keys()) - self.__num_periods = next(iter(metric_reports.values())).get_number_of_periods() - - num_vars_per_period = (self.__num_edp_combs + 1) * len(metric_reports.keys()) - self.__num_vars = self.__num_periods * num_vars_per_period - - def get_metric_report(self, metric: str) -> MetricReport: - return self.__metric_reports[metric] - - def get_metrics(self) -> set[str]: - return set(self.__metric_reports.keys()) - - def get_corrected_report(self) -> "Report": - """Returns a corrected, consistent report. - Note all measurements in the corrected report are set to have 0 variance - """ - spec = self.to_set_measurement_spec() - solution = Solver(spec).solve_and_translate() - return self.report_from_solution(solution, spec) - - def report_from_solution(self, solution, spec): - return Report( - metric_reports={ - metric: self.__metric_report_from_solution(metric, solution, spec) - for metric in self.__metric_reports - }, - metric_subsets_by_parent=self.__metric_subsets_by_parent, - cumulative_inconsistency_allowed_edp_combs=self.__cumulative_inconsistency_allowed_edp_combs, + return array + + def to_set_measurement_spec(self) -> SetMeasurementsSpec: + spec = SetMeasurementsSpec() + self._add_measurements_to_spec(spec) + self._add_set_relations_to_spec(spec) + return spec + + def _add_cover_relations_to_spec(self, spec: SetMeasurementsSpec): + # sum of subsets >= union for each period + for metric in self._metric_reports: + for cover_relationship in self._metric_reports[ + metric].get_cumulative_cover_relationships(): + covered_parent = cover_relationship[0] + covering_children = cover_relationship[1] + for period in range(0, self._num_periods): + spec.add_cover( + children=list(self._get_cumulative_measurement_index( + metric, covering_child, period) + for covering_child in covering_children), + parent=self._get_cumulative_measurement_index( + metric, covered_parent, period), + ) + for cover_relationship in self._metric_reports[ + metric].get_whole_campaign_cover_relationships(): + covered_parent = cover_relationship[0] + covering_children = cover_relationship[1] + spec.add_cover( + children=list(self._get_whole_campaign_measurement_index( + metric, covering_child) + for covering_child in covering_children), + parent=self._get_whole_campaign_measurement_index( + metric, covered_parent), ) - def sample_with_noise(self) -> "Report": - """Returns a new report sampled according to the mean and variance of - all metrics in this report. Useful to bootstrap sample reports. - """ - return Report( - metric_reports={ - i: self.__metric_reports[i].sample_with_noise() - for i in self.__metric_reports - }, - metric_subsets_by_parent=self.__metric_subsets_by_parent, - cumulative_inconsistency_allowed_edp_combs=self.__cumulative_inconsistency_allowed_edp_combs, + def _add_subset_relations_to_spec(self, spec: SetMeasurementsSpec): + # Adds relations for cumulative measurements. + for metric in self._metric_reports: + for subset_relationship in self._metric_reports[ + metric + ].get_cumulative_subset_relationships(): + parent_edp_combination = subset_relationship[0] + child_edp_combination = subset_relationship[1] + for period in range(0, self._num_periods): + spec.add_subset_relation( + child_set_id=self._get_measurement_index( + self._metric_reports[ + metric].get_cumulative_measurement( + child_edp_combination, period)), + parent_set_id=self._get_measurement_index( + self._metric_reports[ + metric].get_cumulative_measurement( + parent_edp_combination, period)), + ) + + # Adds relations for whole campaign measurements. + for subset_relationship in self._metric_reports[ + metric + ].get_whole_campaign_subset_relationships(): + parent_edp_combination = subset_relationship[0] + child_edp_combination = subset_relationship[1] + spec.add_subset_relation( + child_set_id=self._get_measurement_index( + self._metric_reports[ + metric].get_whole_campaign_measurement( + child_edp_combination)), + parent_set_id=self._get_measurement_index( + self._metric_reports[ + metric].get_whole_campaign_measurement( + parent_edp_combination)), ) - def to_array(self) -> np.array: - """Returns an array representation of all the mean measurement values - in this report - """ - array = np.zeros(self.__num_vars) - for metric in self.__metric_reports: - for period in range(0, self.__num_periods): - for edp_comb in self.__edp_comb_index: - edp_comb_ind = self.__edp_comb_index[edp_comb] - array.put( - self.__get_var_index( - period, self.__metric_index[metric], edp_comb_ind - ), - self.__metric_reports[metric] - .get_edp_comb_measurement(edp_comb, period) - .value, - ) - return array - - def to_set_measurement_spec(self): - spec = SetMeasurementsSpec() - self.__add_measurements_to_spec(spec) - self.__add_set_relations_to_spec(spec) - return spec - - def __add_set_relations_to_spec(self, spec): - for period in range(0, self.__num_periods): - - # sum of subsets >= union for each period - for metric in self.__metric_reports: - metric_ind = self.__metric_index[metric] - for cover_relationship in self.__metric_reports[ - metric - ].get_cover_relationships(): - covered_parent = cover_relationship[0] - covering_children = cover_relationship[1] - spec.add_cover( - children=list( - self.__get_var_index( - period, - metric_ind, - self.__edp_comb_index[covering_child], - ) - for covering_child in covering_children - ), - parent=self.__get_var_index( - period, metric_ind, self.__edp_comb_index[covered_parent] - ), - ) - - # subset <= union - for metric in self.__metric_reports: - metric_ind = self.__metric_index[metric] - for subset_relationship in self.__metric_reports[ - metric - ].get_subset_relationships(): - parent_edp_comb = subset_relationship[0] - child_edp_comb = subset_relationship[1] - spec.add_subset_relation( - child_set_id=self.__get_var_index( - period, metric_ind, self.__edp_comb_index[child_edp_comb] - ), - parent_set_id=self.__get_var_index( - period, metric_ind, self.__edp_comb_index[parent_edp_comb] - ), - ) - - # metric1>=metric#2 - for parent_metric in self.__metric_subsets_by_parent: - for child_metric in self.__metric_subsets_by_parent[parent_metric]: - for edp_comb in self.__edp_comb_index: - edp_comb_ind = self.__edp_comb_index[edp_comb] - spec.add_subset_relation( - child_set_id=self.__get_var_index( - period, self.__metric_index[child_metric], edp_comb_ind - ), - parent_set_id=self.__get_var_index( - period, self.__metric_index[parent_metric], edp_comb_ind - ), - ) - - # period1 <= period2 - for edp_comb in self.__edp_comb_index: - if ( - len(edp_comb) == 1 - and next(iter(edp_comb)) - in self.__cumulative_inconsistency_allowed_edp_combs - ): - continue - if period >= self.__num_periods - 1: - continue - for metric in range(0, len(self.__metric_index.keys())): - edp_comb_ind = self.__edp_comb_index[edp_comb] - spec.add_subset_relation( - child_set_id=self.__get_var_index(period, metric, edp_comb_ind), - parent_set_id=self.__get_var_index( - period + 1, metric, edp_comb_ind - ), - ) - - def __add_measurements_to_spec(self, spec): - for metric in self.__metric_reports: - for period in range(0, self.__num_periods): - for edp_comb in self.__edp_comb_index: - edp_comb_ind = self.__edp_comb_index[edp_comb] - spec.add_measurement( - self.__get_var_index( - period, self.__metric_index[metric], edp_comb_ind - ), - self.__metric_reports[metric].get_edp_comb_measurement( - edp_comb, period - ), - ) - - def __get_var_index(self, period: int, metric: int, edp: int): - return ( - metric * self.__num_edp_combs * self.__num_periods - + edp * self.__num_periods - + period + # TODO(@ple13):Use timestamp to check if the last cumulative measurement covers + # the whole campaign. If yes, make sure that the two measurements are equal + # instead of less than or equal. + def _add_cumulative_whole_campaign_relations_to_spec(self, + spec: SetMeasurementsSpec): + # Adds relations between cumulative and whole campaign measurements. + # For an edp combination, the last cumulative measurement is less than or + # equal to the whole campaign measurement. + for metric in self._metric_reports: + for edp_combination in self._metric_reports[ + metric].get_cumulative_edp_combinations().intersection( + self._metric_reports[ + metric].get_whole_campaign_edp_combinations()): + spec.add_subset_relation( + child_set_id=self._get_measurement_index( + self._metric_reports[ + metric].get_cumulative_measurement( + edp_combination, (self._num_periods - 1))), + parent_set_id=self._get_measurement_index( + self._metric_reports[ + metric].get_whole_campaign_measurement( + edp_combination)), ) - def __metric_report_from_solution(self, metric, solution, spec): - solution_time_series = {} - for edp_comb in self.__edp_comb_index: - edp_comb_ind = self.__edp_comb_index[edp_comb] - solution_time_series[edp_comb] = [ - Measurement( - solution[ - self.__get_var_index( - period, self.__metric_index[metric], edp_comb_ind - ) - ], - 0, - spec.get_measurement_metric( - self.__get_var_index( - period, self.__metric_index[metric], edp_comb_ind - ) - ), - ) - for period in range(0, self.__num_periods) - ] + def _add_metric_relations_to_spec(self, spec: SetMeasurementsSpec): + # metric1>=metric#2 + for parent_metric in self._metric_subsets_by_parent: + for child_metric in self._metric_subsets_by_parent[parent_metric]: + # Handles cumulative measurements of common edp combinations. + for edp_combination in self._metric_reports[ + parent_metric].get_cumulative_edp_combinations().intersection( + self._metric_reports[ + child_metric].get_cumulative_edp_combinations()): + for period in range(0, self._num_periods): + spec.add_subset_relation( + child_set_id=self._get_measurement_index( + self._metric_reports[ + child_metric].get_cumulative_measurement( + edp_combination, period)), + parent_set_id=self._get_measurement_index( + self._metric_reports[ + parent_metric].get_cumulative_measurement( + edp_combination, period)), + ) + # Handles whole campaign measurements of common edp combinations. + for edp_combination in self._metric_reports[ + parent_metric].get_whole_campaign_edp_combinations().intersection( + self._metric_reports[ + child_metric].get_whole_campaign_edp_combinations()): + spec.add_subset_relation( + child_set_id=self._get_measurement_index( + self._metric_reports[ + child_metric].get_whole_campaign_measurement( + edp_combination)), + parent_set_id=self._get_measurement_index( + self._metric_reports[ + parent_metric].get_whole_campaign_measurement( + edp_combination)), + ) + + def _add_cumulative_relations_to_spec(self, spec: SetMeasurementsSpec): + for metric in self._metric_reports.keys(): + for edp_combination in self._metric_reports[ + metric].get_cumulative_edp_combinations(): + if ( + len(edp_combination) == 1 + and next(iter(edp_combination)) + in self._cumulative_inconsistency_allowed_edp_combinations + ): + continue + for period in range(0, self._num_periods): + if period >= self._num_periods - 1: + continue + spec.add_subset_relation( + child_set_id=self._get_measurement_index( + self._metric_reports[ + metric].get_cumulative_measurement( + edp_combination, period)), + parent_set_id=self._get_measurement_index( + self._metric_reports[ + metric].get_cumulative_measurement( + edp_combination, period + 1)), + ) + + def _add_set_relations_to_spec(self, spec: SetMeasurementsSpec): + # sum of subsets >= union for each period. + self._add_cover_relations_to_spec(spec) + + # subset <= union. + self._add_subset_relations_to_spec(spec) + + # metric1>=metric#2. + self._add_metric_relations_to_spec(spec) + + # period1 <= period2. + self._add_cumulative_relations_to_spec(spec) + + # Last cumulative measurement <= whole campaign measurement. + self._add_cumulative_whole_campaign_relations_to_spec(spec) + + def _add_measurements_to_spec(self, spec: SetMeasurementsSpec): + for metric in self._metric_reports.keys(): + for edp_combination in self._metric_reports[ + metric].get_cumulative_edp_combinations(): + for period in range(0, self._num_periods): + measurement = self._metric_reports[ + metric].get_cumulative_measurement(edp_combination, period) + spec.add_measurement( + self._get_measurement_index(measurement), + measurement, + ) + for edp_combination in self._metric_reports[ + metric].get_whole_campaign_edp_combinations(): + measurement = self._metric_reports[ + metric].get_whole_campaign_measurement(edp_combination) + spec.add_measurement( + self._get_measurement_index(measurement), + measurement, + ) - return MetricReport(reach_time_series_by_edp_combination=solution_time_series) + def _get_measurement_index(self, measurement: Measurement) -> int: + return self._measurement_name_to_index[measurement.name] + + def _get_cumulative_measurement_index(self, metric: str, + edp_combination: FrozenSet[str], period: int) -> int: + return self._get_measurement_index( + self._metric_reports[metric].get_cumulative_measurement( + edp_combination, period) + ) + + def _get_whole_campaign_measurement_index(self, metric: str, + edp_combination: FrozenSet[str]) -> int: + return self._get_measurement_index( + self._metric_reports[metric].get_whole_campaign_measurement( + edp_combination) + ) + + def _metric_report_from_solution(self, metric: str, + solution: Solution) -> "MetricReport": + solution_time_series = {} + solution_whole_campaign = {} + for edp_combination in self._metric_reports[ + metric].get_cumulative_edp_combinations(): + solution_time_series[edp_combination] = [ + Measurement( + solution[ + self._get_measurement_index(self._metric_reports[ + metric].get_cumulative_measurement( + edp_combination, period)) + ], + self._metric_reports[metric].get_cumulative_measurement( + edp_combination, period).sigma, + self._metric_reports[metric].get_cumulative_measurement( + edp_combination, period).name, + ) + for period in range(0, self._num_periods) + ] + for edp_combination in self._metric_reports[ + metric].get_whole_campaign_edp_combinations(): + solution_whole_campaign[edp_combination] = Measurement( + solution[ + self._get_measurement_index(self._metric_reports[ + metric].get_whole_campaign_measurement( + edp_combination)) + ], + self._metric_reports[metric].get_whole_campaign_measurement( + edp_combination).sigma, + self._metric_reports[metric].get_whole_campaign_measurement( + edp_combination).name, + ) + return MetricReport( + reach_time_series=solution_time_series, + reach_whole_campaign=solution_whole_campaign, + ) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py index 7d26a3a53b3..d2b83364a82 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py @@ -13,96 +13,24 @@ # limitations under the License. import json -import math -import pandas as pd import sys from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import report_summary_pb2 -from functools import partial from noiseninja.noised_measurements import Measurement from report.report import Report, MetricReport +from typing import FrozenSet # This is a demo script that has the following assumptions : -# 1. There are 2 EDPs one with Name Google, the other Linear TV. -# 2. CUSTOM filters are not yet supported in this tool. -# 3. AMI is a parent of MRC and there are no other relationships between metrics. -# 4. The standard deviation for all Measurements are assumed to be 1 -# 5. Frequency results are not corrected. -# 6. Impression results are not corrected. +# 1. CUSTOM filters are not yet supported in this tool. +# 2. AMI is a parent of MRC and there are no other relationships between metrics. +# 3. Impression results are not corrected. SIGMA = 1 -AMI_FILTER = "AMI" -MRC_FILTER = "MRC" - -# TODO(uakyol) : Read the EDP names dynamically from the excel sheet -# TODO(uakyol) : Make this work for 3 EDPs -EDP_ONE = "Google" -EDP_TWO = "Linear TV" -TOTAL_CAMPAIGN = "Total Campaign" - -edp_names = [EDP_ONE, EDP_TWO] - -CUML_REACH_PREFIX = "Cuml. Reach" - -EDP_MAP = { - edp_name: {"sheet": f"{CUML_REACH_PREFIX} ({edp_name})", "ind": ind} - for ind, edp_name in enumerate(edp_names + [TOTAL_CAMPAIGN]) -} - -CUML_REACH_COL_NAME = "Cumulative Reach 1+" -TOTAL_REACH_COL_NAME = "Total Reach (1+)" -FILTER_COL_NAME = "Impression Filter" - ami = "ami" mrc = "mrc" -def createMeasurements(rows, reach_col_name, sigma, metric=""): - # These rows are already sorted by timestamp. - return [ - Measurement(measured_value, sigma, metric) - for measured_value in list(rows[reach_col_name]) - ] - - -def getMeasurements(df, reach_col_name, sigma): - ami_rows = df[df[FILTER_COL_NAME] == AMI_FILTER] - mrc_rows = df[df[FILTER_COL_NAME] == MRC_FILTER] - - ami_measurements = createMeasurements(ami_rows, reach_col_name, sigma) - mrc_measurements = createMeasurements(mrc_rows, reach_col_name, sigma) - - return (ami_measurements, mrc_measurements) - - -def readExcel(excel_file_path, unnoised_edps): - measurements = {} - dfs = pd.read_excel(excel_file_path, sheet_name=None) - for edp in EDP_MAP: - sigma = 0 if edp in unnoised_edps else SIGMA - - cumilative_sheet_name = EDP_MAP[edp]["sheet"] - ( - cumilative_ami_measurements, - cumilative_mrc_measurements) = getMeasurements( - dfs[cumilative_sheet_name], CUML_REACH_COL_NAME, sigma - ) - - (total_ami_measurements, total_mrc_measurements) = getMeasurements( - dfs[edp], TOTAL_REACH_COL_NAME, sigma - ) - - # There has to be 1 row for AMI and MRC metrics in the total reach sheet. - assert len(total_mrc_measurements) == 1 and len(total_ami_measurements) == 1 - - measurements[edp] = { - AMI_FILTER: cumilative_ami_measurements + total_ami_measurements, - MRC_FILTER: cumilative_mrc_measurements + total_mrc_measurements, - } - return (measurements, dfs) - - # Processes a report summary and returns a consistent one. # # Currently, the function only supports ami and mrc measurements and primitive @@ -110,8 +38,10 @@ def readExcel(excel_file_path, unnoised_edps): # TODO(@ple13): Extend the function to support custom measurements and composite # set operations such as difference, incremental. def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): - ami_measurements: Dict[FrozenSet[str], List[Measurement]] = {} - mrc_measurements: Dict[FrozenSet[str], List[Measurement]] = {} + cumulative_ami_measurements: Dict[FrozenSet[str], List[Measurement]] = {} + cumulative_mrc_measurements: Dict[FrozenSet[str], List[Measurement]] = {} + total_ami_measurements: Dict[FrozenSet[str], Measurement] = {} + total_mrc_measurements: Dict[FrozenSet[str], Measurement] = {} # Processes cumulative measurements first. for entry in report_summary.measurement_details: @@ -123,41 +53,36 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): for result in entry.measurement_results ] if entry.measurement_policy == "ami": - ami_measurements[data_providers] = measurements + cumulative_ami_measurements[data_providers] = measurements elif entry.measurement_policy == "mrc": - mrc_measurements[data_providers] = measurements + cumulative_mrc_measurements[data_providers] = measurements - edp_comb_list = ami_measurements.keys() - if len(edp_comb_list) == 0: - edp_comb_list = mrc_measurements.keys() - - # Processes non-cumulative union measurements. + # Processes total union measurements. for entry in report_summary.measurement_details: - if (entry.set_operation == "union") and ( - entry.is_cumulative == False) and ( - frozenset(entry.data_providers) in edp_comb_list): + if (entry.set_operation == "union") and (entry.is_cumulative == False): measurements = [ - Measurement(result.reach, result.standard_deviation, - result.metric) + Measurement(result.reach, result.standard_deviation, result.metric) for result in entry.measurement_results ] if entry.measurement_policy == "ami": - ami_measurements[frozenset(entry.data_providers)].extend( - measurements) + total_ami_measurements[frozenset(entry.data_providers)] = measurements[ + 0] elif entry.measurement_policy == "mrc": - mrc_measurements[frozenset(entry.data_providers)].extend( - measurements) + total_mrc_measurements[frozenset(entry.data_providers)] = measurements[ + 0] # Builds the report based on the above measurements. report = Report( { - policy: MetricReport(measurements) - for policy, measurements in - [("ami", ami_measurements), ("mrc", mrc_measurements)] - if measurements # Only include if measurements is not empty + policy: MetricReport(cumulative_measurements, total_measurements) + for policy, cumulative_measurements, total_measurements in + [("ami", cumulative_ami_measurements, total_ami_measurements), + ("mrc", cumulative_mrc_measurements, total_mrc_measurements)] + if cumulative_measurements + # Only include if measurements is not empty }, metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) # Gets the corrected report. @@ -168,132 +93,17 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): measurements_policies = corrected_report.get_metrics() for policy in measurements_policies: metric_report = corrected_report.get_metric_report(policy) - for edp in metric_report.get_edp_combs(): + for edp_combination in metric_report.get_cumulative_edp_combinations(): for index in range(metric_report.get_number_of_periods()): - entry = metric_report.get_edp_comb_measurement(edp, index) - metric_name_to_value.update( - {entry.metric_name: int(entry.value)}) + entry = metric_report.get_cumulative_measurement(edp_combination, index) + metric_name_to_value.update({entry.name: int(entry.value)}) + for edp_combination in metric_report.get_whole_campaign_edp_combinations(): + entry = metric_report.get_whole_campaign_measurement(edp_combination) + metric_name_to_value.update({entry.name: int(entry.value)}) return metric_name_to_value -def getCorrectedReport(measurements): - report = Report( - { - ami: MetricReport( - reach_time_series_by_edp_combination={ - frozenset({EDP_ONE, EDP_TWO}): measurements[TOTAL_CAMPAIGN][ - AMI_FILTER - ], - frozenset({EDP_ONE}): measurements[EDP_ONE][AMI_FILTER], - frozenset({EDP_TWO}): measurements[EDP_TWO][AMI_FILTER], - } - ), - mrc: MetricReport( - reach_time_series_by_edp_combination={ - frozenset({EDP_ONE, EDP_TWO}): measurements[TOTAL_CAMPAIGN][ - MRC_FILTER - ], - frozenset({EDP_ONE}): measurements[EDP_ONE][MRC_FILTER], - frozenset({EDP_TWO}): measurements[EDP_TWO][MRC_FILTER], - } - ), - }, - # AMI is a parent of MRC - metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combs={}, - ) - - return report.get_corrected_report() - - -def correctSheetMetric(df, rows, func): - for period, (index, row) in enumerate(rows.iterrows()): - df.at[index, CUML_REACH_COL_NAME] = math.ceil(func(period).value) - - -def correctCumSheet(df, ami_func, mrc_func): - ami_rows = df[df[FILTER_COL_NAME] == AMI_FILTER] - mrc_rows = df[df[FILTER_COL_NAME] == MRC_FILTER] - correctSheetMetric(df, ami_rows, ami_func) - correctSheetMetric(df, mrc_rows, mrc_func) - return df - - -def correctTotSheet(df, ami_val, mrc_val): - ami_rows = df[df[FILTER_COL_NAME] == AMI_FILTER] - mrc_rows = df[df[FILTER_COL_NAME] == MRC_FILTER] - - # There has to be 1 row for AMI and MRC metrics in the total reach sheet. - assert ami_rows.shape[0] == 1 and mrc_rows.shape[0] == 1 - df.at[ami_rows.index[0], TOTAL_REACH_COL_NAME] = math.ceil(ami_val) - df.at[mrc_rows.index[0], TOTAL_REACH_COL_NAME] = math.ceil(mrc_val) - return df - - -def buildCorrectedExcel(correctedReport, excel): - ami_metric_report = correctedReport.get_metric_report(ami) - mrc_metric_report = correctedReport.get_metric_report(mrc) - - for edp in EDP_MAP: - edp_index = EDP_MAP[edp]["ind"] - amiFunc = ( - partial(ami_metric_report.get_edp_comb_measurement, - frozenset({EDP_ONE, EDP_TWO})) - if (edp == TOTAL_CAMPAIGN) - else partial(ami_metric_report.get_edp_comb_measurement, - frozenset({edp})) - ) - mrcFunc = ( - partial(mrc_metric_report.get_edp_comb_measurement, - frozenset({EDP_ONE, EDP_TWO})) - if (edp == TOTAL_CAMPAIGN) - else partial(mrc_metric_report.get_edp_comb_measurement, - frozenset({edp})) - ) - - cumilative_sheet_name = EDP_MAP[edp]["sheet"] - excel[cumilative_sheet_name] = correctCumSheet( - excel[cumilative_sheet_name], amiFunc, mrcFunc - ) - - # The last value of the corrected measurement series is the total reach. - totAmiVal = ( - ami_metric_report.get_edp_comb_measurement( - frozenset({EDP_ONE, EDP_TWO}), -1).value - if (edp == TOTAL_CAMPAIGN) - else ami_metric_report.get_edp_comb_measurement(frozenset({edp}), - -1).value - ) - totMrcVal = ( - mrc_metric_report.get_edp_comb_measurement( - frozenset({EDP_ONE, EDP_TWO}), -1).value - if (edp == TOTAL_CAMPAIGN) - else mrc_metric_report.get_edp_comb_measurement(frozenset({edp}), - -1).value - ) - total_sheet_name = edp - excel[total_sheet_name] = correctTotSheet( - excel[total_sheet_name], totAmiVal, totMrcVal - ) - return excel - - -def writeCorrectedExcel(path, corrected_excel): - with pd.ExcelWriter(path) as writer: - # Write each dataframe to a different sheet - for sheet_name in corrected_excel: - corrected_excel[sheet_name].to_excel( - writer, sheet_name=sheet_name, index=False - ) - - -def correctExcelFile(path_to_report, unnoised_edps): - (measurements, excel) = readExcel(path_to_report, unnoised_edps) - correctedReport = getCorrectedReport(measurements) - return buildCorrectedExcel(correctedReport, excel) - - def main(): report_summary = report_summary_pb2.ReportSummary() # Read the encoded serialized report summary from stdin and convert it back to diff --git a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json index 9178812b1b8..d5472682c60 100644 --- a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json @@ -92,7 +92,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -215,7 +215,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -338,7 +338,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -457,7 +457,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -559,7 +559,7 @@ "reach": { "value": "31569000", "univariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 } }, "frequencyHistogram": { @@ -576,7 +576,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -682,7 +682,7 @@ "reach": { "value": "31569000", "univariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 } }, "frequencyHistogram": { @@ -699,7 +699,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py index 9dcb7eafd61..251fd22879e 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py @@ -14,20 +14,122 @@ import unittest -from noiseninja.noised_measurements import Measurement -from report.report import Report, MetricReport +from noiseninja.noised_measurements import Measurement, SetMeasurementsSpec +from report.report import Report, MetricReport, is_cover, get_covers EXPECTED_PRECISION = 3 EDP_ONE = "EDP_ONE" EDP_TWO = "EDP_TWO" EDP_THREE = "EDP_THREE" +SAMPLE_REPORT = Report( + metric_reports={ + "ami": MetricReport( + reach_time_series={ + frozenset({EDP_ONE}): [Measurement(1, 0, "measurement_01"), + Measurement(1, 0, "measurement_02")], + frozenset({EDP_TWO}): [Measurement(1, 0, "measurement_03"), + Measurement(1, 0, "measurement_04")], + frozenset({EDP_THREE}): [ + Measurement(1, 0, "measurement_05"), + Measurement(1, 0, "measurement_06")], + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1, 0, "measurement_07"), + Measurement(1, 0, "measurement_08")], + }, + reach_whole_campaign={ + frozenset({EDP_ONE}): Measurement(1, 0, "measurement_09"), + frozenset({EDP_TWO}): Measurement(1, 0, "measurement_10"), + frozenset({EDP_THREE}): + Measurement(1, 0, "measurement_11"), + frozenset({EDP_ONE, EDP_TWO}): + Measurement(1, 0, "measurement_12"), + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(1, 0, "measurement_13"), + }, + ), + "mrc": MetricReport( + reach_time_series={ + frozenset({EDP_ONE}): [Measurement(1, 0, "measurement_14"), + Measurement(1, 0, "measurement_15")], + frozenset({EDP_TWO}): [Measurement(1, 0, "measurement_16"), + Measurement(1, 0, "measurement_17")], + frozenset({EDP_THREE}): [ + Measurement(1, 0, "measurement_18"), + Measurement(1, 0, "measurement_19")], + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1, 0, "measurement_20"), + Measurement(1, 0, "measurement_21")], + }, + reach_whole_campaign={ + frozenset({EDP_ONE}): Measurement(1, 0, "measurement_22"), + frozenset({EDP_TWO}): Measurement(1, 0, "measurement_23"), + frozenset({EDP_THREE}): + Measurement(1, 0, "measurement_24"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(1, 0, "measurement_25"), + }, + ) + }, + metric_subsets_by_parent={"ami": ["mrc"]}, + cumulative_inconsistency_allowed_edp_combinations={}, +) + class TestReport(unittest.TestCase): + def test_is_cover_returns_true_for_valid_cover_sets(self): + self.assertTrue(is_cover(frozenset({"EDP_ONE", "EDP_TWO", "EDP_THREE"}), + (frozenset({"EDP_ONE"}), frozenset({"EDP_TWO"}), + frozenset({"EDP_THREE"})))) + self.assertTrue(is_cover(frozenset({"EDP_ONE", "EDP_TWO", "EDP_THREE"}), + (frozenset({"EDP_ONE"}), frozenset({"EDP_TWO"}), + frozenset({"EDP_THREE"}), + frozenset({"EDP_ONE", "EDP_TWO"})))) + + def test_is_cover_returns_false_for_invalid_cover_sets(self): + self.assertFalse(is_cover(frozenset({"EDP_ONE", "EDP_TWO", "EDP_THREE"}), + (frozenset({"EDP_ONE"}), + frozenset({"EDP_THREE"})))) + + def test_get_cover_returns_all_cover_sets(self): + target = frozenset({"EDP_ONE", "EDP_TWO", "EDP_THREE"}) + other_sets = (frozenset({"EDP_ONE"}), frozenset({"EDP_TWO"}), + frozenset({"EDP_THREE"}), + frozenset({"EDP_ONE", "EDP_TWO"})) + + expected = [ + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), + (frozenset({'EDP_THREE'}), frozenset({'EDP_TWO', 'EDP_ONE'})) + ), + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), ( + frozenset({'EDP_ONE'}), frozenset({'EDP_TWO'}), + frozenset({'EDP_THREE'})) + ), + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), ( + frozenset({'EDP_ONE'}), frozenset({'EDP_THREE'}), + frozenset({'EDP_TWO', 'EDP_ONE'})) + ), + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), ( + frozenset({'EDP_TWO'}), frozenset({'EDP_THREE'}), + frozenset({'EDP_TWO', 'EDP_ONE'})) + ), + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), ( + frozenset({'EDP_ONE'}), frozenset({'EDP_TWO'}), + frozenset({'EDP_THREE'}), frozenset({'EDP_TWO', 'EDP_ONE'})) + ) + ] + + cover_relationship = get_covers(target, other_sets) + self.assertEqual(expected, cover_relationship) def test_get_cover_relationships(self): metric_report = MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE}): [Measurement(1, 0, "measurement_01")], frozenset({EDP_TWO}): [Measurement(1, 0, "measurement_02")], frozenset({EDP_THREE}): [Measurement(1, 0, "measurement_03")], @@ -39,7 +141,8 @@ def test_get_cover_relationships(self): Measurement(1, 0, "measurement_06")], frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ Measurement(1, 0, "measurement_07")], - } + }, + reach_whole_campaign={}, ) expected = [ @@ -431,55 +534,237 @@ def test_get_cover_relationships(self): ), ), ] - self.assertEqual(metric_report.get_cover_relationships(), expected) + self.assertEqual(metric_report.get_cumulative_cover_relationships(), + expected) - def test_get_corrected_single_metric_report(self): + def test_add_cover_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_covers_by_set = { + name_to_index["measurement_07"]: [ + [name_to_index["measurement_01"], + name_to_index["measurement_03"], + name_to_index["measurement_05"]] + ], + name_to_index["measurement_08"]: [ + [name_to_index["measurement_02"], + name_to_index["measurement_04"], + name_to_index["measurement_06"]] + ], + name_to_index["measurement_12"]: [ + [name_to_index["measurement_09"], + name_to_index["measurement_10"]] + ], + name_to_index["measurement_13"]: [ + [name_to_index["measurement_11"], + name_to_index["measurement_12"]], + [name_to_index["measurement_09"], + name_to_index["measurement_10"], + name_to_index["measurement_11"]], + [name_to_index["measurement_09"], + name_to_index["measurement_11"], + name_to_index["measurement_12"]], + [name_to_index["measurement_10"], + name_to_index["measurement_11"], + name_to_index["measurement_12"]], + [name_to_index["measurement_10"], + name_to_index["measurement_09"], + name_to_index["measurement_11"], + name_to_index["measurement_12"]] + ], + name_to_index["measurement_20"]: [ + [name_to_index["measurement_14"], + name_to_index["measurement_16"], + name_to_index["measurement_18"]] + ], + name_to_index["measurement_21"]: [ + [name_to_index["measurement_15"], + name_to_index["measurement_17"], + name_to_index["measurement_19"]] + ], + name_to_index["measurement_25"]: [ + [name_to_index["measurement_23"], + name_to_index["measurement_24"]] + ], + } + + spec = SetMeasurementsSpec() + report._add_cover_relations_to_spec(spec) + self.assertEqual(len(spec._subsets_by_set), 0) + self.assertEqual(expected_covers_by_set.keys(), spec._covers_by_set.keys()) + for key in spec._covers_by_set.keys(): + self.assertEqual({tuple(sorted(inner_list)) for inner_list in + expected_covers_by_set[key]}, + {tuple(sorted(inner_list)) for inner_list in + spec._covers_by_set[key]}) + + def test_add_subset_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_subsets_by_set = { + name_to_index["measurement_07"]: [name_to_index["measurement_01"], + name_to_index["measurement_03"], + name_to_index["measurement_05"]], + name_to_index["measurement_08"]: [name_to_index["measurement_02"], + name_to_index["measurement_04"], + name_to_index["measurement_06"]], + name_to_index["measurement_12"]: [name_to_index["measurement_09"], + name_to_index["measurement_10"]], + name_to_index["measurement_13"]: [name_to_index["measurement_09"], + name_to_index["measurement_10"], + name_to_index["measurement_11"], + name_to_index["measurement_12"]], + name_to_index["measurement_20"]: [name_to_index["measurement_14"], + name_to_index["measurement_16"], + name_to_index["measurement_18"]], + name_to_index["measurement_21"]: [name_to_index["measurement_15"], + name_to_index["measurement_17"], + name_to_index["measurement_19"]], + name_to_index["measurement_25"]: [name_to_index["measurement_23"], + name_to_index["measurement_24"]], + } + + spec = SetMeasurementsSpec() + report._add_subset_relations_to_spec(spec) + + self.assertEqual(len(spec._covers_by_set), 0) + self.assertEqual(expected_subsets_by_set.keys(), + spec._subsets_by_set.keys()) + for key in spec._subsets_by_set.keys(): + self.assertEqual(sorted(expected_subsets_by_set[key]), + sorted(spec._subsets_by_set[key])) + + def test_add_cumulative_subset_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_subsets_by_set = { + name_to_index["measurement_02"]: [name_to_index["measurement_01"]], + name_to_index["measurement_04"]: [name_to_index["measurement_03"]], + name_to_index["measurement_06"]: [name_to_index["measurement_05"]], + name_to_index["measurement_08"]: [name_to_index["measurement_07"]], + name_to_index["measurement_15"]: [name_to_index["measurement_14"]], + name_to_index["measurement_17"]: [name_to_index["measurement_16"]], + name_to_index["measurement_19"]: [name_to_index["measurement_18"]], + name_to_index["measurement_21"]: [name_to_index["measurement_20"]], + } + spec = SetMeasurementsSpec() + report._add_cumulative_relations_to_spec(spec) + + self.assertEqual(len(spec._covers_by_set), 0) + self.assertEqual(expected_subsets_by_set.keys(), + spec._subsets_by_set.keys()) + for key in spec._subsets_by_set.keys(): + self.assertEqual(sorted(expected_subsets_by_set[key]), + sorted(spec._subsets_by_set[key])) + + def test_add_metric_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_subsets_by_set = { + name_to_index["measurement_01"]: [name_to_index["measurement_14"]], + name_to_index["measurement_02"]: [name_to_index["measurement_15"]], + name_to_index["measurement_03"]: [name_to_index["measurement_16"]], + name_to_index["measurement_04"]: [name_to_index["measurement_17"]], + name_to_index["measurement_05"]: [name_to_index["measurement_18"]], + name_to_index["measurement_06"]: [name_to_index["measurement_19"]], + name_to_index["measurement_07"]: [name_to_index["measurement_20"]], + name_to_index["measurement_08"]: [name_to_index["measurement_21"]], + name_to_index["measurement_09"]: [name_to_index["measurement_22"]], + name_to_index["measurement_10"]: [name_to_index["measurement_23"]], + name_to_index["measurement_11"]: [name_to_index["measurement_24"]], + } + + spec = SetMeasurementsSpec() + report._add_metric_relations_to_spec(spec) + + self.assertEqual(len(spec._covers_by_set), 0) + self.assertEqual(expected_subsets_by_set.keys(), + spec._subsets_by_set.keys()) + for key in spec._subsets_by_set.keys(): + self.assertEqual(sorted(expected_subsets_by_set[key]), + sorted(spec._subsets_by_set[key])) + + def test_add_cumulative_whole_campaign_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_subsets_by_set = { + name_to_index["measurement_09"]: [name_to_index["measurement_02"]], + name_to_index["measurement_10"]: [name_to_index["measurement_04"]], + name_to_index["measurement_11"]: [name_to_index["measurement_06"]], + name_to_index["measurement_13"]: [name_to_index["measurement_08"]], + name_to_index["measurement_22"]: [name_to_index["measurement_15"]], + name_to_index["measurement_23"]: [name_to_index["measurement_17"]], + name_to_index["measurement_24"]: [name_to_index["measurement_19"]], + } + + spec = SetMeasurementsSpec() + report._add_cumulative_whole_campaign_relations_to_spec(spec) + + self.assertEqual(len(spec._covers_by_set), 0) + self.assertEqual(expected_subsets_by_set.keys(), + spec._subsets_by_set.keys()) + for key in spec._subsets_by_set.keys(): + self.assertEqual(sorted(expected_subsets_by_set[key]), + sorted(spec._subsets_by_set[key])) + + def test_get_corrected_single_metric_report(self): ami = "ami" report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(50, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(48, 0, "measurement_02")], frozenset({EDP_TWO}): [Measurement(1, 1, "measurement_03")], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent: + # 1. reach[edp1][0] <= reach[edp1 U edp2][0] + # 2. reach[edp2][0] <= reach[edp1 U edp2][0] + # 3. reach[edp1 U edp2][0] <= reach[edp1][0] + reach[edp2][0]. corrected = report.get_corrected_report() expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(49.5, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(48, 0, "measurement_02")], frozenset({EDP_TWO}): [ Measurement(1.5, 1, "measurement_03")], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_can_correct_time_series(self): ami = "ami" report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(0.00, 1, "measurement_01"), Measurement(3.30, 1, "measurement_02"), @@ -490,19 +775,25 @@ def test_can_correct_time_series(self): Measurement(3.30, 1, "measurement_05"), Measurement(0.00, 1, "measurement_06"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent: + # 1. All the time series reaches are monotonic increasing, e.g. + # reach[edp1][i] <= reach[edp1][i+1]. + # 2. Reach of the child set is less than or equal to reach of the parent set + # for all period, e.g. reach[edp1][i] <= reach[edp1 U edp2][i]. corrected = report.get_corrected_report() expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(0.00, 1, "measurement_01"), Measurement(1.65, 1, "measurement_02"), @@ -513,21 +804,22 @@ def test_can_correct_time_series(self): Measurement(1.65, 1, "measurement_05"), Measurement(1.65, 1, "measurement_06"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_can_correct_time_series_for_three_edps(self): ami = "ami" report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ # 1 way comb frozenset({EDP_ONE}): [ Measurement(0.00, 1, "measurement_01"), @@ -566,19 +858,30 @@ def test_can_correct_time_series_for_three_edps(self): Measurement(8.0, 1, "measurement_20"), Measurement(11.90, 1, "measurement_21"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent: + # 1. All the time series reaches are monotonic increasing, e.g. + # reach[edp1][i] <= reach[edp1][i+1]. + # 2. Reach of the cover set is less than or equal to the sum of reach of + # sets it covers. For example: for each period i it is true that + # reach[edp1 U edp2][i] <= reach[edp1][i] + reach[edp2][i], + # or reach[edp1 U edp2 U edp3][i] <= reach[edp1 U edp2][i] + reach[edp3][i], + # etc. + # 3. Reach of the child set is less than or equal to reach of the parent set + # for all period, e.g. reach[edp1][i] <= reach[edp1 U edp2][i]. corrected = report.get_corrected_report() expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ # 1 way comb frozenset({EDP_ONE}): [ Measurement(0.10, 1.00, "measurement_01"), @@ -617,21 +920,271 @@ def test_can_correct_time_series_for_three_edps(self): Measurement(8.00, 1.00, "measurement_20"), Measurement(11.90, 1.00, "measurement_21"), ], + }, + reach_whole_campaign={}, + ) + }, + metric_subsets_by_parent={}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + + def test_correct_report_with_both_time_series_and_whole_campaign_measurements_three_edps( + self): + ami = "ami" + + report = Report( + metric_reports={ + ami: MetricReport( + reach_time_series={ + # 1 way comb + frozenset({EDP_ONE}): [ + Measurement(0.00, 1, "measurement_01"), + Measurement(3.30, 1, "measurement_02"), + ], + frozenset({EDP_TWO}): [ + Measurement(0.00, 1, "measurement_04"), + Measurement(2.30, 1, "measurement_05"), + ], + frozenset({EDP_THREE}): [ + Measurement(1.00, 1, "measurement_07"), + Measurement(3.30, 1, "measurement_08"), + ], + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(0.00, 1, "measurement_10"), + Measurement(5.30, 1, "measurement_11"), + ], + frozenset({EDP_TWO, EDP_THREE}): [ + Measurement(0.70, 1, "measurement_13"), + Measurement(6.30, 1, "measurement_14"), + ], + frozenset({EDP_ONE, EDP_THREE}): [ + Measurement(1.20, 1, "measurement_16"), + Measurement(7.00, 1, "measurement_17"), + ], + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1.10, 1, "measurement_19"), + Measurement(8.0, 1, "measurement_20"), + ], + }, + reach_whole_campaign={ + # 1 way comb + frozenset({EDP_ONE}): + Measurement(4.00, 1.00, "measurement_03"), + frozenset({EDP_TWO}): + Measurement(3.3333, 1.00, "measurement_06"), + frozenset({EDP_THREE}): + Measurement(5.3333, 1.00, "measurement_09"), + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): + Measurement(6.90, 1.00, "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(8.66666, 1.00, "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): + Measurement(8.90, 1.00, "measurement_18"), + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(11.90, 1.00, "measurement_21"), } ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + # The corrected report should be consistent: + # 1. All the time series reaches are monotonic increasing, e.g. + # reach[edp1][i] <= reach[edp1][i+1]. + # 2. Reach of the cover set is less than or equal to the sum of reach of + # sets it covers. For example: for each period i it is true that + # reach[edp1 U edp2][i] <= reach[edp1][i] + reach[edp2][i], + # or reach[edp1 U edp2 U edp3][i] <= reach[edp1 U edp2][i] + reach[edp3][i], + # etc. + # 3. Reach of the child set is less than or equal to reach of the parent set + # for all period, e.g. reach[edp1][i] <= reach[edp1 U edp2][i]. + # 4. Time series reaches are less than or equal to whole campaign reach, + # e.g. cumulative_reach[edp1][1] <= whole_campaign_reach[edp1]. + corrected = report.get_corrected_report() + + expected = Report( + metric_reports={ + ami: MetricReport( + reach_time_series={ + # 1 way comb + frozenset({EDP_ONE}): [ + Measurement(0.10, 1.00, "measurement_01"), + Measurement(3.362, 1.00, "measurement_02"), + ], + frozenset({EDP_TWO}): [ + Measurement(0.00, 1.00, "measurement_04"), + Measurement(2.512, 1.00, "measurement_05"), + ], + frozenset({EDP_THREE}): [ + Measurement(0.95, 1.00, "measurement_07"), + Measurement(3.5749, 1.00, "measurement_08"), + ], + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(0.10, 1.00, "measurement_10"), + Measurement(5.30, 1.00, "measurement_11"), + ], + frozenset({EDP_TWO, EDP_THREE}): [ + Measurement(0.95, 1.00, "measurement_13"), + Measurement(6.087, 1.00, "measurement_14"), + ], + frozenset({EDP_ONE, EDP_THREE}): [ + Measurement(1.05, 1.00, "measurement_16"), + Measurement(6.937, 1.00, "measurement_17"), + ], + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1.05, 1.00, "measurement_19"), + Measurement(8.00, 1.00, "measurement_20"), + ], + }, + reach_whole_campaign={ + # 1 way comb + frozenset({EDP_ONE}): + Measurement(4.00, 1.00, "measurement_03"), + frozenset({EDP_TWO}): + Measurement(3.3333, 1.00, "measurement_06"), + frozenset({EDP_THREE}): + Measurement(5.3333, 1.00, "measurement_09"), + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): + Measurement(6.90, 1.00, "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(8.66666, 1.00, "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): + Measurement(8.90, 1.00, "measurement_18"), + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(11.90, 1.00, "measurement_21"), + }, + ) + }, + metric_subsets_by_parent={}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + + def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): + ami = "ami" + + report = Report( + metric_reports={ + ami: MetricReport( + reach_time_series={ + # 1 way comb + frozenset({EDP_ONE}): [ + Measurement(0.00, 1, "measurement_01"), + Measurement(3.30, 1, "measurement_02"), + ], + frozenset({EDP_TWO}): [ + Measurement(0.00, 1, "measurement_04"), + Measurement(2.30, 1, "measurement_05"), + ], + frozenset({EDP_THREE}): [ + Measurement(1.00, 1, "measurement_07"), + Measurement(3.30, 1, "measurement_08"), + ], + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1.10, 1, "measurement_19"), + Measurement(8.0, 1, "measurement_20"), + ], + }, + reach_whole_campaign={ + # 1 way comb + frozenset({EDP_ONE}): + Measurement(4.00, 1.00, "measurement_03"), + frozenset({EDP_TWO}): + Measurement(3.3333, 1.00, "measurement_06"), + frozenset({EDP_THREE}): + Measurement(5.3333, 1.00, "measurement_09"), + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): + Measurement(6.90, 1.00, "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(8.66666, 1.00, "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): + Measurement(8.90, 1.00, "measurement_18"), + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(11.90, 1.00, "measurement_21"), + }, + ) + }, + metric_subsets_by_parent={}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + # The corrected report should be consistent between time series reaches and + # whole campaign reach: time series reaches are less than or equal to whole + # campaign reach, e.g. cumulative_reach[edp1][1] <= + # whole_campaign_reach[edp1]. + corrected = report.get_corrected_report() + + expected = Report( + metric_reports={ + ami: MetricReport( + reach_time_series={ + # 1 way comb + frozenset({EDP_ONE}): [ + Measurement(0.025, 1.00, "measurement_01"), + Measurement(3.30, 1.00, "measurement_02"), + ], + frozenset({EDP_TWO}): [ + Measurement(0.025, 1.00, "measurement_04"), + Measurement(2.30, 1.00, "measurement_05"), + ], + frozenset({EDP_THREE}): [ + Measurement(1.025, 1.00, "measurement_07"), + Measurement(3.30, 1.00, "measurement_08"), + ], + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1.075, 1.00, "measurement_19"), + Measurement(8.00, 1.00, "measurement_20"), + ], + }, + reach_whole_campaign={ + # 1 way comb + frozenset({EDP_ONE}): + Measurement(4.00, 1.00, "measurement_03"), + frozenset({EDP_TWO}): + Measurement(3.3333, 1.00, "measurement_06"), + frozenset({EDP_THREE}): + Measurement(5.3333, 1.00, "measurement_09"), + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): + Measurement(6.90, 1.00, "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(8.66666, 1.00, "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): + Measurement(8.90, 1.00, "measurement_18"), + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(11.90, 1.00, "measurement_21"), + }, + ) + }, + metric_subsets_by_parent={}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_allows_incorrect_time_series(self): ami = "ami" report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_TWO}): [ Measurement(0.00, 1, "measurement_01"), Measurement(3.30, 1, "measurement_02"), @@ -642,19 +1195,24 @@ def test_allows_incorrect_time_series(self): Measurement(3.30, 1, "measurement_05"), Measurement(1.00, 1, "measurement_06"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs=set(frozenset({EDP_ONE})), + cumulative_inconsistency_allowed_edp_combinations=set( + frozenset({EDP_ONE})), ) corrected = report.get_corrected_report() + # The corrected report should be consistent: all the time series reaches are + # monotonic increasing, e.g. reach[edp1][i] <= reach[edp1][i+1], except for + # the one in the exception list, e.g. edp1. expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_TWO}): [ Measurement(0.00, 1, "measurement_01"), Measurement(3.30, 1, "measurement_02"), @@ -665,14 +1223,16 @@ def test_allows_incorrect_time_series(self): Measurement(3.30, 1, "measurement_05"), Measurement(1.00, 1, "measurement_06"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs=set(frozenset({EDP_ONE})), + cumulative_inconsistency_allowed_edp_combinations=set( + frozenset({EDP_ONE})), ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_can_correct_related_metrics(self): ami = "ami" @@ -680,56 +1240,126 @@ def test_can_correct_related_metrics(self): report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(51, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(50, 1, "measurement_02")], - } + }, + reach_whole_campaign={}, ), mrc: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(52, 1, "measurement_03")], frozenset({EDP_ONE}): [ Measurement(51, 1, "measurement_04")], - } + }, + reach_whole_campaign={}, ), }, # AMI is a parent of MRC metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent for metric relations: MRC + # measurements are less than or equal to the AMI measurements, e.g. + # mrc_reach[edp1][0] <= ami_reach[edp1][0]. corrected = report.get_corrected_report() expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(51.5, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(50.5, 1, "measurement_02")], - } + }, + reach_whole_campaign={}, ), mrc: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(51.5, 1, "measurement_03")], frozenset({EDP_ONE}): [ Measurement(50.5, 1, "measurement_04")], - } + }, + reach_whole_campaign={}, ), }, # AMI is a parent of MRC metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) - def __assertMeasurementAlmostEquals( + def test_get_corrected_multiple_metric_report_with_different_edp_combinations( + self): + report = Report( + metric_reports={ + "ami": MetricReport( + reach_time_series={ + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(50, 1, "measurement_01")], + frozenset({EDP_ONE}): [ + Measurement(48, 0, "measurement_02")], + frozenset({EDP_TWO}): [ + Measurement(1, 1, "measurement_03")], + }, + reach_whole_campaign={}, + ), + "mrc": MetricReport( + reach_time_series={ + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(45, 1, "measurement_04")], + frozenset({EDP_TWO}): [ + Measurement(2, 1, "measurement_05")], + }, + reach_whole_campaign={}, + ), + }, + metric_subsets_by_parent={"ami": ["mrc"]}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + # The corrected report should be consistent for metric relations: MRC + # measurements are less than or equal to the AMI measurements, e.g. + # mrc_reach[edp1][0] <= ami_reach[edp1][0]. + corrected = report.get_corrected_report() + + expected = Report( + metric_reports={ + "ami": MetricReport( + reach_time_series={ + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(49.667, 1, "measurement_01")], + frozenset({EDP_ONE}): [ + Measurement(48, 0, "measurement_02")], + frozenset({EDP_TWO}): [ + Measurement(1.667, 1, "measurement_03")], + }, + reach_whole_campaign={}, + ), + "mrc": MetricReport( + reach_time_series={ + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(45, 1, "measurement_04")], + frozenset({EDP_TWO}): [ + Measurement(1.667, 1, "measurement_05")], + }, + reach_whole_campaign={}, + ), + }, + metric_subsets_by_parent={"ami": ["mrc"]}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + + def _assertMeasurementAlmostEquals( self, expected: Measurement, actual: Measurement, msg ): if expected.sigma == 0: @@ -739,25 +1369,35 @@ def __assertMeasurementAlmostEquals( expected.value, actual.value, places=EXPECTED_PRECISION, msg=msg ) - def __assertMetricReportsAlmostEqual( + def _assertMetricReportsAlmostEqual( self, expected: MetricReport, actual: MetricReport, msg ): - self.assertEqual(expected.get_num_edp_combs(), actual.get_num_edp_combs()) + self.assertEqual(expected.get_cumulative_edp_combinations_count(), + actual.get_cumulative_edp_combinations_count()) self.assertEqual( expected.get_number_of_periods(), actual.get_number_of_periods() ) - for period in range(0, expected.get_number_of_periods()): - for edp_comb in expected.get_edp_combs(): - self.__assertMeasurementAlmostEquals( - expected.get_edp_comb_measurement(edp_comb, period), - actual.get_edp_comb_measurement(edp_comb, period), + for edp_combination in expected.get_cumulative_edp_combinations(): + for period in range(0, expected.get_number_of_periods()): + self._assertMeasurementAlmostEquals( + expected.get_cumulative_measurement(edp_combination, period), + actual.get_cumulative_measurement(edp_combination, period), msg, ) - def __assertReportsAlmostEqual(self, expected: Report, actual: Report, msg): + self.assertEqual(expected.get_whole_campaign_edp_combinations_count(), + actual.get_whole_campaign_edp_combinations_count()) + for edp_combination in expected.get_whole_campaign_edp_combinations(): + self._assertMeasurementAlmostEquals( + expected.get_whole_campaign_measurement(edp_combination), + actual.get_whole_campaign_measurement(edp_combination), + msg, + ) + + def _assertReportsAlmostEqual(self, expected: Report, actual: Report, msg): self.assertEqual(expected.get_metrics(), actual.get_metrics()) for metric in expected.get_metrics(): - self.__assertMetricReportsAlmostEqual( + self._assertMetricReportsAlmostEqual( expected.get_metric_report(metric), actual.get_metric_report(metric), msg, diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py index a9277a6039a..afee65f8d97 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import unittest from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import \ report_summary_pb2 -from tools.post_process_origin_report import correctExcelFile, readExcel, \ - processReportSummary +from tools.post_process_origin_report import processReportSummary EDP_MAP = { "edp1": {"edp1"}, @@ -45,6 +45,13 @@ 31775635, 31917650, 31478465, 31784354, 31542065], } +SIGMAS = { + 'edp1': 1.0, + 'edp2': 1.0, + 'union': 1.0, +} + + class TestOriginReport(unittest.TestCase): def test_report_summary_is_corrected_successfully(self): report_summary = report_summary_pb2.ReportSummary() @@ -58,8 +65,9 @@ def test_report_summary_is_corrected_successfully(self): for i in range(len(AMI_MEASUREMENTS[edp]) - 1): ami_result = ami_measurement_detail.measurement_results.add() ami_result.reach = AMI_MEASUREMENTS[edp][i] - ami_result.standard_deviation = 1.0 - ami_result.metric = "metric_" + edp + "_ami_" + str(i).zfill(5) + ami_result.standard_deviation = SIGMAS[edp] + ami_result.metric = "cumulative_metric_" + edp + "_ami_" + str(i).zfill( + 5) mrc_measurement_detail = report_summary.measurement_details.add() mrc_measurement_detail.measurement_policy = "mrc" @@ -69,8 +77,9 @@ def test_report_summary_is_corrected_successfully(self): for i in range(len(MRC_MEASUREMENTS[edp]) - 1): mrc_result = mrc_measurement_detail.measurement_results.add() mrc_result.reach = MRC_MEASUREMENTS[edp][i] - mrc_result.standard_deviation = 1.0 - mrc_result.metric = "metric_" + edp + "_mrc_" + str(i).zfill(5) + mrc_result.standard_deviation = SIGMAS[edp] + mrc_result.metric = "cumulative_metric_" + edp + "_mrc_" + str(i).zfill( + 5) for edp in EDP_MAP: ami_measurement_detail = report_summary.measurement_details.add() @@ -80,9 +89,8 @@ def test_report_summary_is_corrected_successfully(self): ami_measurement_detail.data_providers.extend(EDP_MAP[edp]) ami_result = ami_measurement_detail.measurement_results.add() ami_result.reach = AMI_MEASUREMENTS[edp][len(AMI_MEASUREMENTS[edp]) - 1] - ami_result.standard_deviation = 1.0 - ami_result.metric = "metric_" + edp + "_ami_" + str( - len(AMI_MEASUREMENTS[edp]) - 1).zfill(5) + ami_result.standard_deviation = SIGMAS[edp] + ami_result.metric = "total_metric_" + edp + "_ami_" mrc_measurement_detail = report_summary.measurement_details.add() mrc_measurement_detail.measurement_policy = "mrc" @@ -91,43 +99,81 @@ def test_report_summary_is_corrected_successfully(self): mrc_measurement_detail.data_providers.extend(EDP_MAP[edp]) mrc_result = mrc_measurement_detail.measurement_results.add() mrc_result.reach = MRC_MEASUREMENTS[edp][len(MRC_MEASUREMENTS[edp]) - 1] - mrc_result.standard_deviation = 1.0 - mrc_result.metric = "metric_" + edp + "_mrc_" + str( - len(MRC_MEASUREMENTS[edp]) - 1).zfill(5) + mrc_result.standard_deviation = SIGMAS[edp] + mrc_result.metric = "total_metric_" + edp + "_mrc_" corrected_measurements_map = processReportSummary(report_summary) + # Verifies that the updated reach values are consistent. for edp in EDP_MAP: - ami_metric_prefix = "metric_" + edp + "_ami_" - mrc_metric_prefix = "metric_" + edp + "_mrc_" + cumulative_ami_metric_prefix = "cumulative_metric_" + edp + "_ami_" + cumulative_mrc_metric_prefix = "cumulative_metric_" + edp + "_mrc_" + total_ami_metric = "total_metric_" + edp + "_ami_" + total_mrc_metric = "total_metric_" + edp + "_mrc_" # Verifies that cumulative measurements are consistent. - for i in range(len(AMI_MEASUREMENTS) - 1): + for i in range(len(AMI_MEASUREMENTS) - 2): self.assertTrue( - corrected_measurements_map[ami_metric_prefix + str(i).zfill(5)] <= - corrected_measurements_map[ami_metric_prefix + str(i + 1).zfill(5)]) + corrected_measurements_map[ + cumulative_ami_metric_prefix + str(i).zfill(5)] <= + corrected_measurements_map[ + cumulative_ami_metric_prefix + str(i + 1).zfill(5)]) self.assertTrue( - corrected_measurements_map[mrc_metric_prefix + str(i).zfill(5)] <= - corrected_measurements_map[mrc_metric_prefix + str(i + 1).zfill(5)]) + corrected_measurements_map[ + cumulative_mrc_metric_prefix + str(i).zfill(5)] <= + corrected_measurements_map[ + cumulative_mrc_metric_prefix + str(i + 1).zfill(5)]) # Verifies that the mrc measurements is less than or equal to the ami ones. - for i in range(len(AMI_MEASUREMENTS)): + for i in range(len(AMI_MEASUREMENTS) - 1): self.assertTrue( - corrected_measurements_map[mrc_metric_prefix + str(i).zfill(5)] <= - corrected_measurements_map[ami_metric_prefix + str(i).zfill(5)] + corrected_measurements_map[ + cumulative_mrc_metric_prefix + str(i).zfill(5)] <= + corrected_measurements_map[ + cumulative_ami_metric_prefix + str(i).zfill(5)] ) + # Verifies that the total reach is greater than or equal to the last + # cumulative reach. + index = len(AMI_MEASUREMENTS) - 1 + self.assertTrue( + corrected_measurements_map[ + cumulative_ami_metric_prefix + str(index).zfill(5)] <= + corrected_measurements_map[total_ami_metric] + ) + self.assertTrue( + corrected_measurements_map[ + cumulative_mrc_metric_prefix + str(index).zfill(5)] <= + corrected_measurements_map[total_mrc_metric] + ) # Verifies that the union reach is less than or equal to the sum of # individual reaches. for i in range(len(AMI_MEASUREMENTS) - 1): self.assertTrue( - corrected_measurements_map["metric_union_ami_" + str(i).zfill(5)] <= - corrected_measurements_map["metric_edp1_ami_" + str(i).zfill(5)] + - corrected_measurements_map["metric_edp2_ami_" + str(i).zfill(5)] + corrected_measurements_map[ + "cumulative_metric_union_ami_" + str(i).zfill(5)] <= + corrected_measurements_map[ + "cumulative_metric_edp1_ami_" + str(i).zfill(5)] + + corrected_measurements_map[ + "cumulative_metric_edp2_ami_" + str(i).zfill(5)] ) self.assertTrue( - corrected_measurements_map["metric_union_mrc_" + str(i).zfill(5)] <= - corrected_measurements_map["metric_edp1_mrc_" + str(i).zfill(5)] + - corrected_measurements_map["metric_edp2_mrc_" + str(i).zfill(5)] + corrected_measurements_map[ + "cumulative_metric_union_mrc_" + str(i).zfill(5)] <= + corrected_measurements_map[ + "cumulative_metric_edp1_mrc_" + str(i).zfill(5)] + + corrected_measurements_map[ + "cumulative_metric_edp2_mrc_" + str(i).zfill(5)] ) + self.assertTrue( + corrected_measurements_map["total_metric_union_ami_"] <= + corrected_measurements_map["total_metric_edp1_ami_"] + + corrected_measurements_map["total_metric_edp2_ami_"] + ) + self.assertTrue( + corrected_measurements_map["total_metric_union_mrc_"] <= + corrected_measurements_map["total_metric_edp1_mrc_"] + + corrected_measurements_map["total_metric_edp2_mrc_"] + ) + if __name__ == "__main__": unittest.main()