From b9240b2106a253c04d28ce5c0b9b299d8032fa58 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Fri, 11 Oct 2024 18:57:45 +0000 Subject: [PATCH 01/18] Migrate dp consistency out of experimental. --- experimental/dp_consistency/BUILD.bazel | 0 experimental/dp_consistency/README.md | 148 ------------------ .../reporting/postprocess/BUILD.bazel | 28 ---- .../reporting/postprocess/BUILD.bazel | 43 ----- .../reporting/postprocess/tools/BUILD.bazel | 10 -- .../postprocessing/v2alpha/BUILD.bazel | 10 ++ .../v2alpha}/ReportConversion.kt | 10 +- .../v2alpha/ReportProcessorImpl.kt | 41 ++--- .../postprocessing/v2alpha}/BUILD.bazel | 0 .../v2alpha}/report_summary.proto | 4 +- .../src => src}/main/python/__init__.py | 0 .../src => src}/main/python/wfa/__init__.py | 0 .../main/python/wfa/measurement/__init__.py | 0 .../wfa/measurement/reporting/__init__.py | 0 .../reporting/postprocessing}/__init__.py | 0 .../postprocessing}/noiseninja/BUILD.bazel | 0 .../postprocessing}/noiseninja/__init__.py | 0 .../noiseninja/noised_measurements.py | 0 .../postprocessing}/noiseninja/solver.py | 0 .../postprocessing}/report/BUILD.bazel | 2 +- .../postprocessing}/report/__init__.py | 0 .../postprocessing}/report/report.py | 0 .../postprocessing}/tools/BUILD.bazel | 6 +- .../postprocessing}/tools/__init__.py | 0 .../tools/post_process_origin_report.py | 4 +- .../postprocessing/v2alpha/BUILD.bazel | 40 +++++ .../v2alpha}/ReportConversionTest.kt | 13 +- .../v2alpha/ReportProcessorImplTest.kt | 14 +- .../report_with_failed_measurement.json | 0 .../report_with_unspecified_state.json | 0 .../v2alpha}/sample_report_large.json | 0 .../v2alpha}/sample_report_small.json | 0 .../postprocessing}/noiseninja/BUILD.bazel | 2 +- .../postprocessing}/noiseninja/test_solver.py | 0 .../postprocessing}/report/BUILD.bazel | 2 +- .../postprocessing}/report/test_report.py | 0 .../postprocessing/tools/BUILD.bazel | 10 ++ .../tools/test_post_process_origin_report.py | 2 +- 38 files changed, 111 insertions(+), 278 deletions(-) delete mode 100644 experimental/dp_consistency/BUILD.bazel delete mode 100644 experimental/dp_consistency/README.md delete mode 100644 experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/BUILD.bazel delete mode 100644 experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/BUILD.bazel delete mode 100644 experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/tools/BUILD.bazel rename {experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess => src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha}/ReportConversion.kt (94%) rename experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/ReportPostProcessing.kt => src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt (85%) rename {experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess => src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha}/BUILD.bazel (100%) rename {experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess => src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha}/report_summary.proto (89%) rename {experimental/dp_consistency/src => src}/main/python/__init__.py (100%) rename {experimental/dp_consistency/src => src}/main/python/wfa/__init__.py (100%) rename {experimental/dp_consistency/src => src}/main/python/wfa/measurement/__init__.py (100%) rename {experimental/dp_consistency/src => src}/main/python/wfa/measurement/reporting/__init__.py (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/__init__.py (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/noiseninja/BUILD.bazel (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/noiseninja/__init__.py (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/noiseninja/noised_measurements.py (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/noiseninja/solver.py (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/report/BUILD.bazel (65%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/report/__init__.py (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/report/report.py (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/tools/BUILD.bazel (57%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/tools/__init__.py (100%) rename {experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess => src/main/python/wfa/measurement/reporting/postprocessing}/tools/post_process_origin_report.py (98%) rename {experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess => src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha}/ReportConversionTest.kt (91%) rename experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/ReportPostProcessingTest.kt => src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt (87%) rename {experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess => src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha}/report_with_failed_measurement.json (100%) rename {experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess => src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha}/report_with_unspecified_state.json (100%) rename {experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess => src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha}/sample_report_large.json (100%) rename {experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess => src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha}/sample_report_small.json (100%) rename {experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess => src/test/python/wfa/measurement/reporting/postprocessing}/noiseninja/BUILD.bazel (60%) rename {experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess => src/test/python/wfa/measurement/reporting/postprocessing}/noiseninja/test_solver.py (100%) rename {experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess => src/test/python/wfa/measurement/reporting/postprocessing}/report/BUILD.bazel (57%) rename {experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess => src/test/python/wfa/measurement/reporting/postprocessing}/report/test_report.py (100%) create mode 100644 src/test/python/wfa/measurement/reporting/postprocessing/tools/BUILD.bazel rename {experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess => src/test/python/wfa/measurement/reporting/postprocessing}/tools/test_post_process_origin_report.py (98%) diff --git a/experimental/dp_consistency/BUILD.bazel b/experimental/dp_consistency/BUILD.bazel deleted file mode 100644 index e69de29bb2d..00000000000 diff --git a/experimental/dp_consistency/README.md b/experimental/dp_consistency/README.md deleted file mode 100644 index 4f4a975c261..00000000000 --- a/experimental/dp_consistency/README.md +++ /dev/null @@ -1,148 +0,0 @@ -# Quadratic Programming for Consistency Enforcement in Noisy Data - -This project provides a Python implementation of a quadratic programming (QP) approach to address inconsistencies that -arise in data due to noise. The method focuses on ensuring consistency between measurement results, particularly when -dealing with sets and their subsets or unions. - -## Core Idea - -The central idea is to formulate the problem of inconsistency removal as a QP optimization problem. The objective is to -minimize the variance-adjusted L2 norm between the original (noisy) measurements and the adjusted estimates. This -adjustment process aims to find the most likely set of consistent values, given the observed noisy measurements and -their inherent uncertainties (variances). - -## Mathematical Formulation - -### Objective Function - -The objective function to be minimized is: - -$\|{ \dfrac{Y_i - \mu_{M_j}}{\sigma_{M_j}} }\|_2$ - -where: - -* $Y_i$: Adjusted estimates for each set -* $M_j$: Original (noisy) measurements, note each set could have been measured more than once. -* $\sigma_{M_j}$: Standard deviations of the measurements - -### Constraints - -The optimization is subject to several constraints: - -1. **Subset Constraints:** The measurements for subsets within a set must be less than or equal to the - measurement of the parent set. -2. **Cover Constraints:** The sum of measurements for sets forming a cover must be greater than or equal to the - measurement of the parent set they cover. -3. **Non-negativity:** All adjusted estimates (Y_i) must be greater than or equal to zero. -4. **Population Bound:** All adjusted estimates (Y_i) must be less than or equal to the total population (P). -5. **Zero-Variance Equality:** Measurements with zero variance are considered exact and are enforced as equality - constraints (Y_i = M_j). Note: This is useful if one of the data sources does not have noise added. - -## Interpretation - -* The variance adjustment in the objective function allows for larger changes to measurements with higher variance ( - greater uncertainty) and encourages smaller changes to measurements with lower variance. -* Under the assumption of normally distributed measurement noise, this approach is equivalent to finding the maximum - likelihood estimate (MLE) of the true values, meaning it selects the most likely consistent solution given the - observed data and their uncertainties. - -## Application to Origin's report - -### Measurements ### - -Origin baseline measurements are either direct (single EDP measurements), or union measurements for more than one EDP. -Each measurement is also associated to a time period, and a metric (e.g. mrc, ami, etc). - -To compute unique reach for each EDP (when the number of EDPs are larger than 2), we must also compute the union of all -EDPs but one, for each EDP, for each measurement period and metric. - -For TV measurements we always set the variance to 0, to indicate that TV measurements do not have DP noise. - -### Cover Relationships ### - -For each metric and period, it identifies the set representing the union of all entities/dimensions (EDPs) and the -individual sets for each EDP and add a "cover" relationship to the spec, indicating that the union set's measurement -should be greater than or equal to the sum of the measurements of its constituent EDP sets. - -We do the same for the unions computed for unique reach purposes. Also including the relationship that the -of all-but-edp-x set and the set x are a cover of the union of all EDPs. - -### Subset Relationships ### - -For each metric, period, and EDP, it establishes a subset relationship between the set representing that specific EDP -and the union set for that metric and period, for all unions that include that EDP for tha metric and period. - -If there's a predefined hierarchy among metrics (where some metrics are inherently greater than or equal to others), it -enforces this relationship for each period and EDP, including the measured unions. For example: $MRC \lte AMI$. - -It asserts the cumulative reach measurements in one period are less than or equal to those in the -subsequent period. It adds subset relationships to reflect this for each metric and EDP, including the union set. - -## Implementation - -The methodology is implemented using Python and the [qpsolvers](https://github.com/qpsolvers/qpsolvers) - library to solve the QP problem. QP solvers offers a common interface over many QP implementations which -can make it easy to adjust underlying the solver engine over time. - -The `SetMeasurementsSpec` class in the [noised_measurements](src/noiseninja/noised_measurements.py) package is used to -efficiently store and manage the relationships between sets and their -measurements, facilitating the automatic generation of constraints for the optimization. - -## Experimental Results - -Simulation results demonstrate a reduction in variance across all variables in the report, indicating improved -consistency after applying this method. More details will be added soon. - -## Key Advantages - -* **Statistically Sound:** Based on MLE, providing a principled approach to handling noisy data. -* **Flexible:** Can accommodate various types of constraints and relationships between sets. -* **Efficient:** Leverages state of the art QP solvers for fast and accurate solutions. - -## Key Classes - -- **`Measurement`:** - - Represents a single measurement with a mean value (`value`) and a standard deviation (`sigma`). - -- **`SetMeasurementsSpec`:** - - Stores information about the relationships between sets and their measurements. - - Tracks subset relationships (`subsets_by_set`), cover relationships (`covers_by_set`), and the measurements - associated with each set (`measurements_by_set`). - - Provides methods to add these relationships and measurements incrementally. - -- **`Solver`:** - - Takes in a (`SetMeasurementSpec`) and translates it to a quadratic program, and has methods to run QP and return - the solution - -## Prepare the environment, install dependencies and run correction on an Origin Report - -``` -git clone https://github.com/world-federation-of-advertisers/experimental.git -cd experimental -python3 -m venv ../noisecorrectionenv -source ../noisecorrectionenv/bin/activate -cd dp_consistency -pip3 install -r requirements.txt -python3 -m src.main.python.tools.correctoriginreport --path_to_report=/path/to/Origin_Report.xlsx --unnoised_edps "Linear TV" -``` -This will correct the report and create a corrected a file called `Origin_Report_corrected.xlsx` in this folder. - -## Usage Example - -```python -from collections import defaultdict -# ... (Your classes: Measurement, SetMeasurementsSpec) -from qpsolvers import solve_qp -import numpy as np - - -# ... (Load or create your data) - -# Create a SetMeasurementsSpec and populate it -spec = SetMeasurementsSpec() -spec.add_subset_relation(1, 2) # Set 2 is a subset of set 1 -spec.add_cover(3, [4, 5]) # Sets 4 and 5 form a cover of set 3 -# ... (Add measurements for each set) - -# Formulate and solve the QP problem -# ... (Similar to the previous example, but use the data from spec) diff --git a/experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/BUILD.bazel b/experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/BUILD.bazel deleted file mode 100644 index 879afcb5306..00000000000 --- a/experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/BUILD.bazel +++ /dev/null @@ -1,28 +0,0 @@ -load("@wfa_rules_kotlin_jvm//kotlin:defs.bzl", "kt_jvm_library") - -package(default_visibility = ["//visibility:public"]) - -kt_jvm_library( - name = "report_post_processing", - srcs = ["ReportPostProcessing.kt"], - resources = [ - "//experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools:post_process_origin_report_pyzip", - ], - deps = [ - ":report_conversion", - "//experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess:report_summary_kt_jvm_proto", - "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", - "@wfa_common_jvm//src/main/kotlin/org/wfanet/measurement/common", - ], -) - -kt_jvm_library( - name = "report_conversion", - srcs = ["ReportConversion.kt"], - deps = [ - "//experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess:report_summary_kt_jvm_proto", - "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", - "@maven//:com_google_protobuf_protobuf_java_util", - "@wfa_common_jvm//imports/java/com/google/gson", - ], -) diff --git a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/BUILD.bazel b/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/BUILD.bazel deleted file mode 100644 index 0da1b25547e..00000000000 --- a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/BUILD.bazel +++ /dev/null @@ -1,43 +0,0 @@ -load("@wfa_rules_kotlin_jvm//kotlin:defs.bzl", "kt_jvm_test") - -package(default_testonly = True) - -kt_jvm_test( - name = "report_post_processing_test", - srcs = ["ReportPostProcessingTest.kt"], - data = [":sample_reports"], - test_class = "org.wfanet.measurement.reporting.postprocessing.ReportPostProcessingTest", - deps = [ - "//experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess:report_post_processing", - "//experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess:report_summary_kt_jvm_proto", - "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", - "@wfa_common_jvm//imports/java/com/google/common/truth", - "@wfa_common_jvm//imports/java/com/google/common/truth/extensions/proto", - "@wfa_common_jvm//imports/java/com/google/protobuf", - "@wfa_common_jvm//imports/java/org/junit", - "@wfa_common_jvm//imports/kotlin/kotlin/test", - "@wfa_common_jvm//src/main/kotlin/org/wfanet/measurement/common", - ], -) - -kt_jvm_test( - name = "report_conversion_test", - srcs = ["ReportConversionTest.kt"], - data = [":sample_reports"], - test_class = "org.wfanet.measurement.reporting.postprocessing.ReportConversionTest", - deps = [ - "//experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess:report_conversion", - "//experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess:report_summary_kt_jvm_proto", - "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", - "@wfa_common_jvm//imports/java/com/google/common/truth", - "@wfa_common_jvm//imports/java/com/google/common/truth/extensions/proto", - "@wfa_common_jvm//imports/java/org/junit", - "@wfa_common_jvm//imports/kotlin/kotlin/test", - "@wfa_common_jvm//src/main/kotlin/org/wfanet/measurement/common", - ], -) - -filegroup( - name = "sample_reports", - srcs = glob(["*.json"]), -) diff --git a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/tools/BUILD.bazel b/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/tools/BUILD.bazel deleted file mode 100644 index 3bb6787f023..00000000000 --- a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/tools/BUILD.bazel +++ /dev/null @@ -1,10 +0,0 @@ -load("@rules_python//python:defs.bzl", "py_test") - -py_test( - name = "test_post_process_origin_report", - srcs = ["test_post_process_origin_report.py"], - deps = [ - "//experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess:report_summary_py_pb2", - "//experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools:post_process_origin_report", - ], -) diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel index 4498a075819..fea967abbe1 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel @@ -9,6 +9,16 @@ MAVEN_COORDINATES = "org.wfanet.measurement.reporting:postprocessing-v2alpha:" + kt_jvm_library( name = "postprocessing", srcs = glob(["*.kt"]), + resources = [ + "//src/main/python/wfa/measurement/reporting/postprocessing/tools:post_process_origin_report_pyzip", + ], + deps = [ + "//src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha:report_summary_kt_jvm_proto", + "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", + "@maven//:com_google_protobuf_protobuf_java_util", + "@wfa_common_jvm//imports/java/com/google/gson", + "@wfa_common_jvm//src/main/kotlin/org/wfanet/measurement/common", + ], tags = ["maven_coordinates=" + MAVEN_COORDINATES], ) diff --git a/experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/ReportConversion.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt similarity index 94% rename from experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/ReportConversion.kt rename to src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt index 2ef2b830078..5f343d75b4e 100644 --- a/experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/ReportConversion.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt @@ -12,14 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.wfanet.measurement.reporting.postprocessing +package org.wfanet.measurement.reporting.postprocessing.v2alpha import com.google.protobuf.InvalidProtocolBufferException import com.google.protobuf.util.JsonFormat -import org.wfanet.measurement.reporting.MeasurementDetailKt -import org.wfanet.measurement.reporting.ReportSummary -import org.wfanet.measurement.reporting.measurementDetail -import org.wfanet.measurement.reporting.reportSummary +import org.wfanet.measurement.reporting.postprocessing.v2alpha.MeasurementDetailKt +import org.wfanet.measurement.reporting.postprocessing.v2alpha.ReportSummary +import org.wfanet.measurement.reporting.postprocessing.v2alpha.measurementDetail +import org.wfanet.measurement.reporting.postprocessing.v2alpha.reportSummary import org.wfanet.measurement.reporting.v2alpha.Metric import org.wfanet.measurement.reporting.v2alpha.Report diff --git a/experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/ReportPostProcessing.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt similarity index 85% rename from experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/ReportPostProcessing.kt rename to src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt index 931d0920288..9a9beb763e7 100644 --- a/experimental/dp_consistency/src/main/kotlin/org/wfanet/measurement/reporting/postprocess/ReportPostProcessing.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.wfanet.measurement.reporting.postprocessing +package org.wfanet.measurement.reporting.postprocessing.v2alpha import com.google.gson.GsonBuilder import java.io.BufferedReader @@ -25,26 +25,16 @@ import java.util.logging.Logger import kotlin.io.path.name import org.wfanet.measurement.common.getJarResourcePath import org.wfanet.measurement.common.toJson -import org.wfanet.measurement.reporting.ReportSummary +import org.wfanet.measurement.reporting.postprocessing.v2alpha.ReportSummary import org.wfanet.measurement.reporting.v2alpha.Report import org.wfanet.measurement.reporting.v2alpha.copy import org.wfanet.measurement.reporting.v2alpha.report -/** Corrects noisy measurements in a report. */ -object ReportPostProcessing { - private val logger: Logger = Logger.getLogger(this::class.java.name) - const private val PYTHON_LIBRARY_RESOURCE_NAME = - "experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/post_process_origin_report.zip" - private val resourcePath: Path = - this::class.java.classLoader.getJarResourcePath(PYTHON_LIBRARY_RESOURCE_NAME) - ?: error("$PYTHON_LIBRARY_RESOURCE_NAME not found in JAR") - private val tempFile = File.createTempFile(resourcePath.name, "").apply { deleteOnExit() } - - init { - // Copies python zip package from JAR to local directory. - Files.copy(resourcePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING) - } - +/** + * An implementation of [ReportProcessor] that takes a serialized [Report] in JSON format and + * returns the a [Report] of which all measurements are consistent. + */ +class ReportProcessorImpl : ReportProcessor { /** * Corrects the inconsistent measurements in the [report] and returns a corrected report in JSON * format. @@ -52,7 +42,7 @@ object ReportPostProcessing { * @param report standard JSON serialization of a Report message. * @return a corrected report, serialized as a standard JSON string. */ - fun processReportJson(report: String): String { + override fun processReportJson(report: String): String { return processReport(ReportConversion.getReportFromJsonString(report)).toJson() } @@ -179,4 +169,19 @@ object ReportPostProcessing { } return updatedReport } + + companion object { + private val logger: Logger = Logger.getLogger(this::class.java.name) + const private val PYTHON_LIBRARY_RESOURCE_NAME = + "src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.zip" + private val resourcePath: Path = + this::class.java.classLoader.getJarResourcePath(PYTHON_LIBRARY_RESOURCE_NAME) + ?: error("$PYTHON_LIBRARY_RESOURCE_NAME not found in JAR") + private val tempFile = File.createTempFile(resourcePath.name, "").apply { deleteOnExit() } + + init { + // Copies python zip package from JAR to local directory. + Files.copy(resourcePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING) + } + } } diff --git a/experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess/BUILD.bazel b/src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha/BUILD.bazel similarity index 100% rename from experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess/BUILD.bazel rename to src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha/BUILD.bazel diff --git a/experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess/report_summary.proto b/src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha/report_summary.proto similarity index 89% rename from experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess/report_summary.proto rename to src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha/report_summary.proto index eecfcd5c630..b77b9281c49 100644 --- a/experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess/report_summary.proto +++ b/src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha/report_summary.proto @@ -14,9 +14,9 @@ syntax = "proto3"; -package wfa.measurement.reporting; +package wfa.measurement.reporting.postprocessing.v2alpha; -option java_package = "org.wfanet.measurement.reporting"; +option java_package = "org.wfanet.measurement.reporting.postprocessing.v2alpha"; option java_multiple_files = true; option java_outer_classname = "ReportSummaryProto"; diff --git a/experimental/dp_consistency/src/main/python/__init__.py b/src/main/python/__init__.py similarity index 100% rename from experimental/dp_consistency/src/main/python/__init__.py rename to src/main/python/__init__.py diff --git a/experimental/dp_consistency/src/main/python/wfa/__init__.py b/src/main/python/wfa/__init__.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/__init__.py rename to src/main/python/wfa/__init__.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/__init__.py b/src/main/python/wfa/measurement/__init__.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/__init__.py rename to src/main/python/wfa/measurement/__init__.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/__init__.py b/src/main/python/wfa/measurement/reporting/__init__.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/__init__.py rename to src/main/python/wfa/measurement/reporting/__init__.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/__init__.py b/src/main/python/wfa/measurement/reporting/postprocessing/__init__.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/__init__.py rename to src/main/python/wfa/measurement/reporting/postprocessing/__init__.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja/BUILD.bazel b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/BUILD.bazel similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja/BUILD.bazel rename to src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/BUILD.bazel diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja/__init__.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/__init__.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja/__init__.py rename to src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/__init__.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja/noised_measurements.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja/noised_measurements.py rename to src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja/solver.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja/solver.py rename to src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report/BUILD.bazel b/src/main/python/wfa/measurement/reporting/postprocessing/report/BUILD.bazel similarity index 65% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report/BUILD.bazel rename to src/main/python/wfa/measurement/reporting/postprocessing/report/BUILD.bazel index 5eeb42afe3b..40e28cbfc09 100644 --- a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report/BUILD.bazel +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/BUILD.bazel @@ -6,6 +6,6 @@ py_library( imports = ["../"], visibility = ["//visibility:public"], deps = [ - "//experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja", + "//src/main/python/wfa/measurement/reporting/postprocessing/noiseninja", ], ) diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report/__init__.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/__init__.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report/__init__.py rename to src/main/python/wfa/measurement/reporting/postprocessing/report/__init__.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report/report.py rename to src/main/python/wfa/measurement/reporting/postprocessing/report/report.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/BUILD.bazel b/src/main/python/wfa/measurement/reporting/postprocessing/tools/BUILD.bazel similarity index 57% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/BUILD.bazel rename to src/main/python/wfa/measurement/reporting/postprocessing/tools/BUILD.bazel index 0d37f8cce76..8cced02faa5 100644 --- a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/BUILD.bazel +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/BUILD.bazel @@ -6,7 +6,7 @@ package(default_visibility = ["//visibility:public"]) filegroup( name = "post_process_origin_report_pyzip", srcs = [ - "//experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools:post_process_origin_report", + "//src/main/python/wfa/measurement/reporting/postprocessing/tools:post_process_origin_report", ], output_group = "python_zip_file", ) @@ -15,8 +15,8 @@ py_binary( name = "post_process_origin_report", srcs = ["post_process_origin_report.py"], deps = [ - "//experimental/dp_consistency/src/main/proto/wfa/measurement/reporting/postprocess:report_summary_py_pb2", - "//experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report", + "//src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha:report_summary_py_pb2", + "//src/main/python/wfa/measurement/reporting/postprocessing/report", requirement("openpyxl"), requirement("pandas"), ], diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/__init__.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/__init__.py similarity index 100% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/__init__.py rename to src/main/python/wfa/measurement/reporting/postprocessing/tools/__init__.py diff --git a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/post_process_origin_report.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py similarity index 98% rename from experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/post_process_origin_report.py rename to src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py index 969cb96db6c..7d26a3a53b3 100644 --- a/experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/tools/post_process_origin_report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py @@ -12,14 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. -import base64 import json import math import pandas as pd import sys -from experimental.dp_consistency.src.main.proto.wfa.measurement.reporting.postprocess import \ - report_summary_pb2 +from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import report_summary_pb2 from functools import partial from noiseninja.noised_measurements import Measurement from report.report import Report, MetricReport diff --git a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel index 31b7fbe0276..af70122377d 100644 --- a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel @@ -14,3 +14,43 @@ kt_jvm_test( "@wfa_common_jvm//imports/kotlin/kotlin/test", ], ) + +kt_jvm_test( + name = "report_processor_impl_test", + srcs = ["ReportProcessorImplTest.kt"], + data = [":sample_reports"], + test_class = "org.wfanet.measurement.reporting.postprocessing.v2alpha.ReportProcessorImplTest", + deps = [ + "//src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha:postprocessing", + "//src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha:report_summary_kt_jvm_proto", + "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", + "@wfa_common_jvm//imports/java/com/google/common/truth", + "@wfa_common_jvm//imports/java/com/google/common/truth/extensions/proto", + "@wfa_common_jvm//imports/java/com/google/protobuf", + "@wfa_common_jvm//imports/java/org/junit", + "@wfa_common_jvm//imports/kotlin/kotlin/test", + "@wfa_common_jvm//src/main/kotlin/org/wfanet/measurement/common", + ], +) + +kt_jvm_test( + name = "report_conversion_test", + srcs = ["ReportConversionTest.kt"], + data = [":sample_reports"], + test_class = "org.wfanet.measurement.reporting.postprocessing.v2alpha.ReportConversionTest", + deps = [ + "//src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha:postprocessing", + "//src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha:report_summary_kt_jvm_proto", + "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", + "@wfa_common_jvm//imports/java/com/google/common/truth", + "@wfa_common_jvm//imports/java/com/google/common/truth/extensions/proto", + "@wfa_common_jvm//imports/java/org/junit", + "@wfa_common_jvm//imports/kotlin/kotlin/test", + "@wfa_common_jvm//src/main/kotlin/org/wfanet/measurement/common", + ], +) + +filegroup( + name = "sample_reports", + srcs = glob(["*.json"]), +) diff --git a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/ReportConversionTest.kt b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversionTest.kt similarity index 91% rename from experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/ReportConversionTest.kt rename to src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversionTest.kt index e7f0c75d1ef..87c91733b47 100644 --- a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/ReportConversionTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversionTest.kt @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -package org.wfanet.measurement.reporting.postprocessing +package org.wfanet.measurement.reporting.postprocessing.v2alpha import com.google.common.truth.Truth.assertThat import com.google.common.truth.extensions.proto.ProtoTruth.assertThat @@ -23,9 +23,9 @@ import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.JUnit4 import org.wfanet.measurement.common.getRuntimePath -import org.wfanet.measurement.reporting.MeasurementDetailKt.measurementResult -import org.wfanet.measurement.reporting.measurementDetail -import org.wfanet.measurement.reporting.reportSummary +import org.wfanet.measurement.reporting.postprocessing.v2alpha.MeasurementDetailKt.measurementResult +import org.wfanet.measurement.reporting.postprocessing.v2alpha.measurementDetail +import org.wfanet.measurement.reporting.postprocessing.v2alpha.reportSummary @RunWith(JUnit4::class) class ReportConversionTest { @@ -101,8 +101,6 @@ class ReportConversionTest { getRuntimePath( Paths.get( "wfa_measurement_system", - "experimental", - "dp_consistency", "src", "test", "kotlin", @@ -110,7 +108,8 @@ class ReportConversionTest { "wfanet", "measurement", "reporting", - "postprocess", + "postprocessing", + "v2alpha", ) )!! } diff --git a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/ReportPostProcessingTest.kt b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt similarity index 87% rename from experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/ReportPostProcessingTest.kt rename to src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt index 5fba1e1220c..ed15ded8ea5 100644 --- a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/ReportPostProcessingTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt @@ -12,7 +12,7 @@ * the License. */ -package org.wfanet.measurement.reporting.postprocessing +package org.wfanet.measurement.reporting.postprocessing.v2alpha import com.google.common.truth.Truth.assertThat import java.nio.file.Path @@ -21,11 +21,11 @@ import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.JUnit4 import org.wfanet.measurement.common.getRuntimePath -import org.wfanet.measurement.reporting.measurementDetail +import org.wfanet.measurement.reporting.postprocessing.v2alpha.measurementDetail import org.wfanet.measurement.reporting.v2alpha.Report @RunWith(JUnit4::class) -class ReportPostProcessingTest { +class ReportProcessorImplTest { @Test fun `run correct report successfully`() { val reportFile = TEST_DATA_RUNTIME_DIR.resolve("sample_report_large.json").toFile() @@ -33,7 +33,8 @@ class ReportPostProcessingTest { val report = ReportConversion.getReportFromJsonString(reportAsJson) assertThat(report.hasConsistentMeasurements()).isEqualTo(false) - val updatedReportAsJson = ReportPostProcessing.processReportJson(reportAsJson) + val reportProcessor = ReportProcessorImpl() + val updatedReportAsJson = reportProcessor.processReportJson(reportAsJson) val updatedReport = ReportConversion.getReportFromJsonString(updatedReportAsJson) assertThat(updatedReport.hasConsistentMeasurements()).isEqualTo(true) } @@ -43,8 +44,6 @@ class ReportPostProcessingTest { getRuntimePath( Paths.get( "wfa_measurement_system", - "experimental", - "dp_consistency", "src", "test", "kotlin", @@ -52,7 +51,8 @@ class ReportPostProcessingTest { "wfanet", "measurement", "reporting", - "postprocess", + "postprocessing", + "v2alpha", ) )!! diff --git a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/report_with_failed_measurement.json b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/report_with_failed_measurement.json similarity index 100% rename from experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/report_with_failed_measurement.json rename to src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/report_with_failed_measurement.json diff --git a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/report_with_unspecified_state.json b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/report_with_unspecified_state.json similarity index 100% rename from experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/report_with_unspecified_state.json rename to src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/report_with_unspecified_state.json diff --git a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/sample_report_large.json b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json similarity index 100% rename from experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/sample_report_large.json rename to src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json diff --git a/experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/sample_report_small.json b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_small.json similarity index 100% rename from experimental/dp_consistency/src/test/kotlin/org/wfanet/measurement/reporting/postprocess/sample_report_small.json rename to src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_small.json diff --git a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/noiseninja/BUILD.bazel b/src/test/python/wfa/measurement/reporting/postprocessing/noiseninja/BUILD.bazel similarity index 60% rename from experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/noiseninja/BUILD.bazel rename to src/test/python/wfa/measurement/reporting/postprocessing/noiseninja/BUILD.bazel index f50243a66a2..4047fcbe597 100644 --- a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/noiseninja/BUILD.bazel +++ b/src/test/python/wfa/measurement/reporting/postprocessing/noiseninja/BUILD.bazel @@ -5,6 +5,6 @@ py_test( size = "small", srcs = ["test_solver.py"], deps = [ - "//experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/noiseninja", + "//src/main/python/wfa/measurement/reporting/postprocessing/noiseninja", ], ) diff --git a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/noiseninja/test_solver.py b/src/test/python/wfa/measurement/reporting/postprocessing/noiseninja/test_solver.py similarity index 100% rename from experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/noiseninja/test_solver.py rename to src/test/python/wfa/measurement/reporting/postprocessing/noiseninja/test_solver.py diff --git a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/report/BUILD.bazel b/src/test/python/wfa/measurement/reporting/postprocessing/report/BUILD.bazel similarity index 57% rename from experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/report/BUILD.bazel rename to src/test/python/wfa/measurement/reporting/postprocessing/report/BUILD.bazel index 947e84d6fcd..b3ef4e46c6b 100644 --- a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/report/BUILD.bazel +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/BUILD.bazel @@ -4,6 +4,6 @@ py_test( name = "test_report", srcs = ["test_report.py"], deps = [ - "//experimental/dp_consistency/src/main/python/wfa/measurement/reporting/postprocess/report", + "//src/main/python/wfa/measurement/reporting/postprocessing/report", ], ) diff --git a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py similarity index 100% rename from experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/report/test_report.py rename to src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/tools/BUILD.bazel b/src/test/python/wfa/measurement/reporting/postprocessing/tools/BUILD.bazel new file mode 100644 index 00000000000..1b6126d0a56 --- /dev/null +++ b/src/test/python/wfa/measurement/reporting/postprocessing/tools/BUILD.bazel @@ -0,0 +1,10 @@ +load("@rules_python//python:defs.bzl", "py_test") + +py_test( + name = "test_post_process_origin_report", + srcs = ["test_post_process_origin_report.py"], + deps = [ + "//src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha:report_summary_py_pb2", + "//src/main/python/wfa/measurement/reporting/postprocessing/tools:post_process_origin_report", + ], +) diff --git a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/tools/test_post_process_origin_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py similarity index 98% rename from experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/tools/test_post_process_origin_report.py rename to src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py index 548501fc1d9..a9277a6039a 100644 --- a/experimental/dp_consistency/src/test/python/wfa/measurement/reporting/postprocess/tools/test_post_process_origin_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py @@ -14,7 +14,7 @@ import unittest -from experimental.dp_consistency.src.main.proto.wfa.measurement.reporting.postprocess import \ +from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import \ report_summary_pb2 from tools.post_process_origin_report import correctExcelFile, readExcel, \ processReportSummary From 4986073446964a7b5545d522d69f0374c63e6515 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Mon, 14 Oct 2024 19:25:13 +0000 Subject: [PATCH 02/18] Address comments. --- .../postprocessing/v2alpha/BUILD.bazel | 1 - .../v2alpha/ReportConversion.kt | 8 ++--- .../v2alpha/ReportProcessorImpl.kt | 31 +++++++++---------- .../v2alpha/ReportConversionTest.kt | 2 -- .../v2alpha/ReportProcessorImplTest.kt | 4 +-- 5 files changed, 19 insertions(+), 27 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel index fea967abbe1..67c329b347d 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel @@ -15,7 +15,6 @@ kt_jvm_library( deps = [ "//src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha:report_summary_kt_jvm_proto", "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", - "@maven//:com_google_protobuf_protobuf_java_util", "@wfa_common_jvm//imports/java/com/google/gson", "@wfa_common_jvm//src/main/kotlin/org/wfanet/measurement/common", ], diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt index 5f343d75b4e..3b8bf5bf44f 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt @@ -16,10 +16,6 @@ package org.wfanet.measurement.reporting.postprocessing.v2alpha import com.google.protobuf.InvalidProtocolBufferException import com.google.protobuf.util.JsonFormat -import org.wfanet.measurement.reporting.postprocessing.v2alpha.MeasurementDetailKt -import org.wfanet.measurement.reporting.postprocessing.v2alpha.ReportSummary -import org.wfanet.measurement.reporting.postprocessing.v2alpha.measurementDetail -import org.wfanet.measurement.reporting.postprocessing.v2alpha.reportSummary import org.wfanet.measurement.reporting.v2alpha.Metric import org.wfanet.measurement.reporting.v2alpha.Report @@ -51,6 +47,7 @@ object ReportConversion { return getReportFromJsonString(reportAsJsonString).toReportSummaries() } + // TODO(@ple13): Move this function to a separate package that handles the tags. fun getMeasurementPolicy(tag: String): String { when { "measurement_policy=AMI" in tag -> return "ami" @@ -60,6 +57,7 @@ object ReportConversion { } } + // TODO(@ple13): Move this function to a separate package that handles the tags. fun getSetOperation(tag: String): String { val parts = tag.split(", ") val setOperationPart = parts.find { it.startsWith("set_operation=") } @@ -67,10 +65,12 @@ object ReportConversion { ?: error("Set operation must be specified.") } + // TODO(@ple13): Move this function to a separate package that handles the tags. fun isCumulative(tag: String): Boolean { return tag.contains("cumulative=true") } + // TODO(@ple13): Move this function to a separate package that handles the tags. fun getTargets(tag: String): List { val parts = tag.split(", ") val targetPart = parts.find { it.startsWith("target=") } diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt index 9a9beb763e7..9de407ec3de 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt @@ -25,7 +25,6 @@ import java.util.logging.Logger import kotlin.io.path.name import org.wfanet.measurement.common.getJarResourcePath import org.wfanet.measurement.common.toJson -import org.wfanet.measurement.reporting.postprocessing.v2alpha.ReportSummary import org.wfanet.measurement.reporting.v2alpha.Report import org.wfanet.measurement.reporting.v2alpha.copy import org.wfanet.measurement.reporting.v2alpha.report @@ -34,7 +33,20 @@ import org.wfanet.measurement.reporting.v2alpha.report * An implementation of [ReportProcessor] that takes a serialized [Report] in JSON format and * returns the a [Report] of which all measurements are consistent. */ -class ReportProcessorImpl : ReportProcessor { +object ReportProcessorImpl : ReportProcessor { + private val logger: Logger = Logger.getLogger(this::class.java.name) + const private val PYTHON_LIBRARY_RESOURCE_NAME = + "src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.zip" + private val resourcePath: Path = + this::class.java.classLoader.getJarResourcePath(PYTHON_LIBRARY_RESOURCE_NAME) + ?: error("$PYTHON_LIBRARY_RESOURCE_NAME not found in JAR") + private val tempFile = File.createTempFile(resourcePath.name, "").apply { deleteOnExit() } + + init { + // Copies python zip package from JAR to local directory. + Files.copy(resourcePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING) + } + /** * Corrects the inconsistent measurements in the [report] and returns a corrected report in JSON * format. @@ -169,19 +181,4 @@ class ReportProcessorImpl : ReportProcessor { } return updatedReport } - - companion object { - private val logger: Logger = Logger.getLogger(this::class.java.name) - const private val PYTHON_LIBRARY_RESOURCE_NAME = - "src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.zip" - private val resourcePath: Path = - this::class.java.classLoader.getJarResourcePath(PYTHON_LIBRARY_RESOURCE_NAME) - ?: error("$PYTHON_LIBRARY_RESOURCE_NAME not found in JAR") - private val tempFile = File.createTempFile(resourcePath.name, "").apply { deleteOnExit() } - - init { - // Copies python zip package from JAR to local directory. - Files.copy(resourcePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING) - } - } } diff --git a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversionTest.kt b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversionTest.kt index 87c91733b47..bba550f77a4 100644 --- a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversionTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversionTest.kt @@ -24,8 +24,6 @@ import org.junit.runner.RunWith import org.junit.runners.JUnit4 import org.wfanet.measurement.common.getRuntimePath import org.wfanet.measurement.reporting.postprocessing.v2alpha.MeasurementDetailKt.measurementResult -import org.wfanet.measurement.reporting.postprocessing.v2alpha.measurementDetail -import org.wfanet.measurement.reporting.postprocessing.v2alpha.reportSummary @RunWith(JUnit4::class) class ReportConversionTest { diff --git a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt index ed15ded8ea5..25c21aa415b 100644 --- a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt @@ -21,7 +21,6 @@ import org.junit.Test import org.junit.runner.RunWith import org.junit.runners.JUnit4 import org.wfanet.measurement.common.getRuntimePath -import org.wfanet.measurement.reporting.postprocessing.v2alpha.measurementDetail import org.wfanet.measurement.reporting.v2alpha.Report @RunWith(JUnit4::class) @@ -33,8 +32,7 @@ class ReportProcessorImplTest { val report = ReportConversion.getReportFromJsonString(reportAsJson) assertThat(report.hasConsistentMeasurements()).isEqualTo(false) - val reportProcessor = ReportProcessorImpl() - val updatedReportAsJson = reportProcessor.processReportJson(reportAsJson) + val updatedReportAsJson = ReportProcessorImpl.processReportJson(reportAsJson) val updatedReport = ReportConversion.getReportFromJsonString(updatedReportAsJson) assertThat(updatedReport.hasConsistentMeasurements()).isEqualTo(true) } From 44a7ff28c5c3573dba497f1ea4c8add9d16f2f81 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Mon, 14 Oct 2024 19:27:39 +0000 Subject: [PATCH 03/18] Fix Lint. --- .../measurement/reporting/postprocessing/v2alpha/BUILD.bazel | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel index 67c329b347d..67fe7f5da2e 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel @@ -12,13 +12,13 @@ kt_jvm_library( resources = [ "//src/main/python/wfa/measurement/reporting/postprocessing/tools:post_process_origin_report_pyzip", ], + tags = ["maven_coordinates=" + MAVEN_COORDINATES], deps = [ "//src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha:report_summary_kt_jvm_proto", "//src/main/proto/wfa/measurement/reporting/v2alpha:report_kt_jvm_proto", "@wfa_common_jvm//imports/java/com/google/gson", "@wfa_common_jvm//src/main/kotlin/org/wfanet/measurement/common", ], - tags = ["maven_coordinates=" + MAVEN_COORDINATES], ) maven_export( From a45b1e12ee386889f22804972349991cde814813 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Fri, 18 Oct 2024 21:01:32 +0000 Subject: [PATCH 04/18] Address comments. --- .../v2alpha/ReportConversion.kt | 8 +- .../postprocessing/v2alpha/ReportProcessor.kt | 167 ++++++++++++++++ .../v2alpha/ReportProcessorImpl.kt | 184 ------------------ .../postprocessing/v2alpha/BUILD.bazel | 6 +- ...ssorImplTest.kt => ReportProcessorTest.kt} | 4 +- 5 files changed, 176 insertions(+), 193 deletions(-) delete mode 100644 src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt rename src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/{ReportProcessorImplTest.kt => ReportProcessorTest.kt} (95%) diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt index 3b8bf5bf44f..948e99642ba 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportConversion.kt @@ -47,7 +47,7 @@ object ReportConversion { return getReportFromJsonString(reportAsJsonString).toReportSummaries() } - // TODO(@ple13): Move this function to a separate package that handles the tags. + // TODO(@ple13): Move this function to a separate Origin-specific package. fun getMeasurementPolicy(tag: String): String { when { "measurement_policy=AMI" in tag -> return "ami" @@ -57,7 +57,7 @@ object ReportConversion { } } - // TODO(@ple13): Move this function to a separate package that handles the tags. + // TODO(@ple13): Move this function to a separate Origin-specific package. fun getSetOperation(tag: String): String { val parts = tag.split(", ") val setOperationPart = parts.find { it.startsWith("set_operation=") } @@ -65,12 +65,12 @@ object ReportConversion { ?: error("Set operation must be specified.") } - // TODO(@ple13): Move this function to a separate package that handles the tags. + // TODO(@ple13): Move this function to a separate Origin-specific package. fun isCumulative(tag: String): Boolean { return tag.contains("cumulative=true") } - // TODO(@ple13): Move this function to a separate package that handles the tags. + // TODO(@ple13): Move this function to a separate Origin-specific package. fun getTargets(tag: String): List { val parts = tag.split(", ") val targetPart = parts.find { it.startsWith("target=") } diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt index e66f9995685..c1ed19eec1b 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt @@ -14,6 +14,20 @@ package org.wfanet.measurement.reporting.postprocessing.v2alpha +import com.google.gson.GsonBuilder +import java.io.BufferedReader +import java.io.File +import java.io.InputStreamReader +import java.nio.file.Files +import java.nio.file.Path +import java.nio.file.StandardCopyOption +import java.util.logging.Logger +import kotlin.io.path.name +import org.wfanet.measurement.common.getJarResourcePath +import org.wfanet.measurement.common.toJson +import org.wfanet.measurement.reporting.v2alpha.Report +import org.wfanet.measurement.reporting.v2alpha.copy + /** Corrects the inconsistent measurements in a serialized [Report]. */ interface ReportProcessor { /** @@ -23,4 +37,157 @@ interface ReportProcessor { * @return The corrected serialized [Report] in JSON format. */ fun processReportJson(report: String): String + + companion object Default : ReportProcessor { + private val logger: Logger = Logger.getLogger(this::class.java.name) + const private val PYTHON_LIBRARY_RESOURCE_NAME = + "src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.zip" + private val resourcePath: Path = + this::class.java.classLoader.getJarResourcePath(PYTHON_LIBRARY_RESOURCE_NAME) + ?: error("$PYTHON_LIBRARY_RESOURCE_NAME not found in JAR") + private val tempFile = File.createTempFile(resourcePath.name, "").apply { deleteOnExit() } + + init { + // Copies python zip package from JAR to local directory. + Files.copy(resourcePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING) + } + + /** + * Corrects the inconsistent measurements in the [report] and returns a corrected report in JSON + * format. + * + * @param report standard JSON serialization of a Report message. + * @return a corrected report, serialized as a standard JSON string. + */ + override fun processReportJson(report: String): String { + return processReport(ReportConversion.getReportFromJsonString(report)).toJson() + } + + /** + * Corrects the inconsistent measurements in the [report] and returns a corrected report . + * + * @param report a Report message. + * @return a corrected Report. + */ + private fun processReport(report: Report): Report { + val reportSummaries = report.toReportSummaries() + val correctedMeasurementsMap = mutableMapOf() + for (reportSummary in reportSummaries) { + correctedMeasurementsMap.putAll(processReportSummary(reportSummary)) + } + val updatedReport = updateReport(report, correctedMeasurementsMap) + return updatedReport + } + + /** + * Corrects the inconsistent measurements in the [reportSummary] and returns a map of metric names + * to corrected reach values. + * + * Each metric name is tied to a measurement. + */ + private fun processReportSummary(reportSummary: ReportSummary): Map { + logger.info { "Start processing report.." } + + // TODO(bazelbuild/bazel#17629): Execute the Python zip directly once this bug is fixed. + val processBuilder = ProcessBuilder("python3", tempFile.toPath().toString()) + + val process = processBuilder.start() + + // Write the process' argument to its stdin. + process.outputStream.use { outputStream -> + reportSummary.writeTo(outputStream) + outputStream.flush() + } + + // Reads the output of the above process. + val processOutput = + BufferedReader(InputStreamReader(process.inputStream)).use { it.readText() } + + val exitCode = process.waitFor() + require(exitCode == 0) { "Failed to process the report with exitCode $exitCode." } + + logger.info { "Finished processing report.." } + + // Converts the process output to the correction map. + val correctedMeasurementsMap = mutableMapOf() + GsonBuilder().create().fromJson(processOutput, Map::class.java).forEach { (key, value) -> + correctedMeasurementsMap[key as String] = (value as Double).toLong() + } + + return correctedMeasurementsMap + } + + /** + * Updates a [MetricCalculationResult] with corrected reach values from the + * [correctedMeasurementsMap]. + * + * Only the reach-only and reach-and-frequency resultAttributes in the [MetricCalculationResult] + * will be updated. + */ + private fun updateMetricCalculationResult( + metricCalculationResult: Report.MetricCalculationResult, + correctedMeasurementsMap: Map, + ): Report.MetricCalculationResult { + val updatedMetricCalculationResult = + metricCalculationResult.copy { + resultAttributes.clear() + resultAttributes += + metricCalculationResult.resultAttributesList.map { entry -> + entry.copy { + // The result attribute is updated only if its metric is in the correction map. + if (entry.metric in correctedMeasurementsMap) { + val correctedReach = correctedMeasurementsMap.getValue(entry.metric) + when { + entry.metricResult.hasReach() -> { + metricResult = + metricResult.copy { reach = reach.copy { value = correctedReach } } + } + + entry.metricResult.hasReachAndFrequency() -> { + val scale: Double = + correctedReach / entry.metricResult.reachAndFrequency.reach.value.toDouble() + metricResult = + metricResult.copy { + reachAndFrequency = + reachAndFrequency.copy { + reach = reach.copy { value = correctedReach } + frequencyHistogram = + frequencyHistogram.copy { + bins.clear() + bins += + entry.metricResult.reachAndFrequency.frequencyHistogram.binsList + .map { bin -> + bin.copy { + binResult = + binResult.copy { value = bin.binResult.value * scale } + } + } + } + } + } + } + + else -> {} + } + } + } + } + } + return updatedMetricCalculationResult + } + + /** Returns a [Report] with updated reach values from the [correctedMeasurementsMap]. */ + private fun updateReport(report: Report, correctedMeasurementsMap: Map): Report { + val correctedMetricCalculationResults = + report.metricCalculationResultsList.map { result -> + updateMetricCalculationResult(result, correctedMeasurementsMap) + } + val updatedReport = + report.copy { + metricCalculationResults.clear() + metricCalculationResults += correctedMetricCalculationResults + } + return updatedReport + } + } } diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt deleted file mode 100644 index 9de407ec3de..00000000000 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImpl.kt +++ /dev/null @@ -1,184 +0,0 @@ -// Copyright 2024 The Cross-Media Measurement Authors -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. - -package org.wfanet.measurement.reporting.postprocessing.v2alpha - -import com.google.gson.GsonBuilder -import java.io.BufferedReader -import java.io.File -import java.io.InputStreamReader -import java.nio.file.Files -import java.nio.file.Path -import java.nio.file.StandardCopyOption -import java.util.logging.Logger -import kotlin.io.path.name -import org.wfanet.measurement.common.getJarResourcePath -import org.wfanet.measurement.common.toJson -import org.wfanet.measurement.reporting.v2alpha.Report -import org.wfanet.measurement.reporting.v2alpha.copy -import org.wfanet.measurement.reporting.v2alpha.report - -/** - * An implementation of [ReportProcessor] that takes a serialized [Report] in JSON format and - * returns the a [Report] of which all measurements are consistent. - */ -object ReportProcessorImpl : ReportProcessor { - private val logger: Logger = Logger.getLogger(this::class.java.name) - const private val PYTHON_LIBRARY_RESOURCE_NAME = - "src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.zip" - private val resourcePath: Path = - this::class.java.classLoader.getJarResourcePath(PYTHON_LIBRARY_RESOURCE_NAME) - ?: error("$PYTHON_LIBRARY_RESOURCE_NAME not found in JAR") - private val tempFile = File.createTempFile(resourcePath.name, "").apply { deleteOnExit() } - - init { - // Copies python zip package from JAR to local directory. - Files.copy(resourcePath, tempFile.toPath(), StandardCopyOption.REPLACE_EXISTING) - } - - /** - * Corrects the inconsistent measurements in the [report] and returns a corrected report in JSON - * format. - * - * @param report standard JSON serialization of a Report message. - * @return a corrected report, serialized as a standard JSON string. - */ - override fun processReportJson(report: String): String { - return processReport(ReportConversion.getReportFromJsonString(report)).toJson() - } - - /** - * Corrects the inconsistent measurements in the [report] and returns a corrected report . - * - * @param report a Report message. - * @return a corrected Report. - */ - private fun processReport(report: Report): Report { - val reportSummaries = report.toReportSummaries() - val correctedMeasurementsMap = mutableMapOf() - for (reportSummary in reportSummaries) { - correctedMeasurementsMap.putAll(processReportSummary(reportSummary)) - } - val updatedReport = updateReport(report, correctedMeasurementsMap) - return updatedReport - } - - /** - * Corrects the inconsistent measurements in the [reportSummary] and returns a map of metric names - * to corrected reach values. - * - * Each metric name is tied to a measurement. - */ - private fun processReportSummary(reportSummary: ReportSummary): Map { - logger.info { "Start processing report.." } - - // TODO(bazelbuild/bazel#17629): Execute the Python zip directly once this bug is fixed. - val processBuilder = ProcessBuilder("python3", tempFile.toPath().toString()) - - val process = processBuilder.start() - - // Write the process' argument to its stdin. - process.outputStream.use { outputStream -> - reportSummary.writeTo(outputStream) - outputStream.flush() - } - - // Reads the output of the above process. - val processOutput = BufferedReader(InputStreamReader(process.inputStream)).use { it.readText() } - - val exitCode = process.waitFor() - require(exitCode == 0) { "Failed to process the report with exitCode $exitCode." } - - logger.info { "Finished processing report.." } - - // Converts the process output to the correction map. - val correctedMeasurementsMap = mutableMapOf() - GsonBuilder().create().fromJson(processOutput, Map::class.java).forEach { (key, value) -> - correctedMeasurementsMap[key as String] = (value as Double).toLong() - } - - return correctedMeasurementsMap - } - - /** - * Updates a [MetricCalculationResult] with corrected reach values from the - * [correctedMeasurementsMap]. - * - * Only the reach-only and reach-and-frequency resultAttributes in the [MetricCalculationResult] - * will be updated. - */ - private fun updateMetricCalculationResult( - metricCalculationResult: Report.MetricCalculationResult, - correctedMeasurementsMap: Map, - ): Report.MetricCalculationResult { - val updatedMetricCalculationResult = - metricCalculationResult.copy { - resultAttributes.clear() - resultAttributes += - metricCalculationResult.resultAttributesList.map { entry -> - entry.copy { - // The result attribute is updated only if its metric is in the correction map. - if (entry.metric in correctedMeasurementsMap) { - val correctedReach = correctedMeasurementsMap.getValue(entry.metric) - when { - entry.metricResult.hasReach() -> { - metricResult = - metricResult.copy { reach = reach.copy { value = correctedReach } } - } - entry.metricResult.hasReachAndFrequency() -> { - val scale: Double = - correctedReach / entry.metricResult.reachAndFrequency.reach.value.toDouble() - metricResult = - metricResult.copy { - reachAndFrequency = - reachAndFrequency.copy { - reach = reach.copy { value = correctedReach } - frequencyHistogram = - frequencyHistogram.copy { - bins.clear() - bins += - entry.metricResult.reachAndFrequency.frequencyHistogram.binsList - .map { bin -> - bin.copy { - binResult = - binResult.copy { value = bin.binResult.value * scale } - } - } - } - } - } - } - else -> {} - } - } - } - } - } - return updatedMetricCalculationResult - } - - /** Returns a [Report] with updated reach values from the [correctedMeasurementsMap]. */ - private fun updateReport(report: Report, correctedMeasurementsMap: Map): Report { - val correctedMetricCalculationResults = - report.metricCalculationResultsList.map { result -> - updateMetricCalculationResult(result, correctedMeasurementsMap) - } - val updatedReport = - report.copy { - metricCalculationResults.clear() - metricCalculationResults += correctedMetricCalculationResults - } - return updatedReport - } -} diff --git a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel index af70122377d..ec0e30236b7 100644 --- a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/BUILD.bazel @@ -16,10 +16,10 @@ kt_jvm_test( ) kt_jvm_test( - name = "report_processor_impl_test", - srcs = ["ReportProcessorImplTest.kt"], + name = "report_processor_test", + srcs = ["ReportProcessorTest.kt"], data = [":sample_reports"], - test_class = "org.wfanet.measurement.reporting.postprocessing.v2alpha.ReportProcessorImplTest", + test_class = "org.wfanet.measurement.reporting.postprocessing.v2alpha.ReportProcessorTest", deps = [ "//src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha:postprocessing", "//src/main/proto/wfa/measurement/reporting/postprocessing/v2alpha:report_summary_kt_jvm_proto", diff --git a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorTest.kt similarity index 95% rename from src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt rename to src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorTest.kt index 25c21aa415b..360be0a85de 100644 --- a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorImplTest.kt +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessorTest.kt @@ -24,7 +24,7 @@ import org.wfanet.measurement.common.getRuntimePath import org.wfanet.measurement.reporting.v2alpha.Report @RunWith(JUnit4::class) -class ReportProcessorImplTest { +class ReportProcessorTest { @Test fun `run correct report successfully`() { val reportFile = TEST_DATA_RUNTIME_DIR.resolve("sample_report_large.json").toFile() @@ -32,7 +32,7 @@ class ReportProcessorImplTest { val report = ReportConversion.getReportFromJsonString(reportAsJson) assertThat(report.hasConsistentMeasurements()).isEqualTo(false) - val updatedReportAsJson = ReportProcessorImpl.processReportJson(reportAsJson) + val updatedReportAsJson = ReportProcessor.processReportJson(reportAsJson) val updatedReport = ReportConversion.getReportFromJsonString(updatedReportAsJson) assertThat(updatedReport.hasConsistentMeasurements()).isEqualTo(true) } From 147a626e42949f7676728eace580f26ab5ec1a77 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Fri, 18 Oct 2024 21:07:27 +0000 Subject: [PATCH 05/18] Add object description. --- .../reporting/postprocessing/v2alpha/ReportProcessor.kt | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt index c1ed19eec1b..820d7f7f225 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt @@ -38,6 +38,7 @@ interface ReportProcessor { */ fun processReportJson(report: String): String + /** The default implementation of [ReportProcessor]. */ companion object Default : ReportProcessor { private val logger: Logger = Logger.getLogger(this::class.java.name) const private val PYTHON_LIBRARY_RESOURCE_NAME = @@ -80,8 +81,8 @@ interface ReportProcessor { } /** - * Corrects the inconsistent measurements in the [reportSummary] and returns a map of metric names - * to corrected reach values. + * Corrects the inconsistent measurements in the [reportSummary] and returns a map of metric + * names to corrected reach values. * * Each metric name is tied to a measurement. */ @@ -142,7 +143,6 @@ interface ReportProcessor { metricResult = metricResult.copy { reach = reach.copy { value = correctedReach } } } - entry.metricResult.hasReachAndFrequency() -> { val scale: Double = correctedReach / entry.metricResult.reachAndFrequency.reach.value.toDouble() @@ -166,7 +166,6 @@ interface ReportProcessor { } } } - else -> {} } } From 8b53b0476cd83aa665e1e8d0684b673612e1757a Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Fri, 25 Oct 2024 18:33:46 +0000 Subject: [PATCH 06/18] refactor report post processing. --- .../postprocessing/v2alpha/ReportProcessor.kt | 2 +- .../noiseninja/noised_measurements.py | 10 +- .../reporting/postprocessing/report/report.py | 757 ++++++++++-------- .../tools/post_process_origin_report.py | 43 +- .../postprocessing/report/test_report.py | 31 +- .../tools/test_post_process_origin_report.py | 2 + 6 files changed, 485 insertions(+), 360 deletions(-) diff --git a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt index 820d7f7f225..e9b4e4a7e30 100644 --- a/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt +++ b/src/main/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/ReportProcessor.kt @@ -41,7 +41,7 @@ interface ReportProcessor { /** The default implementation of [ReportProcessor]. */ companion object Default : ReportProcessor { private val logger: Logger = Logger.getLogger(this::class.java.name) - const private val PYTHON_LIBRARY_RESOURCE_NAME = + private const val PYTHON_LIBRARY_RESOURCE_NAME = "src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.zip" private val resourcePath: Path = this::class.java.classLoader.getJarResourcePath(PYTHON_LIBRARY_RESOURCE_NAME) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py index 5d218c8d9f5..fa804cbb5dc 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py @@ -19,15 +19,15 @@ class Measurement: value: float sigma: float - metric_name: str + name: str - def __init__(self, value: float, sigma: float, metric_name: str): + def __init__(self, value: float, sigma: float, name: str): self.value = value self.sigma = sigma - self.metric_name = metric_name + self.name = name def __repr__(self): - return 'Measurement({:.2f}, {:.2f}, {})\n'.format(self.value, self.sigma, self.metric_name) + return 'Measurement({:.2f}, {:.2f}, {})\n'.format(self.value, self.sigma, self.name) class SetMeasurementsSpec: @@ -69,7 +69,7 @@ def get_measurements(self, measured_set_id): def get_measurement_metric(self, measured_set_id): measurement = self.__measurements_by_set.get(measured_set_id) - return measurement[0].metric_name + return measurement[0].name def __repr__(self): return (('SetMeasurementsSpec(' diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index 3c15e588c15..73e194ab7ed 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -24,341 +24,458 @@ class MetricReport: - """Represents a metric sub-report view (e.g. MRC, AMI, etc) - within a report. - """ + """Represents a metric sub-report view (e.g. MRC, AMI, etc) + within a report. + """ + + __reach_time_series_by_edp_combination: dict[ + FrozenSet[str], list[Measurement]] + __reach_whole_campaign_by_edp_combination: dict[FrozenSet[str], Measurement] + + def __init__( + self, + reach_time_series_by_edp_combination: dict[ + FrozenSet[str], list[Measurement]], + reach_whole_campaign_by_edp_combination: dict[ + FrozenSet[str], Measurement] = None, + ): + num_periods = len(next(iter(reach_time_series_by_edp_combination.values()))) + for series in reach_time_series_by_edp_combination.values(): + if len(series) != num_periods: + raise ValueError( + "all time series must have the same length {1: d} vs {2: d}".format( + len(series), len(num_periods) + ) + ) - __reach_time_series_by_edp_combination: dict[FrozenSet[str], list[Measurement]] + self.__reach_time_series_by_edp_combination = ( + reach_time_series_by_edp_combination + ) - def __init__( - self, - reach_time_series_by_edp_combination: dict[FrozenSet[str], list[Measurement]], - ): - num_periods = len(next(iter(reach_time_series_by_edp_combination.values()))) - for series in reach_time_series_by_edp_combination.values(): - if len(series) != num_periods: - raise ValueError( - "all time series must have the same length {1: d} vs {2: d}".format( - len(series), len(num_periods) - ) - ) - - self.__reach_time_series_by_edp_combination = ( - reach_time_series_by_edp_combination - ) + if reach_whole_campaign_by_edp_combination is None: + reach_whole_campaign_by_edp_combination = {} + self.__reach_whole_campaign_by_edp_combination = ( + reach_whole_campaign_by_edp_combination + ) - def sample_with_noise(self) -> "MetricReport": - """ - :return: a new MetricReport where measurements have been resampled - according to their mean and variance. - """ - return MetricReport( - reach_time_series_by_edp_combination={ - edp_comb: [ - MetricReport.__sample_with_noise(measurement) - for measurement in self.__reach_time_series_by_edp_combination[ - edp_comb - ] + def sample_with_noise(self) -> "MetricReport": + """ + :return: a new MetricReport where measurements have been resampled + according to their mean and variance. + """ + return MetricReport( + reach_time_series_by_edp_combination={ + edp_combination: [ + MetricReport.__sample_with_noise(measurement) + for measurement in self.__reach_time_series_by_edp_combination[ + edp_combination ] - for edp_comb in self.__reach_time_series_by_edp_combination.keys() - } - ) - - def get_edp_comb_measurement(self, edp_comb: str, period: int): - return self.__reach_time_series_by_edp_combination[edp_comb][period] - - def get_edp_combs(self): - return list(self.__reach_time_series_by_edp_combination.keys()) - - def get_num_edp_combs(self): - return len(self.__reach_time_series_by_edp_combination.keys()) - - def get_number_of_periods(self): - return len(next(iter(self.__reach_time_series_by_edp_combination.values()))) - - def get_subset_relationships(self): - """Returns a list of tuples where first element in the tuple is the parent - and second element is the subset.""" - subset_relationships = [] - edp_combinations = list(self.__reach_time_series_by_edp_combination) - - for comb1, comb2 in combinations(edp_combinations, 2): - if comb1.issubset(comb2): - subset_relationships.append((comb2, comb1)) - elif comb2.issubset(comb1): - subset_relationships.append((comb1, comb2)) - return subset_relationships - - def get_cover_relationships(self): - """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). - For each set (s_i) in the list, enumerate combinations of all sets excluding this one. - For each of these considered combinations, take their union and check if it is equal to - s_i. If so, this combination is a cover of s_i. - """ - - def generate_all_length_combinations(data): - return [ - comb for r in range(1, len(data) + 1) for comb in combinations(data, r) ] + for edp_combination in + self.__reach_time_series_by_edp_combination.keys() + } + ) - cover_relationships = [] - edp_combinations = list(self.__reach_time_series_by_edp_combination) - for i in range(len(edp_combinations)): - possible_covered = edp_combinations[i] - other_sets = edp_combinations[:i] + edp_combinations[i + 1 :] - all_subsets_of_possible_covered = [other_set for other_set in other_sets if other_set.issubset(possible_covered)] - possible_covers = generate_all_length_combinations(all_subsets_of_possible_covered) - for possible_cover in possible_covers: - union_of_possible_cover = reduce( - lambda x, y: x.union(y), possible_cover - ) - if union_of_possible_cover == possible_covered: - cover_relationships.append((possible_covered, possible_cover)) - return cover_relationships - - @staticmethod - def __sample_with_noise(measurement: Measurement): - return Measurement( - measurement.value + random.gauss(0, measurement.sigma), measurement.sigma - ) + def get_cumulative_measurements(self, edp_combination: str): + """Returns the cumulative measurements for the given EDP combination. -class Report: + Args: edp_combination: The EDP combination string. + Returns: A list of cumulative measurements if found, otherwise None. """ - Represents a full report, consisting of multiple MetricReports, - which may have set relationships between each other. + return self.__reach_time_series_by_edp_combination[edp_combination] + + def get_cumulative_measurement(self, edp_combination: str, period: int): + return self.__reach_time_series_by_edp_combination[edp_combination][ + period] + + def get_whole_campaign_measurement(self, edp_combination: str): + return self.__reach_whole_campaign_by_edp_combination[edp_combination] + + def get_cumulative_edp_combinations(self): + return list(self.__reach_time_series_by_edp_combination.keys()) + + def get_whole_campaign_edp_combinations(self): + return list(self.__reach_whole_campaign_by_edp_combination.keys()) + + def get_cumulative_edp_combinations_count(self): + return len(self.__reach_time_series_by_edp_combination.keys()) + + def get_whole_campaign_edp_combinations_count(self): + return len(self.__reach_whole_campaign_by_edp_combination.keys()) + + def get_number_of_periods(self): + return len(next(iter(self.__reach_time_series_by_edp_combination.values()))) + + def get_cumulative_subset_relationships(self): + """Returns a list of tuples where first element in the tuple is the parent + and second element is the subset.""" + subset_relationships = [] + edp_combinations = list(self.__reach_time_series_by_edp_combination) + + for comb1, comb2 in combinations(edp_combinations, 2): + if comb1.issubset(comb2): + subset_relationships.append((comb2, comb1)) + elif comb2.issubset(comb1): + subset_relationships.append((comb1, comb2)) + return subset_relationships + + def get_whole_campaign_subset_relationships(self): + """Returns a list of tuples where first element in the tuple is the parent + and second element is the subset.""" + subset_relationships = [] + edp_combinations = list(self.__reach_whole_campaign_by_edp_combination) + + for comb1, comb2 in combinations(edp_combinations, 2): + if comb1.issubset(comb2): + subset_relationships.append((comb2, comb1)) + elif comb2.issubset(comb1): + subset_relationships.append((comb1, comb2)) + return subset_relationships + + def get_cover_relationships(self): + """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). + For each set (s_i) in the list, enumerate combinations of all sets excluding this one. + For each of these considered combinations, take their union and check if it is equal to + s_i. If so, this combination is a cover of s_i. """ - __metric_reports: dict[str, MetricReport] - __metric_subsets_by_parent: dict[str, list[str]] - __metric_index: dict[str, int] - __edp_comb_index: dict[str, int] - - def __init__( - self, - metric_reports: dict[str, MetricReport], - metric_subsets_by_parent: dict[str, list[str]], - cumulative_inconsistency_allowed_edp_combs: set[str], - ): - """ - Args: - metric_reports: a dictionary mapping metric types to a MetricReport - metric_subsets_by_parent: a dictionary containing subset - relationship between the metrics. .e.g. ami >= [custom, mrc] - cumulative_inconsistency_allowed_edps : a set containing edp keys that won't - be forced to have self cumulative reaches be increasing - """ - self.__metric_reports = metric_reports - self.__metric_subsets_by_parent = metric_subsets_by_parent - self.__cumulative_inconsistency_allowed_edp_combs = ( - cumulative_inconsistency_allowed_edp_combs + def generate_all_length_combinations(data): + return [ + comb for r in range(1, len(data) + 1) for comb in + combinations(data, r) + ] + + cover_relationships = [] + edp_combinations = list(self.__reach_time_series_by_edp_combination) + for i in range(len(edp_combinations)): + possible_covered = edp_combinations[i] + other_sets = edp_combinations[:i] + edp_combinations[i + 1:] + all_subsets_of_possible_covered = [other_set for other_set in other_sets + if + other_set.issubset(possible_covered)] + possible_covers = generate_all_length_combinations( + all_subsets_of_possible_covered) + for possible_cover in possible_covers: + union_of_possible_cover = reduce( + lambda x, y: x.union(y), possible_cover ) + if union_of_possible_cover == possible_covered: + cover_relationships.append((possible_covered, possible_cover)) + return cover_relationships - # All metrics in the set relationships must have a corresponding report. - for parent in metric_subsets_by_parent.keys(): - if not (parent in metric_reports): - raise ValueError( - "key {1} does not have a corresponding report".format(parent) - ) - for child in metric_subsets_by_parent[parent]: - if not (child in metric_reports): - raise ValueError( - "key {1} does not have a corresponding report".format(child) - ) - - self.__metric_index = {} - for index, metric in enumerate(metric_reports.keys()): - self.__metric_index[metric] = index - - self.__edp_comb_index = {} - for index, edp_comb in enumerate( - next(iter(metric_reports.values())).get_edp_combs() - ): - self.__edp_comb_index[edp_comb] = index - - self.__num_edp_combs = len(self.__edp_comb_index.keys()) - self.__num_periods = next(iter(metric_reports.values())).get_number_of_periods() - - num_vars_per_period = (self.__num_edp_combs + 1) * len(metric_reports.keys()) - self.__num_vars = self.__num_periods * num_vars_per_period - - def get_metric_report(self, metric: str) -> MetricReport: - return self.__metric_reports[metric] - - def get_metrics(self) -> set[str]: - return set(self.__metric_reports.keys()) - - def get_corrected_report(self) -> "Report": - """Returns a corrected, consistent report. - Note all measurements in the corrected report are set to have 0 variance - """ - spec = self.to_set_measurement_spec() - solution = Solver(spec).solve_and_translate() - return self.report_from_solution(solution, spec) - - def report_from_solution(self, solution, spec): - return Report( - metric_reports={ - metric: self.__metric_report_from_solution(metric, solution, spec) - for metric in self.__metric_reports - }, - metric_subsets_by_parent=self.__metric_subsets_by_parent, - cumulative_inconsistency_allowed_edp_combs=self.__cumulative_inconsistency_allowed_edp_combs, - ) + @staticmethod + def __sample_with_noise(measurement: Measurement): + return Measurement( + measurement.value + random.gauss(0, measurement.sigma), + measurement.sigma + ) - def sample_with_noise(self) -> "Report": - """Returns a new report sampled according to the mean and variance of - all metrics in this report. Useful to bootstrap sample reports. - """ - return Report( - metric_reports={ - i: self.__metric_reports[i].sample_with_noise() - for i in self.__metric_reports - }, - metric_subsets_by_parent=self.__metric_subsets_by_parent, - cumulative_inconsistency_allowed_edp_combs=self.__cumulative_inconsistency_allowed_edp_combs, - ) - def to_array(self) -> np.array: - """Returns an array representation of all the mean measurement values - in this report - """ - array = np.zeros(self.__num_vars) - for metric in self.__metric_reports: - for period in range(0, self.__num_periods): - for edp_comb in self.__edp_comb_index: - edp_comb_ind = self.__edp_comb_index[edp_comb] - array.put( - self.__get_var_index( - period, self.__metric_index[metric], edp_comb_ind - ), - self.__metric_reports[metric] - .get_edp_comb_measurement(edp_comb, period) - .value, - ) - return array - - def to_set_measurement_spec(self): - spec = SetMeasurementsSpec() - self.__add_measurements_to_spec(spec) - self.__add_set_relations_to_spec(spec) - return spec - - def __add_set_relations_to_spec(self, spec): +class Report: + """ + Represents a full report, consisting of multiple MetricReports, + which may have set relationships between each other. + """ + + __metric_reports: dict[str, MetricReport] + __metric_subsets_by_parent: dict[str, list[str]] + __metric_index: dict[str, int] + __edp_combination_index: dict[str, int] + + def __init__( + self, + metric_reports: dict[str, MetricReport], + metric_subsets_by_parent: dict[str, list[str]], + cumulative_inconsistency_allowed_edp_combinations: set[str], + ): + """ + Args: + metric_reports: a dictionary mapping metric types to a MetricReport + metric_subsets_by_parent: a dictionary containing subset + relationship between the metrics. .e.g. ami >= [custom, mrc] + cumulative_inconsistency_allowed_edps : a set containing edp keys that won't + be forced to have self cumulative reaches be increasing + """ + self.__metric_reports = metric_reports + self.__metric_subsets_by_parent = metric_subsets_by_parent + self.__cumulative_inconsistency_allowed_edp_combinations = ( + cumulative_inconsistency_allowed_edp_combinations + ) + + # All metrics in the set relationships must have a corresponding report. + for parent in metric_subsets_by_parent.keys(): + if not (parent in metric_reports): + raise ValueError( + "key {1} does not have a corresponding report".format(parent) + ) + for child in metric_subsets_by_parent[parent]: + if not (child in metric_reports): + raise ValueError( + "key {1} does not have a corresponding report".format(child) + ) + + self.__metric_index = {} + for index, metric in enumerate(metric_reports.keys()): + self.__metric_index[metric] = index + + self.__edp_combination_index = {} + for index, edp_combination in enumerate( + next(iter(metric_reports.values())).get_cumulative_edp_combinations() + ): + self.__edp_combination_index[edp_combination] = index + + self.__num_edp_combinations = len(self.__edp_combination_index.keys()) + self.__num_periods = next( + iter(metric_reports.values())).get_number_of_periods() + + num_vars_per_period = (self.__num_edp_combinations + 1) * len( + metric_reports.keys()) + self.__num_vars = self.__num_periods * num_vars_per_period + + measurement_index = 0 + self.__measurement_name_to_index = {} + for metric in metric_reports.keys(): + for edp_combination in metric_reports[ + metric].get_cumulative_edp_combinations(): + for measurement in metric_reports[metric].get_cumulative_measurements( + edp_combination): + self.__measurement_name_to_index[measurement.name] = measurement_index + measurement_index += 1 + for edp_combination in metric_reports[ + metric].get_whole_campaign_edp_combinations(): + measurement = metric_reports[metric].get_whole_campaign_measurement( + edp_combination) + self.__measurement_name_to_index[measurement.name] = measurement_index + measurement_index += 1 + + def get_metric_report(self, metric: str) -> MetricReport: + return self.__metric_reports[metric] + + def get_metrics(self) -> set[str]: + return set(self.__metric_reports.keys()) + + def get_corrected_report(self) -> "Report": + """Returns a corrected, consistent report. + Note all measurements in the corrected report are set to have 0 variance + """ + spec = self.to_set_measurement_spec() + solution = Solver(spec).solve_and_translate() + return self.report_from_solution(solution, spec) + + def report_from_solution(self, solution, spec): + return Report( + metric_reports={ + metric: self.__metric_report_from_solution(metric, solution) + for metric in self.__metric_reports + }, + metric_subsets_by_parent=self.__metric_subsets_by_parent, + cumulative_inconsistency_allowed_edp_combinations=self.__cumulative_inconsistency_allowed_edp_combinations, + ) + + def sample_with_noise(self) -> "Report": + """Returns a new report sampled according to the mean and variance of + all metrics in this report. Useful to bootstrap sample reports. + """ + return Report( + metric_reports={ + i: self.__metric_reports[i].sample_with_noise() + for i in self.__metric_reports + }, + metric_subsets_by_parent=self.__metric_subsets_by_parent, + cumulative_inconsistency_allowed_edp_combinations=self.__cumulative_inconsistency_allowed_edp_combinations, + ) + + def to_array(self) -> np.array: + """Returns an array representation of all the mean measurement values + in this report + """ + array = np.zeros(self.__num_vars) + for metric in self.__metric_reports: + for period in range(0, self.__num_periods): + for edp_combination in self.__edp_combination_index: + array.put( + self.__get_measurement_index( + self.__metric_reports[metric] + .get_cumulative_measurement(edp_combination, period) + ), + self.__metric_reports[metric] + .get_cumulative_measurement(edp_combination, period) + .value, + ) + return array + + def to_set_measurement_spec(self): + spec = SetMeasurementsSpec() + self.__add_measurements_to_spec(spec) + self.__add_set_relations_to_spec(spec) + return spec + + def __add_cover_relations_to_spec(self, spec): + # sum of subsets >= union for each period + for metric in self.__metric_reports: + for cover_relationship in self.__metric_reports[ + metric].get_cover_relationships(): + covered_parent = cover_relationship[0] + covering_children = cover_relationship[1] for period in range(0, self.__num_periods): - - # sum of subsets >= union for each period - for metric in self.__metric_reports: - metric_ind = self.__metric_index[metric] - for cover_relationship in self.__metric_reports[ - metric - ].get_cover_relationships(): - covered_parent = cover_relationship[0] - covering_children = cover_relationship[1] - spec.add_cover( - children=list( - self.__get_var_index( - period, - metric_ind, - self.__edp_comb_index[covering_child], - ) - for covering_child in covering_children - ), - parent=self.__get_var_index( - period, metric_ind, self.__edp_comb_index[covered_parent] - ), - ) - - # subset <= union - for metric in self.__metric_reports: - metric_ind = self.__metric_index[metric] - for subset_relationship in self.__metric_reports[ - metric - ].get_subset_relationships(): - parent_edp_comb = subset_relationship[0] - child_edp_comb = subset_relationship[1] - spec.add_subset_relation( - child_set_id=self.__get_var_index( - period, metric_ind, self.__edp_comb_index[child_edp_comb] - ), - parent_set_id=self.__get_var_index( - period, metric_ind, self.__edp_comb_index[parent_edp_comb] - ), - ) - - # metric1>=metric#2 - for parent_metric in self.__metric_subsets_by_parent: - for child_metric in self.__metric_subsets_by_parent[parent_metric]: - for edp_comb in self.__edp_comb_index: - edp_comb_ind = self.__edp_comb_index[edp_comb] - spec.add_subset_relation( - child_set_id=self.__get_var_index( - period, self.__metric_index[child_metric], edp_comb_ind - ), - parent_set_id=self.__get_var_index( - period, self.__metric_index[parent_metric], edp_comb_ind - ), - ) - - # period1 <= period2 - for edp_comb in self.__edp_comb_index: - if ( - len(edp_comb) == 1 - and next(iter(edp_comb)) - in self.__cumulative_inconsistency_allowed_edp_combs - ): - continue - if period >= self.__num_periods - 1: - continue - for metric in range(0, len(self.__metric_index.keys())): - edp_comb_ind = self.__edp_comb_index[edp_comb] - spec.add_subset_relation( - child_set_id=self.__get_var_index(period, metric, edp_comb_ind), - parent_set_id=self.__get_var_index( - period + 1, metric, edp_comb_ind - ), - ) - - def __add_measurements_to_spec(self, spec): - for metric in self.__metric_reports: - for period in range(0, self.__num_periods): - for edp_comb in self.__edp_comb_index: - edp_comb_ind = self.__edp_comb_index[edp_comb] - spec.add_measurement( - self.__get_var_index( - period, self.__metric_index[metric], edp_comb_ind - ), - self.__metric_reports[metric].get_edp_comb_measurement( - edp_comb, period - ), - ) - - def __get_var_index(self, period: int, metric: int, edp: int): - return ( - metric * self.__num_edp_combs * self.__num_periods - + edp * self.__num_periods - + period + spec.add_cover( + children=list(self.__get_cumulative_measurement_index( + metric, covering_child, period) + for covering_child in covering_children), + parent=self.__get_cumulative_measurement_index( + metric, covered_parent, period), + ) + + def __add_subset_relations_to_spec(self, spec): + for metric in self.__metric_reports: + metric_ind = self.__metric_index[metric] + for subset_relationship in self.__metric_reports[ + metric + ].get_cumulative_subset_relationships(): + parent_edp_combination = subset_relationship[0] + child_edp_combination = subset_relationship[1] + for period in range(0, self.__num_periods): + spec.add_subset_relation( + child_set_id=self.__get_measurement_index( + self.__metric_reports[ + metric].get_cumulative_measurement( + child_edp_combination, period)), + parent_set_id=self.__get_measurement_index( + self.__metric_reports[ + metric].get_cumulative_measurement( + parent_edp_combination, period)), + ) + + # for subset_relationship in self.__metric_reports[metric].get_whole_campaign_subset_relationships(): + # parent_edp_combination = subset_relationship[0] + # child_edp_combination = subset_relationship[1] + # for period in range(0, self.__num_periods): + # spec.add_subset_relation( + # child_set_id=self.__get_measurement_index( + # self.__metric_reports[ + # metric].get_cumulative_measurement( + # child_edp_combination, period)), + # parent_set_id=self.__get_measurement_index( + # self.__metric_reports[ + # metric].get_cumulative_measurement( + # parent_edp_combination, period)), + # ) + + def __add_metric_relations_to_spec(self, spec): + # metric1>=metric#2 + for parent_metric in self.__metric_subsets_by_parent: + for child_metric in self.__metric_subsets_by_parent[parent_metric]: + for edp_combination in self.__metric_reports[ + parent_metric].get_cumulative_edp_combinations(): + for period in range(0, self.__num_periods): + spec.add_subset_relation( + child_set_id=self.__get_measurement_index( + self.__metric_reports[ + child_metric].get_cumulative_measurement( + edp_combination, period)), + parent_set_id=self.__get_measurement_index( + self.__metric_reports[ + parent_metric].get_cumulative_measurement( + edp_combination, period)), + ) + + def __add_periodic_relations_to_spec(self, spec): + for metric in self.__metric_reports.keys(): + for edp_combination in self.__edp_combination_index: + if ( + len(edp_combination) == 1 + and next(iter(edp_combination)) + in self.__cumulative_inconsistency_allowed_edp_combinations + ): + continue + for period in range(0, self.__num_periods): + if period >= self.__num_periods - 1: + continue + spec.add_subset_relation( + child_set_id=self.__get_measurement_index( + self.__metric_reports[ + metric].get_cumulative_measurement( + edp_combination, period)), + parent_set_id=self.__get_measurement_index( + self.__metric_reports[ + metric].get_cumulative_measurement( + edp_combination, period + 1)), + ) + + def __add_set_relations_to_spec(self, spec): + # sum of subsets >= union for each period. + self.__add_cover_relations_to_spec(spec) + + # subset <= union. + self.__add_subset_relations_to_spec(spec) + + # metric1>=metric#2. + self.__add_metric_relations_to_spec(spec) + + # period1 <= period2 + self.__add_periodic_relations_to_spec(spec) + + def __add_measurements_to_spec(self, spec): + for metric in self.__metric_reports.keys(): + for edp_combination in self.__metric_reports[ + metric].get_cumulative_edp_combinations(): + for measurement in self.__metric_reports[ + metric].get_cumulative_measurements(edp_combination): + spec.add_measurement( + self.__get_measurement_index(measurement), + measurement, + ) + for edp_combination in self.__metric_reports[ + metric].get_whole_campaign_edp_combinations(): + measurement = self.__metric_reports[ + metric].get_whole_campaign_measurement(edp_combination) + spec.add_measurement( + self.__get_measurement_index(measurement), + measurement, ) - def __metric_report_from_solution(self, metric, solution, spec): - solution_time_series = {} - for edp_comb in self.__edp_comb_index: - edp_comb_ind = self.__edp_comb_index[edp_comb] - solution_time_series[edp_comb] = [ - Measurement( - solution[ - self.__get_var_index( - period, self.__metric_index[metric], edp_comb_ind - ) - ], - 0, - spec.get_measurement_metric( - self.__get_var_index( - period, self.__metric_index[metric], edp_comb_ind - ) - ), - ) - for period in range(0, self.__num_periods) - ] - - return MetricReport(reach_time_series_by_edp_combination=solution_time_series) + def __get_measurement_index(self, measurement: Measurement): + return self.__measurement_name_to_index[measurement.name] + + def __get_cumulative_measurement_index(self, metric: str, + edp_combination: str, period: int): + return self.__get_measurement_index( + self.__metric_reports[metric].get_cumulative_measurement( + edp_combination, period) + ) + + def __get_whole_campaign_measurement_index(self, metric: str, + edp_combination: str): + return self.__get_measurement_index( + self.__metric_reports[metric].get_whole_campaign_measurement( + edp_combination) + ) + + def __get_var_index(self, period: int, metric: int, edp: int): + return ( + metric * self.__num_edp_combinations * self.__num_periods + + edp * self.__num_periods + + period + ) + + def __metric_report_from_solution(self, metric, solution): + solution_time_series = {} + for edp_combination in self.__edp_combination_index: + solution_time_series[edp_combination] = [ + Measurement( + solution[ + self.__get_measurement_index(self.__metric_reports[ + metric].get_cumulative_measurement( + edp_combination, period)) + ], + self.__metric_reports[metric].get_cumulative_measurement( + edp_combination, period).sigma, + self.__metric_reports[metric].get_cumulative_measurement( + edp_combination, period).name, + ) + for period in range(0, self.__num_periods) + ] + + return MetricReport( + reach_time_series_by_edp_combination=solution_time_series) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py index 7d26a3a53b3..f18052a5a43 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py @@ -17,7 +17,8 @@ import pandas as pd import sys -from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import report_summary_pb2 +from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import \ + report_summary_pb2 from functools import partial from noiseninja.noised_measurements import Measurement from report.report import Report, MetricReport @@ -127,15 +128,15 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): elif entry.measurement_policy == "mrc": mrc_measurements[data_providers] = measurements - edp_comb_list = ami_measurements.keys() - if len(edp_comb_list) == 0: - edp_comb_list = mrc_measurements.keys() + edp_combination_list = ami_measurements.keys() + if len(edp_combination_list) == 0: + edp_combination_list = mrc_measurements.keys() # Processes non-cumulative union measurements. for entry in report_summary.measurement_details: if (entry.set_operation == "union") and ( entry.is_cumulative == False) and ( - frozenset(entry.data_providers) in edp_comb_list): + frozenset(entry.data_providers) in edp_combination_list): measurements = [ Measurement(result.reach, result.standard_deviation, result.metric) @@ -157,7 +158,7 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): if measurements # Only include if measurements is not empty }, metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) # Gets the corrected report. @@ -168,11 +169,11 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): measurements_policies = corrected_report.get_metrics() for policy in measurements_policies: metric_report = corrected_report.get_metric_report(policy) - for edp in metric_report.get_edp_combs(): + for edp in metric_report.get_cumulative_edp_combinations(): for index in range(metric_report.get_number_of_periods()): - entry = metric_report.get_edp_comb_measurement(edp, index) + entry = metric_report.get_cumulative_measurement(edp, index) metric_name_to_value.update( - {entry.metric_name: int(entry.value)}) + {entry.name: int(entry.value)}) return metric_name_to_value @@ -201,7 +202,7 @@ def getCorrectedReport(measurements): }, # AMI is a parent of MRC metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) return report.get_corrected_report() @@ -238,17 +239,17 @@ def buildCorrectedExcel(correctedReport, excel): for edp in EDP_MAP: edp_index = EDP_MAP[edp]["ind"] amiFunc = ( - partial(ami_metric_report.get_edp_comb_measurement, + partial(ami_metric_report.get_cumulative_measurement, frozenset({EDP_ONE, EDP_TWO})) if (edp == TOTAL_CAMPAIGN) - else partial(ami_metric_report.get_edp_comb_measurement, + else partial(ami_metric_report.get_cumulative_measurement, frozenset({edp})) ) mrcFunc = ( - partial(mrc_metric_report.get_edp_comb_measurement, + partial(mrc_metric_report.get_cumulative_measurement, frozenset({EDP_ONE, EDP_TWO})) if (edp == TOTAL_CAMPAIGN) - else partial(mrc_metric_report.get_edp_comb_measurement, + else partial(mrc_metric_report.get_cumulative_measurement, frozenset({edp})) ) @@ -259,18 +260,20 @@ def buildCorrectedExcel(correctedReport, excel): # The last value of the corrected measurement series is the total reach. totAmiVal = ( - ami_metric_report.get_edp_comb_measurement( + ami_metric_report.get_cumulative_measurement( frozenset({EDP_ONE, EDP_TWO}), -1).value if (edp == TOTAL_CAMPAIGN) - else ami_metric_report.get_edp_comb_measurement(frozenset({edp}), - -1).value + else ami_metric_report.get_cumulative_measurement( + frozenset({edp}), + -1).value ) totMrcVal = ( - mrc_metric_report.get_edp_comb_measurement( + mrc_metric_report.get_cumulative_measurement( frozenset({EDP_ONE, EDP_TWO}), -1).value if (edp == TOTAL_CAMPAIGN) - else mrc_metric_report.get_edp_comb_measurement(frozenset({edp}), - -1).value + else mrc_metric_report.get_cumulative_measurement( + frozenset({edp}), + -1).value ) total_sheet_name = edp excel[total_sheet_name] = correctTotSheet( diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py index 9dcb7eafd61..8d19e5e44db 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py @@ -450,7 +450,7 @@ def test_get_corrected_single_metric_report(self): ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) corrected = report.get_corrected_report() @@ -469,7 +469,7 @@ def test_get_corrected_single_metric_report(self): ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) @@ -494,7 +494,7 @@ def test_can_correct_time_series(self): ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) corrected = report.get_corrected_report() @@ -517,7 +517,7 @@ def test_can_correct_time_series(self): ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) @@ -570,7 +570,7 @@ def test_can_correct_time_series_for_three_edps(self): ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) corrected = report.get_corrected_report() @@ -621,7 +621,7 @@ def test_can_correct_time_series_for_three_edps(self): ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) @@ -646,7 +646,8 @@ def test_allows_incorrect_time_series(self): ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs=set(frozenset({EDP_ONE})), + cumulative_inconsistency_allowed_edp_combinations=set( + frozenset({EDP_ONE})), ) corrected = report.get_corrected_report() @@ -669,7 +670,8 @@ def test_allows_incorrect_time_series(self): ) }, metric_subsets_by_parent={}, - cumulative_inconsistency_allowed_edp_combs=set(frozenset({EDP_ONE})), + cumulative_inconsistency_allowed_edp_combinations=set( + frozenset({EDP_ONE})), ) self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) @@ -698,7 +700,7 @@ def test_can_correct_related_metrics(self): }, # AMI is a parent of MRC metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) corrected = report.get_corrected_report() @@ -724,7 +726,7 @@ def test_can_correct_related_metrics(self): }, # AMI is a parent of MRC metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combs={}, + cumulative_inconsistency_allowed_edp_combinations={}, ) self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) @@ -742,15 +744,16 @@ def __assertMeasurementAlmostEquals( def __assertMetricReportsAlmostEqual( self, expected: MetricReport, actual: MetricReport, msg ): - self.assertEqual(expected.get_num_edp_combs(), actual.get_num_edp_combs()) + self.assertEqual(expected.get_cumulative_edp_combinations_count(), + actual.get_cumulative_edp_combinations_count()) self.assertEqual( expected.get_number_of_periods(), actual.get_number_of_periods() ) for period in range(0, expected.get_number_of_periods()): - for edp_comb in expected.get_edp_combs(): + for edp_comb in expected.get_cumulative_edp_combinations(): self.__assertMeasurementAlmostEquals( - expected.get_edp_comb_measurement(edp_comb, period), - actual.get_edp_comb_measurement(edp_comb, period), + expected.get_cumulative_measurement(edp_comb, period), + actual.get_cumulative_measurement(edp_comb, period), msg, ) diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py index a9277a6039a..09a35b29948 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py @@ -96,6 +96,8 @@ def test_report_summary_is_corrected_successfully(self): len(MRC_MEASUREMENTS[edp]) - 1).zfill(5) corrected_measurements_map = processReportSummary(report_summary) + for key, value in corrected_measurements_map.items(): + print(f"{key}: {value}") # Verifies that the updated reach values are consistent. for edp in EDP_MAP: ami_metric_prefix = "metric_" + edp + "_ami_" From e888bb16f4ebaa0c0a2205fded4724779cdbd080 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Tue, 29 Oct 2024 11:10:12 +0000 Subject: [PATCH 07/18] separate cumulative measurements from total measurements. --- .../reporting/postprocessing/report/report.py | 174 +++++++----- .../tools/post_process_origin_report.py | 52 ++-- .../postprocessing/report/test_report.py | 248 +++++++++++++++++- .../tools/test_post_process_origin_report.py | 84 ++++-- 4 files changed, 446 insertions(+), 112 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index 0bfdf63c4c6..bb9f0cd749e 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -23,6 +23,19 @@ from functools import reduce +def get_subset_relationships(edp_combinations: list[str]): + """Returns a list of tuples where first element in the tuple is the parent + and second element is the subset.""" + subset_relationships = [] + + for comb1, comb2 in combinations(edp_combinations, 2): + if comb1.issubset(comb2): + subset_relationships.append((comb2, comb1)) + elif comb2.issubset(comb1): + subset_relationships.append((comb1, comb2)) + return subset_relationships + + class MetricReport: """Represents a metric sub-report view (e.g. MRC, AMI, etc) within a report. @@ -76,14 +89,6 @@ def sample_with_noise(self) -> "MetricReport": } ) - def get_cumulative_measurements(self, edp_combination: str): - """Returns the cumulative measurements for the given EDP combination. - - Args: edp_combination: The EDP combination string. - Returns: A list of cumulative measurements if found, otherwise None. - """ - return self.__reach_time_series_by_edp_combination[edp_combination] - def get_cumulative_measurement(self, edp_combination: str, period: int): return self.__reach_time_series_by_edp_combination[edp_combination][ period] @@ -92,10 +97,10 @@ def get_whole_campaign_measurement(self, edp_combination: str): return self.__reach_whole_campaign_by_edp_combination[edp_combination] def get_cumulative_edp_combinations(self): - return list(self.__reach_time_series_by_edp_combination.keys()) + return set(self.__reach_time_series_by_edp_combination.keys()) def get_whole_campaign_edp_combinations(self): - return list(self.__reach_whole_campaign_by_edp_combination.keys()) + return set(self.__reach_whole_campaign_by_edp_combination.keys()) def get_cumulative_edp_combinations_count(self): return len(self.__reach_time_series_by_edp_combination.keys()) @@ -107,30 +112,12 @@ def get_number_of_periods(self): return len(next(iter(self.__reach_time_series_by_edp_combination.values()))) def get_cumulative_subset_relationships(self): - """Returns a list of tuples where first element in the tuple is the parent - and second element is the subset.""" - subset_relationships = [] edp_combinations = list(self.__reach_time_series_by_edp_combination) - - for comb1, comb2 in combinations(edp_combinations, 2): - if comb1.issubset(comb2): - subset_relationships.append((comb2, comb1)) - elif comb2.issubset(comb1): - subset_relationships.append((comb1, comb2)) - return subset_relationships + return get_subset_relationships(edp_combinations) def get_whole_campaign_subset_relationships(self): - """Returns a list of tuples where first element in the tuple is the parent - and second element is the subset.""" - subset_relationships = [] edp_combinations = list(self.__reach_whole_campaign_by_edp_combination) - - for comb1, comb2 in combinations(edp_combinations, 2): - if comb1.issubset(comb2): - subset_relationships.append((comb2, comb1)) - elif comb2.issubset(comb1): - subset_relationships.append((comb1, comb2)) - return subset_relationships + return get_subset_relationships(edp_combinations) def get_cover_relationships(self): """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). @@ -232,13 +219,15 @@ def __init__( metric_reports.keys()) self.__num_vars = self.__num_periods * num_vars_per_period + # Assign an index to each measurement. measurement_index = 0 self.__measurement_name_to_index = {} for metric in metric_reports.keys(): for edp_combination in metric_reports[ metric].get_cumulative_edp_combinations(): - for measurement in metric_reports[metric].get_cumulative_measurements( - edp_combination): + for period in range(0, self.__num_periods): + measurement = metric_reports[metric].get_cumulative_measurement( + edp_combination, period) self.__measurement_name_to_index[measurement.name] = measurement_index measurement_index += 1 for edp_combination in metric_reports[ @@ -327,8 +316,8 @@ def __add_cover_relations_to_spec(self, spec): ) def __add_subset_relations_to_spec(self, spec): + # Adds relations for cumulative measurements. for metric in self.__metric_reports: - metric_ind = self.__metric_index[metric] for subset_relationship in self.__metric_reports[ metric ].get_cumulative_subset_relationships(): @@ -346,27 +335,55 @@ def __add_subset_relations_to_spec(self, spec): parent_edp_combination, period)), ) - # for subset_relationship in self.__metric_reports[metric].get_whole_campaign_subset_relationships(): - # parent_edp_combination = subset_relationship[0] - # child_edp_combination = subset_relationship[1] - # for period in range(0, self.__num_periods): - # spec.add_subset_relation( - # child_set_id=self.__get_measurement_index( - # self.__metric_reports[ - # metric].get_cumulative_measurement( - # child_edp_combination, period)), - # parent_set_id=self.__get_measurement_index( - # self.__metric_reports[ - # metric].get_cumulative_measurement( - # parent_edp_combination, period)), - # ) + # Adds relations for whole campaign measurements. + for subset_relationship in self.__metric_reports[ + metric + ].get_whole_campaign_subset_relationships(): + parent_edp_combination = subset_relationship[0] + child_edp_combination = subset_relationship[1] + spec.add_subset_relation( + child_set_id=self.__get_measurement_index( + self.__metric_reports[ + metric].get_whole_campaign_measurement( + child_edp_combination)), + parent_set_id=self.__get_measurement_index( + self.__metric_reports[ + metric].get_whole_campaign_measurement( + parent_edp_combination)), + ) + + # TODO(@ple13):Use timestamp to check if the last cumulative measurement covers + # the whole campaign. If yes, make sure that the two measurements are equal + # instead of less than or equal. + def __add_cumulative_whole_campaign_relations_to_spec(self, spec): + # Adds relations between cumulative and whole campaign measurements. + # For an edp combination, the last cumulative measurement is less than or + # equal to the whole campaign measurement. + for metric in self.__metric_reports: + for edp_combination in self.__metric_reports[ + metric].get_cumulative_edp_combinations().intersection( + self.__metric_reports[ + metric].get_whole_campaign_edp_combinations()): + spec.add_subset_relation( + child_set_id=self.__get_measurement_index( + self.__metric_reports[ + metric].get_cumulative_measurement( + edp_combination, (self.__num_periods - 1))), + parent_set_id=self.__get_measurement_index( + self.__metric_reports[ + metric].get_whole_campaign_measurement( + edp_combination)), + ) def __add_metric_relations_to_spec(self, spec): # metric1>=metric#2 for parent_metric in self.__metric_subsets_by_parent: for child_metric in self.__metric_subsets_by_parent[parent_metric]: + # Handles cumulative measurements of common edp combinations. for edp_combination in self.__metric_reports[ - parent_metric].get_cumulative_edp_combinations(): + parent_metric].get_cumulative_edp_combinations().intersection( + self.__metric_reports[ + child_metric].get_cumulative_edp_combinations()): for period in range(0, self.__num_periods): spec.add_subset_relation( child_set_id=self.__get_measurement_index( @@ -378,10 +395,26 @@ def __add_metric_relations_to_spec(self, spec): parent_metric].get_cumulative_measurement( edp_combination, period)), ) + # Handles whole campaign measurements of common edp combinations. + for edp_combination in self.__metric_reports[ + parent_metric].get_whole_campaign_edp_combinations().intersection( + self.__metric_reports[ + child_metric].get_whole_campaign_edp_combinations()): + spec.add_subset_relation( + child_set_id=self.__get_measurement_index( + self.__metric_reports[ + child_metric].get_whole_campaign_measurement( + edp_combination)), + parent_set_id=self.__get_measurement_index( + self.__metric_reports[ + parent_metric].get_whole_campaign_measurement( + edp_combination)), + ) - def __add_periodic_relations_to_spec(self, spec): + def __add_cumulative_relations_to_spec(self, spec): for metric in self.__metric_reports.keys(): - for edp_combination in self.__edp_combination_index: + for edp_combination in self.__metric_reports[ + metric].get_cumulative_edp_combinations(): if ( len(edp_combination) == 1 and next(iter(edp_combination)) @@ -412,15 +445,19 @@ def __add_set_relations_to_spec(self, spec): # metric1>=metric#2. self.__add_metric_relations_to_spec(spec) - # period1 <= period2 - self.__add_periodic_relations_to_spec(spec) + # period1 <= period2. + self.__add_cumulative_relations_to_spec(spec) + + # Last cumulative measurement <= whole campaign measurement. + self.__add_cumulative_whole_campaign_relations_to_spec(spec) def __add_measurements_to_spec(self, spec): for metric in self.__metric_reports.keys(): for edp_combination in self.__metric_reports[ metric].get_cumulative_edp_combinations(): - for measurement in self.__metric_reports[ - metric].get_cumulative_measurements(edp_combination): + for period in range(0, self.__num_periods): + measurement = self.__metric_reports[ + metric].get_cumulative_measurement(edp_combination, period) spec.add_measurement( self.__get_measurement_index(measurement), measurement, @@ -451,16 +488,11 @@ def __get_whole_campaign_measurement_index(self, metric: str, edp_combination) ) - def __get_var_index(self, period: int, metric: int, edp: int): - return ( - metric * self.__num_edp_combinations * self.__num_periods - + edp * self.__num_periods - + period - ) - def __metric_report_from_solution(self, metric, solution): solution_time_series = {} - for edp_combination in self.__edp_combination_index: + solution_whole_campaign = {} + for edp_combination in self.__metric_reports[ + metric].get_cumulative_edp_combinations(): solution_time_series[edp_combination] = [ Measurement( solution[ @@ -475,6 +507,20 @@ def __metric_report_from_solution(self, metric, solution): ) for period in range(0, self.__num_periods) ] - + for edp_combination in self.__metric_reports[ + metric].get_whole_campaign_edp_combinations(): + solution_whole_campaign[edp_combination] = Measurement( + solution[ + self.__get_measurement_index(self.__metric_reports[ + metric].get_whole_campaign_measurement( + edp_combination)) + ], + self.__metric_reports[metric].get_whole_campaign_measurement( + edp_combination).sigma, + self.__metric_reports[metric].get_whole_campaign_measurement( + edp_combination).name, + ) return MetricReport( - reach_time_series_by_edp_combination=solution_time_series) + reach_time_series_by_edp_combination=solution_time_series, + reach_whole_campaign_by_edp_combination=solution_whole_campaign, + ) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py index 4c0266b7fb3..267616d442b 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py @@ -17,7 +17,8 @@ import pandas as pd import sys -from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import report_summary_pb2 +from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import \ + report_summary_pb2 from functools import partial from noiseninja.noised_measurements import Measurement from report.report import Report, MetricReport @@ -110,8 +111,10 @@ def readExcel(excel_file_path, unnoised_edps): # TODO(@ple13): Extend the function to support custom measurements and composite # set operations such as difference, incremental. def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): - ami_measurements: Dict[FrozenSet[str], List[Measurement]] = {} - mrc_measurements: Dict[FrozenSet[str], List[Measurement]] = {} + cumulative_ami_measurements: Dict[FrozenSet[str], List[Measurement]] = {} + cumulative_mrc_measurements: Dict[FrozenSet[str], List[Measurement]] = {} + total_ami_measurements: Dict[FrozenSet[str], Measurement] = {} + total_mrc_measurements: Dict[FrozenSet[str], Measurement] = {} # Processes cumulative measurements first. for entry in report_summary.measurement_details: @@ -123,38 +126,39 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): for result in entry.measurement_results ] if entry.measurement_policy == "ami": - ami_measurements[data_providers] = measurements + cumulative_ami_measurements[data_providers] = measurements elif entry.measurement_policy == "mrc": - mrc_measurements[data_providers] = measurements + cumulative_mrc_measurements[data_providers] = measurements - edp_combination_list = ami_measurements.keys() + edp_combination_list = cumulative_ami_measurements.keys() if len(edp_combination_list) == 0: - edp_combination_list = mrc_measurements.keys() + edp_combination_list = cumulative_mrc_measurements.keys() - # Processes non-cumulative union measurements. + # Processes total union measurements. for entry in report_summary.measurement_details: if (entry.set_operation == "union") and ( - entry.is_cumulative == False) and ( - frozenset(entry.data_providers) in edp_combination_list): + entry.is_cumulative == False): measurements = [ Measurement(result.reach, result.standard_deviation, result.metric) for result in entry.measurement_results ] if entry.measurement_policy == "ami": - ami_measurements[frozenset(entry.data_providers)].extend( - measurements) + total_ami_measurements[frozenset(entry.data_providers)] = measurements[ + 0] elif entry.measurement_policy == "mrc": - mrc_measurements[frozenset(entry.data_providers)].extend( - measurements) + total_mrc_measurements[frozenset(entry.data_providers)] = measurements[ + 0] # Builds the report based on the above measurements. report = Report( { - policy: MetricReport(measurements) - for policy, measurements in - [("ami", ami_measurements), ("mrc", mrc_measurements)] - if measurements # Only include if measurements is not empty + policy: MetricReport(cumulative_measurements, total_measurements) + for policy, cumulative_measurements, total_measurements in + [("ami", cumulative_ami_measurements, total_ami_measurements), + ("mrc", cumulative_mrc_measurements, total_mrc_measurements)] + if cumulative_measurements + # Only include if measurements is not empty }, metric_subsets_by_parent={ami: [mrc]}, cumulative_inconsistency_allowed_edp_combinations={}, @@ -173,6 +177,10 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): entry = metric_report.get_cumulative_measurement(edp, index) metric_name_to_value.update( {entry.name: int(entry.value)}) + for edp in metric_report.get_whole_campaign_edp_combinations(): + entry = metric_report.get_whole_campaign_measurement(edp) + metric_name_to_value.update( + {entry.name: int(entry.value)}) return metric_name_to_value @@ -263,16 +271,16 @@ def buildCorrectedExcel(correctedReport, excel): frozenset({EDP_ONE, EDP_TWO}), -1).value if (edp == TOTAL_CAMPAIGN) else ami_metric_report.get_cumulative_measurement( - frozenset({edp}), - -1).value + frozenset({edp}), + -1).value ) totMrcVal = ( mrc_metric_report.get_cumulative_measurement( frozenset({EDP_ONE, EDP_TWO}), -1).value if (edp == TOTAL_CAMPAIGN) else mrc_metric_report.get_cumulative_measurement( - frozenset({edp}), - -1).value + frozenset({edp}), + -1).value ) total_sheet_name = edp excel[total_sheet_name] = correctTotSheet( diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py index 8d19e5e44db..f4b5e7b6619 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py @@ -626,6 +626,241 @@ def test_can_correct_time_series_for_three_edps(self): self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + def test_correct_report_with_both_time_series_and_whole_campaign_measurements_three_edps( + self): + ami = "ami" + + report = Report( + metric_reports={ + ami: MetricReport( + reach_time_series_by_edp_combination={ + # 1 way comb + frozenset({EDP_ONE}): [ + Measurement(0.00, 1, "measurement_01"), + Measurement(3.30, 1, "measurement_02"), + ], + frozenset({EDP_TWO}): [ + Measurement(0.00, 1, "measurement_04"), + Measurement(2.30, 1, "measurement_05"), + ], + frozenset({EDP_THREE}): [ + Measurement(1.00, 1, "measurement_07"), + Measurement(3.30, 1, "measurement_08"), + ], + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(0.00, 1, "measurement_10"), + Measurement(5.30, 1, "measurement_11"), + ], + frozenset({EDP_TWO, EDP_THREE}): [ + Measurement(0.70, 1, "measurement_13"), + Measurement(6.30, 1, "measurement_14"), + ], + frozenset({EDP_ONE, EDP_THREE}): [ + Measurement(1.20, 1, "measurement_16"), + Measurement(7.00, 1, "measurement_17"), + ], + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1.10, 1, "measurement_19"), + Measurement(8.0, 1, "measurement_20"), + ], + }, + reach_whole_campaign_by_edp_combination={ + # 1 way comb + frozenset({EDP_ONE}): Measurement(4.00, 1.00, + "measurement_03"), + frozenset({EDP_TWO}): Measurement(3.3333, 1.00, + "measurement_06"), + frozenset({EDP_THREE}): Measurement(5.3333, 1.00, + "measurement_09"), + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): Measurement(6.90, 1.00, + "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): Measurement(8.66666, 1.00, + "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): Measurement(8.90, 1.00, + "measurement_18"), + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): Measurement(11.90, + 1.00, + "measurement_21"), + } + ) + }, + metric_subsets_by_parent={}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + corrected = report.get_corrected_report() + + expected = Report( + metric_reports={ + ami: MetricReport( + reach_time_series_by_edp_combination={ + # 1 way comb + frozenset({EDP_ONE}): [ + Measurement(0.10, 1.00, "measurement_01"), + Measurement(3.362, 1.00, "measurement_02"), + ], + frozenset({EDP_TWO}): [ + Measurement(0.00, 1.00, "measurement_04"), + Measurement(2.512, 1.00, "measurement_05"), + ], + frozenset({EDP_THREE}): [ + Measurement(0.95, 1.00, "measurement_07"), + Measurement(3.5749, 1.00, "measurement_08"), + ], + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(0.10, 1.00, "measurement_10"), + Measurement(5.30, 1.00, "measurement_11"), + ], + frozenset({EDP_TWO, EDP_THREE}): [ + Measurement(0.95, 1.00, "measurement_13"), + Measurement(6.087, 1.00, "measurement_14"), + ], + frozenset({EDP_ONE, EDP_THREE}): [ + Measurement(1.05, 1.00, "measurement_16"), + Measurement(6.937, 1.00, "measurement_17"), + ], + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1.05, 1.00, "measurement_19"), + Measurement(8.00, 1.00, "measurement_20"), + ], + }, + reach_whole_campaign_by_edp_combination={ + # 1 way comb + frozenset({EDP_ONE}): Measurement(4.00, 1.00, + "measurement_03"), + frozenset({EDP_TWO}): Measurement(3.3333, 1.00, + "measurement_06"), + frozenset({EDP_THREE}): Measurement(5.3333, 1.00, + "measurement_09"), + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): Measurement(6.90, 1.00, + "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): Measurement(8.66666, 1.00, + "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): Measurement(8.90, 1.00, + "measurement_18"), + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): Measurement(11.90, + 1.00, + "measurement_21"), + }, + ) + }, + metric_subsets_by_parent={}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + + def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): + ami = "ami" + + report = Report( + metric_reports={ + ami: MetricReport( + reach_time_series_by_edp_combination={ + # 1 way comb + frozenset({EDP_ONE}): [ + Measurement(0.00, 1, "measurement_01"), + Measurement(3.30, 1, "measurement_02"), + ], + frozenset({EDP_TWO}): [ + Measurement(0.00, 1, "measurement_04"), + Measurement(2.30, 1, "measurement_05"), + ], + frozenset({EDP_THREE}): [ + Measurement(1.00, 1, "measurement_07"), + Measurement(3.30, 1, "measurement_08"), + ], + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1.10, 1, "measurement_19"), + Measurement(8.0, 1, "measurement_20"), + ], + }, + reach_whole_campaign_by_edp_combination={ + # 1 way comb + frozenset({EDP_ONE}): + Measurement(4.00, 1.00, "measurement_03"), + frozenset({EDP_TWO}): + Measurement(3.3333, 1.00, "measurement_06"), + frozenset({EDP_THREE}): + Measurement(5.3333, 1.00, "measurement_09"), + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): + Measurement(6.90, 1.00, "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(8.66666, 1.00, "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): + Measurement(8.90, 1.00, "measurement_18"), + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(11.90, 1.00, "measurement_21"), + } + ) + }, + metric_subsets_by_parent={}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + corrected = report.get_corrected_report() + + expected = Report( + metric_reports={ + ami: MetricReport( + reach_time_series_by_edp_combination={ + # 1 way comb + frozenset({EDP_ONE}): [ + Measurement(0.025, 1.00, "measurement_01"), + Measurement(3.30, 1.00, "measurement_02"), + ], + frozenset({EDP_TWO}): [ + Measurement(0.025, 1.00, "measurement_04"), + Measurement(2.30, 1.00, "measurement_05"), + ], + frozenset({EDP_THREE}): [ + Measurement(1.025, 1.00, "measurement_07"), + Measurement(3.30, 1.00, "measurement_08"), + ], + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1.075, 1.00, "measurement_19"), + Measurement(8.00, 1.00, "measurement_20"), + ], + }, + reach_whole_campaign_by_edp_combination={ + # 1 way comb + frozenset({EDP_ONE}): + Measurement(4.00, 1.00, "measurement_03"), + frozenset({EDP_TWO}): + Measurement(3.3333, 1.00, "measurement_06"), + frozenset({EDP_THREE}): + Measurement(5.3333, 1.00, "measurement_09"), + # 2 way combs + frozenset({EDP_ONE, EDP_TWO}): + Measurement(6.90, 1.00, "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(8.66666, 1.00, "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): + Measurement(8.90, 1.00, "measurement_18"), + # 3 way comb + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(11.90, 1.00, "measurement_21"), + }, + ) + }, + metric_subsets_by_parent={}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + def test_allows_incorrect_time_series(self): ami = "ami" report = Report( @@ -647,7 +882,7 @@ def test_allows_incorrect_time_series(self): }, metric_subsets_by_parent={}, cumulative_inconsistency_allowed_edp_combinations=set( - frozenset({EDP_ONE})), + frozenset({EDP_ONE})), ) corrected = report.get_corrected_report() @@ -671,7 +906,7 @@ def test_allows_incorrect_time_series(self): }, metric_subsets_by_parent={}, cumulative_inconsistency_allowed_edp_combinations=set( - frozenset({EDP_ONE})), + frozenset({EDP_ONE})), ) self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) @@ -757,6 +992,15 @@ def __assertMetricReportsAlmostEqual( msg, ) + self.assertEqual(expected.get_whole_campaign_edp_combinations_count(), + actual.get_whole_campaign_edp_combinations_count()) + for edp_comb in expected.get_whole_campaign_edp_combinations(): + self.__assertMeasurementAlmostEquals( + expected.get_whole_campaign_measurement(edp_comb), + actual.get_whole_campaign_measurement(edp_comb), + msg, + ) + def __assertReportsAlmostEqual(self, expected: Report, actual: Report, msg): self.assertEqual(expected.get_metrics(), actual.get_metrics()) for metric in expected.get_metrics(): diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py index 09a35b29948..ef729b3a972 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py @@ -59,7 +59,8 @@ def test_report_summary_is_corrected_successfully(self): ami_result = ami_measurement_detail.measurement_results.add() ami_result.reach = AMI_MEASUREMENTS[edp][i] ami_result.standard_deviation = 1.0 - ami_result.metric = "metric_" + edp + "_ami_" + str(i).zfill(5) + ami_result.metric = "cumulative_metric_" + edp + "_ami_" + str(i).zfill( + 5) mrc_measurement_detail = report_summary.measurement_details.add() mrc_measurement_detail.measurement_policy = "mrc" @@ -70,7 +71,8 @@ def test_report_summary_is_corrected_successfully(self): mrc_result = mrc_measurement_detail.measurement_results.add() mrc_result.reach = MRC_MEASUREMENTS[edp][i] mrc_result.standard_deviation = 1.0 - mrc_result.metric = "metric_" + edp + "_mrc_" + str(i).zfill(5) + mrc_result.metric = "cumulative_metric_" + edp + "_mrc_" + str(i).zfill( + 5) for edp in EDP_MAP: ami_measurement_detail = report_summary.measurement_details.add() @@ -81,8 +83,7 @@ def test_report_summary_is_corrected_successfully(self): ami_result = ami_measurement_detail.measurement_results.add() ami_result.reach = AMI_MEASUREMENTS[edp][len(AMI_MEASUREMENTS[edp]) - 1] ami_result.standard_deviation = 1.0 - ami_result.metric = "metric_" + edp + "_ami_" + str( - len(AMI_MEASUREMENTS[edp]) - 1).zfill(5) + ami_result.metric = "total_metric_" + edp + "_ami_" mrc_measurement_detail = report_summary.measurement_details.add() mrc_measurement_detail.measurement_policy = "mrc" @@ -92,44 +93,79 @@ def test_report_summary_is_corrected_successfully(self): mrc_result = mrc_measurement_detail.measurement_results.add() mrc_result.reach = MRC_MEASUREMENTS[edp][len(MRC_MEASUREMENTS[edp]) - 1] mrc_result.standard_deviation = 1.0 - mrc_result.metric = "metric_" + edp + "_mrc_" + str( - len(MRC_MEASUREMENTS[edp]) - 1).zfill(5) + mrc_result.metric = "total_metric_" + edp + "_mrc_" corrected_measurements_map = processReportSummary(report_summary) - for key, value in corrected_measurements_map.items(): - print(f"{key}: {value}") # Verifies that the updated reach values are consistent. for edp in EDP_MAP: - ami_metric_prefix = "metric_" + edp + "_ami_" - mrc_metric_prefix = "metric_" + edp + "_mrc_" + cumulative_ami_metric_prefix = "cumulative_metric_" + edp + "_ami_" + cumulative_mrc_metric_prefix = "cumulative_metric_" + edp + "_mrc_" + total_ami_metric = "total_metric_" + edp + "_ami_" + total_mrc_metric = "total_metric_" + edp + "_mrc_" # Verifies that cumulative measurements are consistent. - for i in range(len(AMI_MEASUREMENTS) - 1): + for i in range(len(AMI_MEASUREMENTS) - 2): self.assertTrue( - corrected_measurements_map[ami_metric_prefix + str(i).zfill(5)] <= - corrected_measurements_map[ami_metric_prefix + str(i + 1).zfill(5)]) + corrected_measurements_map[ + cumulative_ami_metric_prefix + str(i).zfill(5)] <= + corrected_measurements_map[ + cumulative_ami_metric_prefix + str(i + 1).zfill(5)]) self.assertTrue( - corrected_measurements_map[mrc_metric_prefix + str(i).zfill(5)] <= - corrected_measurements_map[mrc_metric_prefix + str(i + 1).zfill(5)]) + corrected_measurements_map[ + cumulative_mrc_metric_prefix + str(i).zfill(5)] <= + corrected_measurements_map[ + cumulative_mrc_metric_prefix + str(i + 1).zfill(5)]) # Verifies that the mrc measurements is less than or equal to the ami ones. - for i in range(len(AMI_MEASUREMENTS)): + for i in range(len(AMI_MEASUREMENTS) - 1): self.assertTrue( - corrected_measurements_map[mrc_metric_prefix + str(i).zfill(5)] <= - corrected_measurements_map[ami_metric_prefix + str(i).zfill(5)] + corrected_measurements_map[ + cumulative_mrc_metric_prefix + str(i).zfill(5)] <= + corrected_measurements_map[ + cumulative_ami_metric_prefix + str(i).zfill(5)] ) + # Verifies that the total reach is greater than or equal to the last + # cumulative reach. + index = len(AMI_MEASUREMENTS) - 1 + self.assertTrue( + corrected_measurements_map[ + cumulative_ami_metric_prefix + str(index).zfill(5)] <= + corrected_measurements_map[total_ami_metric] + ) + self.assertTrue( + corrected_measurements_map[ + cumulative_mrc_metric_prefix + str(index).zfill(5)] <= + corrected_measurements_map[total_mrc_metric] + ) # Verifies that the union reach is less than or equal to the sum of # individual reaches. for i in range(len(AMI_MEASUREMENTS) - 1): self.assertTrue( - corrected_measurements_map["metric_union_ami_" + str(i).zfill(5)] <= - corrected_measurements_map["metric_edp1_ami_" + str(i).zfill(5)] + - corrected_measurements_map["metric_edp2_ami_" + str(i).zfill(5)] + corrected_measurements_map[ + "cumulative_metric_union_ami_" + str(i).zfill(5)] <= + corrected_measurements_map[ + "cumulative_metric_edp1_ami_" + str(i).zfill(5)] + + corrected_measurements_map[ + "cumulative_metric_edp2_ami_" + str(i).zfill(5)] ) self.assertTrue( - corrected_measurements_map["metric_union_mrc_" + str(i).zfill(5)] <= - corrected_measurements_map["metric_edp1_mrc_" + str(i).zfill(5)] + - corrected_measurements_map["metric_edp2_mrc_" + str(i).zfill(5)] + corrected_measurements_map[ + "cumulative_metric_union_mrc_" + str(i).zfill(5)] <= + corrected_measurements_map[ + "cumulative_metric_edp1_mrc_" + str(i).zfill(5)] + + corrected_measurements_map[ + "cumulative_metric_edp2_mrc_" + str(i).zfill(5)] ) + self.assertTrue( + corrected_measurements_map["total_metric_union_ami_"] <= + corrected_measurements_map["total_metric_edp1_ami_"] + + corrected_measurements_map["total_metric_edp2_ami_"] + ) + self.assertTrue( + corrected_measurements_map["total_metric_union_mrc_"] <= + corrected_measurements_map["total_metric_edp1_mrc_"] + + corrected_measurements_map["total_metric_edp2_mrc_"] + ) + if __name__ == "__main__": unittest.main() From f1dc5187b5667d9b5876861dccd36abcd766b1d4 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Tue, 29 Oct 2024 11:21:57 +0000 Subject: [PATCH 08/18] Format code. --- .../tools/post_process_origin_report.py | 14 ++--- .../postprocessing/report/test_report.py | 60 +++++++++---------- 2 files changed, 35 insertions(+), 39 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py index 267616d442b..461cdea75d6 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py @@ -172,15 +172,13 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): measurements_policies = corrected_report.get_metrics() for policy in measurements_policies: metric_report = corrected_report.get_metric_report(policy) - for edp in metric_report.get_cumulative_edp_combinations(): + for edp_combination in metric_report.get_cumulative_edp_combinations(): for index in range(metric_report.get_number_of_periods()): - entry = metric_report.get_cumulative_measurement(edp, index) - metric_name_to_value.update( - {entry.name: int(entry.value)}) - for edp in metric_report.get_whole_campaign_edp_combinations(): - entry = metric_report.get_whole_campaign_measurement(edp) - metric_name_to_value.update( - {entry.name: int(entry.value)}) + entry = metric_report.get_cumulative_measurement(edp_combination, index) + metric_name_to_value.update({entry.name: int(entry.value)}) + for edp_combination in metric_report.get_whole_campaign_edp_combinations(): + entry = metric_report.get_whole_campaign_measurement(edp_combination) + metric_name_to_value.update({entry.name: int(entry.value)}) return metric_name_to_value diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py index f4b5e7b6619..680be682af1 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py @@ -668,23 +668,22 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th }, reach_whole_campaign_by_edp_combination={ # 1 way comb - frozenset({EDP_ONE}): Measurement(4.00, 1.00, - "measurement_03"), - frozenset({EDP_TWO}): Measurement(3.3333, 1.00, - "measurement_06"), - frozenset({EDP_THREE}): Measurement(5.3333, 1.00, - "measurement_09"), + frozenset({EDP_ONE}): + Measurement(4.00, 1.00, "measurement_03"), + frozenset({EDP_TWO}): + Measurement(3.3333, 1.00, "measurement_06"), + frozenset({EDP_THREE}): + Measurement(5.3333, 1.00, "measurement_09"), # 2 way combs - frozenset({EDP_ONE, EDP_TWO}): Measurement(6.90, 1.00, - "measurement_12"), - frozenset({EDP_TWO, EDP_THREE}): Measurement(8.66666, 1.00, - "measurement_15"), - frozenset({EDP_ONE, EDP_THREE}): Measurement(8.90, 1.00, - "measurement_18"), + frozenset({EDP_ONE, EDP_TWO}): + Measurement(6.90, 1.00, "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(8.66666, 1.00, "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): + Measurement(8.90, 1.00, "measurement_18"), # 3 way comb - frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): Measurement(11.90, - 1.00, - "measurement_21"), + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(11.90, 1.00, "measurement_21"), } ) }, @@ -732,23 +731,22 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th }, reach_whole_campaign_by_edp_combination={ # 1 way comb - frozenset({EDP_ONE}): Measurement(4.00, 1.00, - "measurement_03"), - frozenset({EDP_TWO}): Measurement(3.3333, 1.00, - "measurement_06"), - frozenset({EDP_THREE}): Measurement(5.3333, 1.00, - "measurement_09"), + frozenset({EDP_ONE}): + Measurement(4.00, 1.00, "measurement_03"), + frozenset({EDP_TWO}): + Measurement(3.3333, 1.00, "measurement_06"), + frozenset({EDP_THREE}): + Measurement(5.3333, 1.00, "measurement_09"), # 2 way combs - frozenset({EDP_ONE, EDP_TWO}): Measurement(6.90, 1.00, - "measurement_12"), - frozenset({EDP_TWO, EDP_THREE}): Measurement(8.66666, 1.00, - "measurement_15"), - frozenset({EDP_ONE, EDP_THREE}): Measurement(8.90, 1.00, - "measurement_18"), + frozenset({EDP_ONE, EDP_TWO}): + Measurement(6.90, 1.00, "measurement_12"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(8.66666, 1.00, "measurement_15"), + frozenset({EDP_ONE, EDP_THREE}): + Measurement(8.90, 1.00, "measurement_18"), # 3 way comb - frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): Measurement(11.90, - 1.00, - "measurement_21"), + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(11.90, 1.00, "measurement_21"), }, ) }, @@ -802,7 +800,7 @@ def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): # 3 way comb frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): Measurement(11.90, 1.00, "measurement_21"), - } + }, ) }, metric_subsets_by_parent={}, From f0948d432eb589efc84f3d8441c12418e475b97d Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Tue, 29 Oct 2024 11:36:55 +0000 Subject: [PATCH 09/18] Handle cover relation for total measurements. --- .../reporting/postprocessing/report/report.py | 79 ++++++++++++------- .../postprocessing/report/test_report.py | 2 +- 2 files changed, 50 insertions(+), 31 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index bb9f0cd749e..ecf7bf3b8ac 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -36,6 +36,37 @@ def get_subset_relationships(edp_combinations: list[str]): return subset_relationships +def get_cover_relationships(edp_combinations): + """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). + For each set (s_i) in the list, enumerate combinations of all sets excluding this one. + For each of these considered combinations, take their union and check if it is equal to + s_i. If so, this combination is a cover of s_i. + """ + + def generate_all_length_combinations(data): + return [ + comb for r in range(1, len(data) + 1) for comb in + combinations(data, r) + ] + + cover_relationships = [] + for i in range(len(edp_combinations)): + possible_covered = edp_combinations[i] + other_sets = edp_combinations[:i] + edp_combinations[i + 1:] + all_subsets_of_possible_covered = [other_set for other_set in other_sets + if + other_set.issubset(possible_covered)] + possible_covers = generate_all_length_combinations( + all_subsets_of_possible_covered) + for possible_cover in possible_covers: + union_of_possible_cover = reduce( + lambda x, y: x.union(y), possible_cover + ) + if union_of_possible_cover == possible_covered: + cover_relationships.append((possible_covered, possible_cover)) + return cover_relationships + + class MetricReport: """Represents a metric sub-report view (e.g. MRC, AMI, etc) within a report. @@ -119,36 +150,13 @@ def get_whole_campaign_subset_relationships(self): edp_combinations = list(self.__reach_whole_campaign_by_edp_combination) return get_subset_relationships(edp_combinations) - def get_cover_relationships(self): - """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). - For each set (s_i) in the list, enumerate combinations of all sets excluding this one. - For each of these considered combinations, take their union and check if it is equal to - s_i. If so, this combination is a cover of s_i. - """ - - def generate_all_length_combinations(data): - return [ - comb for r in range(1, len(data) + 1) for comb in - combinations(data, r) - ] - - cover_relationships = [] + def get_cumulative_cover_relationships(self): edp_combinations = list(self.__reach_time_series_by_edp_combination) - for i in range(len(edp_combinations)): - possible_covered = edp_combinations[i] - other_sets = edp_combinations[:i] + edp_combinations[i + 1:] - all_subsets_of_possible_covered = [other_set for other_set in other_sets - if - other_set.issubset(possible_covered)] - possible_covers = generate_all_length_combinations( - all_subsets_of_possible_covered) - for possible_cover in possible_covers: - union_of_possible_cover = reduce( - lambda x, y: x.union(y), possible_cover - ) - if union_of_possible_cover == possible_covered: - cover_relationships.append((possible_covered, possible_cover)) - return cover_relationships + return get_cover_relationships(edp_combinations) + + def get_whole_campaign_cover_relationships(self): + edp_combinations = list(self.__reach_whole_campaign_by_edp_combination) + return get_cover_relationships(edp_combinations) @staticmethod def __sample_with_noise(measurement: Measurement): @@ -303,7 +311,7 @@ def __add_cover_relations_to_spec(self, spec): # sum of subsets >= union for each period for metric in self.__metric_reports: for cover_relationship in self.__metric_reports[ - metric].get_cover_relationships(): + metric].get_cumulative_cover_relationships(): covered_parent = cover_relationship[0] covering_children = cover_relationship[1] for period in range(0, self.__num_periods): @@ -314,6 +322,17 @@ def __add_cover_relations_to_spec(self, spec): parent=self.__get_cumulative_measurement_index( metric, covered_parent, period), ) + for cover_relationship in self.__metric_reports[ + metric].get_whole_campaign_cover_relationships(): + covered_parent = cover_relationship[0] + covering_children = cover_relationship[1] + spec.add_cover( + children=list(self.__get_whole_campaign_measurement_index( + metric, covering_child) + for covering_child in covering_children), + parent=self.__get_whole_campaign_measurement_index( + metric, covered_parent), + ) def __add_subset_relations_to_spec(self, spec): # Adds relations for cumulative measurements. diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py index 680be682af1..150438b90ba 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py @@ -431,7 +431,7 @@ def test_get_cover_relationships(self): ), ), ] - self.assertEqual(metric_report.get_cover_relationships(), expected) + self.assertEqual(metric_report.get_cumulative_cover_relationships(), expected) def test_get_corrected_single_metric_report(self): From b3fb89f6277efcb24afd4d40d3ce799b12184e80 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Tue, 29 Oct 2024 12:12:06 +0000 Subject: [PATCH 10/18] Fix unit tests. --- .../reporting/postprocessing/report/report.py | 20 ++-- .../postprocessing/report/test_report.py | 91 +++++++++++++++---- 2 files changed, 88 insertions(+), 23 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index ecf7bf3b8ac..88e2a3cbdbc 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -223,10 +223,6 @@ def __init__( self.__num_periods = next( iter(metric_reports.values())).get_number_of_periods() - num_vars_per_period = (self.__num_edp_combinations + 1) * len( - metric_reports.keys()) - self.__num_vars = self.__num_periods * num_vars_per_period - # Assign an index to each measurement. measurement_index = 0 self.__measurement_name_to_index = {} @@ -245,6 +241,8 @@ def __init__( self.__measurement_name_to_index[measurement.name] = measurement_index measurement_index += 1 + self.__num_vars = measurement_index + def get_metric_report(self, metric: str) -> MetricReport: return self.__metric_reports[metric] @@ -288,8 +286,8 @@ def to_array(self) -> np.array: """ array = np.zeros(self.__num_vars) for metric in self.__metric_reports: - for period in range(0, self.__num_periods): - for edp_combination in self.__edp_combination_index: + for edp_combination in self.__metric_reports[metric].get_cumulative_edp_combinations(): + for period in range(0, self.__num_periods): array.put( self.__get_measurement_index( self.__metric_reports[metric] @@ -299,6 +297,16 @@ def to_array(self) -> np.array: .get_cumulative_measurement(edp_combination, period) .value, ) + for edp_combination in self.__metric_reports[metric].get_whole_campaign_edp_combinations(): + array.put( + self.__get_measurement_index( + self.__metric_reports[metric] + .get_whole_campaign_measurement(edp_combination) + ), + self.__metric_reports[metric] + .get_whole_campaign_measurement(edp_combination) + .value, + ) return array def to_set_measurement_spec(self): diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py index 150438b90ba..5fc9f879018 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py @@ -431,7 +431,8 @@ def test_get_cover_relationships(self): ), ), ] - self.assertEqual(metric_report.get_cumulative_cover_relationships(), expected) + self.assertEqual(metric_report.get_cumulative_cover_relationships(), + expected) def test_get_corrected_single_metric_report(self): @@ -669,21 +670,21 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th reach_whole_campaign_by_edp_combination={ # 1 way comb frozenset({EDP_ONE}): - Measurement(4.00, 1.00, "measurement_03"), + Measurement(4.00, 1.00, "measurement_03"), frozenset({EDP_TWO}): - Measurement(3.3333, 1.00, "measurement_06"), + Measurement(3.3333, 1.00, "measurement_06"), frozenset({EDP_THREE}): - Measurement(5.3333, 1.00, "measurement_09"), + Measurement(5.3333, 1.00, "measurement_09"), # 2 way combs frozenset({EDP_ONE, EDP_TWO}): - Measurement(6.90, 1.00, "measurement_12"), + Measurement(6.90, 1.00, "measurement_12"), frozenset({EDP_TWO, EDP_THREE}): - Measurement(8.66666, 1.00, "measurement_15"), + Measurement(8.66666, 1.00, "measurement_15"), frozenset({EDP_ONE, EDP_THREE}): - Measurement(8.90, 1.00, "measurement_18"), + Measurement(8.90, 1.00, "measurement_18"), # 3 way comb frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): - Measurement(11.90, 1.00, "measurement_21"), + Measurement(11.90, 1.00, "measurement_21"), } ) }, @@ -732,21 +733,21 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th reach_whole_campaign_by_edp_combination={ # 1 way comb frozenset({EDP_ONE}): - Measurement(4.00, 1.00, "measurement_03"), + Measurement(4.00, 1.00, "measurement_03"), frozenset({EDP_TWO}): - Measurement(3.3333, 1.00, "measurement_06"), + Measurement(3.3333, 1.00, "measurement_06"), frozenset({EDP_THREE}): - Measurement(5.3333, 1.00, "measurement_09"), + Measurement(5.3333, 1.00, "measurement_09"), # 2 way combs frozenset({EDP_ONE, EDP_TWO}): - Measurement(6.90, 1.00, "measurement_12"), + Measurement(6.90, 1.00, "measurement_12"), frozenset({EDP_TWO, EDP_THREE}): - Measurement(8.66666, 1.00, "measurement_15"), + Measurement(8.66666, 1.00, "measurement_15"), frozenset({EDP_ONE, EDP_THREE}): - Measurement(8.90, 1.00, "measurement_18"), + Measurement(8.90, 1.00, "measurement_18"), # 3 way comb frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): - Measurement(11.90, 1.00, "measurement_21"), + Measurement(11.90, 1.00, "measurement_21"), }, ) }, @@ -964,6 +965,62 @@ def test_can_correct_related_metrics(self): self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + def test_get_corrected_multiple_metric_report_with_different_edp_combinations( + self): + report = Report( + metric_reports={ + "ami": MetricReport( + reach_time_series_by_edp_combination={ + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(50, 1, "measurement_01")], + frozenset({EDP_ONE}): [ + Measurement(48, 0, "measurement_02")], + frozenset({EDP_TWO}): [ + Measurement(1, 1, "measurement_03")], + } + ), + "mrc": MetricReport( + reach_time_series_by_edp_combination={ + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(45, 1, "measurement_04")], + frozenset({EDP_TWO}): [ + Measurement(2, 1, "measurement_05")], + } + ), + }, + metric_subsets_by_parent={"ami": ["mrc"]}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + corrected = report.get_corrected_report() + + expected = Report( + metric_reports={ + "ami": MetricReport( + reach_time_series_by_edp_combination={ + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(49.667, 1, "measurement_01")], + frozenset({EDP_ONE}): [ + Measurement(48, 0, "measurement_02")], + frozenset({EDP_TWO}): [ + Measurement(1.667, 1, "measurement_03")], + } + ), + "mrc": MetricReport( + reach_time_series_by_edp_combination={ + frozenset({EDP_ONE, EDP_TWO}): [ + Measurement(45, 1, "measurement_04")], + frozenset({EDP_TWO}): [ + Measurement(1.667, 1, "measurement_05")], + } + ), + }, + metric_subsets_by_parent={"ami": ["mrc"]}, + cumulative_inconsistency_allowed_edp_combinations={}, + ) + + self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + def __assertMeasurementAlmostEquals( self, expected: Measurement, actual: Measurement, msg ): @@ -982,8 +1039,8 @@ def __assertMetricReportsAlmostEqual( self.assertEqual( expected.get_number_of_periods(), actual.get_number_of_periods() ) - for period in range(0, expected.get_number_of_periods()): - for edp_comb in expected.get_cumulative_edp_combinations(): + for edp_comb in expected.get_cumulative_edp_combinations(): + for period in range(0, expected.get_number_of_periods()): self.__assertMeasurementAlmostEquals( expected.get_cumulative_measurement(edp_comb, period), actual.get_cumulative_measurement(edp_comb, period), From 5fe7e900c54c4c1489fe041c5b5ab6945fd38c00 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Wed, 30 Oct 2024 06:44:29 +0000 Subject: [PATCH 11/18] For mat code. --- .../reporting/postprocessing/report/report.py | 16 ++++++++-------- .../tools/post_process_origin_report.py | 10 ++-------- .../tools/test_post_process_origin_report.py | 1 + 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index 88e2a3cbdbc..668575d9a90 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -23,7 +23,7 @@ from functools import reduce -def get_subset_relationships(edp_combinations: list[str]): +def get_subset_relationships(edp_combinations: list[FrozenSet[str]]): """Returns a list of tuples where first element in the tuple is the parent and second element is the subset.""" subset_relationships = [] @@ -36,7 +36,7 @@ def get_subset_relationships(edp_combinations: list[str]): return subset_relationships -def get_cover_relationships(edp_combinations): +def get_cover_relationships(edp_combinations: list[FrozenSet[str]]): """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). For each set (s_i) in the list, enumerate combinations of all sets excluding this one. For each of these considered combinations, take their union and check if it is equal to @@ -227,6 +227,12 @@ def __init__( measurement_index = 0 self.__measurement_name_to_index = {} for metric in metric_reports.keys(): + for edp_combination in metric_reports[ + metric].get_whole_campaign_edp_combinations(): + measurement = metric_reports[metric].get_whole_campaign_measurement( + edp_combination) + self.__measurement_name_to_index[measurement.name] = measurement_index + measurement_index += 1 for edp_combination in metric_reports[ metric].get_cumulative_edp_combinations(): for period in range(0, self.__num_periods): @@ -234,12 +240,6 @@ def __init__( edp_combination, period) self.__measurement_name_to_index[measurement.name] = measurement_index measurement_index += 1 - for edp_combination in metric_reports[ - metric].get_whole_campaign_edp_combinations(): - measurement = metric_reports[metric].get_whole_campaign_measurement( - edp_combination) - self.__measurement_name_to_index[measurement.name] = measurement_index - measurement_index += 1 self.__num_vars = measurement_index diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py index 461cdea75d6..0b72e62edbc 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py @@ -130,17 +130,11 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): elif entry.measurement_policy == "mrc": cumulative_mrc_measurements[data_providers] = measurements - edp_combination_list = cumulative_ami_measurements.keys() - if len(edp_combination_list) == 0: - edp_combination_list = cumulative_mrc_measurements.keys() - # Processes total union measurements. for entry in report_summary.measurement_details: - if (entry.set_operation == "union") and ( - entry.is_cumulative == False): + if (entry.set_operation == "union") and (entry.is_cumulative == False): measurements = [ - Measurement(result.reach, result.standard_deviation, - result.metric) + Measurement(result.reach, result.standard_deviation, result.metric) for result in entry.measurement_results ] if entry.measurement_policy == "ami": diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py index ef729b3a972..df3f9c956b8 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py @@ -96,6 +96,7 @@ def test_report_summary_is_corrected_successfully(self): mrc_result.metric = "total_metric_" + edp + "_mrc_" corrected_measurements_map = processReportSummary(report_summary) + # Verifies that the updated reach values are consistent. for edp in EDP_MAP: cumulative_ami_metric_prefix = "cumulative_metric_" + edp + "_ami_" From 5963e64d1bc22726b49f8ad66f4ca01d3834a7eb Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Mon, 4 Nov 2024 07:53:38 +0000 Subject: [PATCH 12/18] Address comments. --- .../noiseninja/noised_measurements.py | 97 ++-- .../postprocessing/noiseninja/solver.py | 328 +++++++------- .../reporting/postprocessing/report/report.py | 420 +++++++++-------- .../tools/post_process_origin_report.py | 164 ------- .../postprocessing/report/test_report.py | 424 +++++++++++++++--- .../tools/test_post_process_origin_report.py | 56 ++- 6 files changed, 833 insertions(+), 656 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py index fa804cbb5dc..71cb448850b 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py @@ -14,65 +14,72 @@ from collections import defaultdict -class Measurement: - """Represents a measurement with a mean value and a standard deviation""" - value: float - sigma: float - name: str +class Measurement: + """Represents a measurement with a mean value and a standard deviation""" + value: float + sigma: float + name: str - def __init__(self, value: float, sigma: float, name: str): - self.value = value - self.sigma = sigma - self.name = name + def __init__(self, value: float, sigma: float, name: str): + self.value = value + self.sigma = sigma + self.name = name - def __repr__(self): - return 'Measurement({:.2f}, {:.2f}, {})\n'.format(self.value, self.sigma, self.name) + def __repr__(self): + return 'Measurement({:.2f}, {:.2f}, {})\n'.format(self.value, self.sigma, + self.name) class SetMeasurementsSpec: - """Stores information about the relationships between sets and their - measurements.""" + """Stores information about relationships between sets and measurements. + + This class maintains data about subset relationships, cover relationships, + and measurements associated with sets. It provides methods to add and + retrieve this information. - __subsets_by_set: dict[int, list[int]] - # https://en.wikipedia.org/wiki/Cover_(topology) - __covers_by_set: dict[int, list[list[int]]] - __measurements_by_set: dict[int, list[Measurement]] + Attributes: + _subsets_by_set: A dictionary mapping a set ID to a list of its subset + set IDs. + _covers_by_set: A dictionary mapping a set ID to a list of its covers, + where each cover is a list of set IDs. See + https://en.wikipedia.org/wiki/Cover_(topology). + _measurements_by_set: A dictionary mapping a set ID to a list of + Measurement objects associated with that set. + """ - def __init__(self): - self.__subsets_by_set = defaultdict(list[int]) - self.__covers_by_set = defaultdict(list[list[int]]) - self.__measurements_by_set = defaultdict(list[Measurement]) + def __init__(self): + self._subsets_by_set = defaultdict(list[int]) + self._covers_by_set = defaultdict(list[list[int]]) + self._measurements_by_set = defaultdict(list[Measurement]) - def add_subset_relation(self, parent_set_id: int, child_set_id: int): - self.__subsets_by_set[parent_set_id].append(child_set_id) + def add_subset_relation(self, parent_set_id: int, child_set_id: int): + self._subsets_by_set[parent_set_id].append(child_set_id) - def add_cover(self, parent: int, children: list[int]): - self.__covers_by_set[parent].append(children) - for child in children: - self.add_subset_relation(parent, child) + def add_cover(self, parent: int, children: list[int]): + self._covers_by_set[parent].append(children) - def add_measurement(self, set_id: int, measurement: Measurement): - self.__measurements_by_set[set_id].append(measurement) + def add_measurement(self, set_id: int, measurement: Measurement): + self._measurements_by_set[set_id].append(measurement) - def all_sets(self) -> set[int]: - return set(i for i in self.__measurements_by_set.keys()) + def all_sets(self) -> set[int]: + return set(i for i in self._measurements_by_set.keys()) - def get_covers_of_set(self, set_id: int): - return self.__covers_by_set[set_id] + def get_covers_of_set(self, set_id: int): + return self._covers_by_set[set_id] - def get_subsets(self, parent_set_id): - return self.__subsets_by_set[parent_set_id] + def get_subsets(self, parent_set_id): + return self._subsets_by_set[parent_set_id] - def get_measurements(self, measured_set_id): - return self.__measurements_by_set.get(measured_set_id) + def get_measurements(self, measured_set_id): + return self._measurements_by_set.get(measured_set_id) - def get_measurement_metric(self, measured_set_id): - measurement = self.__measurements_by_set.get(measured_set_id) - return measurement[0].name + def get_measurement_metric(self, measured_set_id): + measurement = self._measurements_by_set.get(measured_set_id) + return measurement[0].name - def __repr__(self): - return (('SetMeasurementsSpec(' - 'subsets_by_set={},covers_by_set={},measurements_by_set={})') - .format(self.__subsets_by_set, self.__covers_by_set, - self.__measurements_by_set)) + def __repr__(self): + return (('SetMeasurementsSpec(' + 'subsets_by_set={},covers_by_set={},measurements_by_set={})') + .format(self._subsets_by_set, self._covers_by_set, + self._measurements_by_set)) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py index d7f624deae2..884272ba65a 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py @@ -23,172 +23,180 @@ SEMAPHORE = Semaphore() + class SolutionNotFoundError(ValueError): - __non_solution: Solution + _non_solution: Solution - def __init__(self, non_solution: Solution): - super().__init__(non_solution) - self.__non_solution = non_solution + def __init__(self, non_solution: Solution): + super().__init__(non_solution) + self._non_solution = non_solution - def get_non_solution_details(self): - return self.__non_solution + def get_non_solution_details(self): + return self._non_solution class Solver: - def __init__(self, set_measurement_spec: SetMeasurementsSpec): - variable_index_by_set_id = Solver.__map_sets_to_variables( - set_measurement_spec) - self.num_variables = len(variable_index_by_set_id) - self.__init_qp(self.num_variables) - self.__add_covers(set_measurement_spec, variable_index_by_set_id) - self.__add_subsets(set_measurement_spec, variable_index_by_set_id) - self.__add_measurement_targets(set_measurement_spec, - variable_index_by_set_id) - self.__init_base_value(set_measurement_spec, variable_index_by_set_id) - - self.variable_map = dict( - (variable_index_by_set_id[i], i) for i in variable_index_by_set_id) - - def __init_base_value(self, set_measurement_spec, variable_index_by_set_id): - mean_measurement_by_variable: dict[int, float] = {} - for measured_set in set_measurement_spec.all_sets(): - mean_measurement_by_variable[ - variable_index_by_set_id[measured_set]] = ( - sum(v.value - for v in set_measurement_spec.get_measurements( - measured_set)) / len( - set_measurement_spec.get_measurements(measured_set))) - self.base_value = np.array(list( - (mean_measurement_by_variable[i] - for i in range(0, self.num_variables)))) - - def __add_measurement_targets(self, set_measurement_spec, - variable_index_by_set_id): - for (measured_set, variable) in variable_index_by_set_id.items(): - variables = np.zeros(self.num_variables) - variables[variable] = 1 - for measurement in set_measurement_spec.get_measurements( - measured_set): - if abs(measurement.sigma) == 0: - self.__add_eq_term(variables, measurement.value) - else: - self.__add_loss_term( - np.multiply(variables, 1 / measurement.sigma), - -measurement.value / measurement.sigma) - - @staticmethod - def __map_sets_to_variables(set_measurement_spec) -> dict[int, int]: - variable_index_by_set_id: dict[int, int] = {} - num_variables = 0 - for measured_set in set_measurement_spec.all_sets(): - variable_index_by_set_id[measured_set] = num_variables - num_variables += 1 - return variable_index_by_set_id - - def __init_qp(self, num_variables): - self.num_variables = num_variables - # Minimize 1/2 x^T P x + q^T x - self.P = np.zeros(shape=(num_variables, num_variables)) - self.q = np.zeros(shape=(1, num_variables)) - # subject to G x <= h - self.G = [] - self.h = [] - # and A x = h - self.A = [] - self.b = [] - - def __add_subsets(self, set_measurement_spec, variable_index_by_set_id): - for measured_set in set_measurement_spec.all_sets(): - for subset in set(set_measurement_spec.get_subsets(measured_set)): - self.__add_parent_gt_child_term( - variable_index_by_set_id[measured_set], - variable_index_by_set_id[subset]) - - def __add_covers(self, set_measurement_spec, variable_index_by_set_id): - for measured_set in set_measurement_spec.all_sets(): - for cover in set_measurement_spec.get_covers_of_set(measured_set): - self.__add_cover_set_constraint( - list(variable_index_by_set_id[i] for i in cover), - variable_index_by_set_id[measured_set]) - - def __add_cover_set_constraint(self, cover_variables: list[int], - set_variable: int): - variables = np.zeros(self.num_variables) - variables.put(cover_variables, -1) - variables[set_variable] = 1 - self.__add_gt_term(variables) - - def __is_feasible(self, vector: np.array) -> bool: - for i, g in enumerate(self.G): - if np.dot(vector, g) > self.h[i][0]: - return False - return True - - def __add_parent_gt_child_term(self, parent: int, child: int): - variables = np.zeros(self.num_variables) - variables.put(parent, -1) - variables[child] = 1 - self.__add_gt_term(variables) - - def __add_loss_term(self, variables, k: float): - for v1, coeff1 in enumerate(variables): - self.q[0][v1] += coeff1 * k - for v2, coeff2 in enumerate(variables): - self.P[v1][v2] += coeff1 * coeff2 - - def __add_eq_term(self, variables, k: float): - self.A.append(variables) - self.b.append(k) - - def __add_gt_term(self, variables): - self.G.append(variables) - self.h.append([0]) - - def __solve(self): - x0 = np.random.randn(self.num_variables) - return self.__solve_with_initial_value(x0) - - def __solve_with_initial_value(self, x0) -> Solution: - problem = self.__problem() - solution = solve_problem(problem, solver=SOLVER, verbose=False) - return solution - - def __problem(self): - problem: Problem - if len(self.A) > 0: - problem = Problem( - self.P, self.q, np.array(self.G), np.array(self.h), - np.array(self.A), np.array(self.b)) + def __init__(self, set_measurement_spec: SetMeasurementsSpec): + variable_index_by_set_id = Solver._map_sets_to_variables( + set_measurement_spec) + self.num_variables = len(variable_index_by_set_id) + self._init_qp(self.num_variables) + self._add_covers(set_measurement_spec, variable_index_by_set_id) + self._add_subsets(set_measurement_spec, variable_index_by_set_id) + self._add_measurement_targets(set_measurement_spec, + variable_index_by_set_id) + self._init_base_value(set_measurement_spec, variable_index_by_set_id) + + self.variable_map = dict( + (variable_index_by_set_id[i], i) for i in variable_index_by_set_id) + + def _init_base_value(self, set_measurement_spec, variable_index_by_set_id): + mean_measurement_by_variable: dict[int, float] = {} + for measured_set in set_measurement_spec.all_sets(): + mean_measurement_by_variable[ + variable_index_by_set_id[measured_set]] = ( + sum(v.value + for v in set_measurement_spec.get_measurements( + measured_set)) / len( + set_measurement_spec.get_measurements(measured_set))) + self.base_value = np.array(list( + (mean_measurement_by_variable[i] + for i in range(0, self.num_variables)))) + + def _add_measurement_targets(self, set_measurement_spec, + variable_index_by_set_id): + for (measured_set, variable) in variable_index_by_set_id.items(): + variables = np.zeros(self.num_variables) + variables[variable] = 1 + for measurement in set_measurement_spec.get_measurements( + measured_set): + if abs(measurement.sigma) == 0: + self._add_eq_term(variables, measurement.value) else: - problem = Problem( - self.P, self.q, np.array(self.G), np.array(self.h)) - return problem - - def solve(self) -> Solution: - if self.__is_feasible(self.base_value): - solution = Solution(x=self.base_value, - found=True, - extras={'status': 'trivial'}, - problem=self.__problem()) + self._add_loss_term( + np.multiply(variables, 1 / measurement.sigma), + -measurement.value / measurement.sigma) + + @staticmethod + def _map_sets_to_variables(set_measurement_spec) -> dict[int, int]: + variable_index_by_set_id: dict[int, int] = {} + num_variables = 0 + for measured_set in set_measurement_spec.all_sets(): + variable_index_by_set_id[measured_set] = num_variables + num_variables += 1 + return variable_index_by_set_id + + def _init_qp(self, num_variables): + self.num_variables = num_variables + # Minimize 1/2 x^T P x + q^T x + self.P = np.zeros(shape=(num_variables, num_variables)) + self.q = np.zeros(shape=(1, num_variables)) + # subject to G x <= h + self.G = [] + self.h = [] + # and A x = h + self.A = [] + self.b = [] + + def _add_subsets(self, set_measurement_spec, variable_index_by_set_id): + for measured_set in set_measurement_spec.all_sets(): + for subset in set(set_measurement_spec.get_subsets(measured_set)): + self._add_parent_gt_child_term( + variable_index_by_set_id[measured_set], + variable_index_by_set_id[subset]) + + def _add_covers(self, set_measurement_spec, variable_index_by_set_id): + for measured_set in set_measurement_spec.all_sets(): + for cover in set_measurement_spec.get_covers_of_set(measured_set): + self._add_cover_set_constraint( + list(variable_index_by_set_id[i] for i in cover), + variable_index_by_set_id[measured_set]) + + def _add_cover_set_constraint(self, cover_variables: list[int], + set_variable: int): + variables = np.zeros(self.num_variables) + variables.put(cover_variables, -1) + variables[set_variable] = 1 + self._add_gt_term(variables) + + def _is_feasible(self, vector: np.array) -> bool: + for i, g in enumerate(self.G): + if np.dot(vector, g) > self.h[i][0]: + return False + return True + + def _add_parent_gt_child_term(self, parent: int, child: int): + variables = np.zeros(self.num_variables) + variables.put(parent, -1) + variables[child] = 1 + self._add_gt_term(variables) + + def _add_loss_term(self, variables, k: float): + for v1, coeff1 in enumerate(variables): + self.q[0][v1] += coeff1 * k + for v2, coeff2 in enumerate(variables): + self.P[v1][v2] += coeff1 * coeff2 + + def _add_eq_term(self, variables, k: float): + self.A.append(variables) + self.b.append(k) + + def _add_gt_term(self, variables): + self.G.append(variables) + self.h.append([0]) + + def _solve(self): + x0 = np.random.randn(self.num_variables) + return self._solve_with_initial_value(x0) + + def _solve_with_initial_value(self, x0) -> Solution: + problem = self._problem() + solution = solve_problem(problem, solver=SOLVER, verbose=False) + return solution + + def _problem(self): + problem: Problem + if len(self.A) > 0: + problem = Problem( + self.P, self.q, np.array(self.G), np.array(self.h), + np.array(self.A), np.array(self.b)) + else: + problem = Problem( + self.P, self.q, np.array(self.G), np.array(self.h)) + return problem + + def solve(self) -> Solution: + attempt_count = 0 + if self._is_feasible(self.base_value): + solution = Solution(x=self.base_value, + found=True, + extras={'status': 'trivial'}, + problem=self._problem()) + else: + while attempt_count < 10: + # TODO: check if qpsolvers is thread safe, + # and remove this semaphore. + SEMAPHORE.acquire() + solution = self._solve() + SEMAPHORE.release() + + if solution.found: + break else: - # TODO: check if qpsolvers is thread safe, - # and remove this semaphore. - SEMAPHORE.acquire() - solution = self.__solve() - SEMAPHORE.release() - - if not solution.found: - raise SolutionNotFoundError(solution) - - return solution - - def translate_solution(self, solution: Solution) -> dict[int, float]: - result: dict[int, Any] = {} - for var in range(0, self.num_variables): - result[self.variable_map[var]] = solution.x[var] - return result - - def solve_and_translate(self): - solution = self.solve() - return self.translate_solution(solution) \ No newline at end of file + attempt_count += 1 + + if not solution.found: + raise SolutionNotFoundError(solution) + + return solution + + def translate_solution(self, solution: Solution) -> dict[int, float]: + result: dict[int, Any] = {} + for var in range(0, self.num_variables): + result[self.variable_map[var]] = solution.x[var] + return result + + def solve_and_translate(self): + solution = self.solve() + return self.translate_solution(solution) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index 668575d9a90..de9e6590cfd 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -36,71 +36,86 @@ def get_subset_relationships(edp_combinations: list[FrozenSet[str]]): return subset_relationships -def get_cover_relationships(edp_combinations: list[FrozenSet[str]]): - """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). - For each set (s_i) in the list, enumerate combinations of all sets excluding this one. - For each of these considered combinations, take their union and check if it is equal to - s_i. If so, this combination is a cover of s_i. - """ +def is_cover(target_set, possible_cover): + union_of_possible_cover = reduce( + lambda x, y: x.union(y), possible_cover + ) + if union_of_possible_cover == target_set: + return True + else: + return False + +def get_covers(target_set, other_sets): def generate_all_length_combinations(data): return [ comb for r in range(1, len(data) + 1) for comb in combinations(data, r) ] + cover_relationship = [] + all_subsets_of_possible_covered = [other_set for other_set in other_sets + if + other_set.issubset(target_set)] + possible_covers = generate_all_length_combinations( + all_subsets_of_possible_covered) + for possible_cover in possible_covers: + if is_cover(target_set, possible_cover): + cover_relationship.append((target_set, possible_cover)) + return cover_relationship + + +def get_cover_relationships(edp_combinations: list[FrozenSet[str]]): + """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). + For each set (s_i) in the list, enumerate combinations of all sets excluding this one. + For each of these considered combinations, take their union and check if it is equal to + s_i. If so, this combination is a cover of s_i. + """ cover_relationships = [] for i in range(len(edp_combinations)): possible_covered = edp_combinations[i] other_sets = edp_combinations[:i] + edp_combinations[i + 1:] - all_subsets_of_possible_covered = [other_set for other_set in other_sets - if - other_set.issubset(possible_covered)] - possible_covers = generate_all_length_combinations( - all_subsets_of_possible_covered) - for possible_cover in possible_covers: - union_of_possible_cover = reduce( - lambda x, y: x.union(y), possible_cover - ) - if union_of_possible_cover == possible_covered: - cover_relationships.append((possible_covered, possible_cover)) + cover_relationship = get_covers(possible_covered, other_sets) + cover_relationships.extend(cover_relationship) return cover_relationships class MetricReport: - """Represents a metric sub-report view (e.g. MRC, AMI, etc) - within a report. - """ - - __reach_time_series_by_edp_combination: dict[ - FrozenSet[str], list[Measurement]] - __reach_whole_campaign_by_edp_combination: dict[FrozenSet[str], Measurement] + """Represents a metric sub-report view (e.g., MRC, AMI) within a report. + + This class stores and provides access to reach measurements for different + EDP (Event, Data Provider, and Platform) combinations. It holds two types + of reach data: + + * Cumulative reach over time, represented as a time series. + * Reach for the whole campaign. + + Attributes: + _reach_time_series: A dictionary mapping EDP combinations (represented + as frozensets of strings) to lists of Measurement + objects, where each list represents a time series of + reach values. + _reach_whole_campaign: A dictionary mapping EDP combinations to + Measurement objects representing the reach for + the whole campaign. + """ def __init__( self, - reach_time_series_by_edp_combination: dict[ - FrozenSet[str], list[Measurement]], - reach_whole_campaign_by_edp_combination: dict[ - FrozenSet[str], Measurement] = None, + reach_time_series: dict[FrozenSet[str], list[Measurement]], + reach_whole_campaign: dict[FrozenSet[str], Measurement], ): - num_periods = len(next(iter(reach_time_series_by_edp_combination.values()))) - for series in reach_time_series_by_edp_combination.values(): + num_periods = len(next(iter(reach_time_series.values()))) + for series in reach_time_series.values(): if len(series) != num_periods: raise ValueError( - "all time series must have the same length {1: d} vs {2: d}".format( + "All time series must have the same length {1: d} vs {2: d}".format( len(series), len(num_periods) ) ) - self.__reach_time_series_by_edp_combination = ( - reach_time_series_by_edp_combination - ) - - if reach_whole_campaign_by_edp_combination is None: - reach_whole_campaign_by_edp_combination = {} - self.__reach_whole_campaign_by_edp_combination = ( - reach_whole_campaign_by_edp_combination - ) + self._reach_time_series = reach_time_series + self._reach_whole_campaign = reach_whole_campaign def sample_with_noise(self) -> "MetricReport": """ @@ -108,58 +123,54 @@ def sample_with_noise(self) -> "MetricReport": according to their mean and variance. """ return MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ edp_combination: [ - MetricReport.__sample_with_noise(measurement) - for measurement in self.__reach_time_series_by_edp_combination[ + MetricReport._sample_with_noise(measurement) + for measurement in self._reach_time_series[ edp_combination ] ] for edp_combination in - self.__reach_time_series_by_edp_combination.keys() + self._reach_time_series.keys() } ) def get_cumulative_measurement(self, edp_combination: str, period: int): - return self.__reach_time_series_by_edp_combination[edp_combination][ + return self._reach_time_series[edp_combination][ period] def get_whole_campaign_measurement(self, edp_combination: str): - return self.__reach_whole_campaign_by_edp_combination[edp_combination] + return self._reach_whole_campaign[edp_combination] def get_cumulative_edp_combinations(self): - return set(self.__reach_time_series_by_edp_combination.keys()) + return set(self._reach_time_series.keys()) def get_whole_campaign_edp_combinations(self): - return set(self.__reach_whole_campaign_by_edp_combination.keys()) + return set(self._reach_whole_campaign.keys()) def get_cumulative_edp_combinations_count(self): - return len(self.__reach_time_series_by_edp_combination.keys()) + return len(self._reach_time_series.keys()) def get_whole_campaign_edp_combinations_count(self): - return len(self.__reach_whole_campaign_by_edp_combination.keys()) + return len(self._reach_whole_campaign.keys()) def get_number_of_periods(self): - return len(next(iter(self.__reach_time_series_by_edp_combination.values()))) + return len(next(iter(self._reach_time_series.values()))) def get_cumulative_subset_relationships(self): - edp_combinations = list(self.__reach_time_series_by_edp_combination) - return get_subset_relationships(edp_combinations) + return get_subset_relationships(list(self._reach_time_series)) def get_whole_campaign_subset_relationships(self): - edp_combinations = list(self.__reach_whole_campaign_by_edp_combination) - return get_subset_relationships(edp_combinations) + return get_subset_relationships(list(self._reach_whole_campaign)) def get_cumulative_cover_relationships(self): - edp_combinations = list(self.__reach_time_series_by_edp_combination) - return get_cover_relationships(edp_combinations) + return get_cover_relationships(list(self._reach_time_series)) def get_whole_campaign_cover_relationships(self): - edp_combinations = list(self.__reach_whole_campaign_by_edp_combination) - return get_cover_relationships(edp_combinations) + return get_cover_relationships(list(self._reach_whole_campaign)) @staticmethod - def __sample_with_noise(measurement: Measurement): + def _sample_with_noise(measurement: Measurement): return Measurement( measurement.value + random.gauss(0, measurement.sigma), measurement.sigma @@ -167,15 +178,24 @@ def __sample_with_noise(measurement: Measurement): class Report: - """ - Represents a full report, consisting of multiple MetricReports, - which may have set relationships between each other. - """ - - __metric_reports: dict[str, MetricReport] - __metric_subsets_by_parent: dict[str, list[str]] - __metric_index: dict[str, int] - __edp_combination_index: dict[str, int] + """Represents a full report with multiple MetricReports and set relationships. + + This class aggregates multiple MetricReport objects, and the subset relation + between the the metrics. + + Attributes: + _metric_reports: A dictionary mapping metric names (e.g., "MRC", "AMI") + to their corresponding MetricReport objects. + _metric_subsets_by_parent: A dictionary defining subset relationships + between metrics. Each key is a parent metric, + and the value is a list of its child metrics. + _cumulative_inconsistency_allowed_edp_combinations: A set of EDP + combinations for + which inconsistencies + in cumulative + measurements are + allowed. + """ def __init__( self, @@ -191,9 +211,9 @@ def __init__( cumulative_inconsistency_allowed_edps : a set containing edp keys that won't be forced to have self cumulative reaches be increasing """ - self.__metric_reports = metric_reports - self.__metric_subsets_by_parent = metric_subsets_by_parent - self.__cumulative_inconsistency_allowed_edp_combinations = ( + self._metric_reports = metric_reports + self._metric_subsets_by_parent = metric_subsets_by_parent + self._cumulative_inconsistency_allowed_edp_combinations = ( cumulative_inconsistency_allowed_edp_combinations ) @@ -209,45 +229,45 @@ def __init__( "key {1} does not have a corresponding report".format(child) ) - self.__metric_index = {} + self._metric_index = {} for index, metric in enumerate(metric_reports.keys()): - self.__metric_index[metric] = index + self._metric_index[metric] = index - self.__edp_combination_index = {} + self._edp_combination_index = {} for index, edp_combination in enumerate( next(iter(metric_reports.values())).get_cumulative_edp_combinations() ): - self.__edp_combination_index[edp_combination] = index + self._edp_combination_index[edp_combination] = index - self.__num_edp_combinations = len(self.__edp_combination_index.keys()) - self.__num_periods = next( + self._num_edp_combinations = len(self._edp_combination_index.keys()) + self._num_periods = next( iter(metric_reports.values())).get_number_of_periods() # Assign an index to each measurement. measurement_index = 0 - self.__measurement_name_to_index = {} + self._measurement_name_to_index = {} for metric in metric_reports.keys(): for edp_combination in metric_reports[ metric].get_whole_campaign_edp_combinations(): measurement = metric_reports[metric].get_whole_campaign_measurement( edp_combination) - self.__measurement_name_to_index[measurement.name] = measurement_index + self._measurement_name_to_index[measurement.name] = measurement_index measurement_index += 1 for edp_combination in metric_reports[ metric].get_cumulative_edp_combinations(): - for period in range(0, self.__num_periods): + for period in range(0, self._num_periods): measurement = metric_reports[metric].get_cumulative_measurement( edp_combination, period) - self.__measurement_name_to_index[measurement.name] = measurement_index + self._measurement_name_to_index[measurement.name] = measurement_index measurement_index += 1 - self.__num_vars = measurement_index + self._num_vars = measurement_index def get_metric_report(self, metric: str) -> MetricReport: - return self.__metric_reports[metric] + return self._metric_reports[metric] def get_metrics(self) -> set[str]: - return set(self.__metric_reports.keys()) + return set(self._metric_reports.keys()) def get_corrected_report(self) -> "Report": """Returns a corrected, consistent report. @@ -260,11 +280,11 @@ def get_corrected_report(self) -> "Report": def report_from_solution(self, solution, spec): return Report( metric_reports={ - metric: self.__metric_report_from_solution(metric, solution) - for metric in self.__metric_reports + metric: self._metric_report_from_solution(metric, solution) + for metric in self._metric_reports }, - metric_subsets_by_parent=self.__metric_subsets_by_parent, - cumulative_inconsistency_allowed_edp_combinations=self.__cumulative_inconsistency_allowed_edp_combinations, + metric_subsets_by_parent=self._metric_subsets_by_parent, + cumulative_inconsistency_allowed_edp_combinations=self._cumulative_inconsistency_allowed_edp_combinations, ) def sample_with_noise(self) -> "Report": @@ -273,108 +293,110 @@ def sample_with_noise(self) -> "Report": """ return Report( metric_reports={ - i: self.__metric_reports[i].sample_with_noise() - for i in self.__metric_reports + i: self._metric_reports[i].sample_with_noise() + for i in self._metric_reports }, - metric_subsets_by_parent=self.__metric_subsets_by_parent, - cumulative_inconsistency_allowed_edp_combinations=self.__cumulative_inconsistency_allowed_edp_combinations, + metric_subsets_by_parent=self._metric_subsets_by_parent, + cumulative_inconsistency_allowed_edp_combinations=self._cumulative_inconsistency_allowed_edp_combinations, ) def to_array(self) -> np.array: """Returns an array representation of all the mean measurement values in this report """ - array = np.zeros(self.__num_vars) - for metric in self.__metric_reports: - for edp_combination in self.__metric_reports[metric].get_cumulative_edp_combinations(): - for period in range(0, self.__num_periods): + array = np.zeros(self._num_vars) + for metric in self._metric_reports: + for edp_combination in self._metric_reports[ + metric].get_cumulative_edp_combinations(): + for period in range(0, self._num_periods): array.put( - self.__get_measurement_index( - self.__metric_reports[metric] + self._get_measurement_index( + self._metric_reports[metric] .get_cumulative_measurement(edp_combination, period) ), - self.__metric_reports[metric] + self._metric_reports[metric] .get_cumulative_measurement(edp_combination, period) .value, ) - for edp_combination in self.__metric_reports[metric].get_whole_campaign_edp_combinations(): + for edp_combination in self._metric_reports[ + metric].get_whole_campaign_edp_combinations(): array.put( - self.__get_measurement_index( - self.__metric_reports[metric] + self._get_measurement_index( + self._metric_reports[metric] .get_whole_campaign_measurement(edp_combination) ), - self.__metric_reports[metric] + self._metric_reports[metric] .get_whole_campaign_measurement(edp_combination) .value, - ) + ) return array def to_set_measurement_spec(self): spec = SetMeasurementsSpec() - self.__add_measurements_to_spec(spec) - self.__add_set_relations_to_spec(spec) + self._add_measurements_to_spec(spec) + self._add_set_relations_to_spec(spec) return spec - def __add_cover_relations_to_spec(self, spec): + def _add_cover_relations_to_spec(self, spec): # sum of subsets >= union for each period - for metric in self.__metric_reports: - for cover_relationship in self.__metric_reports[ + for metric in self._metric_reports: + for cover_relationship in self._metric_reports[ metric].get_cumulative_cover_relationships(): covered_parent = cover_relationship[0] covering_children = cover_relationship[1] - for period in range(0, self.__num_periods): + for period in range(0, self._num_periods): spec.add_cover( - children=list(self.__get_cumulative_measurement_index( + children=list(self._get_cumulative_measurement_index( metric, covering_child, period) for covering_child in covering_children), - parent=self.__get_cumulative_measurement_index( + parent=self._get_cumulative_measurement_index( metric, covered_parent, period), ) - for cover_relationship in self.__metric_reports[ + for cover_relationship in self._metric_reports[ metric].get_whole_campaign_cover_relationships(): covered_parent = cover_relationship[0] covering_children = cover_relationship[1] spec.add_cover( - children=list(self.__get_whole_campaign_measurement_index( + children=list(self._get_whole_campaign_measurement_index( metric, covering_child) for covering_child in covering_children), - parent=self.__get_whole_campaign_measurement_index( + parent=self._get_whole_campaign_measurement_index( metric, covered_parent), ) - def __add_subset_relations_to_spec(self, spec): + def _add_subset_relations_to_spec(self, spec): # Adds relations for cumulative measurements. - for metric in self.__metric_reports: - for subset_relationship in self.__metric_reports[ + for metric in self._metric_reports: + for subset_relationship in self._metric_reports[ metric ].get_cumulative_subset_relationships(): parent_edp_combination = subset_relationship[0] child_edp_combination = subset_relationship[1] - for period in range(0, self.__num_periods): + for period in range(0, self._num_periods): spec.add_subset_relation( - child_set_id=self.__get_measurement_index( - self.__metric_reports[ + child_set_id=self._get_measurement_index( + self._metric_reports[ metric].get_cumulative_measurement( child_edp_combination, period)), - parent_set_id=self.__get_measurement_index( - self.__metric_reports[ + parent_set_id=self._get_measurement_index( + self._metric_reports[ metric].get_cumulative_measurement( parent_edp_combination, period)), ) # Adds relations for whole campaign measurements. - for subset_relationship in self.__metric_reports[ + for subset_relationship in self._metric_reports[ metric ].get_whole_campaign_subset_relationships(): parent_edp_combination = subset_relationship[0] child_edp_combination = subset_relationship[1] spec.add_subset_relation( - child_set_id=self.__get_measurement_index( - self.__metric_reports[ + child_set_id=self._get_measurement_index( + self._metric_reports[ metric].get_whole_campaign_measurement( child_edp_combination)), - parent_set_id=self.__get_measurement_index( - self.__metric_reports[ + parent_set_id=self._get_measurement_index( + self._metric_reports[ metric].get_whole_campaign_measurement( parent_edp_combination)), ) @@ -382,172 +404,172 @@ def __add_subset_relations_to_spec(self, spec): # TODO(@ple13):Use timestamp to check if the last cumulative measurement covers # the whole campaign. If yes, make sure that the two measurements are equal # instead of less than or equal. - def __add_cumulative_whole_campaign_relations_to_spec(self, spec): + def _add_cumulative_whole_campaign_relations_to_spec(self, spec): # Adds relations between cumulative and whole campaign measurements. # For an edp combination, the last cumulative measurement is less than or # equal to the whole campaign measurement. - for metric in self.__metric_reports: - for edp_combination in self.__metric_reports[ + for metric in self._metric_reports: + for edp_combination in self._metric_reports[ metric].get_cumulative_edp_combinations().intersection( - self.__metric_reports[ + self._metric_reports[ metric].get_whole_campaign_edp_combinations()): spec.add_subset_relation( - child_set_id=self.__get_measurement_index( - self.__metric_reports[ + child_set_id=self._get_measurement_index( + self._metric_reports[ metric].get_cumulative_measurement( - edp_combination, (self.__num_periods - 1))), - parent_set_id=self.__get_measurement_index( - self.__metric_reports[ + edp_combination, (self._num_periods - 1))), + parent_set_id=self._get_measurement_index( + self._metric_reports[ metric].get_whole_campaign_measurement( edp_combination)), ) - def __add_metric_relations_to_spec(self, spec): + def _add_metric_relations_to_spec(self, spec): # metric1>=metric#2 - for parent_metric in self.__metric_subsets_by_parent: - for child_metric in self.__metric_subsets_by_parent[parent_metric]: + for parent_metric in self._metric_subsets_by_parent: + for child_metric in self._metric_subsets_by_parent[parent_metric]: # Handles cumulative measurements of common edp combinations. - for edp_combination in self.__metric_reports[ + for edp_combination in self._metric_reports[ parent_metric].get_cumulative_edp_combinations().intersection( - self.__metric_reports[ + self._metric_reports[ child_metric].get_cumulative_edp_combinations()): - for period in range(0, self.__num_periods): + for period in range(0, self._num_periods): spec.add_subset_relation( - child_set_id=self.__get_measurement_index( - self.__metric_reports[ + child_set_id=self._get_measurement_index( + self._metric_reports[ child_metric].get_cumulative_measurement( edp_combination, period)), - parent_set_id=self.__get_measurement_index( - self.__metric_reports[ + parent_set_id=self._get_measurement_index( + self._metric_reports[ parent_metric].get_cumulative_measurement( edp_combination, period)), ) # Handles whole campaign measurements of common edp combinations. - for edp_combination in self.__metric_reports[ + for edp_combination in self._metric_reports[ parent_metric].get_whole_campaign_edp_combinations().intersection( - self.__metric_reports[ + self._metric_reports[ child_metric].get_whole_campaign_edp_combinations()): spec.add_subset_relation( - child_set_id=self.__get_measurement_index( - self.__metric_reports[ + child_set_id=self._get_measurement_index( + self._metric_reports[ child_metric].get_whole_campaign_measurement( edp_combination)), - parent_set_id=self.__get_measurement_index( - self.__metric_reports[ + parent_set_id=self._get_measurement_index( + self._metric_reports[ parent_metric].get_whole_campaign_measurement( edp_combination)), ) - def __add_cumulative_relations_to_spec(self, spec): - for metric in self.__metric_reports.keys(): - for edp_combination in self.__metric_reports[ + def _add_cumulative_relations_to_spec(self, spec): + for metric in self._metric_reports.keys(): + for edp_combination in self._metric_reports[ metric].get_cumulative_edp_combinations(): if ( len(edp_combination) == 1 and next(iter(edp_combination)) - in self.__cumulative_inconsistency_allowed_edp_combinations + in self._cumulative_inconsistency_allowed_edp_combinations ): continue - for period in range(0, self.__num_periods): - if period >= self.__num_periods - 1: + for period in range(0, self._num_periods): + if period >= self._num_periods - 1: continue spec.add_subset_relation( - child_set_id=self.__get_measurement_index( - self.__metric_reports[ + child_set_id=self._get_measurement_index( + self._metric_reports[ metric].get_cumulative_measurement( edp_combination, period)), - parent_set_id=self.__get_measurement_index( - self.__metric_reports[ + parent_set_id=self._get_measurement_index( + self._metric_reports[ metric].get_cumulative_measurement( edp_combination, period + 1)), ) - def __add_set_relations_to_spec(self, spec): + def _add_set_relations_to_spec(self, spec): # sum of subsets >= union for each period. - self.__add_cover_relations_to_spec(spec) + self._add_cover_relations_to_spec(spec) # subset <= union. - self.__add_subset_relations_to_spec(spec) + self._add_subset_relations_to_spec(spec) # metric1>=metric#2. - self.__add_metric_relations_to_spec(spec) + self._add_metric_relations_to_spec(spec) # period1 <= period2. - self.__add_cumulative_relations_to_spec(spec) + self._add_cumulative_relations_to_spec(spec) # Last cumulative measurement <= whole campaign measurement. - self.__add_cumulative_whole_campaign_relations_to_spec(spec) + self._add_cumulative_whole_campaign_relations_to_spec(spec) - def __add_measurements_to_spec(self, spec): - for metric in self.__metric_reports.keys(): - for edp_combination in self.__metric_reports[ + def _add_measurements_to_spec(self, spec): + for metric in self._metric_reports.keys(): + for edp_combination in self._metric_reports[ metric].get_cumulative_edp_combinations(): - for period in range(0, self.__num_periods): - measurement = self.__metric_reports[ + for period in range(0, self._num_periods): + measurement = self._metric_reports[ metric].get_cumulative_measurement(edp_combination, period) spec.add_measurement( - self.__get_measurement_index(measurement), + self._get_measurement_index(measurement), measurement, ) - for edp_combination in self.__metric_reports[ + for edp_combination in self._metric_reports[ metric].get_whole_campaign_edp_combinations(): - measurement = self.__metric_reports[ + measurement = self._metric_reports[ metric].get_whole_campaign_measurement(edp_combination) spec.add_measurement( - self.__get_measurement_index(measurement), + self._get_measurement_index(measurement), measurement, ) - def __get_measurement_index(self, measurement: Measurement): - return self.__measurement_name_to_index[measurement.name] + def _get_measurement_index(self, measurement: Measurement): + return self._measurement_name_to_index[measurement.name] - def __get_cumulative_measurement_index(self, metric: str, + def _get_cumulative_measurement_index(self, metric: str, edp_combination: str, period: int): - return self.__get_measurement_index( - self.__metric_reports[metric].get_cumulative_measurement( + return self._get_measurement_index( + self._metric_reports[metric].get_cumulative_measurement( edp_combination, period) ) - def __get_whole_campaign_measurement_index(self, metric: str, + def _get_whole_campaign_measurement_index(self, metric: str, edp_combination: str): - return self.__get_measurement_index( - self.__metric_reports[metric].get_whole_campaign_measurement( + return self._get_measurement_index( + self._metric_reports[metric].get_whole_campaign_measurement( edp_combination) ) - def __metric_report_from_solution(self, metric, solution): + def _metric_report_from_solution(self, metric, solution): solution_time_series = {} solution_whole_campaign = {} - for edp_combination in self.__metric_reports[ + for edp_combination in self._metric_reports[ metric].get_cumulative_edp_combinations(): solution_time_series[edp_combination] = [ Measurement( solution[ - self.__get_measurement_index(self.__metric_reports[ + self._get_measurement_index(self._metric_reports[ metric].get_cumulative_measurement( edp_combination, period)) ], - self.__metric_reports[metric].get_cumulative_measurement( + self._metric_reports[metric].get_cumulative_measurement( edp_combination, period).sigma, - self.__metric_reports[metric].get_cumulative_measurement( + self._metric_reports[metric].get_cumulative_measurement( edp_combination, period).name, ) - for period in range(0, self.__num_periods) + for period in range(0, self._num_periods) ] - for edp_combination in self.__metric_reports[ + for edp_combination in self._metric_reports[ metric].get_whole_campaign_edp_combinations(): solution_whole_campaign[edp_combination] = Measurement( solution[ - self.__get_measurement_index(self.__metric_reports[ + self._get_measurement_index(self._metric_reports[ metric].get_whole_campaign_measurement( edp_combination)) ], - self.__metric_reports[metric].get_whole_campaign_measurement( + self._metric_reports[metric].get_whole_campaign_measurement( edp_combination).sigma, - self.__metric_reports[metric].get_whole_campaign_measurement( + self._metric_reports[metric].get_whole_campaign_measurement( edp_combination).name, ) return MetricReport( - reach_time_series_by_edp_combination=solution_time_series, - reach_whole_campaign_by_edp_combination=solution_whole_campaign, + reach_time_series=solution_time_series, + reach_whole_campaign=solution_whole_campaign, ) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py index 0b72e62edbc..212908d4035 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py @@ -59,51 +59,6 @@ mrc = "mrc" -def createMeasurements(rows, reach_col_name, sigma, metric=""): - # These rows are already sorted by timestamp. - return [ - Measurement(measured_value, sigma, metric) - for measured_value in list(rows[reach_col_name]) - ] - - -def getMeasurements(df, reach_col_name, sigma): - ami_rows = df[df[FILTER_COL_NAME] == AMI_FILTER] - mrc_rows = df[df[FILTER_COL_NAME] == MRC_FILTER] - - ami_measurements = createMeasurements(ami_rows, reach_col_name, sigma) - mrc_measurements = createMeasurements(mrc_rows, reach_col_name, sigma) - - return (ami_measurements, mrc_measurements) - - -def readExcel(excel_file_path, unnoised_edps): - measurements = {} - dfs = pd.read_excel(excel_file_path, sheet_name=None) - for edp in EDP_MAP: - sigma = 0 if edp in unnoised_edps else SIGMA - - cumilative_sheet_name = EDP_MAP[edp]["sheet"] - ( - cumilative_ami_measurements, - cumilative_mrc_measurements) = getMeasurements( - dfs[cumilative_sheet_name], CUML_REACH_COL_NAME, sigma - ) - - (total_ami_measurements, total_mrc_measurements) = getMeasurements( - dfs[edp], TOTAL_REACH_COL_NAME, sigma - ) - - # There has to be 1 row for AMI and MRC metrics in the total reach sheet. - assert len(total_mrc_measurements) == 1 and len(total_ami_measurements) == 1 - - measurements[edp] = { - AMI_FILTER: cumilative_ami_measurements + total_ami_measurements, - MRC_FILTER: cumilative_mrc_measurements + total_mrc_measurements, - } - return (measurements, dfs) - - # Processes a report summary and returns a consistent one. # # Currently, the function only supports ami and mrc measurements and primitive @@ -177,125 +132,6 @@ def processReportSummary(report_summary: report_summary_pb2.ReportSummary()): return metric_name_to_value -def getCorrectedReport(measurements): - report = Report( - { - ami: MetricReport( - reach_time_series_by_edp_combination={ - frozenset({EDP_ONE, EDP_TWO}): measurements[TOTAL_CAMPAIGN][ - AMI_FILTER - ], - frozenset({EDP_ONE}): measurements[EDP_ONE][AMI_FILTER], - frozenset({EDP_TWO}): measurements[EDP_TWO][AMI_FILTER], - } - ), - mrc: MetricReport( - reach_time_series_by_edp_combination={ - frozenset({EDP_ONE, EDP_TWO}): measurements[TOTAL_CAMPAIGN][ - MRC_FILTER - ], - frozenset({EDP_ONE}): measurements[EDP_ONE][MRC_FILTER], - frozenset({EDP_TWO}): measurements[EDP_TWO][MRC_FILTER], - } - ), - }, - # AMI is a parent of MRC - metric_subsets_by_parent={ami: [mrc]}, - cumulative_inconsistency_allowed_edp_combinations={}, - ) - - return report.get_corrected_report() - - -def correctSheetMetric(df, rows, func): - for period, (index, row) in enumerate(rows.iterrows()): - df.at[index, CUML_REACH_COL_NAME] = math.ceil(func(period).value) - - -def correctCumSheet(df, ami_func, mrc_func): - ami_rows = df[df[FILTER_COL_NAME] == AMI_FILTER] - mrc_rows = df[df[FILTER_COL_NAME] == MRC_FILTER] - correctSheetMetric(df, ami_rows, ami_func) - correctSheetMetric(df, mrc_rows, mrc_func) - return df - - -def correctTotSheet(df, ami_val, mrc_val): - ami_rows = df[df[FILTER_COL_NAME] == AMI_FILTER] - mrc_rows = df[df[FILTER_COL_NAME] == MRC_FILTER] - - # There has to be 1 row for AMI and MRC metrics in the total reach sheet. - assert ami_rows.shape[0] == 1 and mrc_rows.shape[0] == 1 - df.at[ami_rows.index[0], TOTAL_REACH_COL_NAME] = math.ceil(ami_val) - df.at[mrc_rows.index[0], TOTAL_REACH_COL_NAME] = math.ceil(mrc_val) - return df - - -def buildCorrectedExcel(correctedReport, excel): - ami_metric_report = correctedReport.get_metric_report(ami) - mrc_metric_report = correctedReport.get_metric_report(mrc) - - for edp in EDP_MAP: - edp_index = EDP_MAP[edp]["ind"] - amiFunc = ( - partial(ami_metric_report.get_cumulative_measurement, - frozenset({EDP_ONE, EDP_TWO})) - if (edp == TOTAL_CAMPAIGN) - else partial(ami_metric_report.get_cumulative_measurement, - frozenset({edp})) - ) - mrcFunc = ( - partial(mrc_metric_report.get_cumulative_measurement, - frozenset({EDP_ONE, EDP_TWO})) - if (edp == TOTAL_CAMPAIGN) - else partial(mrc_metric_report.get_cumulative_measurement, - frozenset({edp})) - ) - - cumilative_sheet_name = EDP_MAP[edp]["sheet"] - excel[cumilative_sheet_name] = correctCumSheet( - excel[cumilative_sheet_name], amiFunc, mrcFunc - ) - - # The last value of the corrected measurement series is the total reach. - totAmiVal = ( - ami_metric_report.get_cumulative_measurement( - frozenset({EDP_ONE, EDP_TWO}), -1).value - if (edp == TOTAL_CAMPAIGN) - else ami_metric_report.get_cumulative_measurement( - frozenset({edp}), - -1).value - ) - totMrcVal = ( - mrc_metric_report.get_cumulative_measurement( - frozenset({EDP_ONE, EDP_TWO}), -1).value - if (edp == TOTAL_CAMPAIGN) - else mrc_metric_report.get_cumulative_measurement( - frozenset({edp}), - -1).value - ) - total_sheet_name = edp - excel[total_sheet_name] = correctTotSheet( - excel[total_sheet_name], totAmiVal, totMrcVal - ) - return excel - - -def writeCorrectedExcel(path, corrected_excel): - with pd.ExcelWriter(path) as writer: - # Write each dataframe to a different sheet - for sheet_name in corrected_excel: - corrected_excel[sheet_name].to_excel( - writer, sheet_name=sheet_name, index=False - ) - - -def correctExcelFile(path_to_report, unnoised_edps): - (measurements, excel) = readExcel(path_to_report, unnoised_edps) - correctedReport = getCorrectedReport(measurements) - return buildCorrectedExcel(correctedReport, excel) - - def main(): report_summary = report_summary_pb2.ReportSummary() # Read the encoded serialized report summary from stdin and convert it back to diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py index 5fc9f879018..8bd01cc7b3f 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py @@ -14,20 +14,122 @@ import unittest -from noiseninja.noised_measurements import Measurement -from report.report import Report, MetricReport +from noiseninja.noised_measurements import Measurement, SetMeasurementsSpec +from report.report import Report, MetricReport, is_cover, get_covers EXPECTED_PRECISION = 3 EDP_ONE = "EDP_ONE" EDP_TWO = "EDP_TWO" EDP_THREE = "EDP_THREE" +SAMPLE_REPORT = Report( + metric_reports={ + "ami": MetricReport( + reach_time_series={ + frozenset({EDP_ONE}): [Measurement(1, 0, "measurement_01"), + Measurement(1, 0, "measurement_02")], + frozenset({EDP_TWO}): [Measurement(1, 0, "measurement_03"), + Measurement(1, 0, "measurement_04")], + frozenset({EDP_THREE}): [ + Measurement(1, 0, "measurement_05"), + Measurement(1, 0, "measurement_06")], + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1, 0, "measurement_07"), + Measurement(1, 0, "measurement_08")], + }, + reach_whole_campaign={ + frozenset({EDP_ONE}): Measurement(1, 0, "measurement_09"), + frozenset({EDP_TWO}): Measurement(1, 0, "measurement_10"), + frozenset({EDP_THREE}): + Measurement(1, 0, "measurement_11"), + frozenset({EDP_ONE, EDP_TWO}): + Measurement(1, 0, "measurement_12"), + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): + Measurement(1, 0, "measurement_13"), + }, + ), + "mrc": MetricReport( + reach_time_series={ + frozenset({EDP_ONE}): [Measurement(1, 0, "measurement_14"), + Measurement(1, 0, "measurement_15")], + frozenset({EDP_TWO}): [Measurement(1, 0, "measurement_16"), + Measurement(1, 0, "measurement_17")], + frozenset({EDP_THREE}): [ + Measurement(1, 0, "measurement_18"), + Measurement(1, 0, "measurement_19")], + frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ + Measurement(1, 0, "measurement_20"), + Measurement(1, 0, "measurement_21")], + }, + reach_whole_campaign={ + frozenset({EDP_ONE}): Measurement(1, 0, "measurement_22"), + frozenset({EDP_TWO}): Measurement(1, 0, "measurement_23"), + frozenset({EDP_THREE}): + Measurement(1, 0, "measurement_24"), + frozenset({EDP_TWO, EDP_THREE}): + Measurement(1, 0, "measurement_25"), + }, + ) + }, + metric_subsets_by_parent={"ami": ["mrc"]}, + cumulative_inconsistency_allowed_edp_combinations={}, +) + class TestReport(unittest.TestCase): + def test_is_cover_returns_true_for_valid_cover_sets(self): + self.assertTrue(is_cover(frozenset({"EDP_ONE", "EDP_TWO", "EDP_THREE"}), + (frozenset({"EDP_ONE"}), frozenset({"EDP_TWO"}), + frozenset({"EDP_THREE"})))) + self.assertTrue(is_cover(frozenset({"EDP_ONE", "EDP_TWO", "EDP_THREE"}), + (frozenset({"EDP_ONE"}), frozenset({"EDP_TWO"}), + frozenset({"EDP_THREE"}), + frozenset({"EDP_ONE", "EDP_TWO"})))) + + def test_is_cover_returns_false_for_invalid_cover_sets(self): + self.assertFalse(is_cover(frozenset({"EDP_ONE", "EDP_TWO", "EDP_THREE"}), + (frozenset({"EDP_ONE"}), + frozenset({"EDP_THREE"})))) + + def test_get_cover_returns_all_cover_sets(self): + target = frozenset({"EDP_ONE", "EDP_TWO", "EDP_THREE"}) + other_sets = (frozenset({"EDP_ONE"}), frozenset({"EDP_TWO"}), + frozenset({"EDP_THREE"}), + frozenset({"EDP_ONE", "EDP_TWO"})) + + expected = [ + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), + (frozenset({'EDP_THREE'}), frozenset({'EDP_TWO', 'EDP_ONE'})) + ), + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), ( + frozenset({'EDP_ONE'}), frozenset({'EDP_TWO'}), + frozenset({'EDP_THREE'})) + ), + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), ( + frozenset({'EDP_ONE'}), frozenset({'EDP_THREE'}), + frozenset({'EDP_TWO', 'EDP_ONE'})) + ), + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), ( + frozenset({'EDP_TWO'}), frozenset({'EDP_THREE'}), + frozenset({'EDP_TWO', 'EDP_ONE'})) + ), + ( + frozenset({'EDP_TWO', 'EDP_THREE', 'EDP_ONE'}), ( + frozenset({'EDP_ONE'}), frozenset({'EDP_TWO'}), + frozenset({'EDP_THREE'}), frozenset({'EDP_TWO', 'EDP_ONE'})) + ) + ] + + cover_relationship = get_covers(target, other_sets) + self.assertEqual(expected, cover_relationship) def test_get_cover_relationships(self): metric_report = MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE}): [Measurement(1, 0, "measurement_01")], frozenset({EDP_TWO}): [Measurement(1, 0, "measurement_02")], frozenset({EDP_THREE}): [Measurement(1, 0, "measurement_03")], @@ -39,7 +141,8 @@ def test_get_cover_relationships(self): Measurement(1, 0, "measurement_06")], frozenset({EDP_ONE, EDP_TWO, EDP_THREE}): [ Measurement(1, 0, "measurement_07")], - } + }, + reach_whole_campaign={}, ) expected = [ @@ -434,20 +537,196 @@ def test_get_cover_relationships(self): self.assertEqual(metric_report.get_cumulative_cover_relationships(), expected) - def test_get_corrected_single_metric_report(self): + def test_add_cover_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_covers_by_set = { + name_to_index["measurement_07"]: [ + [name_to_index["measurement_01"], + name_to_index["measurement_03"], + name_to_index["measurement_05"]] + ], + name_to_index["measurement_08"]: [ + [name_to_index["measurement_02"], + name_to_index["measurement_04"], + name_to_index["measurement_06"]] + ], + name_to_index["measurement_12"]: [ + [name_to_index["measurement_09"], + name_to_index["measurement_10"]] + ], + name_to_index["measurement_13"]: [ + [name_to_index["measurement_11"], + name_to_index["measurement_12"]], + [name_to_index["measurement_09"], + name_to_index["measurement_10"], + name_to_index["measurement_11"]], + [name_to_index["measurement_09"], + name_to_index["measurement_11"], + name_to_index["measurement_12"]], + [name_to_index["measurement_10"], + name_to_index["measurement_11"], + name_to_index["measurement_12"]], + [name_to_index["measurement_10"], + name_to_index["measurement_09"], + name_to_index["measurement_11"], + name_to_index["measurement_12"]] + ], + name_to_index["measurement_20"]: [ + [name_to_index["measurement_14"], + name_to_index["measurement_16"], + name_to_index["measurement_18"]] + ], + name_to_index["measurement_21"]: [ + [name_to_index["measurement_15"], + name_to_index["measurement_17"], + name_to_index["measurement_19"]] + ], + name_to_index["measurement_25"]: [ + [name_to_index["measurement_23"], + name_to_index["measurement_24"]] + ], + } + + spec = SetMeasurementsSpec() + report._add_cover_relations_to_spec(spec) + self.assertEqual(len(spec._subsets_by_set), 0) + self.assertEqual(expected_covers_by_set.keys(), spec._covers_by_set.keys()) + for key in spec._covers_by_set.keys(): + self.assertEqual({tuple(sorted(inner_list)) for inner_list in + expected_covers_by_set[key]}, + {tuple(sorted(inner_list)) for inner_list in + spec._covers_by_set[key]}) + + def test_add_subset_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_subsets_by_set = { + name_to_index["measurement_07"]: [name_to_index["measurement_01"], + name_to_index["measurement_03"], + name_to_index["measurement_05"]], + name_to_index["measurement_08"]: [name_to_index["measurement_02"], + name_to_index["measurement_04"], + name_to_index["measurement_06"]], + name_to_index["measurement_12"]: [name_to_index["measurement_09"], + name_to_index["measurement_10"]], + name_to_index["measurement_13"]: [name_to_index["measurement_09"], + name_to_index["measurement_10"], + name_to_index["measurement_11"], + name_to_index["measurement_12"]], + name_to_index["measurement_20"]: [name_to_index["measurement_14"], + name_to_index["measurement_16"], + name_to_index["measurement_18"]], + name_to_index["measurement_21"]: [name_to_index["measurement_15"], + name_to_index["measurement_17"], + name_to_index["measurement_19"]], + name_to_index["measurement_25"]: [name_to_index["measurement_23"], + name_to_index["measurement_24"]], + } + + spec = SetMeasurementsSpec() + report._add_subset_relations_to_spec(spec) + + self.assertEqual(len(spec._covers_by_set), 0) + self.assertEqual(expected_subsets_by_set.keys(), + spec._subsets_by_set.keys()) + for key in spec._subsets_by_set.keys(): + self.assertEqual(sorted(expected_subsets_by_set[key]), + sorted(spec._subsets_by_set[key])) + + def test_add_cumulative_subset_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_subsets_by_set = { + name_to_index["measurement_02"]: [name_to_index["measurement_01"]], + name_to_index["measurement_04"]: [name_to_index["measurement_03"]], + name_to_index["measurement_06"]: [name_to_index["measurement_05"]], + name_to_index["measurement_08"]: [name_to_index["measurement_07"]], + name_to_index["measurement_15"]: [name_to_index["measurement_14"]], + name_to_index["measurement_17"]: [name_to_index["measurement_16"]], + name_to_index["measurement_19"]: [name_to_index["measurement_18"]], + name_to_index["measurement_21"]: [name_to_index["measurement_20"]], + } + + spec = SetMeasurementsSpec() + report._add_cumulative_relations_to_spec(spec) + + self.assertEqual(len(spec._covers_by_set), 0) + self.assertEqual(expected_subsets_by_set.keys(), + spec._subsets_by_set.keys()) + for key in spec._subsets_by_set.keys(): + self.assertEqual(sorted(expected_subsets_by_set[key]), + sorted(spec._subsets_by_set[key])) + + def test_add_metric_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + + expected_subsets_by_set = { + name_to_index["measurement_01"]: [name_to_index["measurement_14"]], + name_to_index["measurement_02"]: [name_to_index["measurement_15"]], + name_to_index["measurement_03"]: [name_to_index["measurement_16"]], + name_to_index["measurement_04"]: [name_to_index["measurement_17"]], + name_to_index["measurement_05"]: [name_to_index["measurement_18"]], + name_to_index["measurement_06"]: [name_to_index["measurement_19"]], + name_to_index["measurement_07"]: [name_to_index["measurement_20"]], + name_to_index["measurement_08"]: [name_to_index["measurement_21"]], + name_to_index["measurement_09"]: [name_to_index["measurement_22"]], + name_to_index["measurement_10"]: [name_to_index["measurement_23"]], + name_to_index["measurement_11"]: [name_to_index["measurement_24"]], + } + + spec = SetMeasurementsSpec() + report._add_metric_relations_to_spec(spec) + + self.assertEqual(len(spec._covers_by_set), 0) + self.assertEqual(expected_subsets_by_set.keys(), + spec._subsets_by_set.keys()) + for key in spec._subsets_by_set.keys(): + self.assertEqual(sorted(expected_subsets_by_set[key]), + sorted(spec._subsets_by_set[key])) + + def test_add_cumulative_whole_campaign_relationships(self): + report = SAMPLE_REPORT + name_to_index = report._measurement_name_to_index + expected_subsets_by_set = { + name_to_index["measurement_09"]: [name_to_index["measurement_02"]], + name_to_index["measurement_10"]: [name_to_index["measurement_04"]], + name_to_index["measurement_11"]: [name_to_index["measurement_06"]], + name_to_index["measurement_13"]: [name_to_index["measurement_08"]], + name_to_index["measurement_22"]: [name_to_index["measurement_15"]], + name_to_index["measurement_23"]: [name_to_index["measurement_17"]], + name_to_index["measurement_24"]: [name_to_index["measurement_19"]], + } + + spec = SetMeasurementsSpec() + report._add_cumulative_whole_campaign_relations_to_spec(spec) + + self.assertEqual(len(spec._covers_by_set), 0) + self.assertEqual(expected_subsets_by_set.keys(), + spec._subsets_by_set.keys()) + for key in spec._subsets_by_set.keys(): + self.assertEqual(sorted(expected_subsets_by_set[key]), + sorted(spec._subsets_by_set[key])) + + def test_get_corrected_single_metric_report(self): ami = "ami" report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(50, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(48, 0, "measurement_02")], frozenset({EDP_TWO}): [Measurement(1, 1, "measurement_03")], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, @@ -459,28 +738,29 @@ def test_get_corrected_single_metric_report(self): expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(49.5, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(48, 0, "measurement_02")], frozenset({EDP_TWO}): [ Measurement(1.5, 1, "measurement_03")], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_can_correct_time_series(self): ami = "ami" report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(0.00, 1, "measurement_01"), Measurement(3.30, 1, "measurement_02"), @@ -491,7 +771,8 @@ def test_can_correct_time_series(self): Measurement(3.30, 1, "measurement_05"), Measurement(0.00, 1, "measurement_06"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, @@ -503,7 +784,7 @@ def test_can_correct_time_series(self): expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(0.00, 1, "measurement_01"), Measurement(1.65, 1, "measurement_02"), @@ -514,21 +795,22 @@ def test_can_correct_time_series(self): Measurement(1.65, 1, "measurement_05"), Measurement(1.65, 1, "measurement_06"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_can_correct_time_series_for_three_edps(self): ami = "ami" report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ # 1 way comb frozenset({EDP_ONE}): [ Measurement(0.00, 1, "measurement_01"), @@ -567,7 +849,8 @@ def test_can_correct_time_series_for_three_edps(self): Measurement(8.0, 1, "measurement_20"), Measurement(11.90, 1, "measurement_21"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, @@ -579,7 +862,7 @@ def test_can_correct_time_series_for_three_edps(self): expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ # 1 way comb frozenset({EDP_ONE}): [ Measurement(0.10, 1.00, "measurement_01"), @@ -618,14 +901,15 @@ def test_can_correct_time_series_for_three_edps(self): Measurement(8.00, 1.00, "measurement_20"), Measurement(11.90, 1.00, "measurement_21"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_correct_report_with_both_time_series_and_whole_campaign_measurements_three_edps( self): @@ -634,7 +918,7 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ # 1 way comb frozenset({EDP_ONE}): [ Measurement(0.00, 1, "measurement_01"), @@ -667,7 +951,7 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th Measurement(8.0, 1, "measurement_20"), ], }, - reach_whole_campaign_by_edp_combination={ + reach_whole_campaign={ # 1 way comb frozenset({EDP_ONE}): Measurement(4.00, 1.00, "measurement_03"), @@ -697,7 +981,7 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ # 1 way comb frozenset({EDP_ONE}): [ Measurement(0.10, 1.00, "measurement_01"), @@ -730,7 +1014,7 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th Measurement(8.00, 1.00, "measurement_20"), ], }, - reach_whole_campaign_by_edp_combination={ + reach_whole_campaign={ # 1 way comb frozenset({EDP_ONE}): Measurement(4.00, 1.00, "measurement_03"), @@ -755,7 +1039,7 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): ami = "ami" @@ -763,7 +1047,7 @@ def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ # 1 way comb frozenset({EDP_ONE}): [ Measurement(0.00, 1, "measurement_01"), @@ -783,7 +1067,7 @@ def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): Measurement(8.0, 1, "measurement_20"), ], }, - reach_whole_campaign_by_edp_combination={ + reach_whole_campaign={ # 1 way comb frozenset({EDP_ONE}): Measurement(4.00, 1.00, "measurement_03"), @@ -813,7 +1097,7 @@ def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ # 1 way comb frozenset({EDP_ONE}): [ Measurement(0.025, 1.00, "measurement_01"), @@ -833,7 +1117,7 @@ def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): Measurement(8.00, 1.00, "measurement_20"), ], }, - reach_whole_campaign_by_edp_combination={ + reach_whole_campaign={ # 1 way comb frozenset({EDP_ONE}): Measurement(4.00, 1.00, "measurement_03"), @@ -858,14 +1142,14 @@ def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_allows_incorrect_time_series(self): ami = "ami" report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_TWO}): [ Measurement(0.00, 1, "measurement_01"), Measurement(3.30, 1, "measurement_02"), @@ -876,7 +1160,8 @@ def test_allows_incorrect_time_series(self): Measurement(3.30, 1, "measurement_05"), Measurement(1.00, 1, "measurement_06"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, @@ -889,7 +1174,7 @@ def test_allows_incorrect_time_series(self): expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_TWO}): [ Measurement(0.00, 1, "measurement_01"), Measurement(3.30, 1, "measurement_02"), @@ -900,7 +1185,8 @@ def test_allows_incorrect_time_series(self): Measurement(3.30, 1, "measurement_05"), Measurement(1.00, 1, "measurement_06"), ], - } + }, + reach_whole_campaign={}, ) }, metric_subsets_by_parent={}, @@ -908,7 +1194,7 @@ def test_allows_incorrect_time_series(self): frozenset({EDP_ONE})), ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_can_correct_related_metrics(self): ami = "ami" @@ -916,20 +1202,22 @@ def test_can_correct_related_metrics(self): report = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(51, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(50, 1, "measurement_02")], - } + }, + reach_whole_campaign={}, ), mrc: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(52, 1, "measurement_03")], frozenset({EDP_ONE}): [ Measurement(51, 1, "measurement_04")], - } + }, + reach_whole_campaign={}, ), }, # AMI is a parent of MRC @@ -942,20 +1230,22 @@ def test_can_correct_related_metrics(self): expected = Report( metric_reports={ ami: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(51.5, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(50.5, 1, "measurement_02")], - } + }, + reach_whole_campaign={}, ), mrc: MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(51.5, 1, "measurement_03")], frozenset({EDP_ONE}): [ Measurement(50.5, 1, "measurement_04")], - } + }, + reach_whole_campaign={}, ), }, # AMI is a parent of MRC @@ -963,29 +1253,31 @@ def test_can_correct_related_metrics(self): cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) def test_get_corrected_multiple_metric_report_with_different_edp_combinations( self): report = Report( metric_reports={ "ami": MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(50, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(48, 0, "measurement_02")], frozenset({EDP_TWO}): [ Measurement(1, 1, "measurement_03")], - } + }, + reach_whole_campaign={}, ), "mrc": MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(45, 1, "measurement_04")], frozenset({EDP_TWO}): [ Measurement(2, 1, "measurement_05")], - } + }, + reach_whole_campaign={}, ), }, metric_subsets_by_parent={"ami": ["mrc"]}, @@ -997,31 +1289,33 @@ def test_get_corrected_multiple_metric_report_with_different_edp_combinations( expected = Report( metric_reports={ "ami": MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(49.667, 1, "measurement_01")], frozenset({EDP_ONE}): [ Measurement(48, 0, "measurement_02")], frozenset({EDP_TWO}): [ Measurement(1.667, 1, "measurement_03")], - } + }, + reach_whole_campaign={}, ), "mrc": MetricReport( - reach_time_series_by_edp_combination={ + reach_time_series={ frozenset({EDP_ONE, EDP_TWO}): [ Measurement(45, 1, "measurement_04")], frozenset({EDP_TWO}): [ Measurement(1.667, 1, "measurement_05")], - } + }, + reach_whole_campaign={}, ), }, metric_subsets_by_parent={"ami": ["mrc"]}, cumulative_inconsistency_allowed_edp_combinations={}, ) - self.__assertReportsAlmostEqual(expected, corrected, corrected.to_array()) + self._assertReportsAlmostEqual(expected, corrected, corrected.to_array()) - def __assertMeasurementAlmostEquals( + def _assertMeasurementAlmostEquals( self, expected: Measurement, actual: Measurement, msg ): if expected.sigma == 0: @@ -1031,7 +1325,7 @@ def __assertMeasurementAlmostEquals( expected.value, actual.value, places=EXPECTED_PRECISION, msg=msg ) - def __assertMetricReportsAlmostEqual( + def _assertMetricReportsAlmostEqual( self, expected: MetricReport, actual: MetricReport, msg ): self.assertEqual(expected.get_cumulative_edp_combinations_count(), @@ -1039,27 +1333,27 @@ def __assertMetricReportsAlmostEqual( self.assertEqual( expected.get_number_of_periods(), actual.get_number_of_periods() ) - for edp_comb in expected.get_cumulative_edp_combinations(): + for edp_combination in expected.get_cumulative_edp_combinations(): for period in range(0, expected.get_number_of_periods()): - self.__assertMeasurementAlmostEquals( - expected.get_cumulative_measurement(edp_comb, period), - actual.get_cumulative_measurement(edp_comb, period), + self._assertMeasurementAlmostEquals( + expected.get_cumulative_measurement(edp_combination, period), + actual.get_cumulative_measurement(edp_combination, period), msg, ) self.assertEqual(expected.get_whole_campaign_edp_combinations_count(), actual.get_whole_campaign_edp_combinations_count()) - for edp_comb in expected.get_whole_campaign_edp_combinations(): - self.__assertMeasurementAlmostEquals( - expected.get_whole_campaign_measurement(edp_comb), - actual.get_whole_campaign_measurement(edp_comb), + for edp_combination in expected.get_whole_campaign_edp_combinations(): + self._assertMeasurementAlmostEquals( + expected.get_whole_campaign_measurement(edp_combination), + actual.get_whole_campaign_measurement(edp_combination), msg, ) - def __assertReportsAlmostEqual(self, expected: Report, actual: Report, msg): + def _assertReportsAlmostEqual(self, expected: Report, actual: Report, msg): self.assertEqual(expected.get_metrics(), actual.get_metrics()) for metric in expected.get_metrics(): - self.__assertMetricReportsAlmostEqual( + self._assertMetricReportsAlmostEqual( expected.get_metric_report(metric), actual.get_metric_report(metric), msg, diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py index df3f9c956b8..de5c23c5cfa 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py @@ -12,12 +12,12 @@ # See the License for the specific language governing permissions and # limitations under the License. +import numpy as np import unittest from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import \ report_summary_pb2 -from tools.post_process_origin_report import correctExcelFile, readExcel, \ - processReportSummary +from tools.post_process_origin_report import processReportSummary EDP_MAP = { "edp1": {"edp1"}, @@ -26,25 +26,35 @@ } AMI_MEASUREMENTS = { - 'edp1': [6333, 3585, 7511, 1037, 0, 10040, 0, 2503, 7907, 0, 0, 0, 0, 1729, - 0, 1322, 0], - 'edp2': [24062000, 29281000, 31569000, 31569000, 31569000, 31569000, - 31569000, 31569000, 31569000, 31569000, 31569000, 31569000, - 31569000, 31569000, 31569000, 31569000, 31569000], - 'union': [24129432, 29152165, 31474050, 31352346, 31685183, 31425302, - 31655739, 31643458, 31438532, 31600739, 31386917, 31785206, - 31627169, 31453865, 31582783, 31806702, 31477620], + 'edp1': [701155, 1387980, 1993909, 2530351, 3004251, 3425139, 3798300, + 4130259, 4425985, 4689161, 4924654, 5134209, 5321144, 5488320, + 5638284, 5772709, 5893108], + 'edp2': [17497550, 26248452, 28434726, 29254557, 29613105, 29781657, + 29863471, 29903985, 29923599, 29933436, 29938318, 29940737, + 29941947, 29942509, 29942840, 29942982, 29943048], + 'union': [17848693, 26596529, 28810116, 29670899, 30076858, 30293844, + 30422560, 30507247, 30567675, 30614303, 30652461, 30684582, + 30712804, 30737507, 30759392, 30778972, 30796521], } MRC_MEASUREMENTS = { - 'edp1': [0, 2196, 2014, 0, 129, 0, 2018, 81, 0, 0, 288, 0, 0, 0, 0, 0, 0], - 'edp2': [24062000, 29281000, 31569000, 31569000, 31569000, 31569000, - 31569000, 31569000, 31569000, 31569000, 31569000, 31569000, - 31569000, 31569000, 31569000, 31569000, 31569000], - 'union': [24299684, 29107595, 31680517, 31513613, 32127776, 31517198, - 31786057, 31225783, 31237872, 31901620, 31720183, 31263524, - 31775635, 31917650, 31478465, 31784354, 31542065], + 'edp1': [630563, 1248838, 1794204, 2276856, 2703592, 3082468, 3418615, + 3717626, 3983983, 4220849, 4432799, 4621453, 4789932, 4940394, + 5075337, 5196132, 5304490], + 'edp2': [15747807, 23623080, 25590863, 26328935, 26651567, 26803189, + 26876867, 26913336, 26930960, 26939827, 26944204, 26946392, + 26947485, 26947981, 26948285, 26948410, 26948472], + 'union': [16063679, 23936163, 25928613, 26703382, 27068800, 27263915, + 27379780, 27456089, 27510475, 27552474, 27586849, 27615813, + 27641241, 27663446, 27683138, 27700680, 27716450], } +SIGMAS = { + 'edp1': 0.1, + 'edp2': 1.0, + 'union': 0.1, +} + + class TestOriginReport(unittest.TestCase): def test_report_summary_is_corrected_successfully(self): report_summary = report_summary_pb2.ReportSummary() @@ -58,9 +68,9 @@ def test_report_summary_is_corrected_successfully(self): for i in range(len(AMI_MEASUREMENTS[edp]) - 1): ami_result = ami_measurement_detail.measurement_results.add() ami_result.reach = AMI_MEASUREMENTS[edp][i] - ami_result.standard_deviation = 1.0 + ami_result.standard_deviation = SIGMAS[edp] ami_result.metric = "cumulative_metric_" + edp + "_ami_" + str(i).zfill( - 5) + 5) mrc_measurement_detail = report_summary.measurement_details.add() mrc_measurement_detail.measurement_policy = "mrc" @@ -70,9 +80,9 @@ def test_report_summary_is_corrected_successfully(self): for i in range(len(MRC_MEASUREMENTS[edp]) - 1): mrc_result = mrc_measurement_detail.measurement_results.add() mrc_result.reach = MRC_MEASUREMENTS[edp][i] - mrc_result.standard_deviation = 1.0 + mrc_result.standard_deviation = SIGMAS[edp] mrc_result.metric = "cumulative_metric_" + edp + "_mrc_" + str(i).zfill( - 5) + 5) for edp in EDP_MAP: ami_measurement_detail = report_summary.measurement_details.add() @@ -82,7 +92,7 @@ def test_report_summary_is_corrected_successfully(self): ami_measurement_detail.data_providers.extend(EDP_MAP[edp]) ami_result = ami_measurement_detail.measurement_results.add() ami_result.reach = AMI_MEASUREMENTS[edp][len(AMI_MEASUREMENTS[edp]) - 1] - ami_result.standard_deviation = 1.0 + ami_result.standard_deviation = SIGMAS[edp] ami_result.metric = "total_metric_" + edp + "_ami_" mrc_measurement_detail = report_summary.measurement_details.add() @@ -92,7 +102,7 @@ def test_report_summary_is_corrected_successfully(self): mrc_measurement_detail.data_providers.extend(EDP_MAP[edp]) mrc_result = mrc_measurement_detail.measurement_results.add() mrc_result.reach = MRC_MEASUREMENTS[edp][len(MRC_MEASUREMENTS[edp]) - 1] - mrc_result.standard_deviation = 1.0 + mrc_result.standard_deviation = SIGMAS[edp] mrc_result.metric = "total_metric_" + edp + "_mrc_" corrected_measurements_map = processReportSummary(report_summary) From 0bd05d4249d7b9d2926ea351992f5c6f6e596888 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Mon, 4 Nov 2024 08:09:41 +0000 Subject: [PATCH 13/18] Update test data. --- .../tools/test_post_process_origin_report.py | 37 +++++++++---------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py index de5c23c5cfa..afee65f8d97 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/tools/test_post_process_origin_report.py @@ -26,32 +26,29 @@ } AMI_MEASUREMENTS = { - 'edp1': [701155, 1387980, 1993909, 2530351, 3004251, 3425139, 3798300, - 4130259, 4425985, 4689161, 4924654, 5134209, 5321144, 5488320, - 5638284, 5772709, 5893108], - 'edp2': [17497550, 26248452, 28434726, 29254557, 29613105, 29781657, - 29863471, 29903985, 29923599, 29933436, 29938318, 29940737, - 29941947, 29942509, 29942840, 29942982, 29943048], - 'union': [17848693, 26596529, 28810116, 29670899, 30076858, 30293844, - 30422560, 30507247, 30567675, 30614303, 30652461, 30684582, - 30712804, 30737507, 30759392, 30778972, 30796521], + 'edp1': [6333, 3585, 7511, 1037, 0, 10040, 0, 2503, 7907, 0, 0, 0, 0, 1729, + 0, 1322, 0], + 'edp2': [24062000, 29281000, 31569000, 31569000, 31569000, 31569000, + 31569000, 31569000, 31569000, 31569000, 31569000, 31569000, + 31569000, 31569000, 31569000, 31569000, 31569000], + 'union': [24129432, 29152165, 31474050, 31352346, 31685183, 31425302, + 31655739, 31643458, 31438532, 31600739, 31386917, 31785206, + 31627169, 31453865, 31582783, 31806702, 31477620], } MRC_MEASUREMENTS = { - 'edp1': [630563, 1248838, 1794204, 2276856, 2703592, 3082468, 3418615, - 3717626, 3983983, 4220849, 4432799, 4621453, 4789932, 4940394, - 5075337, 5196132, 5304490], - 'edp2': [15747807, 23623080, 25590863, 26328935, 26651567, 26803189, - 26876867, 26913336, 26930960, 26939827, 26944204, 26946392, - 26947485, 26947981, 26948285, 26948410, 26948472], - 'union': [16063679, 23936163, 25928613, 26703382, 27068800, 27263915, - 27379780, 27456089, 27510475, 27552474, 27586849, 27615813, - 27641241, 27663446, 27683138, 27700680, 27716450], + 'edp1': [0, 2196, 2014, 0, 129, 0, 2018, 81, 0, 0, 288, 0, 0, 0, 0, 0, 0], + 'edp2': [24062000, 29281000, 31569000, 31569000, 31569000, 31569000, + 31569000, 31569000, 31569000, 31569000, 31569000, 31569000, + 31569000, 31569000, 31569000, 31569000, 31569000], + 'union': [24299684, 29107595, 31680517, 31513613, 32127776, 31517198, + 31786057, 31225783, 31237872, 31901620, 31720183, 31263524, + 31775635, 31917650, 31478465, 31784354, 31542065], } SIGMAS = { - 'edp1': 0.1, + 'edp1': 1.0, 'edp2': 1.0, - 'union': 0.1, + 'union': 1.0, } From f4463cb89fcec43b31b89872f51ac276e9e44a82 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Mon, 4 Nov 2024 17:57:12 +0000 Subject: [PATCH 14/18] Add doc strings for functions. --- .../reporting/postprocessing/report/report.py | 33 +++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index de9e6590cfd..0911fdf33ac 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -37,6 +37,16 @@ def get_subset_relationships(edp_combinations: list[FrozenSet[str]]): def is_cover(target_set, possible_cover): + """Checks if a collection of sets covers a target set. + + Args: + target_set: The set that should be covered. + possible_cover: A collection of sets that may cover the target set. + + Returns: + True if the union of the sets in `possible_cover` equals `target_set`, + False otherwise. + """ union_of_possible_cover = reduce( lambda x, y: x.union(y), possible_cover ) @@ -47,7 +57,30 @@ def is_cover(target_set, possible_cover): def get_covers(target_set, other_sets): + """Finds all combinations of sets from `other_sets` that cover `target_set`. + + This function identifies all possible combinations of sets within `other_sets` + whose union equals the `target_set`. It only considers sets that are subsets of + the `target_set`. + + Args: + target_set: The set that needs to be covered. + other_sets: A collection of sets that may be used to cover the `target_set`. + + Returns: + A list of tuples, where each tuple represents a covering relationship. + The first element of the tuple is the `target_set`, and the second element + is a tuple containing the sets from `other_sets` that cover it. + """ def generate_all_length_combinations(data): + """Generates all possible combinations of elements from a list. + + Args: + data: The list of elements. + + Returns: + A list of tuples, where each tuple represents a combination of elements. + """ return [ comb for r in range(1, len(data) + 1) for comb in combinations(data, r) From 61d0b00e3018f2a94067aec10a2fe3d6a0153d21 Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Tue, 5 Nov 2024 20:43:27 +0000 Subject: [PATCH 15/18] Address comments. --- .../reporting/postprocessing/noiseninja/solver.py | 4 ++-- .../measurement/reporting/postprocessing/report/report.py | 6 +----- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py index 884272ba65a..764af1c2a90 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py @@ -20,7 +20,7 @@ from typing import Any SOLVER = "highs" - +MAX_ATTEMPTS = 10 SEMAPHORE = Semaphore() @@ -174,7 +174,7 @@ def solve(self) -> Solution: extras={'status': 'trivial'}, problem=self._problem()) else: - while attempt_count < 10: + while attempt_count < MAX_ATTEMPTS: # TODO: check if qpsolvers is thread safe, # and remove this semaphore. SEMAPHORE.acquire() diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index 0911fdf33ac..cadd2af1f28 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -50,11 +50,7 @@ def is_cover(target_set, possible_cover): union_of_possible_cover = reduce( lambda x, y: x.union(y), possible_cover ) - if union_of_possible_cover == target_set: - return True - else: - return False - + return union_of_possible_cover == target_set def get_covers(target_set, other_sets): """Finds all combinations of sets from `other_sets` that cover `target_set`. From e6dbd8e1fcd9e2a3970802af69c1de8407d81101 Mon Sep 17 00:00:00 2001 From: ple13 Date: Tue, 19 Nov 2024 02:05:47 -0500 Subject: [PATCH 16/18] Address comments. --- .../noiseninja/noised_measurements.py | 10 ++-- .../postprocessing/noiseninja/solver.py | 8 +-- .../reporting/postprocessing/report/report.py | 51 ++++++++++--------- .../tools/post_process_origin_report.py | 38 ++------------ .../v2alpha/sample_report_large.json | 16 +++--- 5 files changed, 48 insertions(+), 75 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py index 71cb448850b..48206af40de 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/noised_measurements.py @@ -46,7 +46,7 @@ class SetMeasurementsSpec: https://en.wikipedia.org/wiki/Cover_(topology). _measurements_by_set: A dictionary mapping a set ID to a list of Measurement objects associated with that set. - """ + """ def __init__(self): self._subsets_by_set = defaultdict(list[int]) @@ -65,16 +65,16 @@ def add_measurement(self, set_id: int, measurement: Measurement): def all_sets(self) -> set[int]: return set(i for i in self._measurements_by_set.keys()) - def get_covers_of_set(self, set_id: int): + def get_covers_of_set(self, set_id: int) -> list[list[int]]: return self._covers_by_set[set_id] - def get_subsets(self, parent_set_id): + def get_subsets(self, parent_set_id: int) -> list[int]: return self._subsets_by_set[parent_set_id] - def get_measurements(self, measured_set_id): + def get_measurements(self, measured_set_id: int) -> list[Measurement]: return self._measurements_by_set.get(measured_set_id) - def get_measurement_metric(self, measured_set_id): + def get_measurement_metric(self, measured_set_id: int) -> str: measurement = self._measurements_by_set.get(measured_set_id) return measurement[0].name diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py index 764af1c2a90..dd9f019de3e 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py @@ -132,17 +132,17 @@ def _add_parent_gt_child_term(self, parent: int, child: int): variables[child] = 1 self._add_gt_term(variables) - def _add_loss_term(self, variables, k: float): + def _add_loss_term(self, variables: np.array, k: float): for v1, coeff1 in enumerate(variables): self.q[0][v1] += coeff1 * k for v2, coeff2 in enumerate(variables): self.P[v1][v2] += coeff1 * coeff2 - def _add_eq_term(self, variables, k: float): + def _add_eq_term(self, variables: np.array, k: float): self.A.append(variables) self.b.append(k) - def _add_gt_term(self, variables): + def _add_gt_term(self, variables: np.array): self.G.append(variables) self.h.append([0]) @@ -197,6 +197,6 @@ def translate_solution(self, solution: Solution) -> dict[int, float]: result[self.variable_map[var]] = solution.x[var] return result - def solve_and_translate(self): + def solve_and_translate(self) -> dict[int, float]: solution = self.solve() return self.translate_solution(solution) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index cadd2af1f28..27e8b1cde75 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -18,12 +18,12 @@ from noiseninja.noised_measurements import SetMeasurementsSpec, Measurement from noiseninja.solver import Solver -from typing import FrozenSet +from typing import FrozenSet, Tuple from itertools import combinations from functools import reduce -def get_subset_relationships(edp_combinations: list[FrozenSet[str]]): +def get_subset_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tuple[int, int]]: """Returns a list of tuples where first element in the tuple is the parent and second element is the subset.""" subset_relationships = [] @@ -36,7 +36,7 @@ def get_subset_relationships(edp_combinations: list[FrozenSet[str]]): return subset_relationships -def is_cover(target_set, possible_cover): +def is_cover(target_set: set[int], possible_cover: list[set[int]]) -> bool: """Checks if a collection of sets covers a target set. Args: @@ -52,7 +52,8 @@ def is_cover(target_set, possible_cover): ) return union_of_possible_cover == target_set -def get_covers(target_set, other_sets): + +def get_covers(target_set: set[int], other_sets: list[set[int]]) -> list[Tuple[set[int], list[set[int]]]]: """Finds all combinations of sets from `other_sets` that cover `target_set`. This function identifies all possible combinations of sets within `other_sets` @@ -68,7 +69,7 @@ def get_covers(target_set, other_sets): The first element of the tuple is the `target_set`, and the second element is a tuple containing the sets from `other_sets` that cover it. """ - def generate_all_length_combinations(data): + def generate_all_length_combinations(data: list[int]) -> list[set[int]]: """Generates all possible combinations of elements from a list. Args: @@ -94,7 +95,7 @@ def generate_all_length_combinations(data): return cover_relationship -def get_cover_relationships(edp_combinations: list[FrozenSet[str]]): +def get_cover_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tuple[set[int], list[set[int]]]]: """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). For each set (s_i) in the list, enumerate combinations of all sets excluding this one. For each of these considered combinations, take their union and check if it is equal to @@ -164,42 +165,42 @@ def sample_with_noise(self) -> "MetricReport": } ) - def get_cumulative_measurement(self, edp_combination: str, period: int): + def get_cumulative_measurement(self, edp_combination: str, period: int) -> Measurement: return self._reach_time_series[edp_combination][ period] - def get_whole_campaign_measurement(self, edp_combination: str): + def get_whole_campaign_measurement(self, edp_combination: str) -> Measurement: return self._reach_whole_campaign[edp_combination] - def get_cumulative_edp_combinations(self): + def get_cumulative_edp_combinations(self) -> set[list[int]]: return set(self._reach_time_series.keys()) - def get_whole_campaign_edp_combinations(self): + def get_whole_campaign_edp_combinations(self) -> set[list[int]]: return set(self._reach_whole_campaign.keys()) - def get_cumulative_edp_combinations_count(self): + def get_cumulative_edp_combinations_count(self) -> int: return len(self._reach_time_series.keys()) - def get_whole_campaign_edp_combinations_count(self): + def get_whole_campaign_edp_combinations_count(self) -> int: return len(self._reach_whole_campaign.keys()) - def get_number_of_periods(self): + def get_number_of_periods(self) -> int: return len(next(iter(self._reach_time_series.values()))) - def get_cumulative_subset_relationships(self): + def get_cumulative_subset_relationships(self) -> list[Tuple[int, int]]: return get_subset_relationships(list(self._reach_time_series)) - def get_whole_campaign_subset_relationships(self): + def get_whole_campaign_subset_relationships(self) -> list[Tuple[int, int]]: return get_subset_relationships(list(self._reach_whole_campaign)) - def get_cumulative_cover_relationships(self): + def get_cumulative_cover_relationships(self) -> list[Tuple[set[int], list[set[int]]]]: return get_cover_relationships(list(self._reach_time_series)) - def get_whole_campaign_cover_relationships(self): + def get_whole_campaign_cover_relationships(self) -> list[Tuple[set[int], list[set[int]]]]: return get_cover_relationships(list(self._reach_whole_campaign)) @staticmethod - def _sample_with_noise(measurement: Measurement): + def _sample_with_noise(measurement: Measurement) -> Measurement: return Measurement( measurement.value + random.gauss(0, measurement.sigma), measurement.sigma @@ -292,7 +293,7 @@ def __init__( self._num_vars = measurement_index - def get_metric_report(self, metric: str) -> MetricReport: + def get_metric_report(self, metric: str) -> "MetricReport": return self._metric_reports[metric] def get_metrics(self) -> set[str]: @@ -306,7 +307,7 @@ def get_corrected_report(self) -> "Report": solution = Solver(spec).solve_and_translate() return self.report_from_solution(solution, spec) - def report_from_solution(self, solution, spec): + def report_from_solution(self, solution, spec) -> "Report": return Report( metric_reports={ metric: self._metric_report_from_solution(metric, solution) @@ -360,7 +361,7 @@ def to_array(self) -> np.array: ) return array - def to_set_measurement_spec(self): + def to_set_measurement_spec(self) -> SetMeasurementsSpec: spec = SetMeasurementsSpec() self._add_measurements_to_spec(spec) self._add_set_relations_to_spec(spec) @@ -549,24 +550,24 @@ def _add_measurements_to_spec(self, spec): measurement, ) - def _get_measurement_index(self, measurement: Measurement): + def _get_measurement_index(self, measurement: Measurement) -> int: return self._measurement_name_to_index[measurement.name] def _get_cumulative_measurement_index(self, metric: str, - edp_combination: str, period: int): + edp_combination: str, period: int) -> int: return self._get_measurement_index( self._metric_reports[metric].get_cumulative_measurement( edp_combination, period) ) def _get_whole_campaign_measurement_index(self, metric: str, - edp_combination: str): + edp_combination: str) -> int: return self._get_measurement_index( self._metric_reports[metric].get_whole_campaign_measurement( edp_combination) ) - def _metric_report_from_solution(self, metric, solution): + def _metric_report_from_solution(self, metric, solution) -> "MetricReport": solution_time_series = {} solution_whole_campaign = {} for edp_combination in self._metric_reports[ diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py index 212908d4035..d2b83364a82 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/tools/post_process_origin_report.py @@ -13,48 +13,20 @@ # limitations under the License. import json -import math -import pandas as pd import sys -from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import \ - report_summary_pb2 -from functools import partial +from src.main.proto.wfa.measurement.reporting.postprocessing.v2alpha import report_summary_pb2 from noiseninja.noised_measurements import Measurement from report.report import Report, MetricReport +from typing import FrozenSet # This is a demo script that has the following assumptions : -# 1. There are 2 EDPs one with Name Google, the other Linear TV. -# 2. CUSTOM filters are not yet supported in this tool. -# 3. AMI is a parent of MRC and there are no other relationships between metrics. -# 4. The standard deviation for all Measurements are assumed to be 1 -# 5. Frequency results are not corrected. -# 6. Impression results are not corrected. +# 1. CUSTOM filters are not yet supported in this tool. +# 2. AMI is a parent of MRC and there are no other relationships between metrics. +# 3. Impression results are not corrected. SIGMA = 1 -AMI_FILTER = "AMI" -MRC_FILTER = "MRC" - -# TODO(uakyol) : Read the EDP names dynamically from the excel sheet -# TODO(uakyol) : Make this work for 3 EDPs -EDP_ONE = "Google" -EDP_TWO = "Linear TV" -TOTAL_CAMPAIGN = "Total Campaign" - -edp_names = [EDP_ONE, EDP_TWO] - -CUML_REACH_PREFIX = "Cuml. Reach" - -EDP_MAP = { - edp_name: {"sheet": f"{CUML_REACH_PREFIX} ({edp_name})", "ind": ind} - for ind, edp_name in enumerate(edp_names + [TOTAL_CAMPAIGN]) -} - -CUML_REACH_COL_NAME = "Cumulative Reach 1+" -TOTAL_REACH_COL_NAME = "Total Reach (1+)" -FILTER_COL_NAME = "Impression Filter" - ami = "ami" mrc = "mrc" diff --git a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json index 9178812b1b8..d5472682c60 100644 --- a/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json +++ b/src/test/kotlin/org/wfanet/measurement/reporting/postprocessing/v2alpha/sample_report_large.json @@ -92,7 +92,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -215,7 +215,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -338,7 +338,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -457,7 +457,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -559,7 +559,7 @@ "reach": { "value": "31569000", "univariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 } }, "frequencyHistogram": { @@ -576,7 +576,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } @@ -682,7 +682,7 @@ "reach": { "value": "31569000", "univariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 } }, "frequencyHistogram": { @@ -699,7 +699,7 @@ "standardDeviation": 0.28867513459481287 }, "kPlusUnivariateStatistics": { - "standardDeviation": 102011.27564649425 + "standardDeviation": 1.0 }, "relativeKPlusUnivariateStatistics": { } From 2494b0698bc221271a039675bf60b6bcd6b205f0 Mon Sep 17 00:00:00 2001 From: ple13 Date: Tue, 19 Nov 2024 20:54:34 -0500 Subject: [PATCH 17/18] Correct input and output types. --- .../postprocessing/noiseninja/solver.py | 21 ++++---- .../reporting/postprocessing/report/report.py | 54 +++++++++---------- 2 files changed, 39 insertions(+), 36 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py index dd9f019de3e..a8d73ed8864 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/noiseninja/solver.py @@ -51,7 +51,8 @@ def __init__(self, set_measurement_spec: SetMeasurementsSpec): self.variable_map = dict( (variable_index_by_set_id[i], i) for i in variable_index_by_set_id) - def _init_base_value(self, set_measurement_spec, variable_index_by_set_id): + def _init_base_value(self, set_measurement_spec: SetMeasurementsSpec, + variable_index_by_set_id: dict[int, int]): mean_measurement_by_variable: dict[int, float] = {} for measured_set in set_measurement_spec.all_sets(): mean_measurement_by_variable[ @@ -64,8 +65,8 @@ def _init_base_value(self, set_measurement_spec, variable_index_by_set_id): (mean_measurement_by_variable[i] for i in range(0, self.num_variables)))) - def _add_measurement_targets(self, set_measurement_spec, - variable_index_by_set_id): + def _add_measurement_targets(self, set_measurement_spec: SetMeasurementsSpec, + variable_index_by_set_id: dict[int, int]): for (measured_set, variable) in variable_index_by_set_id.items(): variables = np.zeros(self.num_variables) variables[variable] = 1 @@ -79,7 +80,7 @@ def _add_measurement_targets(self, set_measurement_spec, -measurement.value / measurement.sigma) @staticmethod - def _map_sets_to_variables(set_measurement_spec) -> dict[int, int]: + def _map_sets_to_variables(set_measurement_spec: SetMeasurementsSpec) -> dict[int, int]: variable_index_by_set_id: dict[int, int] = {} num_variables = 0 for measured_set in set_measurement_spec.all_sets(): @@ -87,7 +88,7 @@ def _map_sets_to_variables(set_measurement_spec) -> dict[int, int]: num_variables += 1 return variable_index_by_set_id - def _init_qp(self, num_variables): + def _init_qp(self, num_variables: int): self.num_variables = num_variables # Minimize 1/2 x^T P x + q^T x self.P = np.zeros(shape=(num_variables, num_variables)) @@ -99,21 +100,23 @@ def _init_qp(self, num_variables): self.A = [] self.b = [] - def _add_subsets(self, set_measurement_spec, variable_index_by_set_id): + def _add_subsets(self, set_measurement_spec: SetMeasurementsSpec, + variable_index_by_set_id: dict[int, int]): for measured_set in set_measurement_spec.all_sets(): for subset in set(set_measurement_spec.get_subsets(measured_set)): self._add_parent_gt_child_term( variable_index_by_set_id[measured_set], variable_index_by_set_id[subset]) - def _add_covers(self, set_measurement_spec, variable_index_by_set_id): + def _add_covers(self, set_measurement_spec: SetMeasurementsSpec, + variable_index_by_set_id: dict[int, int]): for measured_set in set_measurement_spec.all_sets(): for cover in set_measurement_spec.get_covers_of_set(measured_set): self._add_cover_set_constraint( list(variable_index_by_set_id[i] for i in cover), variable_index_by_set_id[measured_set]) - def _add_cover_set_constraint(self, cover_variables: list[int], + def _add_cover_set_constraint(self, cover_variables: set[int], set_variable: int): variables = np.zeros(self.num_variables) variables.put(cover_variables, -1) @@ -192,7 +195,7 @@ def solve(self) -> Solution: return solution def translate_solution(self, solution: Solution) -> dict[int, float]: - result: dict[int, Any] = {} + result: dict[int, float] = {} for var in range(0, self.num_variables): result[self.variable_map[var]] = solution.x[var] return result diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index 27e8b1cde75..033d9da8e3a 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -18,16 +18,16 @@ from noiseninja.noised_measurements import SetMeasurementsSpec, Measurement from noiseninja.solver import Solver -from typing import FrozenSet, Tuple +from qpsolvers import Solution +from typing import Any, FrozenSet, Tuple from itertools import combinations from functools import reduce -def get_subset_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tuple[int, int]]: +def get_subset_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tuple[FrozenSet[str], FrozenSet[str]]]: """Returns a list of tuples where first element in the tuple is the parent and second element is the subset.""" subset_relationships = [] - for comb1, comb2 in combinations(edp_combinations, 2): if comb1.issubset(comb2): subset_relationships.append((comb2, comb1)) @@ -36,7 +36,7 @@ def get_subset_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tup return subset_relationships -def is_cover(target_set: set[int], possible_cover: list[set[int]]) -> bool: +def is_cover(target_set: FrozenSet[str], possible_cover: list[FrozenSet[str]]) -> bool: """Checks if a collection of sets covers a target set. Args: @@ -53,7 +53,7 @@ def is_cover(target_set: set[int], possible_cover: list[set[int]]) -> bool: return union_of_possible_cover == target_set -def get_covers(target_set: set[int], other_sets: list[set[int]]) -> list[Tuple[set[int], list[set[int]]]]: +def get_covers(target_set: FrozenSet[str], other_sets: list[FrozenSet[str]]) -> list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: """Finds all combinations of sets from `other_sets` that cover `target_set`. This function identifies all possible combinations of sets within `other_sets` @@ -69,11 +69,11 @@ def get_covers(target_set: set[int], other_sets: list[set[int]]) -> list[Tuple[s The first element of the tuple is the `target_set`, and the second element is a tuple containing the sets from `other_sets` that cover it. """ - def generate_all_length_combinations(data: list[int]) -> list[set[int]]: + def generate_all_length_combinations(data: list[Any]) -> list[tuple[Any, ...]]: """Generates all possible combinations of elements from a list. Args: - data: The list of elements. + data: A list of elements. Returns: A list of tuples, where each tuple represents a combination of elements. @@ -95,7 +95,7 @@ def generate_all_length_combinations(data: list[int]) -> list[set[int]]: return cover_relationship -def get_cover_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tuple[set[int], list[set[int]]]]: +def get_cover_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). For each set (s_i) in the list, enumerate combinations of all sets excluding this one. For each of these considered combinations, take their union and check if it is equal to @@ -165,17 +165,17 @@ def sample_with_noise(self) -> "MetricReport": } ) - def get_cumulative_measurement(self, edp_combination: str, period: int) -> Measurement: + def get_cumulative_measurement(self, edp_combination: FrozenSet[str], period: int) -> Measurement: return self._reach_time_series[edp_combination][ period] - def get_whole_campaign_measurement(self, edp_combination: str) -> Measurement: + def get_whole_campaign_measurement(self, edp_combination: FrozenSet[str]) -> Measurement: return self._reach_whole_campaign[edp_combination] - def get_cumulative_edp_combinations(self) -> set[list[int]]: + def get_cumulative_edp_combinations(self) -> set[FrozenSet[str]]: return set(self._reach_time_series.keys()) - def get_whole_campaign_edp_combinations(self) -> set[list[int]]: + def get_whole_campaign_edp_combinations(self) -> set[FrozenSet[str]]: return set(self._reach_whole_campaign.keys()) def get_cumulative_edp_combinations_count(self) -> int: @@ -187,16 +187,16 @@ def get_whole_campaign_edp_combinations_count(self) -> int: def get_number_of_periods(self) -> int: return len(next(iter(self._reach_time_series.values()))) - def get_cumulative_subset_relationships(self) -> list[Tuple[int, int]]: + def get_cumulative_subset_relationships(self) -> list[Tuple[FrozenSet[str], FrozenSet[str]]]: return get_subset_relationships(list(self._reach_time_series)) - def get_whole_campaign_subset_relationships(self) -> list[Tuple[int, int]]: + def get_whole_campaign_subset_relationships(self) -> list[Tuple[FrozenSet[str], FrozenSet[str]]]: return get_subset_relationships(list(self._reach_whole_campaign)) - def get_cumulative_cover_relationships(self) -> list[Tuple[set[int], list[set[int]]]]: + def get_cumulative_cover_relationships(self) -> list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: return get_cover_relationships(list(self._reach_time_series)) - def get_whole_campaign_cover_relationships(self) -> list[Tuple[set[int], list[set[int]]]]: + def get_whole_campaign_cover_relationships(self) -> list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: return get_cover_relationships(list(self._reach_whole_campaign)) @staticmethod @@ -307,7 +307,7 @@ def get_corrected_report(self) -> "Report": solution = Solver(spec).solve_and_translate() return self.report_from_solution(solution, spec) - def report_from_solution(self, solution, spec) -> "Report": + def report_from_solution(self, solution: Solution, spec: SetMeasurementsSpec) -> "Report": return Report( metric_reports={ metric: self._metric_report_from_solution(metric, solution) @@ -367,7 +367,7 @@ def to_set_measurement_spec(self) -> SetMeasurementsSpec: self._add_set_relations_to_spec(spec) return spec - def _add_cover_relations_to_spec(self, spec): + def _add_cover_relations_to_spec(self, spec: SetMeasurementsSpec): # sum of subsets >= union for each period for metric in self._metric_reports: for cover_relationship in self._metric_reports[ @@ -394,7 +394,7 @@ def _add_cover_relations_to_spec(self, spec): metric, covered_parent), ) - def _add_subset_relations_to_spec(self, spec): + def _add_subset_relations_to_spec(self, spec: SetMeasurementsSpec): # Adds relations for cumulative measurements. for metric in self._metric_reports: for subset_relationship in self._metric_reports[ @@ -434,7 +434,7 @@ def _add_subset_relations_to_spec(self, spec): # TODO(@ple13):Use timestamp to check if the last cumulative measurement covers # the whole campaign. If yes, make sure that the two measurements are equal # instead of less than or equal. - def _add_cumulative_whole_campaign_relations_to_spec(self, spec): + def _add_cumulative_whole_campaign_relations_to_spec(self, spec: SetMeasurementsSpec): # Adds relations between cumulative and whole campaign measurements. # For an edp combination, the last cumulative measurement is less than or # equal to the whole campaign measurement. @@ -454,7 +454,7 @@ def _add_cumulative_whole_campaign_relations_to_spec(self, spec): edp_combination)), ) - def _add_metric_relations_to_spec(self, spec): + def _add_metric_relations_to_spec(self, spec: SetMeasurementsSpec): # metric1>=metric#2 for parent_metric in self._metric_subsets_by_parent: for child_metric in self._metric_subsets_by_parent[parent_metric]: @@ -490,7 +490,7 @@ def _add_metric_relations_to_spec(self, spec): edp_combination)), ) - def _add_cumulative_relations_to_spec(self, spec): + def _add_cumulative_relations_to_spec(self, spec: SetMeasurementsSpec): for metric in self._metric_reports.keys(): for edp_combination in self._metric_reports[ metric].get_cumulative_edp_combinations(): @@ -514,7 +514,7 @@ def _add_cumulative_relations_to_spec(self, spec): edp_combination, period + 1)), ) - def _add_set_relations_to_spec(self, spec): + def _add_set_relations_to_spec(self, spec: SetMeasurementsSpec): # sum of subsets >= union for each period. self._add_cover_relations_to_spec(spec) @@ -530,7 +530,7 @@ def _add_set_relations_to_spec(self, spec): # Last cumulative measurement <= whole campaign measurement. self._add_cumulative_whole_campaign_relations_to_spec(spec) - def _add_measurements_to_spec(self, spec): + def _add_measurements_to_spec(self, spec: SetMeasurementsSpec): for metric in self._metric_reports.keys(): for edp_combination in self._metric_reports[ metric].get_cumulative_edp_combinations(): @@ -554,20 +554,20 @@ def _get_measurement_index(self, measurement: Measurement) -> int: return self._measurement_name_to_index[measurement.name] def _get_cumulative_measurement_index(self, metric: str, - edp_combination: str, period: int) -> int: + edp_combination: FrozenSet[str], period: int) -> int: return self._get_measurement_index( self._metric_reports[metric].get_cumulative_measurement( edp_combination, period) ) def _get_whole_campaign_measurement_index(self, metric: str, - edp_combination: str) -> int: + edp_combination: FrozenSet[str]) -> int: return self._get_measurement_index( self._metric_reports[metric].get_whole_campaign_measurement( edp_combination) ) - def _metric_report_from_solution(self, metric, solution) -> "MetricReport": + def _metric_report_from_solution(self, metric: str, solution: Solution) -> "MetricReport": solution_time_series = {} solution_whole_campaign = {} for edp_combination in self._metric_reports[ From e9799789bd60bdb277d0936e7f74590772aae58c Mon Sep 17 00:00:00 2001 From: Phi Hung Le Date: Mon, 2 Dec 2024 19:34:39 +0000 Subject: [PATCH 18/18] Address comments. --- .../reporting/postprocessing/report/report.py | 46 +++++++++++++------ .../postprocessing/report/test_report.py | 44 ++++++++++++++++++ 2 files changed, 75 insertions(+), 15 deletions(-) diff --git a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py index 033d9da8e3a..272e80d8646 100644 --- a/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py +++ b/src/main/python/wfa/measurement/reporting/postprocessing/report/report.py @@ -24,7 +24,8 @@ from functools import reduce -def get_subset_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tuple[FrozenSet[str], FrozenSet[str]]]: +def get_subset_relationships(edp_combinations: list[FrozenSet[str]]) -> list[ + Tuple[FrozenSet[str], FrozenSet[str]]]: """Returns a list of tuples where first element in the tuple is the parent and second element is the subset.""" subset_relationships = [] @@ -36,7 +37,8 @@ def get_subset_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tup return subset_relationships -def is_cover(target_set: FrozenSet[str], possible_cover: list[FrozenSet[str]]) -> bool: +def is_cover(target_set: FrozenSet[str], + possible_cover: list[FrozenSet[str]]) -> bool: """Checks if a collection of sets covers a target set. Args: @@ -53,7 +55,8 @@ def is_cover(target_set: FrozenSet[str], possible_cover: list[FrozenSet[str]]) - return union_of_possible_cover == target_set -def get_covers(target_set: FrozenSet[str], other_sets: list[FrozenSet[str]]) -> list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: +def get_covers(target_set: FrozenSet[str], other_sets: list[FrozenSet[str]]) -> \ +list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: """Finds all combinations of sets from `other_sets` that cover `target_set`. This function identifies all possible combinations of sets within `other_sets` @@ -69,7 +72,9 @@ def get_covers(target_set: FrozenSet[str], other_sets: list[FrozenSet[str]]) -> The first element of the tuple is the `target_set`, and the second element is a tuple containing the sets from `other_sets` that cover it. """ - def generate_all_length_combinations(data: list[Any]) -> list[tuple[Any, ...]]: + + def generate_all_length_combinations(data: list[Any]) -> list[ + tuple[Any, ...]]: """Generates all possible combinations of elements from a list. Args: @@ -95,7 +100,8 @@ def generate_all_length_combinations(data: list[Any]) -> list[tuple[Any, ...]]: return cover_relationship -def get_cover_relationships(edp_combinations: list[FrozenSet[str]]) -> list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: +def get_cover_relationships(edp_combinations: list[FrozenSet[str]]) -> list[ + Tuple[FrozenSet[str], list[FrozenSet[str]]]]: """Returns covers as defined here: # https://en.wikipedia.org/wiki/Cover_(topology). For each set (s_i) in the list, enumerate combinations of all sets excluding this one. For each of these considered combinations, take their union and check if it is equal to @@ -165,11 +171,13 @@ def sample_with_noise(self) -> "MetricReport": } ) - def get_cumulative_measurement(self, edp_combination: FrozenSet[str], period: int) -> Measurement: + def get_cumulative_measurement(self, edp_combination: FrozenSet[str], + period: int) -> Measurement: return self._reach_time_series[edp_combination][ period] - def get_whole_campaign_measurement(self, edp_combination: FrozenSet[str]) -> Measurement: + def get_whole_campaign_measurement(self, + edp_combination: FrozenSet[str]) -> Measurement: return self._reach_whole_campaign[edp_combination] def get_cumulative_edp_combinations(self) -> set[FrozenSet[str]]: @@ -187,16 +195,20 @@ def get_whole_campaign_edp_combinations_count(self) -> int: def get_number_of_periods(self) -> int: return len(next(iter(self._reach_time_series.values()))) - def get_cumulative_subset_relationships(self) -> list[Tuple[FrozenSet[str], FrozenSet[str]]]: + def get_cumulative_subset_relationships(self) -> list[ + Tuple[FrozenSet[str], FrozenSet[str]]]: return get_subset_relationships(list(self._reach_time_series)) - def get_whole_campaign_subset_relationships(self) -> list[Tuple[FrozenSet[str], FrozenSet[str]]]: + def get_whole_campaign_subset_relationships(self) -> list[ + Tuple[FrozenSet[str], FrozenSet[str]]]: return get_subset_relationships(list(self._reach_whole_campaign)) - def get_cumulative_cover_relationships(self) -> list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: + def get_cumulative_cover_relationships(self) -> list[ + Tuple[FrozenSet[str], list[FrozenSet[str]]]]: return get_cover_relationships(list(self._reach_time_series)) - def get_whole_campaign_cover_relationships(self) -> list[Tuple[FrozenSet[str], list[FrozenSet[str]]]]: + def get_whole_campaign_cover_relationships(self) -> list[ + Tuple[FrozenSet[str], list[FrozenSet[str]]]]: return get_cover_relationships(list(self._reach_whole_campaign)) @staticmethod @@ -224,7 +236,8 @@ class Report: which inconsistencies in cumulative measurements are - allowed. + allowed. This is for + TV measurements. """ def __init__( @@ -307,7 +320,8 @@ def get_corrected_report(self) -> "Report": solution = Solver(spec).solve_and_translate() return self.report_from_solution(solution, spec) - def report_from_solution(self, solution: Solution, spec: SetMeasurementsSpec) -> "Report": + def report_from_solution(self, solution: Solution, + spec: SetMeasurementsSpec) -> "Report": return Report( metric_reports={ metric: self._metric_report_from_solution(metric, solution) @@ -434,7 +448,8 @@ def _add_subset_relations_to_spec(self, spec: SetMeasurementsSpec): # TODO(@ple13):Use timestamp to check if the last cumulative measurement covers # the whole campaign. If yes, make sure that the two measurements are equal # instead of less than or equal. - def _add_cumulative_whole_campaign_relations_to_spec(self, spec: SetMeasurementsSpec): + def _add_cumulative_whole_campaign_relations_to_spec(self, + spec: SetMeasurementsSpec): # Adds relations between cumulative and whole campaign measurements. # For an edp combination, the last cumulative measurement is less than or # equal to the whole campaign measurement. @@ -567,7 +582,8 @@ def _get_whole_campaign_measurement_index(self, metric: str, edp_combination) ) - def _metric_report_from_solution(self, metric: str, solution: Solution) -> "MetricReport": + def _metric_report_from_solution(self, metric: str, + solution: Solution) -> "MetricReport": solution_time_series = {} solution_whole_campaign = {} for edp_combination in self._metric_reports[ diff --git a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py index 8bd01cc7b3f..251fd22879e 100644 --- a/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py +++ b/src/test/python/wfa/measurement/reporting/postprocessing/report/test_report.py @@ -733,6 +733,10 @@ def test_get_corrected_single_metric_report(self): cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent: + # 1. reach[edp1][0] <= reach[edp1 U edp2][0] + # 2. reach[edp2][0] <= reach[edp1 U edp2][0] + # 3. reach[edp1 U edp2][0] <= reach[edp1][0] + reach[edp2][0]. corrected = report.get_corrected_report() expected = Report( @@ -779,6 +783,11 @@ def test_can_correct_time_series(self): cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent: + # 1. All the time series reaches are monotonic increasing, e.g. + # reach[edp1][i] <= reach[edp1][i+1]. + # 2. Reach of the child set is less than or equal to reach of the parent set + # for all period, e.g. reach[edp1][i] <= reach[edp1 U edp2][i]. corrected = report.get_corrected_report() expected = Report( @@ -857,6 +866,16 @@ def test_can_correct_time_series_for_three_edps(self): cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent: + # 1. All the time series reaches are monotonic increasing, e.g. + # reach[edp1][i] <= reach[edp1][i+1]. + # 2. Reach of the cover set is less than or equal to the sum of reach of + # sets it covers. For example: for each period i it is true that + # reach[edp1 U edp2][i] <= reach[edp1][i] + reach[edp2][i], + # or reach[edp1 U edp2 U edp3][i] <= reach[edp1 U edp2][i] + reach[edp3][i], + # etc. + # 3. Reach of the child set is less than or equal to reach of the parent set + # for all period, e.g. reach[edp1][i] <= reach[edp1 U edp2][i]. corrected = report.get_corrected_report() expected = Report( @@ -976,6 +995,18 @@ def test_correct_report_with_both_time_series_and_whole_campaign_measurements_th cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent: + # 1. All the time series reaches are monotonic increasing, e.g. + # reach[edp1][i] <= reach[edp1][i+1]. + # 2. Reach of the cover set is less than or equal to the sum of reach of + # sets it covers. For example: for each period i it is true that + # reach[edp1 U edp2][i] <= reach[edp1][i] + reach[edp2][i], + # or reach[edp1 U edp2 U edp3][i] <= reach[edp1 U edp2][i] + reach[edp3][i], + # etc. + # 3. Reach of the child set is less than or equal to reach of the parent set + # for all period, e.g. reach[edp1][i] <= reach[edp1 U edp2][i]. + # 4. Time series reaches are less than or equal to whole campaign reach, + # e.g. cumulative_reach[edp1][1] <= whole_campaign_reach[edp1]. corrected = report.get_corrected_report() expected = Report( @@ -1092,6 +1123,10 @@ def test_correct_report_with_whole_campaign_has_more_edp_combinations(self): cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent between time series reaches and + # whole campaign reach: time series reaches are less than or equal to whole + # campaign reach, e.g. cumulative_reach[edp1][1] <= + # whole_campaign_reach[edp1]. corrected = report.get_corrected_report() expected = Report( @@ -1171,6 +1206,9 @@ def test_allows_incorrect_time_series(self): corrected = report.get_corrected_report() + # The corrected report should be consistent: all the time series reaches are + # monotonic increasing, e.g. reach[edp1][i] <= reach[edp1][i+1], except for + # the one in the exception list, e.g. edp1. expected = Report( metric_reports={ ami: MetricReport( @@ -1225,6 +1263,9 @@ def test_can_correct_related_metrics(self): cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent for metric relations: MRC + # measurements are less than or equal to the AMI measurements, e.g. + # mrc_reach[edp1][0] <= ami_reach[edp1][0]. corrected = report.get_corrected_report() expected = Report( @@ -1284,6 +1325,9 @@ def test_get_corrected_multiple_metric_report_with_different_edp_combinations( cumulative_inconsistency_allowed_edp_combinations={}, ) + # The corrected report should be consistent for metric relations: MRC + # measurements are less than or equal to the AMI measurements, e.g. + # mrc_reach[edp1][0] <= ami_reach[edp1][0]. corrected = report.get_corrected_report() expected = Report(