Skip to content

Commit

Permalink
address reviewer comments
Browse files Browse the repository at this point in the history
Signed-off-by: Bangtian Liu <liubangtian@gmail.com>
  • Loading branch information
bangtianliu committed Jan 10, 2025
1 parent e29af21 commit f4be426
Show file tree
Hide file tree
Showing 2 changed files with 74 additions and 49 deletions.
53 changes: 32 additions & 21 deletions tuner/tuner/libtuner.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,33 +221,42 @@ def validate_devices(user_devices: list[str]) -> None:
)


def validate_benchmark_results(
def get_valid_benchmark_results(
benchmark_results: list[BenchmarkResult],
) -> list[BenchmarkResult]:
"""
Filter the benchmark_results list to return list with finite `time` values.
"""
filtered_benchmark_results = [r for r in benchmark_results if math.isfinite(r.time)]
if len(filtered_benchmark_results) == 0:
logging.error("No successful candidate benchmarks.")

return filtered_benchmark_results


def map_baseline_by_device(baseline_results: list[BenchmarkResult]) -> dict[str, float]:
return {r.device_id: r.time for r in baseline_results}
def check_baseline_devices_uniqueness(baseline_results: list[BenchmarkResult]) -> bool:
seen = set()
for result in baseline_results:
if result.device_id in seen:
return False
seen.add(result.device_id)
return True


def validate_baselines_device_ids_match(
first_baseline_by_device: dict[str, float],
second_baseline_by_device: dict[str, float],
) -> bool:
return first_baseline_by_device.keys() == second_baseline_by_device.keys()
def map_baseline_by_device(baseline_results: list[BenchmarkResult]) -> dict[str, float]:
return {r.device_id: r.time for r in baseline_results}


def validate_baseline_regression(
first_baseline_by_device: dict[str, float],
second_baseline_by_device: dict[str, float],
def detect_baseline_regression(
first_baseline_results: list[BenchmarkResult],
second_baseline_results: list[BenchmarkResult],
) -> list[str]:
"""
Detects performance regressions between two sets of baseline results.
"""
regression_device_ids = []

first_baseline_by_device = map_baseline_by_device(first_baseline_results)
second_baseline_by_device = map_baseline_by_device(second_baseline_results)
for device_id in first_baseline_by_device:
if device_id not in second_baseline_by_device:
continue
Expand Down Expand Up @@ -899,7 +908,7 @@ def select_best_benchmark_results(
baseline_results: list[BenchmarkResult],
num_candidates: Optional[int],
) -> list[BenchmarkResult]:
filtered_candidate_results = validate_benchmark_results(candidate_results)
filtered_candidate_results = get_valid_benchmark_results(candidate_results)
if len(filtered_candidate_results) == 0:
logging.error("No successful candidate benchmarks.")
return []
Expand Down Expand Up @@ -972,8 +981,8 @@ def benchmark(
tuning_client=tuning_client,
candidate_trackers=candidate_trackers,
)

baseline_times_by_device = map_baseline_by_device(baseline_results)
if not check_baseline_devices_uniqueness(baseline_results):
logging.warning("Duplicate device IDs detected in the first baseline results.")

candidate_indices = [i for i in compiled_candidates if i != 0]
candidate_results = benchmark_candidates(
Expand All @@ -992,15 +1001,17 @@ def benchmark(
tuning_client=tuning_client,
candidate_trackers=candidate_trackers,
)
post_baseline_times_by_device = map_baseline_by_device(post_baseline_results)

if validate_baselines_device_ids_match(
baseline_times_by_device, post_baseline_times_by_device
):
if not check_baseline_devices_uniqueness:
logging.warning("Duplicate device IDs detected in the second baseline results.")

first_baseline_by_device = map_baseline_by_device(baseline_results)
second_baseline_by_device = map_baseline_by_device(post_baseline_results)
if first_baseline_by_device.keys() != second_baseline_by_device.keys():
logging.warning("Device ID mismatch between baseline runs.")

regression_devices = validate_baseline_regression(
baseline_times_by_device, post_baseline_results
regression_devices = detect_baseline_regression(
baseline_results, post_baseline_results
)
if regression_devices:
logging.warning(
Expand Down
70 changes: 42 additions & 28 deletions tuner/tuner/libtuner_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -238,57 +238,71 @@ def test_validate_benchmark_results():
libtuner.BenchmarkResult(0, math.inf, "hip://0"),
]

result = libtuner.validate_benchmark_results(benchmark_results)
result = libtuner.get_valid_benchmark_results(benchmark_results)
assert result == []

benchmark_results = [
libtuner.BenchmarkResult(0, math.inf, "hip://0"),
libtuner.BenchmarkResult(0, 0.1, "hip://1"),
]
result = libtuner.validate_benchmark_results(benchmark_results)
result = libtuner.get_valid_benchmark_results(benchmark_results)
assert len(result) == 1
assert result[0].candidate_id == 0
assert result[0].time == 0.1
assert result[0].device_id == "hip://1"


def test_validate_baselines_device_id_match():
first_baseline = {"hip://0": 1000.0, "hip://1": 2000.0}
second_baseline = {"hip://1": 1500.0, "hip://2": 2500.0}

result = libtuner.validate_baselines_device_ids_match(
first_baseline, second_baseline
)
assert result is False

first_baseline = {"hip://0": 1000.0, "hip://1": 2000.0}
second_baseline = {"hip://0": 1500.0, "hip://1": 2500.0}
def test_check_baseline_devices_uniqueness():
baseline_results = [
libtuner.BenchmarkResult(0, 1000.0, "hip://0"),
libtuner.BenchmarkResult(0, 2000.0, "hip://1"),
libtuner.BenchmarkResult(0, 3000.0, "hip://2"),
]
assert libtuner.check_baseline_devices_uniqueness(baseline_results)

result = libtuner.validate_baselines_device_ids_match(
first_baseline, second_baseline
)
assert result is True
baseline_results = [
libtuner.BenchmarkResult(0, 1000.0, "hip://0"),
libtuner.BenchmarkResult(0, 2000.0, "hip://0"),
libtuner.BenchmarkResult(0, 3000.0, "hip://2"),
]
assert not libtuner.check_baseline_devices_uniqueness(baseline_results)


def test_validate_baseline_regression():
first_baseline = {"hip://0": 1000.0, "hip://1": 2000.0}
second_baseline = {"hip://0": 1100.0, "hip://1": 1900.0}
regression_devices = libtuner.validate_baseline_regression(
def test_detect_baseline_regression():
first_baseline = [
libtuner.BenchmarkResult(0, 1000.0, "hip://0"),
libtuner.BenchmarkResult(0, 2000.0, "hip://1"),
]
second_baseline = [
libtuner.BenchmarkResult(0, 1100.0, "hip://0"),
libtuner.BenchmarkResult(0, 1900.0, "hip://1"),
]
regression_devices = libtuner.detect_baseline_regression(
first_baseline, second_baseline
)
assert regression_devices == ["hip://0"]

first_baseline = {"hip://0": 1000.0, "hip://1": 2000.0}
second_baseline = {"hip://0": 1000.0, "hip://1": 2000.0}

regression_devices = libtuner.validate_baseline_regression(
first_baseline = [
libtuner.BenchmarkResult(0, 1000.0, "hip://0"),
libtuner.BenchmarkResult(0, 2000.0, "hip://1"),
]
second_baseline = [
libtuner.BenchmarkResult(0, 1000.0, "hip://0"),
libtuner.BenchmarkResult(0, 1000.0, "hip://1"),
]
regression_devices = libtuner.detect_baseline_regression(
first_baseline, second_baseline
)
assert regression_devices == []

first_baseline = {"hip://0": 1000.0, "hip://1": 2000.0}
second_baseline = {"hip://0": 1100.0}
regression_devices = libtuner.validate_baseline_regression(
first_baseline = [
libtuner.BenchmarkResult(0, 1000.0, "hip://0"),
libtuner.BenchmarkResult(0, 2000.0, "hip://1"),
]
second_baseline = [
libtuner.BenchmarkResult(0, 1100.0, "hip://0"),
]
regression_devices = libtuner.detect_baseline_regression(
first_baseline, second_baseline
)
assert regression_devices == ["hip://0"]

0 comments on commit f4be426

Please sign in to comment.