fluids: Add smartsim regression testing

Requires that the fluids code be built with SMARTREDIS_DIR set, and in an environment with SmartSim (Python library) installed as well. Co-authored by: Zach Atkins <zach.atkins@colorado.edu>
CEED · Oct 11, 2023 · f67273f · f67273f
1 parent 0b9902b
commit f67273f
Show file tree

Hide file tree

Showing 6 changed files with 258 additions and 37 deletions.
diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
@@ -42,6 +42,7 @@ noether-cpu:
 # Libraries for examples
 # -- PETSc with HIP (minimal)
     - export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-hip && git -C $PETSC_DIR -c safe.directory=$PETSC_DIR describe
+    - source /home/jawr8143/SmartSimTestingSoftware/bin/activate && export SMARTREDIS_DIR=/home/jawr8143/SmartSimTestingSoftware/smartredis/install
     - echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
     - make -k -j$NPROC_CPU BACKENDS="$BACKENDS_CPU" JUNIT_BATCH="cpu" junit search="petsc fluids solids"
 # -- MFEM v4.2
@@ -111,6 +112,7 @@ noether-rocm:
 # Libraries for examples
 # -- PETSc with HIP (minimal)
     - export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-hip && git -C $PETSC_DIR -c safe.directory=$PETSC_DIR describe
+    - source /home/jawr8143/SmartSimTestingSoftware/bin/activate && export SMARTREDIS_DIR=/home/jawr8143/SmartSimTestingSoftware/smartredis/install
     - echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
     - make -k -j$NPROC_GPU BACKENDS="$BACKENDS_GPU" JUNIT_BATCH="hip" junit search="petsc fluids solids"
 # -- MFEM v4.2
@@ -234,6 +236,7 @@ noether-cuda:
 # Libraries for examples
 # -- PETSc with CUDA (minimal)
     - export PETSC_DIR=/projects/petsc PETSC_ARCH=mpich-cuda-O PETSC_OPTIONS='-use_gpu_aware_mpi 0' && git -C $PETSC_DIR -c safe.directory=$PETSC_DIR describe
+    - source /home/jawr8143/SmartSimTestingSoftware/bin/activate && export SMARTREDIS_DIR=/home/jawr8143/SmartSimTestingSoftware/smartredis/install
     - echo "-------------- PETSc ---------------" && make -C $PETSC_DIR info
     - make -k -j$NPROC_GPU JUNIT_BATCH="cuda" junit BACKENDS="$BACKENDS_GPU" search="petsc fluids solids"
 # Report status

diff --git a/Makefile b/Makefile
@@ -249,8 +249,10 @@ nekexamples  := $(OBJDIR)/nek-bps
 petscexamples.c := $(wildcard examples/petsc/*.c)
 petscexamples   := $(petscexamples.c:examples/petsc/%.c=$(OBJDIR)/petsc-%)
 # Fluid Dynamics Examples
-fluidsexamples.c := $(sort $(wildcard examples/fluids/*.c))
-fluidsexamples  := $(fluidsexamples.c:examples/fluids/%.c=$(OBJDIR)/fluids-%)
+fluidsexamples.c  := $(sort $(wildcard examples/fluids/*.c))
+fluidsexamples.py := examples/fluids/smartsim_regression_framework.py
+fluidsexamples    := $(fluidsexamples.c:examples/fluids/%.c=$(OBJDIR)/fluids-%)
+fluidsexamples    += $(fluidsexamples.py:examples/fluids/%.py=$(OBJDIR)/fluids-py-%) 
 # Solid Mechanics Examples
 solidsexamples.c := $(sort $(wildcard examples/solids/*.c))
 solidsexamples   := $(solidsexamples.c:examples/solids/%.c=$(OBJDIR)/solids-%)
@@ -614,6 +616,9 @@ $(OBJDIR)/fluids-% : examples/fluids/%.c examples/fluids/src/*.c examples/fluids
 	  PETSC_DIR="$(abspath $(PETSC_DIR))" OPT="$(OPT)" $*
 	cp examples/fluids/$* $@
 
+$(OBJDIR)/fluids-py-% : examples/fluids/%.py
+	cp $< $@
+
 $(OBJDIR)/solids-% : examples/solids/%.c examples/solids/%.h \
     examples/solids/problems/*.c examples/solids/src/*.c \
     examples/solids/include/*.h examples/solids/problems/*.h examples/solids/qfunctions/*.h \

diff --git a/examples/fluids/smartsim_regression_framework.py b/examples/fluids/smartsim_regression_framework.py
@@ -0,0 +1,195 @@
+#!/usr/bin/env python3
+from junit_xml import TestCase
+from smartsim import Experiment
+from smartsim.settings import RunSettings
+from smartredis import Client
+import numpy as np
+from pathlib import Path
+import argparse
+import traceback
+import sys
+import time
+from typing import Tuple
+import os
+import shutil
+import logging
+
+# autopep8 off
+sys.path.insert(0, (Path(__file__).parents[3] / "tests/junit-xml").as_posix())
+# autopep8 on
+
+logging.disable(logging.WARNING)
+
+fluids_example_dir = Path(__file__).parent.absolute()
+
+
+class NoError(Exception):
+    pass
+
+
+class SmartSimTest(object):
+
+    def __init__(self, directory_path: Path):
+        self.exp: Experiment
+        self.database = None
+        self.directory_path: Path = directory_path
+        self.original_path: Path
+
+    def setup(self):
+        """To create the test directory and start SmartRedis database"""
+        self.original_path = Path(os.getcwd())
+
+        if self.directory_path.exists() and self.directory_path.is_dir():
+            shutil.rmtree(self.directory_path)
+        self.directory_path.mkdir()
+        os.chdir(self.directory_path)
+
+        PORT = 6780
+        self.exp = Experiment("test", launcher="local")
+        self.database = self.exp.create_database(port=PORT, batch=False, interface="lo")
+        self.exp.generate(self.database)
+        self.exp.start(self.database)
+
+        # SmartRedis will complain if these aren't set
+        os.environ['SR_LOG_FILE'] = 'R'
+        os.environ['SR_LOG_LEVEL'] = 'INFO'
+
+    def test(self, ceed_resource) -> Tuple[bool, Exception, str]:
+        client = None
+        arguments = []
+        try:
+            exe_path = "../../build/fluids-navierstokes"
+            arguments = [
+                '-ceed', ceed_resource,
+                '-options_file', (fluids_example_dir / 'blasius.yaml').as_posix(),
+                '-ts_max_steps', '2',
+                '-diff_filter_grid_based_width',
+                '-diff_filter_width_scaling', '1,0.7,1',
+                '-ts_monitor', '-snes_monitor',
+                '-diff_filter_ksp_max_it', '50', '-diff_filter_ksp_monitor',
+                '-degree', '1',
+                '-sgs_train_enable',
+                '-sgs_train_put_tensor_interval', '2',
+            ]
+
+            run_settings = RunSettings(exe_path, exe_args=arguments)
+
+            client_exp = self.exp.create_model(f"client_{ceed_resource.replace('/', '_')}", run_settings)
+
+            # Start the client model
+            self.exp.start(client_exp, summary=False, block=True)
+
+            client = Client(cluster=False)
+
+            assert client.poll_tensor("sizeInfo", 250, 5)
+            assert np.all(client.get_tensor("sizeInfo") == np.array([5002, 12, 6, 1, 1, 0]))
+
+            assert client.poll_tensor("check-run", 250, 5)
+            assert client.get_tensor("check-run")[0] == 1
+
+            assert client.poll_tensor("tensor-ow", 250, 5)
+            assert client.get_tensor("tensor-ow")[0] == 1
+
+            assert client.poll_tensor("step", 250, 5)
+            assert client.get_tensor("step")[0] == 2
+
+            assert client.poll_tensor("y.0", 250, 5)
+            test_data_path = fluids_example_dir / "tests-output/y0_output.npy"
+            assert test_data_path.is_file()
+
+            y0_correct_value = np.load(test_data_path)
+            y0_database_value = client.get_tensor("y.0")
+            rtol = 1e-8
+            atol = 1e-8
+            if not np.allclose(y0_database_value, y0_correct_value, atol=atol, rtol=rtol):
+                # Check whether the S-frame-oriented vorticity vector's second component is just flipped.
+                # This can happen due to the eigenvector ordering changing based on whichever one is closest to the vorticity vector.
+                # If two eigenvectors are very close to the vorticity vector, this can cause the ordering to flip.
+                # This flipping of the vorticity vector is not incorrect, just a known sensitivity of the model.
+
+                total_tolerances = atol + rtol * np.abs(y0_correct_value)  # mimic np.allclose tolerance calculation
+                idx_notclose = np.where(np.abs(y0_database_value - y0_correct_value) > total_tolerances)
+                if not np.all(idx_notclose[1] == 4):
+                    # values other than vorticity are not close
+                    test_fail = True
+                else:
+                    database_vorticity = y0_database_value[idx_notclose]
+                    correct_vorticity = y0_correct_value[idx_notclose]
+                    test_fail = False if np.allclose(-database_vorticity, correct_vorticity,
+                                                     atol=atol, rtol=rtol) else True
+
+                if test_fail:
+                    database_output_path = Path(
+                        f"./y0_database_values_{ceed_resource.replace('/', '_')}.npy").absolute()
+                    np.save(database_output_path, y0_database_value)
+                    raise AssertionError(f"Array values in database max difference: {np.max(np.abs(y0_correct_value - y0_database_value))}\n"
+                                         f"Array saved to {database_output_path.as_posix()}")
+
+            client.flush_db([os.environ["SSDB"]])
+            output = (True, NoError(), ' '.join(arguments))
+        except Exception as e:
+            output = (False, e, ' '.join(arguments))
+
+        finally:
+            if client:
+                client.flush_db([os.environ["SSDB"]])
+
+        return output
+
+    def test_junit(self, ceed_resource):
+        start: float = time.time()
+
+        passTest, exception, args = self.test(ceed_resource)
+
+        output = "" if isinstance(exception, NoError) else ''.join(
+            traceback.TracebackException.from_exception(exception).format())
+
+        test_case = TestCase(f'SmartSim Test {ceed_resource}',
+                             elapsed_sec=time.time() - start,
+                             timestamp=time.strftime(
+                                 '%Y-%m-%d %H:%M:%S %Z', time.localtime(start)),
+                             stdout=output,
+                             stderr=output,
+                             allow_multiple_subelements=True)
+        test_case.args = args
+        if not passTest and 'occa' in ceed_resource:
+            test_case.add_skipped_info("OCCA mode not supported")
+        elif not passTest:
+            test_case.add_failure_info("exception", output)
+
+        return test_case
+
+    def teardown(self):
+        self.exp.stop(self.database)
+        os.chdir(self.original_path)
+
+
+if __name__ == "__main__":
+    parser = argparse.ArgumentParser('Testing script for SmartSim integration')
+    parser.add_argument(
+        '-c',
+        '--ceed-backends',
+        type=str,
+        nargs='*',
+        default=['/cpu/self'],
+        help='libCEED backend to use with convergence tests')
+    args = parser.parse_args()
+
+    test_dir = fluids_example_dir / "test_dir"
+    print("Setting up database...", end='')
+    test_framework = SmartSimTest(test_dir)
+    test_framework.setup()
+    print(" Done!")
+    for ceed_resource in args.ceed_backends:
+        print("working on " + ceed_resource + ' ...', end='')
+        passTest, exception, _ = test_framework.test(ceed_resource)
+
+        if passTest:
+            print("Passed!")
+        else:
+            print("Failed!", file=sys.stderr)
+            print('\t' + ''.join(traceback.TracebackException.from_exception(exception).format()), file=sys.stderr)
+
+    print("Cleaning up database...", end='')
+    test_framework.teardown()
+    print(" Done!")
diff --git a/examples/fluids/tests-output/y0_output.npy b/examples/fluids/tests-output/y0_output.npy
diff --git a/tests/junit.py b/tests/junit.py
@@ -188,7 +188,21 @@ def check_allowed_stdout(self, test: str) -> bool:
     args = create_argparser().parse_args()
 
     # run tests
-    result: TestSuite = run_tests(args.test, args.ceed_backends, args.mode, args.nproc, CeedSuiteSpec())
+    if 'smartsim' in args.test:
+        sys.path.insert(0, str(Path(__file__).parents[1] / "examples" / "fluids"))
+        from smartsim_regression_framework import SmartSimTest
+
+        test_framework = SmartSimTest(Path(__file__).parent / 'test_dir')
+        test_framework.setup()
+        results = []
+        print(f'1..{len(args.ceed_backends)}')
+        for i, backend in enumerate(args.ceed_backends):
+            results.append(test_framework.test_junit(backend))
+            print_test_case(results[i], TestSpec("SmartSim Tests"), args.mode, i)
+        test_framework.teardown()
+        result: TestSuite = TestSuite('SmartSim Tests', results)
+    else:
+        result: TestSuite = run_tests(args.test, args.ceed_backends, args.mode, args.nproc, CeedSuiteSpec())
 
     # write output and check for failures
     if args.mode is RunMode.JUNIT:

diff --git a/tests/junit_common.py b/tests/junit_common.py
@@ -327,7 +327,7 @@ def run_tests(test: str, ceed_backends: list[str], mode: RunMode, nproc: int, su
     test_specs: list[TestSpec] = get_test_args(source_path)
 
     if mode is RunMode.TAP:
-        print('1..' + str(len(test_specs) * len(ceed_backends)))
+        print(f'1..{len(test_specs) * len(ceed_backends)}')
 
     test_cases: list[TestCase] = []
     my_env: dict = os.environ.copy()
@@ -435,43 +435,47 @@ def run_tests(test: str, ceed_backends: list[str], mode: RunMode, nproc: int, su
             test_case.args = ' '.join(str(arg) for arg in run_args)
             test_cases.append(test_case)
 
-            if mode is RunMode.TAP:
-                # print incremental output if TAP mode
-                print(f'# Test: {spec.name}')
-                if spec.only:
-                    print('# Only: {}'.format(','.join(spec.only)))
-                print(f'# $ {test_case.args}')
-                if test_case.is_skipped():
-                    print('ok {} - SKIP: {}'.format(index, (test_case.skipped[0]['message'] or 'NO MESSAGE').strip()))
-                elif test_case.is_failure() or test_case.is_error():
-                    print(f'not ok {index}')
-                    if test_case.is_error():
-                        print(f'  ERROR: {test_case.errors[0]["message"]}')
-                    if test_case.is_failure():
-                        for i, failure in enumerate(test_case.failures):
-                            print(f'  FAILURE {i}: {failure["message"]}')
-                            print(f'    Output: \n{failure["output"]}')
-                else:
-                    print(f'ok {index} - PASS')
-                sys.stdout.flush()
-            else:
-                # print error or failure information if JUNIT mode
-                if test_case.is_error() or test_case.is_failure():
-                    print(f'Test: {test} {spec.name}')
-                    print(f'  $ {test_case.args}')
-                    if test_case.is_error():
-                        print('ERROR: {}'.format((test_case.errors[0]['message'] or 'NO MESSAGE').strip()))
-                        print('Output: \n{}'.format((test_case.errors[0]['output'] or 'NO MESSAGE').strip()))
-                    if test_case.is_failure():
-                        for failure in test_case.failures:
-                            print('FAIL: {}'.format((failure['message'] or 'NO MESSAGE').strip()))
-                            print('Output: \n{}'.format((failure['output'] or 'NO MESSAGE').strip()))
-                sys.stdout.flush()
+            print_test_case(test_case, spec, mode, index)
             index += 1
-
     return TestSuite(test, test_cases)
 
 
+def print_test_case(test_case: TestCase, spec: TestSpec, mode: RunMode, index: int) -> None:
+    if mode is RunMode.TAP:
+        # print incremental output if TAP mode
+        print(f'# Test: {spec.name}')
+        if spec.only:
+            print('# Only: {}'.format(','.join(spec.only)))
+        if hasattr(test_case, 'args'):
+            print(f'# $ {test_case.args}')
+        if test_case.is_skipped():
+            print('ok {} - SKIP: {}'.format(index, (test_case.skipped[0]['message'] or 'NO MESSAGE').strip()))
+        elif test_case.is_failure() or test_case.is_error():
+            print(f'not ok {index}')
+            if test_case.is_error():
+                print(f'  ERROR: {test_case.errors[0]["message"]}')
+            if test_case.is_failure():
+                for i, failure in enumerate(test_case.failures):
+                    print(f'  FAILURE {i}: {failure["message"]}')
+                    print(f'    Output: \n{failure["output"]}')
+        else:
+            print(f'ok {index} - PASS')
+        sys.stdout.flush()
+    else:
+        # print error or failure information if JUNIT mode
+        if test_case.is_error() or test_case.is_failure():
+            print(f'Test: {spec.name}')
+            print(f'  $ {test_case.args}')
+            if test_case.is_error():
+                print('ERROR: {}'.format((test_case.errors[0]['message'] or 'NO MESSAGE').strip()))
+                print('Output: \n{}'.format((test_case.errors[0]['output'] or 'NO MESSAGE').strip()))
+            if test_case.is_failure():
+                for failure in test_case.failures:
+                    print('FAIL: {}'.format((failure['message'] or 'NO MESSAGE').strip()))
+                    print('Output: \n{}'.format((failure['output'] or 'NO MESSAGE').strip()))
+        sys.stdout.flush()
+
+
 def write_junit_xml(test_suite: TestSuite, output_file: Optional[Path], batch: str = '') -> None:
     """Write a JUnit XML file containing the results of a `TestSuite`