diff --git a/.github/workflows/ci_linux_x64-libshortfin.yml b/.github/workflows/ci_linux_x64-libshortfin.yml index 793058570..fb838adfc 100644 --- a/.github/workflows/ci_linux_x64-libshortfin.yml +++ b/.github/workflows/ci_linux_x64-libshortfin.yml @@ -85,6 +85,7 @@ jobs: # TODO: Switch to `pip install -r requirements.txt -e libshortfin/`. run: | pip install -r ${{ env.LIBSHORTFIN_DIR }}/requirements-tests.txt + pip install -r ${{ env.LIBSHORTFIN_DIR }}/requirements-iree-compiler.txt pip freeze - name: Build libshortfin (full) @@ -107,7 +108,7 @@ jobs: cd ${{ env.LIBSHORTFIN_DIR }}/build ctest --timeout 30 --output-on-failure cd ${{ env.LIBSHORTFIN_DIR }} - pytest -s -v -m "not requires_amd_gpu" + pytest -s - name: Build libshortfin (host-only) run: | diff --git a/.github/workflows/ci_linux_x64_asan-libshortfin.yml b/.github/workflows/ci_linux_x64_asan-libshortfin.yml index c998fbc77..398a581b1 100644 --- a/.github/workflows/ci_linux_x64_asan-libshortfin.yml +++ b/.github/workflows/ci_linux_x64_asan-libshortfin.yml @@ -76,8 +76,10 @@ jobs: needs: [setup-python-asan] runs-on: ubuntu-24.04 env: - # TODO(#151): Don't ignore ODR violations - ASAN_OPTIONS: detect_odr_violation=0 + # We can't count on being leak free in general (i.e. pip, etc) so disable + # leak checker by default. Here we suppress any ASAN features needed to + # pass the build. Test configuration is done specially just for that step. + ASAN_OPTIONS: detect_leaks=0,detect_odr_violation=0 LSAN_OPTIONS: suppressions=${{ github.workspace }}/libshortfin/build_tools/python_lsan_suppressions.txt steps: - name: Install dependencies @@ -170,7 +172,10 @@ jobs: - name: Run pytest if: ${{ !cancelled() }} + env: + # TODO(#151): Don't ignore ODR violations + ASAN_OPTIONS: detect_odr_violation=0 run: | eval "$(pyenv init -)" cd ${{ env.LIBSHORTFIN_DIR }} - pytest -m "not requires_amd_gpu" -s + pytest -s diff --git a/libshortfin/README.md b/libshortfin/README.md index 435dcbb87..2805ca4ea 100644 --- a/libshortfin/README.md +++ b/libshortfin/README.md @@ -61,6 +61,20 @@ does mean that the C++ core of the library must always be built with the Python bindings to test the most behavior. Given the target of the project, this is not considered to be a significant issue. +### Python tests + +Run platform independent tests only: + +``` +pytest tests/ +``` + +Run tests including for a specific platform: + +``` +pytest tests/ --system amdgpu +``` + # Production Library Building In order to build a production library, additional build steps are typically diff --git a/libshortfin/build_tools/python_lsan_suppressions.txt b/libshortfin/build_tools/python_lsan_suppressions.txt index cc768d575..3a0ef8fe7 100644 --- a/libshortfin/build_tools/python_lsan_suppressions.txt +++ b/libshortfin/build_tools/python_lsan_suppressions.txt @@ -1,2 +1,8 @@ leak:PyUnicode_New +leak:_PyUnicodeWriter_PrepareInternal leak:_PyUnicodeWriter_Finish +leak:numpy +leak:_mlir_libs +leak:google/_upb +leak:import_find_and_load +leak:ufunc diff --git a/libshortfin/pyproject.toml b/libshortfin/pyproject.toml index f1b34c64a..eb54c835b 100644 --- a/libshortfin/pyproject.toml +++ b/libshortfin/pyproject.toml @@ -12,9 +12,6 @@ addopts = [ "-ra", "--import-mode=importlib", ] -markers = [ - "requires_amd_gpu: tests that require and AMD GPU (deselect with '-m \"not requires_amd_gpu\"')", -] testpaths = [ "tests", ] diff --git a/libshortfin/requirements-tests.txt b/libshortfin/requirements-tests.txt index 1049b0412..98fdc0085 100644 --- a/libshortfin/requirements-tests.txt +++ b/libshortfin/requirements-tests.txt @@ -1,4 +1,5 @@ pytest requests fastapi +onnx uvicorn diff --git a/libshortfin/src/shortfin/local/system.cc b/libshortfin/src/shortfin/local/system.cc index 23ecbc088..b0905a8cb 100644 --- a/libshortfin/src/shortfin/local/system.cc +++ b/libshortfin/src/shortfin/local/system.cc @@ -61,31 +61,28 @@ System::~System() { void System::Shutdown() { // Stop workers. - std::vector> local_workers; { iree::slim_mutex_lock_guard guard(lock_); if (!initialized_ || shutdown_) return; shutdown_ = true; - workers_by_name_.clear(); - local_workers.swap(workers_); } // Worker drain and shutdown. - for (auto &worker : local_workers) { + for (auto &worker : workers_) { worker->Kill(); } - for (auto &worker : local_workers) { + for (auto &worker : workers_) { if (worker->options().owned_thread) { worker->WaitForShutdown(); } } blocking_executor_.Kill(); - local_workers.clear(); } std::shared_ptr System::CreateScope(Worker &worker, std::span devices) { iree::slim_mutex_lock_guard guard(lock_); + AssertRunning(); return std::make_shared(shared_ptr(), worker, devices); } @@ -102,6 +99,7 @@ void System::InitializeNodes(int node_count) { Queue &System::CreateQueue(Queue::Options options) { iree::slim_mutex_lock_guard guard(lock_); + AssertRunning(); if (queues_by_name_.count(options.name) != 0) { throw std::invalid_argument(fmt::format( "Cannot create queue with duplicate name '{}'", options.name)); @@ -140,6 +138,7 @@ Worker &System::CreateWorker(Worker::Options options) { Worker *unowned_worker; { iree::slim_mutex_lock_guard guard(lock_); + AssertRunning(); if (options.name == std::string_view("__init__")) { throw std::invalid_argument( "Cannot create worker '__init__' (reserved name)"); @@ -161,6 +160,7 @@ Worker &System::CreateWorker(Worker::Options options) { Worker &System::init_worker() { iree::slim_mutex_lock_guard guard(lock_); + AssertRunning(); auto found_it = workers_by_name_.find("__init__"); if (found_it != workers_by_name_.end()) { return *found_it->second; @@ -207,6 +207,7 @@ void System::FinishInitialization() { int64_t System::AllocateProcess(detail::BaseProcess *p) { iree::slim_mutex_lock_guard guard(lock_); + AssertRunning(); int pid = next_pid_++; processes_by_pid_[pid] = p; return pid; diff --git a/libshortfin/src/shortfin/local/system.h b/libshortfin/src/shortfin/local/system.h index 82fd4b489..31be26b9a 100644 --- a/libshortfin/src/shortfin/local/system.h +++ b/libshortfin/src/shortfin/local/system.h @@ -148,6 +148,13 @@ class SHORTFIN_API System : public std::enable_shared_from_this { "initialization"); } } + void AssertRunning() { + if (!initialized_ || shutdown_) { + throw std::logic_error( + "System manipulation methods can only be called when initialized and " + "not shutdown"); + } + } // Allocates a process in the process table and returns its new pid. // This is done on process construction. Note that it acquires the diff --git a/libshortfin/tests/__init__.py b/libshortfin/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/libshortfin/tests/amdgpu_system_test.py b/libshortfin/tests/amdgpu_system_test.py index cf04a7caf..4f33ad379 100644 --- a/libshortfin/tests/amdgpu_system_test.py +++ b/libshortfin/tests/amdgpu_system_test.py @@ -7,7 +7,7 @@ import pytest -@pytest.mark.requires_amd_gpu +@pytest.mark.system("amdgpu") def test_create_amd_gpu_system(): from _shortfin import lib as sfl diff --git a/libshortfin/tests/conftest.py b/libshortfin/tests/conftest.py new file mode 100644 index 000000000..2a64e4007 --- /dev/null +++ b/libshortfin/tests/conftest.py @@ -0,0 +1,34 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import pytest + + +def pytest_addoption(parser): + parser.addoption( + "--system", + action="store", + metavar="NAME", + nargs="*", + help="Enable tests for system name ('amdgpu', ...)", + ) + + +def pytest_configure(config): + config.addinivalue_line( + "markers", "system(name): mark test to run only on a named system" + ) + + +def pytest_runtest_setup(item): + required_system_names = [mark.args[0] for mark in item.iter_markers("system")] + if required_system_names: + available_system_names = item.config.getoption("--system") or [] + if not all(name in available_system_names for name in required_system_names): + pytest.skip( + f"test requires system in {required_system_names!r} but has " + f"{available_system_names!r} (set with --system arg)" + ) diff --git a/libshortfin/tests/invocation/conftest.py b/libshortfin/tests/invocation/conftest.py new file mode 100644 index 000000000..c366c7f82 --- /dev/null +++ b/libshortfin/tests/invocation/conftest.py @@ -0,0 +1,55 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import pytest +import urllib.request + + +def upgrade_onnx(original_path, converted_path): + import onnx + + original_model = onnx.load_model(original_path) + converted_model = onnx.version_converter.convert_version(original_model, 17) + onnx.save(converted_model, converted_path) + + +@pytest.fixture(scope="session") +def mobilenet_onnx_path(tmp_path_factory): + try: + import onnx + except ModuleNotFoundError: + raise pytest.skip("onnx python package not available") + print("Downloading mobilenet.onnx") + parent_dir = tmp_path_factory.mktemp("mobilenet_onnx") + orig_onnx_path = parent_dir / "mobilenet_orig.onnx" + urllib.request.urlretrieve( + "https://github.com/onnx/models/raw/main/validated/vision/classification/mobilenet/model/mobilenetv2-12.onnx", + orig_onnx_path, + ) + upgraded_onnx_path = parent_dir / "mobilenet.onnx" + upgrade_onnx(orig_onnx_path, upgraded_onnx_path) + return upgraded_onnx_path + + +@pytest.fixture(scope="session") +def mobilenet_compiled_cpu_path(mobilenet_onnx_path): + try: + import iree.compiler.tools as tools + import iree.compiler.tools.import_onnx.__main__ as import_onnx + except ModuleNotFoundError: + raise pytest.skip("iree.compiler packages not available") + print("Compiling mobilenet") + mlir_path = mobilenet_onnx_path.parent / "mobilenet.mlir" + vmfb_path = mobilenet_onnx_path.parent / "mobilenet_cpu.vmfb" + args = import_onnx.parse_arguments(["-o", str(mlir_path), str(mobilenet_onnx_path)]) + import_onnx.main(args) + tools.compile_file( + str(mlir_path), + output_file=str(vmfb_path), + target_backends=["llvm-cpu"], + input_type="onnx", + ) + return vmfb_path diff --git a/libshortfin/tests/invocation/mobilenet_program_test.py b/libshortfin/tests/invocation/mobilenet_program_test.py new file mode 100644 index 000000000..a457b4f6f --- /dev/null +++ b/libshortfin/tests/invocation/mobilenet_program_test.py @@ -0,0 +1,58 @@ +# Copyright 2024 Advanced Micro Devices, Inc. +# +# Licensed under the Apache License v2.0 with LLVM Exceptions. +# See https://llvm.org/LICENSE.txt for license information. +# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +import array +import functools +import pytest + +import shortfin as sf +import shortfin.array as sfnp + + +@pytest.fixture +def lsys(): + sc = sf.host.CPUSystemBuilder() + lsys = sc.create_system() + yield lsys + lsys.shutdown() + + +@pytest.fixture +def scope(lsys): + return lsys.create_scope() + + +@pytest.fixture +def device(scope): + return scope.device(0) + + +def test_invoke_mobilenet(lsys, scope, mobilenet_compiled_cpu_path): + device = scope.device(0) + dummy_data = array.array( + "f", ([0.2] * (224 * 224)) + ([0.4] * (224 * 224)) + ([-0.2] * (224 * 224)) + ) + program_module = lsys.load_module(mobilenet_compiled_cpu_path) + program = sf.Program([program_module], scope=scope) + main_function = program["module.torch-jit-export"] + + async def main(): + device_input = sfnp.device_array(device, [1, 3, 224, 224], sfnp.float32) + staging_input = device_input.for_transfer() + staging_input.storage.data = dummy_data + device_input.copy_from(staging_input) + (device_output,) = await main_function(device_input) + host_output = device_output.for_transfer() + host_output.copy_from(device_output) + await device + flat_output = array.array("f") + flat_output.frombytes(host_output.storage.data) + absmean = functools.reduce( + lambda x, y: x + abs(y) / len(flat_output), flat_output, 0.0 + ) + assert absmean == pytest.approx(5.01964943873882) + + lsys.run(main())