diff --git a/.github/workflows/ci_linux_x64-libshortfin.yml b/.github/workflows/ci_linux_x64-libshortfin.yml
index 793058570..fb838adfc 100644
--- a/.github/workflows/ci_linux_x64-libshortfin.yml
+++ b/.github/workflows/ci_linux_x64-libshortfin.yml
@@ -85,6 +85,7 @@ jobs:
       # TODO: Switch to `pip install -r requirements.txt -e libshortfin/`.
       run: |
         pip install -r ${{ env.LIBSHORTFIN_DIR }}/requirements-tests.txt
+        pip install -r ${{ env.LIBSHORTFIN_DIR }}/requirements-iree-compiler.txt
         pip freeze
 
     - name: Build libshortfin (full)
@@ -107,7 +108,7 @@ jobs:
         cd ${{ env.LIBSHORTFIN_DIR }}/build
         ctest --timeout 30 --output-on-failure
         cd ${{ env.LIBSHORTFIN_DIR }}
-        pytest -s -v -m "not requires_amd_gpu"
+        pytest -s
 
     - name: Build libshortfin (host-only)
       run: |
diff --git a/.github/workflows/ci_linux_x64_asan-libshortfin.yml b/.github/workflows/ci_linux_x64_asan-libshortfin.yml
index c998fbc77..398a581b1 100644
--- a/.github/workflows/ci_linux_x64_asan-libshortfin.yml
+++ b/.github/workflows/ci_linux_x64_asan-libshortfin.yml
@@ -76,8 +76,10 @@ jobs:
     needs: [setup-python-asan]
     runs-on: ubuntu-24.04
     env:
-      # TODO(#151): Don't ignore ODR violations
-      ASAN_OPTIONS: detect_odr_violation=0
+      # We can't count on being leak free in general (i.e. pip, etc) so disable
+      # leak checker by default. Here we suppress any ASAN features needed to
+      # pass the build. Test configuration is done specially just for that step.
+      ASAN_OPTIONS: detect_leaks=0,detect_odr_violation=0
       LSAN_OPTIONS: suppressions=${{ github.workspace }}/libshortfin/build_tools/python_lsan_suppressions.txt
     steps:
     - name: Install dependencies
@@ -170,7 +172,10 @@ jobs:
 
     - name: Run pytest
       if: ${{ !cancelled() }}
+      env:
+        # TODO(#151): Don't ignore ODR violations
+        ASAN_OPTIONS: detect_odr_violation=0
       run: |
         eval "$(pyenv init -)"
         cd ${{ env.LIBSHORTFIN_DIR }}
-        pytest -m "not requires_amd_gpu" -s
+        pytest -s
diff --git a/libshortfin/README.md b/libshortfin/README.md
index 435dcbb87..2805ca4ea 100644
--- a/libshortfin/README.md
+++ b/libshortfin/README.md
@@ -61,6 +61,20 @@ does mean that the C++ core of the library must always be built with the
 Python bindings to test the most behavior. Given the target of the project,
 this is not considered to be a significant issue.
 
+### Python tests
+
+Run platform independent tests only:
+
+```
+pytest tests/
+```
+
+Run tests including for a specific platform:
+
+```
+pytest tests/ --system amdgpu
+```
+
 # Production Library Building
 
 In order to build a production library, additional build steps are typically
diff --git a/libshortfin/build_tools/python_lsan_suppressions.txt b/libshortfin/build_tools/python_lsan_suppressions.txt
index cc768d575..3a0ef8fe7 100644
--- a/libshortfin/build_tools/python_lsan_suppressions.txt
+++ b/libshortfin/build_tools/python_lsan_suppressions.txt
@@ -1,2 +1,8 @@
 leak:PyUnicode_New
+leak:_PyUnicodeWriter_PrepareInternal
 leak:_PyUnicodeWriter_Finish
+leak:numpy
+leak:_mlir_libs
+leak:google/_upb
+leak:import_find_and_load
+leak:ufunc
diff --git a/libshortfin/pyproject.toml b/libshortfin/pyproject.toml
index f1b34c64a..eb54c835b 100644
--- a/libshortfin/pyproject.toml
+++ b/libshortfin/pyproject.toml
@@ -12,9 +12,6 @@ addopts = [
     "-ra",
     "--import-mode=importlib",
 ]
-markers = [
-    "requires_amd_gpu: tests that require and AMD GPU (deselect with '-m \"not requires_amd_gpu\"')",
-]
 testpaths = [
     "tests",
 ]
diff --git a/libshortfin/requirements-tests.txt b/libshortfin/requirements-tests.txt
index 1049b0412..98fdc0085 100644
--- a/libshortfin/requirements-tests.txt
+++ b/libshortfin/requirements-tests.txt
@@ -1,4 +1,5 @@
 pytest
 requests
 fastapi
+onnx
 uvicorn
diff --git a/libshortfin/src/shortfin/local/system.cc b/libshortfin/src/shortfin/local/system.cc
index 23ecbc088..b0905a8cb 100644
--- a/libshortfin/src/shortfin/local/system.cc
+++ b/libshortfin/src/shortfin/local/system.cc
@@ -61,31 +61,28 @@ System::~System() {
 
 void System::Shutdown() {
   // Stop workers.
-  std::vector<std::unique_ptr<Worker>> local_workers;
   {
     iree::slim_mutex_lock_guard guard(lock_);
     if (!initialized_ || shutdown_) return;
     shutdown_ = true;
-    workers_by_name_.clear();
-    local_workers.swap(workers_);
   }
 
   // Worker drain and shutdown.
-  for (auto &worker : local_workers) {
+  for (auto &worker : workers_) {
     worker->Kill();
   }
-  for (auto &worker : local_workers) {
+  for (auto &worker : workers_) {
     if (worker->options().owned_thread) {
       worker->WaitForShutdown();
     }
   }
   blocking_executor_.Kill();
-  local_workers.clear();
 }
 
 std::shared_ptr<Scope> System::CreateScope(Worker &worker,
                                            std::span<Device *const> devices) {
   iree::slim_mutex_lock_guard guard(lock_);
+  AssertRunning();
   return std::make_shared<Scope>(shared_ptr(), worker, devices);
 }
 
@@ -102,6 +99,7 @@ void System::InitializeNodes(int node_count) {
 
 Queue &System::CreateQueue(Queue::Options options) {
   iree::slim_mutex_lock_guard guard(lock_);
+  AssertRunning();
   if (queues_by_name_.count(options.name) != 0) {
     throw std::invalid_argument(fmt::format(
         "Cannot create queue with duplicate name '{}'", options.name));
@@ -140,6 +138,7 @@ Worker &System::CreateWorker(Worker::Options options) {
   Worker *unowned_worker;
   {
     iree::slim_mutex_lock_guard guard(lock_);
+    AssertRunning();
     if (options.name == std::string_view("__init__")) {
       throw std::invalid_argument(
           "Cannot create worker '__init__' (reserved name)");
@@ -161,6 +160,7 @@ Worker &System::CreateWorker(Worker::Options options) {
 
 Worker &System::init_worker() {
   iree::slim_mutex_lock_guard guard(lock_);
+  AssertRunning();
   auto found_it = workers_by_name_.find("__init__");
   if (found_it != workers_by_name_.end()) {
     return *found_it->second;
@@ -207,6 +207,7 @@ void System::FinishInitialization() {
 
 int64_t System::AllocateProcess(detail::BaseProcess *p) {
   iree::slim_mutex_lock_guard guard(lock_);
+  AssertRunning();
   int pid = next_pid_++;
   processes_by_pid_[pid] = p;
   return pid;
diff --git a/libshortfin/src/shortfin/local/system.h b/libshortfin/src/shortfin/local/system.h
index 82fd4b489..31be26b9a 100644
--- a/libshortfin/src/shortfin/local/system.h
+++ b/libshortfin/src/shortfin/local/system.h
@@ -148,6 +148,13 @@ class SHORTFIN_API System : public std::enable_shared_from_this<System> {
           "initialization");
     }
   }
+  void AssertRunning() {
+    if (!initialized_ || shutdown_) {
+      throw std::logic_error(
+          "System manipulation methods can only be called when initialized and "
+          "not shutdown");
+    }
+  }
 
   // Allocates a process in the process table and returns its new pid.
   // This is done on process construction. Note that it acquires the
diff --git a/libshortfin/tests/__init__.py b/libshortfin/tests/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/libshortfin/tests/amdgpu_system_test.py b/libshortfin/tests/amdgpu_system_test.py
index cf04a7caf..4f33ad379 100644
--- a/libshortfin/tests/amdgpu_system_test.py
+++ b/libshortfin/tests/amdgpu_system_test.py
@@ -7,7 +7,7 @@
 import pytest
 
 
-@pytest.mark.requires_amd_gpu
+@pytest.mark.system("amdgpu")
 def test_create_amd_gpu_system():
     from _shortfin import lib as sfl
 
diff --git a/libshortfin/tests/conftest.py b/libshortfin/tests/conftest.py
new file mode 100644
index 000000000..2a64e4007
--- /dev/null
+++ b/libshortfin/tests/conftest.py
@@ -0,0 +1,34 @@
+# Copyright 2024 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import pytest
+
+
+def pytest_addoption(parser):
+    parser.addoption(
+        "--system",
+        action="store",
+        metavar="NAME",
+        nargs="*",
+        help="Enable tests for system name ('amdgpu', ...)",
+    )
+
+
+def pytest_configure(config):
+    config.addinivalue_line(
+        "markers", "system(name): mark test to run only on a named system"
+    )
+
+
+def pytest_runtest_setup(item):
+    required_system_names = [mark.args[0] for mark in item.iter_markers("system")]
+    if required_system_names:
+        available_system_names = item.config.getoption("--system") or []
+        if not all(name in available_system_names for name in required_system_names):
+            pytest.skip(
+                f"test requires system in {required_system_names!r} but has "
+                f"{available_system_names!r} (set with --system arg)"
+            )
diff --git a/libshortfin/tests/invocation/conftest.py b/libshortfin/tests/invocation/conftest.py
new file mode 100644
index 000000000..c366c7f82
--- /dev/null
+++ b/libshortfin/tests/invocation/conftest.py
@@ -0,0 +1,55 @@
+# Copyright 2024 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import pytest
+import urllib.request
+
+
+def upgrade_onnx(original_path, converted_path):
+    import onnx
+
+    original_model = onnx.load_model(original_path)
+    converted_model = onnx.version_converter.convert_version(original_model, 17)
+    onnx.save(converted_model, converted_path)
+
+
+@pytest.fixture(scope="session")
+def mobilenet_onnx_path(tmp_path_factory):
+    try:
+        import onnx
+    except ModuleNotFoundError:
+        raise pytest.skip("onnx python package not available")
+    print("Downloading mobilenet.onnx")
+    parent_dir = tmp_path_factory.mktemp("mobilenet_onnx")
+    orig_onnx_path = parent_dir / "mobilenet_orig.onnx"
+    urllib.request.urlretrieve(
+        "https://github.com/onnx/models/raw/main/validated/vision/classification/mobilenet/model/mobilenetv2-12.onnx",
+        orig_onnx_path,
+    )
+    upgraded_onnx_path = parent_dir / "mobilenet.onnx"
+    upgrade_onnx(orig_onnx_path, upgraded_onnx_path)
+    return upgraded_onnx_path
+
+
+@pytest.fixture(scope="session")
+def mobilenet_compiled_cpu_path(mobilenet_onnx_path):
+    try:
+        import iree.compiler.tools as tools
+        import iree.compiler.tools.import_onnx.__main__ as import_onnx
+    except ModuleNotFoundError:
+        raise pytest.skip("iree.compiler packages not available")
+    print("Compiling mobilenet")
+    mlir_path = mobilenet_onnx_path.parent / "mobilenet.mlir"
+    vmfb_path = mobilenet_onnx_path.parent / "mobilenet_cpu.vmfb"
+    args = import_onnx.parse_arguments(["-o", str(mlir_path), str(mobilenet_onnx_path)])
+    import_onnx.main(args)
+    tools.compile_file(
+        str(mlir_path),
+        output_file=str(vmfb_path),
+        target_backends=["llvm-cpu"],
+        input_type="onnx",
+    )
+    return vmfb_path
diff --git a/libshortfin/tests/invocation/mobilenet_program_test.py b/libshortfin/tests/invocation/mobilenet_program_test.py
new file mode 100644
index 000000000..a457b4f6f
--- /dev/null
+++ b/libshortfin/tests/invocation/mobilenet_program_test.py
@@ -0,0 +1,58 @@
+# Copyright 2024 Advanced Micro Devices, Inc.
+#
+# Licensed under the Apache License v2.0 with LLVM Exceptions.
+# See https://llvm.org/LICENSE.txt for license information.
+# SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+
+import array
+import functools
+import pytest
+
+import shortfin as sf
+import shortfin.array as sfnp
+
+
+@pytest.fixture
+def lsys():
+    sc = sf.host.CPUSystemBuilder()
+    lsys = sc.create_system()
+    yield lsys
+    lsys.shutdown()
+
+
+@pytest.fixture
+def scope(lsys):
+    return lsys.create_scope()
+
+
+@pytest.fixture
+def device(scope):
+    return scope.device(0)
+
+
+def test_invoke_mobilenet(lsys, scope, mobilenet_compiled_cpu_path):
+    device = scope.device(0)
+    dummy_data = array.array(
+        "f", ([0.2] * (224 * 224)) + ([0.4] * (224 * 224)) + ([-0.2] * (224 * 224))
+    )
+    program_module = lsys.load_module(mobilenet_compiled_cpu_path)
+    program = sf.Program([program_module], scope=scope)
+    main_function = program["module.torch-jit-export"]
+
+    async def main():
+        device_input = sfnp.device_array(device, [1, 3, 224, 224], sfnp.float32)
+        staging_input = device_input.for_transfer()
+        staging_input.storage.data = dummy_data
+        device_input.copy_from(staging_input)
+        (device_output,) = await main_function(device_input)
+        host_output = device_output.for_transfer()
+        host_output.copy_from(device_output)
+        await device
+        flat_output = array.array("f")
+        flat_output.frombytes(host_output.storage.data)
+        absmean = functools.reduce(
+            lambda x, y: x + abs(y) / len(flat_output), flat_output, 0.0
+        )
+        assert absmean == pytest.approx(5.01964943873882)
+
+    lsys.run(main())