diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
index 7faddd90696..69bc29cc63b 100644
--- a/.github/ISSUE_TEMPLATE/bug_report.md
+++ b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -4,6 +4,8 @@ about: Create a report to help us improve
 
 ---
 
+⚠️ **Issues not using this template will be systematically closed.**
+
 **Describe the bug**
 A clear and concise description of what the bug is.
 
@@ -18,8 +20,8 @@ A clear and concise description of what you expected to happen.
 If applicable, add screenshots to help explain your problem.
 
 **Logs**
-If applicable, copy logs from your console here. Please *do not*
-use screenshots of logs, copy them as text.
+If applicable, copy **full** logs from your console here. Please *do not*
+use screenshots of logs, copy them as text, use gist or attach an *uncompressed* file.
 
 **Desktop (please complete the following information):**
  - OS: [e.g. iOS]
diff --git a/README.md b/README.md
index f1962fa0748..dc21159e4d2 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ repositories {
 }
 
 dependencies {
-    implementation 'com.google.android.filament:filament-android:1.40.4'
+    implementation 'com.google.android.filament:filament-android:1.40.5'
 }
 ```
 
@@ -50,7 +50,7 @@ Here are all the libraries available in the group `com.google.android.filament`:
 iOS projects can use CocoaPods to install the latest release:
 
 ```
-pod 'Filament', '~> 1.40.4'
+pod 'Filament', '~> 1.40.5'
 ```
 
 ### Snapshots
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index a7e0930c5ae..c1827db87a2 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -7,6 +7,18 @@ A new header is inserted each time a *tag* is created.
 Instead, if you are authoring a PR for the main branch, add your release note to
 [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md).
 
+## v1.40.5
+
+- backend: Disable timer queries on all Mali GPUs (fixes b/233754398)
+- engine: Add a way to query the validity of most filament objects (see `Engine::isValid`)
+- opengl: fix b/290388359 : possible crash when shutting down the engine
+- engine: Improve precision of frame time measurement when using emulated TimerQueries
+- backend: Improve frame pacing on Android and Vulkan.
+- backend: workaround b/291140208 (gltf_viewer crashes on Nexus 6P)
+- engine: support `setDepthFunc` for `MaterialInstance`
+- web: Added setDepthFunc()/getDepthFunc() to MaterialInstance
+- android: Added setDepthFunc()/getDepthFunc() to MaterialInstance
+
 ## v1.40.4
 
 - gltfio: fix crash when compute morph target without material
diff --git a/android/filament-android/src/main/cpp/Engine.cpp b/android/filament-android/src/main/cpp/Engine.cpp
index 3d720a88a3f..e530e7bb55b 100644
--- a/android/filament-android/src/main/cpp/Engine.cpp
+++ b/android/filament-android/src/main/cpp/Engine.cpp
@@ -278,6 +278,112 @@ Java_com_google_android_filament_Engine_nDestroyEntity(JNIEnv*, jclass,
     engine->destroy(entity);
 }
 
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidRenderer(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeRenderer) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((Renderer*)nativeRenderer);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidView(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeView) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((View*)nativeView);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidScene(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeScene) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((Scene*)nativeScene);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidFence(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeFence) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((Fence*)nativeFence);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidStream(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeStream) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((Stream*)nativeStream);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidIndexBuffer(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeIndexBuffer) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((IndexBuffer*)nativeIndexBuffer);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidVertexBuffer(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeVertexBuffer) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((VertexBuffer*)nativeVertexBuffer);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidSkinningBuffer(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeSkinningBuffer) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((SkinningBuffer*)nativeSkinningBuffer);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidIndirectLight(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeIndirectLight) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((IndirectLight*)nativeIndirectLight);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidMaterial(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeMaterial) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((Material*)nativeMaterial);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidSkybox(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeSkybox) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((Skybox*)nativeSkybox);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidColorGrading(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeColorGrading) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((ColorGrading*)nativeColorGrading);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidTexture(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeTexture) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((Texture*)nativeTexture);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidRenderTarget(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeTarget) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((RenderTarget*)nativeTarget);
+}
+
+extern "C" JNIEXPORT jboolean JNICALL
+Java_com_google_android_filament_Engine_nIsValidSwapChain(JNIEnv*, jclass,
+        jlong nativeEngine, jlong nativeSwapChain) {
+    Engine* engine = (Engine *)nativeEngine;
+    return (jboolean)engine->isValid((SwapChain*)nativeSwapChain);
+}
+
 extern "C" JNIEXPORT void JNICALL
 Java_com_google_android_filament_Engine_nFlushAndWait(JNIEnv*, jclass,
         jlong nativeEngine) {
diff --git a/android/filament-android/src/main/cpp/MaterialInstance.cpp b/android/filament-android/src/main/cpp/MaterialInstance.cpp
index 5273dc2c808..d692689e94c 100644
--- a/android/filament-android/src/main/cpp/MaterialInstance.cpp
+++ b/android/filament-android/src/main/cpp/MaterialInstance.cpp
@@ -357,6 +357,14 @@ Java_com_google_android_filament_MaterialInstance_nSetDepthCulling(JNIEnv*,
     instance->setDepthCulling(enable);
 }
 
+extern "C"
+JNIEXPORT void JNICALL
+Java_com_google_android_filament_MaterialInstance_nSetDepthFunc(JNIEnv*,
+        jclass, jlong nativeMaterialInstance, jlong function) {
+    MaterialInstance* instance = (MaterialInstance*) nativeMaterialInstance;
+    instance->setDepthFunc(static_cast<MaterialInstance::DepthFunc>(function));
+}
+
 extern "C"
 JNIEXPORT void JNICALL
 Java_com_google_android_filament_MaterialInstance_nSetStencilCompareFunction(JNIEnv*, jclass,
@@ -524,3 +532,11 @@ Java_com_google_android_filament_MaterialInstance_nIsDepthCullingEnabled(JNIEnv*
     MaterialInstance* instance = (MaterialInstance*)nativeMaterialInstance;
     return instance->isDepthCullingEnabled();
 }
+
+extern "C"
+JNIEXPORT jint JNICALL
+Java_com_google_android_filament_MaterialInstance_nGetDepthFunc(JNIEnv* env, jclass clazz,
+        jlong nativeMaterialInstance) {
+    MaterialInstance* instance = (MaterialInstance*)nativeMaterialInstance;
+    return (jint)instance->getDepthFunc();
+}
diff --git a/android/filament-android/src/main/java/com/google/android/filament/Engine.java b/android/filament-android/src/main/java/com/google/android/filament/Engine.java
index c047c96224a..e6bc4d92430 100644
--- a/android/filament-android/src/main/java/com/google/android/filament/Engine.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/Engine.java
@@ -449,6 +449,141 @@ public void destroySwapChain(@NonNull SwapChain swapChain) {
         swapChain.clearNativeObject();
     }
 
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidRenderer(@NonNull Renderer object) {
+        return nIsValidRenderer(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidView(@NonNull View object) {
+        return nIsValidView(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidScene(@NonNull Scene object) {
+        return nIsValidScene(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidFence(@NonNull Fence object) {
+        return nIsValidFence(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidStream(@NonNull Stream object) {
+        return nIsValidStream(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidIndexBuffer(@NonNull IndexBuffer object) {
+        return nIsValidIndexBuffer(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidVertexBuffer(@NonNull VertexBuffer object) {
+        return nIsValidVertexBuffer(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidSkinningBuffer(@NonNull SkinningBuffer object) {
+        return nIsValidSkinningBuffer(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidIndirectLight(@NonNull IndirectLight object) {
+        return nIsValidIndirectLight(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidMaterial(@NonNull Material object) {
+        return nIsValidMaterial(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidSkybox(@NonNull Skybox object) {
+        return nIsValidSkybox(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidColorGrading(@NonNull ColorGrading object) {
+        return nIsValidColorGrading(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidTexture(@NonNull Texture object) {
+        return nIsValidTexture(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidRenderTarget(@NonNull RenderTarget object) {
+        return nIsValidRenderTarget(getNativeObject(), object.getNativeObject());
+    }
+
+    /**
+     * Returns whether the object is valid.
+     * @param object Object to check for validity
+     * @return returns true if the specified object is valid.
+     */
+    public boolean isValidSwapChain(@NonNull SwapChain object) {
+        return nIsValidSwapChain(getNativeObject(), object.getNativeObject());
+    }
+
     // View
 
     /**
@@ -785,17 +920,17 @@ private static void assertDestroy(boolean success) {
     private static native long nCreateSwapChain(long nativeEngine, Object nativeWindow, long flags);
     private static native long nCreateSwapChainHeadless(long nativeEngine, int width, int height, long flags);
     private static native long nCreateSwapChainFromRawPointer(long nativeEngine, long pointer, long flags);
-    private static native boolean nDestroySwapChain(long nativeEngine, long nativeSwapChain);
     private static native long nCreateView(long nativeEngine);
-    private static native boolean nDestroyView(long nativeEngine, long nativeView);
     private static native long nCreateRenderer(long nativeEngine);
-    private static native boolean nDestroyRenderer(long nativeEngine, long nativeRenderer);
     private static native long nCreateCamera(long nativeEngine, int entity);
     private static native long nGetCameraComponent(long nativeEngine, int entity);
     private static native void nDestroyCameraComponent(long nativeEngine, int entity);
     private static native long nCreateScene(long nativeEngine);
-    private static native boolean nDestroyScene(long nativeEngine, long nativeScene);
     private static native long nCreateFence(long nativeEngine);
+
+    private static native boolean nDestroyRenderer(long nativeEngine, long nativeRenderer);
+    private static native boolean nDestroyView(long nativeEngine, long nativeView);
+    private static native boolean nDestroyScene(long nativeEngine, long nativeScene);
     private static native boolean nDestroyFence(long nativeEngine, long nativeFence);
     private static native boolean nDestroyStream(long nativeEngine, long nativeStream);
     private static native boolean nDestroyIndexBuffer(long nativeEngine, long nativeIndexBuffer);
@@ -808,6 +943,22 @@ private static void assertDestroy(boolean success) {
     private static native boolean nDestroyColorGrading(long nativeEngine, long nativeColorGrading);
     private static native boolean nDestroyTexture(long nativeEngine, long nativeTexture);
     private static native boolean nDestroyRenderTarget(long nativeEngine, long nativeTarget);
+    private static native boolean nDestroySwapChain(long nativeEngine, long nativeSwapChain);
+    private static native boolean nIsValidRenderer(long nativeEngine, long nativeRenderer);
+    private static native boolean nIsValidView(long nativeEngine, long nativeView);
+    private static native boolean nIsValidScene(long nativeEngine, long nativeScene);
+    private static native boolean nIsValidFence(long nativeEngine, long nativeFence);
+    private static native boolean nIsValidStream(long nativeEngine, long nativeStream);
+    private static native boolean nIsValidIndexBuffer(long nativeEngine, long nativeIndexBuffer);
+    private static native boolean nIsValidVertexBuffer(long nativeEngine, long nativeVertexBuffer);
+    private static native boolean nIsValidSkinningBuffer(long nativeEngine, long nativeSkinningBuffer);
+    private static native boolean nIsValidIndirectLight(long nativeEngine, long nativeIndirectLight);
+    private static native boolean nIsValidMaterial(long nativeEngine, long nativeMaterial);
+    private static native boolean nIsValidSkybox(long nativeEngine, long nativeSkybox);
+    private static native boolean nIsValidColorGrading(long nativeEngine, long nativeColorGrading);
+    private static native boolean nIsValidTexture(long nativeEngine, long nativeTexture);
+    private static native boolean nIsValidRenderTarget(long nativeEngine, long nativeTarget);
+    private static native boolean nIsValidSwapChain(long nativeEngine, long nativeSwapChain);
     private static native void nDestroyEntity(long nativeEngine, int entity);
     private static native void nFlushAndWait(long nativeEngine);
     private static native long nGetTransformManager(long nativeEngine);
diff --git a/android/filament-android/src/main/java/com/google/android/filament/MaterialInstance.java b/android/filament-android/src/main/java/com/google/android/filament/MaterialInstance.java
index bde26df5ac1..9f30568f52b 100644
--- a/android/filament-android/src/main/java/com/google/android/filament/MaterialInstance.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/MaterialInstance.java
@@ -625,6 +625,15 @@ public void setDepthCulling(boolean enable) {
         nSetDepthCulling(getNativeObject(), enable);
     }
 
+    /**
+     * Sets the depth comparison function (default is {@link TextureSampler.CompareFunction#GE}).
+     *
+     * @param func the depth comparison function
+     */
+    public void setDepthFunc(TextureSampler.CompareFunction func) {
+        nSetDepthFunc(getNativeObject(), func.ordinal());
+    }
+
     /**
      * Returns whether depth culling is enabled.
      */
@@ -632,6 +641,13 @@ public boolean isDepthCullingEnabled() {
         return nIsDepthCullingEnabled(getNativeObject());
     }
 
+    /**
+     * Returns the depth comparison function.
+     */
+    public TextureSampler.CompareFunction getDepthFunc() {
+        return TextureSampler.EnumCache.sCompareFunctionValues[nGetDepthFunc(getNativeObject())];
+    }
+
     /**
      * Sets the stencil comparison function (default is {@link TextureSampler.CompareFunction#ALWAYS}).
      *
@@ -908,6 +924,7 @@ private static native void nSetSpecularAntiAliasingThreshold(long nativeMaterial
     private static native void nSetDepthWrite(long nativeMaterialInstance, boolean enable);
     private static native void nSetStencilWrite(long nativeMaterialInstance, boolean enable);
     private static native void nSetDepthCulling(long nativeMaterialInstance, boolean enable);
+    private static native void nSetDepthFunc(long nativeMaterialInstance, long function);
 
     private static native void nSetStencilCompareFunction(long nativeMaterialInstance,
             long function, long face);
@@ -939,4 +956,5 @@ private static native void nSetStencilWriteMask(long nativeMaterialInstance, int
     private static native boolean nIsDepthWriteEnabled(long nativeMaterialInstance);
     private static native boolean nIsStencilWriteEnabled(long nativeMaterialInstance);
     private static native boolean nIsDepthCullingEnabled(long nativeMaterialInstance);
+    private static native int nGetDepthFunc(long nativeMaterialInstance);
 }
diff --git a/android/gradle.properties b/android/gradle.properties
index 879f309c2ca..d017be996c6 100644
--- a/android/gradle.properties
+++ b/android/gradle.properties
@@ -1,5 +1,5 @@
 GROUP=com.google.android.filament
-VERSION_NAME=1.40.4
+VERSION_NAME=1.40.5
 
 POM_DESCRIPTION=Real-time physically based rendering engine for Android.
 
diff --git a/android/samples/sample-gltf-viewer/src/main/java/com/google/android/filament/gltf/MainActivity.kt b/android/samples/sample-gltf-viewer/src/main/java/com/google/android/filament/gltf/MainActivity.kt
index 8291f020406..06c9f141cd1 100644
--- a/android/samples/sample-gltf-viewer/src/main/java/com/google/android/filament/gltf/MainActivity.kt
+++ b/android/samples/sample-gltf-viewer/src/main/java/com/google/android/filament/gltf/MainActivity.kt
@@ -337,6 +337,11 @@ class MainActivity : Activity() {
         remoteServer?.close()
     }
 
+    override fun onBackPressed() {
+        super.onBackPressed()
+        finish()
+    }
+
     fun loadModelData(message: RemoteServer.ReceivedMessage) {
         Log.i(TAG, "Downloaded model ${message.label} (${message.buffer.capacity()} bytes)")
         clearStatusText()
diff --git a/filament/backend/include/backend/Handle.h b/filament/backend/include/backend/Handle.h
index 7317e2c59b1..04e83809800 100644
--- a/filament/backend/include/backend/Handle.h
+++ b/filament/backend/include/backend/Handle.h
@@ -39,7 +39,6 @@ struct HwRenderTarget;
 struct HwSamplerGroup;
 struct HwStream;
 struct HwSwapChain;
-struct HwSync;
 struct HwTexture;
 struct HwTimerQuery;
 struct HwVertexBuffer;
@@ -126,7 +125,6 @@ using RenderTargetHandle    = Handle<HwRenderTarget>;
 using SamplerGroupHandle    = Handle<HwSamplerGroup>;
 using StreamHandle          = Handle<HwStream>;
 using SwapChainHandle       = Handle<HwSwapChain>;
-using SyncHandle            = Handle<HwSync>;
 using TextureHandle         = Handle<HwTexture>;
 using TimerQueryHandle      = Handle<HwTimerQuery>;
 using VertexBufferHandle    = Handle<HwVertexBuffer>;
diff --git a/filament/backend/include/private/backend/DriverAPI.inc b/filament/backend/include/private/backend/DriverAPI.inc
index 84ac2f24049..150615767fb 100644
--- a/filament/backend/include/private/backend/DriverAPI.inc
+++ b/filament/backend/include/private/backend/DriverAPI.inc
@@ -245,8 +245,6 @@ DECL_DRIVER_API_R_N(backend::RenderTargetHandle, createRenderTarget,
 
 DECL_DRIVER_API_R_0(backend::FenceHandle, createFence)
 
-DECL_DRIVER_API_R_0(backend::SyncHandle, createSync)
-
 DECL_DRIVER_API_R_N(backend::SwapChainHandle, createSwapChain,
         void*, nativeWindow,
         uint64_t, flags)
@@ -275,7 +273,6 @@ DECL_DRIVER_API_N(destroyRenderTarget,    backend::RenderTargetHandle, rth)
 DECL_DRIVER_API_N(destroySwapChain,       backend::SwapChainHandle, sch)
 DECL_DRIVER_API_N(destroyStream,          backend::StreamHandle, sh)
 DECL_DRIVER_API_N(destroyTimerQuery,      backend::TimerQueryHandle, sh)
-DECL_DRIVER_API_N(destroySync,            backend::SyncHandle, sh)
 
 /*
  * Synchronous APIs
@@ -306,7 +303,6 @@ DECL_DRIVER_API_SYNCHRONOUS_0(math::float2, getClipSpaceParams)
 DECL_DRIVER_API_SYNCHRONOUS_0(bool, canGenerateMipmaps)
 DECL_DRIVER_API_SYNCHRONOUS_N(void, setupExternalImage, void*, image)
 DECL_DRIVER_API_SYNCHRONOUS_N(bool, getTimerQueryValue, backend::TimerQueryHandle, query, uint64_t*, elapsedTime)
-DECL_DRIVER_API_SYNCHRONOUS_N(backend::SyncStatus, getSyncStatus, backend::SyncHandle, sh)
 DECL_DRIVER_API_SYNCHRONOUS_N(bool, isWorkaroundNeeded, backend::Workaround, workaround)
 DECL_DRIVER_API_SYNCHRONOUS_0(backend::FeatureLevel, getFeatureLevel)
 
diff --git a/filament/backend/src/Driver.cpp b/filament/backend/src/Driver.cpp
index 7501ae48d26..b3bbab4a6ca 100644
--- a/filament/backend/src/Driver.cpp
+++ b/filament/backend/src/Driver.cpp
@@ -63,6 +63,8 @@ DriverBase::DriverBase() noexcept {
 }
 
 DriverBase::~DriverBase() noexcept {
+    assert_invariant(mCallbacks.empty());
+    assert_invariant(mServiceThreadCallbackQueue.empty());
     if constexpr (UTILS_HAS_THREADING) {
         // quit our service thread
         std::unique_lock<std::mutex> lock(mServiceThreadLock);
diff --git a/filament/backend/src/DriverBase.h b/filament/backend/src/DriverBase.h
index 3de365245b9..3e7f2647d2f 100644
--- a/filament/backend/src/DriverBase.h
+++ b/filament/backend/src/DriverBase.h
@@ -135,9 +135,6 @@ struct HwFence : public HwBase {
     Platform::Fence* fence = nullptr;
 };
 
-struct HwSync : public HwBase {
-};
-
 struct HwSwapChain : public HwBase {
     Platform::SwapChain* swapChain = nullptr;
 };
diff --git a/filament/backend/src/Handle.cpp b/filament/backend/src/Handle.cpp
index c715a5ee67e..a32b2252680 100644
--- a/filament/backend/src/Handle.cpp
+++ b/filament/backend/src/Handle.cpp
@@ -67,7 +67,6 @@ template io::ostream& operator<<(io::ostream& out, const Handle<HwFence>& h) noe
 template io::ostream& operator<<(io::ostream& out, const Handle<HwSwapChain>& h) noexcept;
 template io::ostream& operator<<(io::ostream& out, const Handle<HwStream>& h) noexcept;
 template io::ostream& operator<<(io::ostream& out, const Handle<HwTimerQuery>& h) noexcept;
-template io::ostream& operator<<(io::ostream& out, const Handle<HwSync>& h) noexcept;
 template io::ostream& operator<<(io::ostream& out, const Handle<HwBufferObject>& h) noexcept;
 
 #endif
diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm
index 61f42b1143a..a849180ea9b 100644
--- a/filament/backend/src/metal/MetalDriver.mm
+++ b/filament/backend/src/metal/MetalDriver.mm
@@ -380,11 +380,6 @@
     fence->encode();
 }
 
-void MetalDriver::createSyncR(Handle<HwSync> sh, int) {
-    auto* fence = handle_cast<MetalFence>(sh);
-    fence->encode();
-}
-
 void MetalDriver::createSwapChainR(Handle<HwSwapChain> sch, void* nativeWindow, uint64_t flags) {
     if (UTILS_UNLIKELY(flags & SWAP_CHAIN_CONFIG_APPLE_CVPIXELBUFFER)) {
         CVPixelBufferRef pixelBuffer = (CVPixelBufferRef) nativeWindow;
@@ -454,12 +449,6 @@
     return alloc_and_construct_handle<MetalFence, HwFence>(*mContext);
 }
 
-Handle<HwSync> MetalDriver::createSyncS() noexcept {
-    // The handle must be constructed here, as a synchronous call to getSyncStatus might happen
-    // before createSyncR is executed.
-    return alloc_and_construct_handle<MetalFence, HwSync>(*mContext);
-}
-
 Handle<HwSwapChain> MetalDriver::createSwapChainS() noexcept {
     return alloc_handle<MetalSwapChain>();
 }
@@ -567,13 +556,6 @@
     }
 }
 
-void MetalDriver::destroySync(Handle<HwSync> sh) {
-    if (sh) {
-        destruct_handle<MetalFence>(sh);
-    }
-}
-
-
 void MetalDriver::terminate() {
     // finish() will flush the pending command buffer and will ensure all GPU work has finished.
     // This must be done before calling bufferPool->reset() to ensure no buffers are in flight.
@@ -841,17 +823,6 @@
     return mContext->timerQueryImpl->getQueryResult(tq, elapsedTime);
 }
 
-SyncStatus MetalDriver::getSyncStatus(Handle<HwSync> sh) {
-    auto* fence = handle_cast<MetalFence>(sh);
-    FenceStatus status = fence->wait(0);
-    if (status == FenceStatus::TIMEOUT_EXPIRED) {
-        return SyncStatus::NOT_SIGNALED;
-    } else if (status == FenceStatus::CONDITION_SATISFIED) {
-        return SyncStatus::SIGNALED;
-    }
-    return SyncStatus::ERROR;
-}
-
 void MetalDriver::generateMipmaps(Handle<HwTexture> th) {
     ASSERT_PRECONDITION(!isInRenderPass(mContext),
                         "generateMipmaps must be called outside of a render pass.");
diff --git a/filament/backend/src/metal/MetalHandles.h b/filament/backend/src/metal/MetalHandles.h
index b1f59d59e83..9ffa1a0bda8 100644
--- a/filament/backend/src/metal/MetalHandles.h
+++ b/filament/backend/src/metal/MetalHandles.h
@@ -446,9 +446,7 @@ class MetalRenderTarget : public HwRenderTarget {
 };
 
 // MetalFence is used to implement both Fences and Syncs.
-// There's no diamond problem, because HwBase (superclass of HwFence and HwSync) is empty.
-static_assert(std::is_empty_v<HwBase>);
-class MetalFence : public HwFence, public HwSync {
+class MetalFence : public HwFence {
 public:
 
     // MetalFence is special, as it gets constructed on the Filament thread. We must delay inserting
diff --git a/filament/backend/src/noop/NoopDriver.cpp b/filament/backend/src/noop/NoopDriver.cpp
index 61b80df465c..40a617f4a17 100644
--- a/filament/backend/src/noop/NoopDriver.cpp
+++ b/filament/backend/src/noop/NoopDriver.cpp
@@ -107,9 +107,6 @@ void NoopDriver::destroyStream(Handle<HwStream> sh) {
 void NoopDriver::destroyTimerQuery(Handle<HwTimerQuery> tqh) {
 }
 
-void NoopDriver::destroySync(Handle<HwSync> fh) {
-}
-
 Handle<HwStream> NoopDriver::createStreamNative(void* nativeStream) {
     return {};
 }
@@ -236,10 +233,6 @@ bool NoopDriver::getTimerQueryValue(Handle<HwTimerQuery> tqh, uint64_t* elapsedT
     return false;
 }
 
-SyncStatus NoopDriver::getSyncStatus(Handle<HwSync> sh) {
-    return SyncStatus::SIGNALED;
-}
-
 void NoopDriver::setExternalImage(Handle<HwTexture> th, void* image) {
 }
 
diff --git a/filament/backend/src/opengl/OpenGLContext.cpp b/filament/backend/src/opengl/OpenGLContext.cpp
index 07a6d7ce8cd..14d7c679c7f 100644
--- a/filament/backend/src/opengl/OpenGLContext.cpp
+++ b/filament/backend/src/opengl/OpenGLContext.cpp
@@ -268,25 +268,13 @@ OpenGLContext::OpenGLContext() noexcept {
                 bugs.dont_use_timer_query = true;
             }
             if (strstr(state.renderer, "Mali-G")) {
-                // assume we don't have working timer queries
+                // We have run into several problems with timer queries on Mali-Gxx:
+                // - timer queries seem to cause memory corruptions in some cases on some devices
+                //   (see b/233754398)
+                //          - appeared at least in: "OpenGL ES 3.2 v1.r26p0-01eac0"
+                //          - wasn't present in: "OpenGL ES 3.2 v1.r32p1-00pxl1"
+                // - timer queries sometime crash with an NPE (see b/273759031)
                 bugs.dont_use_timer_query = true;
-
-                int maj, min, driverVersion, driverRevision, driverPatch;
-                int const c = sscanf(state.version, "OpenGL ES %d.%d v%d.r%dp%d", // NOLINT(cert-err34-c)
-                        &maj, &min, &driverVersion, &driverRevision, &driverPatch);
-                if (c == 5) {
-                    // Workarounds based on version here.
-                    // notes:
-                    //  bugs.dont_use_timer_query : on some Mali-Gxx drivers timer query seems
-                    //  to cause memory corruptions in some cases on some devices (see b/233754398).
-                    //  - appeared at least in
-                    //      "OpenGL ES 3.2 v1.r26p0-01eac0"
-                    //  - wasn't present in
-                    //      "OpenGL ES 3.2 v1.r32p1-00pxl1"
-                    if (driverVersion >= 2 || (driverVersion == 1 && driverRevision >= 32)) {
-                        bugs.dont_use_timer_query = false;
-                    }
-                }
             }
             // Mali seems to have no problem with this (which is good for us)
             bugs.allow_read_only_ancillary_feedback_loop = true;
@@ -894,63 +882,4 @@ void OpenGLContext::resetState() noexcept {
     
 }
 
-OpenGLContext::FenceSync OpenGLContext::createFenceSync(
-        OpenGLPlatform& platform) noexcept {
-
-    if (UTILS_UNLIKELY(isES2())) {
-        assert_invariant(platform.canCreateFence());
-        return { .fence = platform.createFence() };
-    }
-
-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-    auto sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
-    CHECK_GL_ERROR(utils::slog.e)
-    return { .sync = sync };
-#else
-    return {};
-#endif
-}
-
-void OpenGLContext::destroyFenceSync(
-        OpenGLPlatform& platform, FenceSync sync) noexcept {
-
-    if (UTILS_UNLIKELY(isES2())) {
-        platform.destroyFence(static_cast<Platform::Fence*>(sync.fence));
-        return;
-    }
-
-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-    glDeleteSync(sync.sync);
-    CHECK_GL_ERROR(utils::slog.e)
-#endif
-}
-
-OpenGLContext::FenceSync::Status OpenGLContext::clientWaitSync(
-        OpenGLPlatform& platform, FenceSync sync) const noexcept {
-
-    if (UTILS_UNLIKELY(isES2())) {
-        using Status = OpenGLContext::FenceSync::Status;
-        auto const status = platform.waitFence(static_cast<Platform::Fence*>(sync.fence), 0u);
-        switch (status) {
-            case FenceStatus::ERROR:                return Status::FAILURE;
-            case FenceStatus::CONDITION_SATISFIED:  return Status::CONDITION_SATISFIED;
-            case FenceStatus::TIMEOUT_EXPIRED:      return Status ::TIMEOUT_EXPIRED;
-        }
-    }
-
-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
-    GLenum const status = glClientWaitSync(sync.sync, 0, 0u);
-    CHECK_GL_ERROR(utils::slog.e)
-    using Status = OpenGLContext::FenceSync::Status;
-    switch (status) {
-        case GL_ALREADY_SIGNALED:       return Status::ALREADY_SIGNALED;
-        case GL_TIMEOUT_EXPIRED:        return Status::TIMEOUT_EXPIRED;
-        case GL_CONDITION_SATISFIED:    return Status::CONDITION_SATISFIED;
-        default:                        return Status::FAILURE;
-    }
-#else
-    return FenceSync::Status::FAILURE;
-#endif
-}
-
 } // namesapce filament
diff --git a/filament/backend/src/opengl/OpenGLContext.h b/filament/backend/src/opengl/OpenGLContext.h
index 60930213719..cdad65b8a7d 100644
--- a/filament/backend/src/opengl/OpenGLContext.h
+++ b/filament/backend/src/opengl/OpenGLContext.h
@@ -150,26 +150,6 @@ class OpenGLContext {
     void deleteBuffers(GLsizei n, const GLuint* buffers, GLenum target) noexcept;
     void deleteVertexArrays(GLsizei n, const GLuint* arrays) noexcept;
 
-    // we abstract GL's sync because it's not available in ES2, but we can use EGL's sync
-    // instead, if available.
-    struct FenceSync {
-        enum class Status {
-            ALREADY_SIGNALED,
-            TIMEOUT_EXPIRED,
-            CONDITION_SATISFIED,
-            FAILURE
-        };
-        union {
-            void* fence;
-            GLsync sync;
-        };
-    };
-
-    FenceSync createFenceSync(OpenGLPlatform& platform) noexcept;
-    void destroyFenceSync(OpenGLPlatform& platform, FenceSync sync) noexcept;
-    FenceSync::Status clientWaitSync(OpenGLPlatform& platform, FenceSync sync) const noexcept;
-
-
     // glGet*() values
     struct {
         GLfloat max_anisotropy;
diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp
index fac8bc7d4a9..98f50859e83 100644
--- a/filament/backend/src/opengl/OpenGLDriver.cpp
+++ b/filament/backend/src/opengl/OpenGLDriver.cpp
@@ -231,13 +231,15 @@ void OpenGLDriver::terminate() {
     // wait for the GPU to finish executing all commands
     glFinish();
 
+    mShaderCompilerService.terminate();
+
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
     // and make sure to execute all the GpuCommandCompleteOps callbacks
     executeGpuCommandsCompleteOps();
 
     // because we called glFinish(), all callbacks should have been executed
     assert_invariant(mGpuCommandCompleteOps.empty());
 
-#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
     if (!getContext().isES2()) {
         for (auto& item: mSamplerMap) {
             mContext.unbindSampler(item.second);
@@ -249,8 +251,6 @@ void OpenGLDriver::terminate() {
 
     delete mTimerQueryImpl;
 
-    mShaderCompilerService.terminate();
-
     mPlatform.terminate();
 }
 
@@ -436,11 +436,7 @@ Handle<HwRenderTarget> OpenGLDriver::createRenderTargetS() noexcept {
 }
 
 Handle<HwFence> OpenGLDriver::createFenceS() noexcept {
-    return initHandle<HwFence>();
-}
-
-Handle<HwSync> OpenGLDriver::createSyncS() noexcept {
-    return initHandle<GLSync>();
+    return initHandle<GLFence>();
 }
 
 Handle<HwSwapChain> OpenGLDriver::createSwapChainS() noexcept {
@@ -1352,28 +1348,45 @@ void OpenGLDriver::createRenderTargetR(Handle<HwRenderTarget> rth,
 void OpenGLDriver::createFenceR(Handle<HwFence> fh, int) {
     DEBUG_MARKER()
 
-    HwFence* f = handle_cast<HwFence*>(fh);
-    f->fence = mPlatform.createFence();
-}
+    GLFence* f = handle_cast<GLFence*>(fh);
 
-void OpenGLDriver::createSyncR(Handle<HwSync> fh, int) {
-    DEBUG_MARKER()
-
-    GLSync* f = handle_cast<GLSync *>(fh);
-    f->handle = mContext.createFenceSync(mPlatform);
-
-    // check the status of the sync once a frame, since we must do this from our thread
-    std::weak_ptr<GLSync::State> const weak = f->result;
-    runEveryNowAndThen(
-            [&platform = mPlatform, context = mContext, handle = f->handle, weak]() -> bool {
-        auto result = weak.lock();
-        if (result) {
-            auto const status = context.clientWaitSync(platform, handle);
-            result->status.store(status, std::memory_order_relaxed);
-            return (status != OpenGLContext::FenceSync::Status::TIMEOUT_EXPIRED);
-        }
-        return true;
-    });
+    if (mPlatform.canCreateFence() || mContext.isES2()) {
+        assert_invariant(mPlatform.canCreateFence());
+        f->fence = mPlatform.createFence();
+    }
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+    else {
+        f->sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+        std::weak_ptr<GLFence::State> const weak = f->state;
+        runEveryNowAndThen(
+                [sync = f->sync, weak]() -> bool {
+                    auto state = weak.lock();
+                    if (state) {
+                        FenceStatus fenceStatus;
+                        GLenum const syncStatus = glClientWaitSync(sync, 0, 0u);
+                        switch (syncStatus) {
+                            case GL_TIMEOUT_EXPIRED:
+                                fenceStatus = FenceStatus::TIMEOUT_EXPIRED;
+                                break;
+                            case GL_ALREADY_SIGNALED:
+                            case GL_CONDITION_SATISFIED:
+                                fenceStatus = FenceStatus::CONDITION_SATISFIED;
+                                break;
+                            default:
+                                fenceStatus = FenceStatus::ERROR;
+                                break;
+                        }
+                        if (fenceStatus != FenceStatus::TIMEOUT_EXPIRED) {
+                            std::lock_guard const lock(state->lock);
+                            state->status = fenceStatus;
+                            state->cond.notify_all();
+                        }
+                        return (fenceStatus != FenceStatus::TIMEOUT_EXPIRED);
+                    }
+                    return true;
+                });
+    }
+#endif
 }
 
 void OpenGLDriver::createSwapChainR(Handle<HwSwapChain> sch, void* nativeWindow, uint64_t flags) {
@@ -1572,15 +1585,6 @@ void OpenGLDriver::destroyTimerQuery(Handle<HwTimerQuery> tqh) {
     }
 }
 
-void OpenGLDriver::destroySync(Handle<HwSync> sh) {
-    DEBUG_MARKER()
-    if (sh) {
-        GLSync* s = handle_cast<GLSync*>(sh);
-        mContext.destroyFenceSync(mPlatform, s->handle);
-        destruct(sh, s);
-    }
-}
-
 // ------------------------------------------------------------------------------------------------
 // Synchronous APIs
 // These are called on the application's thread
@@ -1683,24 +1687,46 @@ int64_t OpenGLDriver::getStreamTimestamp(Handle<HwStream> sh) {
 
 void OpenGLDriver::destroyFence(Handle<HwFence> fh) {
     if (fh) {
-        HwFence* f = handle_cast<HwFence*>(fh);
-        mPlatform.destroyFence(f->fence);
+        GLFence* f = handle_cast<GLFence*>(fh);
+        if (mPlatform.canCreateFence() || mContext.isES2()) {
+            mPlatform.destroyFence(f->fence);
+        }
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+        else {
+            glDeleteSync(f->sync);
+            CHECK_GL_ERROR(utils::slog.e)
+        }
+#endif
         destruct(fh, f);
     }
 }
 
 FenceStatus OpenGLDriver::wait(Handle<HwFence> fh, uint64_t timeout) {
     if (fh) {
-        HwFence* f = handle_cast<HwFence*>(fh);
-        if (f->fence == nullptr) {
-            // we can end-up here if:
-            // - the platform doesn't support h/w fences
-            // - wait() was called before the fence was asynchronously created.
-            //   This case is not handled in OpenGLDriver but is handled by FFence.
-            //   TODO: move FFence logic into the backend.
-            return FenceStatus::ERROR;
+        GLFence* f = handle_cast<GLFence*>(fh);
+        if (mPlatform.canCreateFence() || mContext.isES2()) {
+            if (f->fence == nullptr) {
+                // we can end-up here if:
+                // - the platform doesn't support h/w fences
+                if (UTILS_UNLIKELY(!mPlatform.canCreateFence())) {
+                    return FenceStatus::ERROR;
+                }
+                // - wait() was called before the fence was asynchronously created.
+                return FenceStatus::TIMEOUT_EXPIRED;
+            }
+            return mPlatform.waitFence(f->fence, timeout);
         }
-        return mPlatform.waitFence(f->fence, timeout);
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+        else {
+            assert_invariant(f->state);
+            std::unique_lock lock(f->state->lock);
+            f->state->cond.wait_for(lock,
+                    std::chrono::nanoseconds(timeout), [&state = f->state]() {
+                return state->status != FenceStatus::TIMEOUT_EXPIRED;
+            });
+            return f->state->status;
+        }
+#endif
     }
     return FenceStatus::ERROR;
 }
@@ -2588,25 +2614,6 @@ bool OpenGLDriver::getTimerQueryValue(Handle<HwTimerQuery> tqh, uint64_t* elapse
     return true;
 }
 
-SyncStatus OpenGLDriver::getSyncStatus(Handle<HwSync> sh) {
-    GLSync* s = handle_cast<GLSync*>(sh);
-    if (!s->result) {
-        return SyncStatus::NOT_SIGNALED;
-    }
-    auto status = s->result->status.load(std::memory_order_relaxed);
-    using Status = OpenGLContext::FenceSync::Status;
-    switch (status) {
-        case Status::CONDITION_SATISFIED:
-        case Status::ALREADY_SIGNALED:
-            return SyncStatus::SIGNALED;
-        case Status::TIMEOUT_EXPIRED:
-            return SyncStatus::NOT_SIGNALED;
-        case Status::FAILURE:
-        default:
-            return SyncStatus::ERROR;
-    }
-}
-
 void OpenGLDriver::compilePrograms(CompilerPriorityQueue priority,
         CallbackHandler* handler, CallbackHandler::Callback callback, void* user) {
     if (callback) {
@@ -3180,30 +3187,16 @@ void OpenGLDriver::readBufferSubData(backend::BufferObjectHandle boh,
 #endif
 }
 
-void OpenGLDriver::whenGpuCommandsComplete(std::function<void()> fn) noexcept {
-    OpenGLContext::FenceSync sync = mContext.createFenceSync(mPlatform);
-    mGpuCommandCompleteOps.emplace_back(sync, std::move(fn));
-    CHECK_GL_ERROR(utils::slog.e)
-}
 
 void OpenGLDriver::runEveryNowAndThen(std::function<bool()> fn) noexcept {
     mEveryNowAndThenOps.push_back(std::move(fn));
 }
 
-void OpenGLDriver::executeGpuCommandsCompleteOps() noexcept {
-    auto& v = mGpuCommandCompleteOps;
+void OpenGLDriver::executeEveryNowAndThenOps() noexcept {
+    auto& v = mEveryNowAndThenOps;
     auto it = v.begin();
     while (it != v.end()) {
-        using Status = OpenGLContext::FenceSync::Status;
-        auto const status = mContext.clientWaitSync(mPlatform, it->first);
-        if (status == Status::ALREADY_SIGNALED || status == Status::CONDITION_SATISFIED) {
-            it->second();
-            mContext.destroyFenceSync(mPlatform, it->first);
-            it = v.erase(it);
-        } else if (UTILS_UNLIKELY(status == Status::FAILURE)) {
-            // This should never happen, but is very problematic if it does, as we might leak
-            // some data depending on what the callback does. However, we clean up our own state.
-            mContext.destroyFenceSync(mPlatform, it->first);
+        if ((*it)()) {
             it = v.erase(it);
         } else {
             ++it;
@@ -3211,17 +3204,41 @@ void OpenGLDriver::executeGpuCommandsCompleteOps() noexcept {
     }
 }
 
-void OpenGLDriver::executeEveryNowAndThenOps() noexcept {
-    auto& v = mEveryNowAndThenOps;
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
+void OpenGLDriver::whenGpuCommandsComplete(const std::function<void()>& fn) noexcept {
+    GLsync sync = glFenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+    mGpuCommandCompleteOps.emplace_back(sync, fn);
+    CHECK_GL_ERROR(utils::slog.e)
+}
+
+void OpenGLDriver::executeGpuCommandsCompleteOps() noexcept {
+    auto& v = mGpuCommandCompleteOps;
     auto it = v.begin();
     while (it != v.end()) {
-        if ((*it)()) {
-            it = v.erase(it);
-        } else {
-            ++it;
+        auto const& [sync, fn] = *it;
+        GLenum const syncStatus = glClientWaitSync(sync, 0, 0u);
+        switch (syncStatus) {
+            case GL_TIMEOUT_EXPIRED:
+                // not ready
+                ++it;
+                break;
+            case GL_ALREADY_SIGNALED:
+            case GL_CONDITION_SATISFIED:
+                // ready
+                it->second();
+                glDeleteSync(sync);
+                it = v.erase(it);
+                break;
+            default:
+                // This should never happen, but is very problematic if it does, as we might leak
+                // some data depending on what the callback does. However, we clean up our own state.
+                glDeleteSync(sync);
+                it = v.erase(it);
+                break;
         }
     }
 }
+#endif
 
 // ------------------------------------------------------------------------------------------------
 // Rendering ops
@@ -3296,19 +3313,19 @@ void OpenGLDriver::flush(int) {
     if (!gl.bugs.disable_glFlush) {
         glFlush();
     }
-    mTimerQueryImpl->flush();
 }
 
 void OpenGLDriver::finish(int) {
     DEBUG_MARKER()
     glFinish();
-    mTimerQueryImpl->flush();
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
     executeGpuCommandsCompleteOps();
+    assert_invariant(mGpuCommandCompleteOps.empty());
+#endif
     executeEveryNowAndThenOps();
     // Note: since we executed a glFinish(), all pending tasks should be done
-    assert_invariant(mGpuCommandCompleteOps.empty());
 
-    // however, some tasks rely on a separated thread to publish their result (e.g.
+    // However, some tasks rely on a separated thread to publish their result (e.g.
     // endTimerQuery), so the result could very well not be ready, and the task will
     // linger a bit longer, this is only true for mEveryNowAndThenOps tasks.
     // The fallout of this is that we can't assert that mEveryNowAndThenOps is empty.
diff --git a/filament/backend/src/opengl/OpenGLDriver.h b/filament/backend/src/opengl/OpenGLDriver.h
index efb512b5265..bc5830044e0 100644
--- a/filament/backend/src/opengl/OpenGLDriver.h
+++ b/filament/backend/src/opengl/OpenGLDriver.h
@@ -151,8 +151,7 @@ class OpenGLDriver final : public DriverBase {
 
     struct GLTimerQuery : public HwTimerQuery {
         struct State {
-            std::atomic<uint64_t> elapsed{};
-            std::atomic_bool available{};
+            std::atomic<int64_t> elapsed{};
         };
         struct {
             GLuint query = 0;
@@ -196,14 +195,15 @@ class OpenGLDriver final : public DriverBase {
         TargetBufferFlags targets = {};
     };
 
-    struct GLSync : public HwSync {
-        using HwSync::HwSync;
+    struct GLFence : public HwFence {
+        using HwFence::HwFence;
         struct State {
-            std::atomic<OpenGLContext::FenceSync::Status> status{
-                OpenGLContext::FenceSync::Status::TIMEOUT_EXPIRED };
+            std::mutex lock;
+            std::condition_variable cond;
+            FenceStatus status{ FenceStatus::TIMEOUT_EXPIRED };
         };
-        OpenGLContext::FenceSync handle{};
-        std::shared_ptr<State> result{ std::make_shared<GLSync::State>() };
+        GLsync sync;
+        std::shared_ptr<State> state{ std::make_shared<GLFence::State>() };
     };
 
     OpenGLDriver(OpenGLDriver const&) = delete;
@@ -383,10 +383,12 @@ class OpenGLDriver final : public DriverBase {
 
     void updateTextureLodRange(GLTexture* texture, int8_t targetLevel) noexcept;
 
+#ifndef FILAMENT_SILENCE_NOT_SUPPORTED_BY_ES2
     // tasks executed on the main thread after the fence signaled
-    void whenGpuCommandsComplete(std::function<void()> fn) noexcept;
+    void whenGpuCommandsComplete(const std::function<void()>& fn) noexcept;
     void executeGpuCommandsCompleteOps() noexcept;
-    std::vector<std::pair<OpenGLContext::FenceSync, std::function<void()>>> mGpuCommandCompleteOps;
+    std::vector<std::pair<GLsync, std::function<void()>>> mGpuCommandCompleteOps;
+#endif
 
     // tasks regularly executed on the main thread at until they return true
     void runEveryNowAndThen(std::function<bool()> fn) noexcept;
diff --git a/filament/backend/src/opengl/OpenGLTimerQuery.cpp b/filament/backend/src/opengl/OpenGLTimerQuery.cpp
index 9b7d836540e..f7e537383de 100644
--- a/filament/backend/src/opengl/OpenGLTimerQuery.cpp
+++ b/filament/backend/src/opengl/OpenGLTimerQuery.cpp
@@ -19,6 +19,7 @@
 #include <backend/platforms/OpenGLPlatform.h>
 
 #include <utils/compiler.h>
+#include <utils/JobSystem.h>
 #include <utils/Log.h>
 #include <utils/Systrace.h>
 #include <utils/debug.h>
@@ -41,9 +42,6 @@ TimerQueryNative::TimerQueryNative(OpenGLContext& context) : mContext(context) {
 
 TimerQueryNative::~TimerQueryNative() = default;
 
-void TimerQueryNative::flush() {
-}
-
 void TimerQueryNative::beginTimeElapsedQuery(GLTimerQuery* query) {
     mContext.procs.beginQuery(GL_TIME_ELAPSED, query->gl.query);
     CHECK_GL_ERROR(utils::slog.e)
@@ -77,6 +75,8 @@ OpenGLTimerQueryFence::OpenGLTimerQueryFence(OpenGLPlatform& platform)
         : mPlatform(platform) {
     mQueue.reserve(2);
     mThread = std::thread([this]() {
+        utils::JobSystem::setThreadName("OpenGLTimerQueryFence");
+        utils::JobSystem::setThreadPriority(utils::JobSystem::Priority::URGENT_DISPLAY);
         auto& queue = mQueue;
         bool exitRequested;
         do {
@@ -111,59 +111,55 @@ void OpenGLTimerQueryFence::enqueue(OpenGLTimerQueryFence::Job&& job) {
     mCondition.notify_one();
 }
 
-void OpenGLTimerQueryFence::flush() {
-    // Use calls to flush() as a proxy for when the GPU work started.
-    GLTimerQuery* query = mActiveQuery;
-    if (query) {
-        uint64_t const elapsed = query->gl.emulation->elapsed.load(std::memory_order_relaxed);
-        if (!elapsed) {
-            uint64_t const now = clock::now().time_since_epoch().count();
-            query->gl.emulation->elapsed.store(now, std::memory_order_relaxed);
-            //SYSTRACE_CONTEXT();
-            //SYSTRACE_ASYNC_BEGIN("gpu", query->gl.query);
-        }
-    }
-}
-
 void OpenGLTimerQueryFence::beginTimeElapsedQuery(GLTimerQuery* query) {
-    assert_invariant(!mActiveQuery);
-    // We can't use a fence to figure out when a GPU operation starts (only when it finishes)
-    // so instead, we use when glFlush() was issued as a proxy.
     if (UTILS_UNLIKELY(!query->gl.emulation)) {
         query->gl.emulation = std::make_shared<GLTimerQuery::State>();
     }
-    query->gl.emulation->elapsed.store(0, std::memory_order_relaxed);
-    query->gl.emulation->available.store(false);
-    mActiveQuery = query;
+    query->gl.emulation->elapsed.store(0);
+    Platform::Fence* fence = mPlatform.createFence();
+    std::weak_ptr<GLTimerQuery::State> const weak = query->gl.emulation;
+    uint32_t const cookie = query->gl.query;
+    push([&platform = mPlatform, fence, weak, cookie]() {
+        auto emulation = weak.lock();
+        if (emulation) {
+            platform.waitFence(fence, FENCE_WAIT_FOR_EVER);
+            int64_t const then = clock::now().time_since_epoch().count();
+            emulation->elapsed.store(-then, std::memory_order_relaxed);
+            SYSTRACE_CONTEXT();
+            SYSTRACE_ASYNC_BEGIN("OpenGLTimerQueryFence", cookie);
+            (void)cookie;
+        }
+        platform.destroyFence(fence);
+    });
 }
 
 void OpenGLTimerQueryFence::endTimeElapsedQuery(GLTimerQuery* query) {
-    assert_invariant(mActiveQuery);
     Platform::Fence* fence = mPlatform.createFence();
     std::weak_ptr<GLTimerQuery::State> const weak = query->gl.emulation;
-    mActiveQuery = nullptr;
-    //uint32_t cookie = cookie = query->gl.query;
-    push([&platform = mPlatform, fence, weak]() {
+    uint32_t const cookie = query->gl.query;
+    push([&platform = mPlatform, fence, weak, cookie]() {
         auto emulation = weak.lock();
         if (emulation) {
             platform.waitFence(fence, FENCE_WAIT_FOR_EVER);
-            auto now = clock::now().time_since_epoch().count();
-            auto then = emulation->elapsed.load(std::memory_order_relaxed);
-            emulation->elapsed.store(now - then, std::memory_order_relaxed);
-            emulation->available.store(true);
-            //SYSTRACE_CONTEXT();
-            //SYSTRACE_ASYNC_END("gpu", cookie);
+            int64_t const now = clock::now().time_since_epoch().count();
+            int64_t const then = emulation->elapsed.load(std::memory_order_relaxed);
+            assert_invariant(then < 0);
+            emulation->elapsed.store(now + then, std::memory_order_relaxed);
+            SYSTRACE_CONTEXT();
+            SYSTRACE_ASYNC_END("OpenGLTimerQueryFence", cookie);
+            (void)cookie;
         }
         platform.destroyFence(fence);
     });
 }
 
 bool OpenGLTimerQueryFence::queryResultAvailable(GLTimerQuery* query) {
-    return query->gl.emulation->available.load();
+    return query->gl.emulation->elapsed.load(std::memory_order_relaxed) > 0;
 }
 
 uint64_t OpenGLTimerQueryFence::queryResult(GLTimerQuery* query) {
-    return query->gl.emulation->elapsed;
+    int64_t const result = query->gl.emulation->elapsed.load(std::memory_order_relaxed);
+    return result > 0 ? result : 0;
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -172,30 +168,30 @@ TimerQueryFallback::TimerQueryFallback() = default;
 
 TimerQueryFallback::~TimerQueryFallback() = default;
 
-void TimerQueryFallback::flush() {
-}
-
 void TimerQueryFallback::beginTimeElapsedQuery(OpenGLTimerQueryInterface::GLTimerQuery* query) {
     if (!query->gl.emulation) {
         query->gl.emulation = std::make_shared<GLTimerQuery::State>();
     }
     // this implementation clearly doesn't work at all, but we have no h/w support
-    query->gl.emulation->available.store(false, std::memory_order_relaxed);
-    query->gl.emulation->elapsed = clock::now().time_since_epoch().count();
+    int64_t const then = clock::now().time_since_epoch().count();
+    query->gl.emulation->elapsed.store(-then, std::memory_order_relaxed);
 }
 
 void TimerQueryFallback::endTimeElapsedQuery(OpenGLTimerQueryInterface::GLTimerQuery* query) {
     // this implementation clearly doesn't work at all, but we have no h/w support
-    query->gl.emulation->elapsed = clock::now().time_since_epoch().count() - query->gl.emulation->elapsed;
-    query->gl.emulation->available.store(true, std::memory_order_relaxed);
+    int64_t const now = clock::now().time_since_epoch().count();
+    int64_t const then = query->gl.emulation->elapsed.load(std::memory_order_relaxed);
+    assert_invariant(then < 0);
+    query->gl.emulation->elapsed.store(now + then, std::memory_order_relaxed);
 }
 
 bool TimerQueryFallback::queryResultAvailable(OpenGLTimerQueryInterface::GLTimerQuery* query) {
-    return query->gl.emulation->available.load(std::memory_order_relaxed);
+    return query->gl.emulation->elapsed.load(std::memory_order_relaxed) > 0;
 }
 
 uint64_t TimerQueryFallback::queryResult(OpenGLTimerQueryInterface::GLTimerQuery* query) {
-    return query->gl.emulation->elapsed;
+    int64_t const result = query->gl.emulation->elapsed.load(std::memory_order_relaxed);
+    return result > 0 ? result : 0;
 }
 
 } // namespace filament::backend
diff --git a/filament/backend/src/opengl/OpenGLTimerQuery.h b/filament/backend/src/opengl/OpenGLTimerQuery.h
index 7df3f937a8f..1edee992f8a 100644
--- a/filament/backend/src/opengl/OpenGLTimerQuery.h
+++ b/filament/backend/src/opengl/OpenGLTimerQuery.h
@@ -41,7 +41,6 @@ class OpenGLTimerQueryInterface {
 
 public:
     virtual ~OpenGLTimerQueryInterface();
-    virtual void flush() = 0;
     virtual void beginTimeElapsedQuery(GLTimerQuery* query) = 0;
     virtual void endTimeElapsedQuery(GLTimerQuery* query) = 0;
     virtual bool queryResultAvailable(GLTimerQuery* query) = 0;
@@ -55,7 +54,6 @@ class TimerQueryNative : public OpenGLTimerQueryInterface {
     explicit TimerQueryNative(OpenGLContext& context);
     ~TimerQueryNative() override;
 private:
-    void flush() override;
     void beginTimeElapsedQuery(GLTimerQuery* query) override;
     void endTimeElapsedQuery(GLTimerQuery* query) override;
     bool queryResultAvailable(GLTimerQuery* query) override;
@@ -71,7 +69,6 @@ class OpenGLTimerQueryFence : public OpenGLTimerQueryInterface {
     ~OpenGLTimerQueryFence() override;
 private:
     using Job = std::function<void()>;
-    void flush() override;
     void beginTimeElapsedQuery(GLTimerQuery* query) override;
     void endTimeElapsedQuery(GLTimerQuery* query) override;
     bool queryResultAvailable(GLTimerQuery* query) override;
@@ -89,7 +86,6 @@ class OpenGLTimerQueryFence : public OpenGLTimerQueryInterface {
     mutable utils::Condition mCondition;
     std::vector<Job> mQueue;
     bool mExitRequested = false;
-    GLTimerQuery* mActiveQuery = nullptr;
 };
 
 class TimerQueryFallback : public OpenGLTimerQueryInterface {
@@ -97,7 +93,6 @@ class TimerQueryFallback : public OpenGLTimerQueryInterface {
     explicit TimerQueryFallback();
     ~TimerQueryFallback() override;
 private:
-    void flush() override;
     void beginTimeElapsedQuery(GLTimerQuery* query) override;
     void endTimeElapsedQuery(GLTimerQuery* query) override;
     bool queryResultAvailable(GLTimerQuery* query) override;
diff --git a/filament/backend/src/opengl/ShaderCompilerService.cpp b/filament/backend/src/opengl/ShaderCompilerService.cpp
index cef01163b5f..59b85d134db 100644
--- a/filament/backend/src/opengl/ShaderCompilerService.cpp
+++ b/filament/backend/src/opengl/ShaderCompilerService.cpp
@@ -87,7 +87,6 @@ struct ShaderCompilerService::ProgramToken {
 
     BlobCacheKey key;
     std::future<ProgramBinary> binary;
-    CompilerPriorityQueue priorityQueue = CompilerPriorityQueue::HIGH;
     bool canceled = false;
 };
 
@@ -101,6 +100,14 @@ void* ShaderCompilerService::getUserData(const program_token_t& token) noexcept
 
 // ------------------------------------------------------------------------------------------------
 
+ShaderCompilerService::CompilerThreadPool::CompilerThreadPool() noexcept = default;
+
+ShaderCompilerService::CompilerThreadPool::~CompilerThreadPool() noexcept {
+    assert_invariant(mCompilerThreads.empty());
+    assert_invariant(mQueues[0].empty());
+    assert_invariant(mQueues[1].empty());
+}
+
 void ShaderCompilerService::CompilerThreadPool::init(
         bool useSharedContexts, uint32_t threadCount, OpenGLPlatform& platform) noexcept {
 
@@ -148,12 +155,23 @@ void ShaderCompilerService::CompilerThreadPool::init(
     }
 }
 
+auto ShaderCompilerService::CompilerThreadPool::find(
+        program_token_t const& token) -> std::pair<Queue&, Queue::iterator> {
+    for (auto&& q: mQueues) {
+        auto pos = std::find_if(q.begin(), q.end(), [&token](auto&& item) {
+            return item.first == token;
+        });
+        if (pos != q.end()) {
+            return { q, pos };
+        }
+    }
+    // this can happen if the program is being processed right now
+    return { mQueues[0], mQueues[0].end() };
+}
+
 auto ShaderCompilerService::CompilerThreadPool::dequeue(program_token_t const& token) -> Job {
-    auto& q = mQueues[size_t(token->priorityQueue)];
-    auto pos = std::find_if(q.begin(), q.end(), [&token](auto&& item) {
-        return item.first == token;
-    });
     Job job;
+    auto&& [q, pos] = find(token);
     if (pos != q.end()) {
         std::swap(job, pos->second);
         q.erase(pos);
@@ -169,22 +187,30 @@ void ShaderCompilerService::CompilerThreadPool::makeUrgent(program_token_t const
     mQueueCondition.notify_one();
 }
 
-void ShaderCompilerService::CompilerThreadPool::queue(program_token_t const& token, Job&& job) {
+void ShaderCompilerService::CompilerThreadPool::queue(CompilerPriorityQueue priorityQueue,
+        program_token_t const& token, Job&& job) {
     std::unique_lock const lock(mQueueLock);
-    mQueues[size_t(token->priorityQueue)].emplace_back(token, std::move(job));
+    mQueues[size_t(priorityQueue)].emplace_back(token, std::move(job));
     mQueueCondition.notify_one();
 }
 
-void ShaderCompilerService::CompilerThreadPool::exit() noexcept {
+void ShaderCompilerService::CompilerThreadPool::terminate() noexcept {
     std::unique_lock lock(mQueueLock);
     mExitRequested = true;
     mQueueCondition.notify_all();
     lock.unlock();
+
     for (auto& thread: mCompilerThreads) {
         if (thread.joinable()) {
             thread.join();
         }
     }
+    mCompilerThreads.clear();
+
+    // Clear all the queues, dropping the remaining jobs. This relies on the jobs being cancelable.
+    for (auto&& q : mQueues) {
+        q.clear();
+    }
 }
 
 // ------------------------------------------------------------------------------------------------
@@ -204,7 +230,9 @@ void ShaderCompilerService::init() noexcept {
         //   also glProgramBinary blocks if other threads are compiling.
         // - on Mali shader compilation can be multithreaded, but program linking happens on
         //   a single service thread, so we don't bother using more than one thread either.
-        // - on desktop we could use more threads, tbd.
+        // - on macOS (M1 MacBook Pro/Ventura) there is global lock around all GL APIs when using
+        //   a shared context, so parallel shader compilation yields no benefit.
+        // - on windows/linux we could use more threads, tbd.
         if (mDriver.mPlatform.isExtraContextSupported()) {
             mShaderCompilerThreadCount = 1;
             mCompilerThreadPool.init(mUseSharedContext,
@@ -214,8 +242,16 @@ void ShaderCompilerService::init() noexcept {
 }
 
 void ShaderCompilerService::terminate() noexcept {
-    // FIXME: could we have some user callbacks pending here?
-    mCompilerThreadPool.exit();
+    mCompilerThreadPool.terminate();
+
+    // We could have some pending callbacks here, we need to execute them
+    for (auto&& op: mRunAtNextTickOps) {
+        Job const& job = std::get<2>(op);
+        if (job.callback) {
+            mDriver.scheduleCallback(job.handler, job.user, job.callback);
+        }
+    }
+    mRunAtNextTickOps.clear();
 }
 
 ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
@@ -230,13 +266,13 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
 
     token->gl.program = OpenGLBlobCache::retrieve(&token->key, mDriver.mPlatform, program);
     if (!token->gl.program) {
+        CompilerPriorityQueue const priorityQueue = program.getPriorityQueue();
         if (mShaderCompilerThreadCount) {
             // set the future in the token and pass the promise to the worker thread
             std::promise<ProgramToken::ProgramBinary> promise;
             token->binary = promise.get_future();
-            token->priorityQueue = program.getPriorityQueue();
             // queue a compile job
-            mCompilerThreadPool.queue(token,
+            mCompilerThreadPool.queue(priorityQueue, token,
                     [this, &gl, promise = std::move(promise),
                             program = std::move(program), token]() mutable {
 
@@ -304,7 +340,7 @@ ShaderCompilerService::program_token_t ShaderCompilerService::createProgram(
 
         }
 
-        runAtNextTick(token->priorityQueue, token, [this, token]() {
+        runAtNextTick(priorityQueue, token, [this, token](Job const&) {
             if (mShaderCompilerThreadCount) {
                 if (!token->gl.program) {
                     // TODO: see if we could completely eliminate this callback here
@@ -429,16 +465,19 @@ void ShaderCompilerService::notifyWhenAllProgramsAreReady(CompilerPriorityQueue
 
     if (KHR_parallel_shader_compile || mShaderCompilerThreadCount) {
         // list all programs up to this point, both low and high priority
-        utils::FixedCapacityVector<program_token_t, std::allocator<program_token_t>, false> tokens;
-        tokens.reserve(mRunAtNextTickOps.size());
-        for (auto& [priority_, token, fn_] : mRunAtNextTickOps) {
-            if (token) {
+
+        using TokenVector = utils::FixedCapacityVector<
+                program_token_t, std::allocator<program_token_t>, false>;
+        TokenVector tokens{ TokenVector::with_capacity(mRunAtNextTickOps.size()) };
+
+        for (auto& [itemPriority, token, job] : mRunAtNextTickOps) {
+            if (token && job.fn && itemPriority == priority) {
                 tokens.push_back(token);
             }
         }
 
-        runAtNextTick(priority, nullptr,
-                [this, tokens = std::move(tokens), handler, user, callback]() {
+        runAtNextTick(priority, nullptr, {
+                [this, tokens = std::move(tokens)](Job const& job) {
             for (auto const& token : tokens) {
                 assert_invariant(token);
                 if (!isProgramReady(token)) {
@@ -446,23 +485,23 @@ void ShaderCompilerService::notifyWhenAllProgramsAreReady(CompilerPriorityQueue
                     return false;
                 }
             }
-            if (callback) {
+            if (job.callback) {
                 // all programs are ready, we can call the callbacks
-                mDriver.scheduleCallback(handler, user, callback);
+                mDriver.scheduleCallback(job.handler, job.user, job.callback);
             }
             // and we're done
             return true;
-        });
+        }, handler, user, callback });
 
         return;
     }
 
     // we don't have KHR_parallel_shader_compile
 
-    runAtNextTick(priority, nullptr, [this, handler, user, callback]() {
-        mDriver.scheduleCallback(handler, user, callback);
+    runAtNextTick(priority, nullptr, {[this](Job const& job) {
+        mDriver.scheduleCallback(job.handler, job.user, job.callback);
         return true;
-    });
+    }, handler, user, callback });
 
     // TODO: we could spread the compiles over several frames, the tick() below then is not
     //       needed here. We keep it for now as to not change the current behavior too much.
@@ -661,8 +700,8 @@ float u16tofp32(highp uint v) {
     v <<= 16u;
     highp uint s = v & 0x80000000u;
     highp uint n = v & 0x7FFFFFFFu;
-    highp uint nz = n == 0u ? 0u : 0xFFFFFFFF;
-    return uintBitsToFloat(s | ((((n >> 3u) + (0x70u << 23))) & nz));
+    highp uint nz = (n == 0u) ? 0u : 0xFFFFFFFFu;
+    return uintBitsToFloat(s | ((((n >> 3u) + (0x70u << 23u))) & nz));
 }
 vec2 unpackHalf2x16(highp uint v) {
     return vec2(u16tofp32(v&0xFFFFu), u16tofp32(v>>16u));
@@ -670,11 +709,11 @@ vec2 unpackHalf2x16(highp uint v) {
 uint fp32tou16(float val) {
     uint f32 = floatBitsToUint(val);
     uint f16 = 0u;
-    uint sign = (f32 >> 16) & 0x8000u;
-    int exponent = int((f32 >> 23) & 0xFFu) - 127;
+    uint sign = (f32 >> 16u) & 0x8000u;
+    int exponent = int((f32 >> 23u) & 0xFFu) - 127;
     uint mantissa = f32 & 0x007FFFFFu;
     if (exponent > 15) {
-        f16 = sign | (0x1Fu << 10);
+        f16 = sign | (0x1Fu << 10u);
     } else if (exponent > -15) {
         exponent += 15;
         mantissa >>= 13;
@@ -687,7 +726,7 @@ uint fp32tou16(float val) {
 highp uint packHalf2x16(vec2 v) {
     highp uint x = fp32tou16(v.x);
     highp uint y = fp32tou16(v.y);
-    return (y << 16) | x;
+    return (y << 16u) | x;
 }
 )"sv;
     }
@@ -748,14 +787,14 @@ GLuint ShaderCompilerService::linkProgram(OpenGLContext& context,
 // ------------------------------------------------------------------------------------------------
 
 void ShaderCompilerService::runAtNextTick(CompilerPriorityQueue priority,
-        const program_token_t& token, std::function<bool()> fn) noexcept {
+        const program_token_t& token, Job job) noexcept {
     // insert items in order of priority and at the end of the range
     auto& ops = mRunAtNextTickOps;
     auto const pos = std::lower_bound(ops.begin(), ops.end(), priority,
             [](ContainerType const& lhs, CompilerPriorityQueue priorityQueue) {
                 return std::get<0>(lhs) < priorityQueue;
             });
-    ops.emplace(pos, priority, token, std::move(fn));
+    ops.emplace(pos, priority, token, std::move(job));
 
     SYSTRACE_CONTEXT();
     SYSTRACE_VALUE32("ShaderCompilerService Jobs", mRunAtNextTickOps.size());
@@ -779,8 +818,8 @@ void ShaderCompilerService::executeTickOps() noexcept {
     auto& ops = mRunAtNextTickOps;
     auto it = ops.begin();
     while (it != ops.end()) {
-        auto fn = std::get<2>(*it);
-        bool const remove = fn();
+        Job const& job = std::get<2>(*it);
+        bool const remove = job.fn(job);
         if (remove) {
             it = ops.erase(it);
         } else {
diff --git a/filament/backend/src/opengl/ShaderCompilerService.h b/filament/backend/src/opengl/ShaderCompilerService.h
index 833636965cc..8f42dfed6b2 100644
--- a/filament/backend/src/opengl/ShaderCompilerService.h
+++ b/filament/backend/src/opengl/ShaderCompilerService.h
@@ -33,6 +33,7 @@
 #include <memory>
 #include <mutex>
 #include <thread>
+#include <utility>
 #include <vector>
 
 namespace filament::backend {
@@ -93,20 +94,24 @@ class ShaderCompilerService {
 private:
     class CompilerThreadPool {
     public:
+        CompilerThreadPool() noexcept;
+        ~CompilerThreadPool() noexcept;
         using Job = utils::Invocable<void()>;
         void init(bool useSharedContexts, uint32_t threadCount, OpenGLPlatform& platform) noexcept;
-        void exit() noexcept;
-        void queue(program_token_t const& token, Job&& job);
+        void terminate() noexcept;
+        void queue(CompilerPriorityQueue priorityQueue, program_token_t const& token, Job&& job);
         void makeUrgent(program_token_t const& token);
 
     private:
+        using Queue = std::deque<std::pair<program_token_t, Job>>;
         std::vector<std::thread> mCompilerThreads;
         std::atomic_bool mExitRequested{ false };
         std::mutex mQueueLock;
         std::condition_variable mQueueCondition;
-        std::array<std::deque<std::pair<program_token_t, Job>>, 2> mQueues;
-        Job mUrgentJob;
+        std::array<Queue, 2> mQueues;
+        Job mUrgentJob; // needs mQueueLock as well
         Job dequeue(program_token_t const& token); // lock must be held
+        std::pair<Queue&, Queue::iterator> find(program_token_t const& token);
     };
 
     OpenGLDriver& mDriver;
@@ -143,12 +148,26 @@ class ShaderCompilerService {
 
     static bool checkProgramStatus(program_token_t const& token) noexcept;
 
+    struct Job {
+        template<typename FUNC>
+        Job(FUNC&& fn) : fn(std::forward<FUNC>(fn)) {}
+        Job(std::function<bool(Job const& job)> fn,
+                CallbackHandler* handler, void* user, CallbackHandler::Callback callback)
+                : fn(std::move(fn)), handler(handler), user(user), callback(callback) {
+        }
+        std::function<bool(Job const& job)> fn;
+        CallbackHandler* handler = nullptr;
+        void* user = nullptr;
+        CallbackHandler::Callback callback{};
+    };
+
     void runAtNextTick(CompilerPriorityQueue priority,
-            const program_token_t& token, std::function<bool()> fn) noexcept;
+            const program_token_t& token, Job job) noexcept;
     void executeTickOps() noexcept;
     void cancelTickOp(program_token_t token) noexcept;
     // order of insertion is important
-    using ContainerType = std::tuple<CompilerPriorityQueue, program_token_t, std::function<bool()>>;
+
+    using ContainerType = std::tuple<CompilerPriorityQueue, program_token_t, Job>;
     std::vector<ContainerType> mRunAtNextTickOps;
 };
 
diff --git a/filament/backend/src/opengl/platforms/PlatformCocoaGL.mm b/filament/backend/src/opengl/platforms/PlatformCocoaGL.mm
index 195d8d17e70..b19ec4cbbf4 100644
--- a/filament/backend/src/opengl/platforms/PlatformCocoaGL.mm
+++ b/filament/backend/src/opengl/platforms/PlatformCocoaGL.mm
@@ -173,6 +173,9 @@
 }
 
 bool PlatformCocoaGL::isExtraContextSupported() const noexcept {
+    // macOS supports shared contexts however, it looks like the implementation uses a global
+    // lock around all GL APIs. It's a problem for API calls that take a long time to execute,
+    // one such call is e.g.: glCompileProgram.
     return true;
 }
 
diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp
index 0e2c2e6d1e9..14215434975 100644
--- a/filament/backend/src/vulkan/VulkanDriver.cpp
+++ b/filament/backend/src/vulkan/VulkanDriver.cpp
@@ -503,12 +503,7 @@ void VulkanDriver::destroyRenderTarget(Handle<HwRenderTarget> rth) {
 
 void VulkanDriver::createFenceR(Handle<HwFence> fh, int) {
     VulkanCommandBuffer const& commandBuffer = mCommands->get();
-    construct<VulkanFence>(fh, commandBuffer);
-}
-
-void VulkanDriver::createSyncR(Handle<HwSync> sh, int) {
-    VulkanCommandBuffer const& commandBuffer = mCommands->get();
-    construct<VulkanSync>(sh, commandBuffer);
+    construct<VulkanFence>(fh, commandBuffer.fence);
 }
 
 void VulkanDriver::createSwapChainR(Handle<HwSwapChain> sch, void* nativeWindow, uint64_t flags) {
@@ -584,13 +579,7 @@ Handle<HwRenderTarget> VulkanDriver::createRenderTargetS() noexcept {
 }
 
 Handle<HwFence> VulkanDriver::createFenceS() noexcept {
-    return allocHandle<VulkanFence>();
-}
-
-Handle<HwSync> VulkanDriver::createSyncS() noexcept {
-    Handle<HwSync> sh = allocHandle<VulkanSync>();
-    construct<VulkanSync>(sh);
-    return sh;
+    return initHandle<VulkanFence>();
 }
 
 Handle<HwSwapChain> VulkanDriver::createSwapChainS() noexcept {
@@ -647,11 +636,6 @@ void VulkanDriver::destroyTimerQuery(Handle<HwTimerQuery> tqh) {
     }
 }
 
-void VulkanDriver::destroySync(Handle<HwSync> sh) {
-    destruct<VulkanSync>(sh);
-}
-
-
 Handle<HwStream> VulkanDriver::createStreamNative(void* nativeStream) {
     return {};
 }
@@ -680,6 +664,12 @@ void VulkanDriver::destroyFence(Handle<HwFence> fh) {
 
 FenceStatus VulkanDriver::wait(Handle<HwFence> fh, uint64_t timeout) {
     auto& cmdfence = handle_cast<VulkanFence*>(fh)->fence;
+    if (!cmdfence) {
+        // If wait is called before a fence actually exists, we return timeout.  This matches the
+        // current behavior in OpenGLDriver, but we should eventually reconsider a different error
+        // code.
+        return FenceStatus::TIMEOUT_EXPIRED;
+    }
 
     // Internally we use the VK_INCOMPLETE status to mean "not yet submitted".
     // When this fence gets submitted, its status changes to VK_NOT_READY.
@@ -931,23 +921,6 @@ bool VulkanDriver::getTimerQueryValue(Handle<HwTimerQuery> tqh, uint64_t* elapse
     return true;
 }
 
-SyncStatus VulkanDriver::getSyncStatus(Handle<HwSync> sh) {
-    VulkanSync* sync = handle_cast<VulkanSync*>(sh);
-    if (sync->fence == nullptr) {
-        return SyncStatus::NOT_SIGNALED;
-    }
-    VkResult status = sync->fence->status.load(std::memory_order_relaxed);
-    switch (status) {
-        case VK_SUCCESS: return SyncStatus::SIGNALED;
-        case VK_INCOMPLETE: return SyncStatus::NOT_SIGNALED;
-        case VK_NOT_READY: return SyncStatus::NOT_SIGNALED;
-        case VK_ERROR_DEVICE_LOST: return SyncStatus::ERROR;
-        default:
-            // NOTE: In theory, the fence status must be one of the above values.
-            return SyncStatus::ERROR;
-    }
-}
-
 void VulkanDriver::setExternalImage(Handle<HwTexture> th, void* image) {
 }
 
diff --git a/filament/backend/src/vulkan/VulkanHandles.h b/filament/backend/src/vulkan/VulkanHandles.h
index c234cf53944..fe5fc2227e0 100644
--- a/filament/backend/src/vulkan/VulkanHandles.h
+++ b/filament/backend/src/vulkan/VulkanHandles.h
@@ -129,13 +129,9 @@ struct VulkanRenderPrimitive : public HwRenderPrimitive {
 };
 
 struct VulkanFence : public HwFence {
-    explicit VulkanFence(const VulkanCommandBuffer& commands) : fence(commands.fence) {}
-    std::shared_ptr<VulkanCmdFence> fence;
-};
+    VulkanFence() = default;
+    explicit VulkanFence(std::shared_ptr<VulkanCmdFence> fence) : fence(fence) {}
 
-struct VulkanSync : public HwSync {
-    VulkanSync() = default;
-    explicit VulkanSync(const VulkanCommandBuffer& commands) : fence(commands.fence) {}
     std::shared_ptr<VulkanCmdFence> fence;
 };
 
diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h
index 3795b89fb54..e4d601b3cde 100644
--- a/filament/include/filament/Engine.h
+++ b/filament/include/filament/Engine.h
@@ -676,6 +676,25 @@ class UTILS_PUBLIC Engine {
     bool destroy(const InstanceBuffer* p);      //!< Destroys an InstanceBuffer object.
     void destroy(utils::Entity e);              //!< Destroys all filament-known components from this entity
 
+    bool isValid(const BufferObject* p);        //!< Tells whether a BufferObject object is valid
+    bool isValid(const VertexBuffer* p);        //!< Tells whether an VertexBuffer object is valid
+    bool isValid(const Fence* p);               //!< Tells whether a Fence object is valid
+    bool isValid(const IndexBuffer* p);         //!< Tells whether an IndexBuffer object is valid
+    bool isValid(const SkinningBuffer* p);      //!< Tells whether a SkinningBuffer object is valid
+    bool isValid(const MorphTargetBuffer* p);   //!< Tells whether a MorphTargetBuffer object is valid
+    bool isValid(const IndirectLight* p);       //!< Tells whether an IndirectLight object is valid
+    bool isValid(const Material* p);            //!< Tells whether an IndirectLight object is valid
+    bool isValid(const Renderer* p);            //!< Tells whether a Renderer object is valid
+    bool isValid(const Scene* p);               //!< Tells whether a Scene object is valid
+    bool isValid(const Skybox* p);              //!< Tells whether a SkyBox object is valid
+    bool isValid(const ColorGrading* p);        //!< Tells whether a ColorGrading object is valid
+    bool isValid(const SwapChain* p);           //!< Tells whether a SwapChain object is valid
+    bool isValid(const Stream* p);              //!< Tells whether a Stream object is valid
+    bool isValid(const Texture* p);             //!< Tells whether a Texture object is valid
+    bool isValid(const RenderTarget* p);        //!< Tells whether a RenderTarget object is valid
+    bool isValid(const View* p);                //!< Tells whether a View object is valid
+    bool isValid(const InstanceBuffer* p);      //!< Tells whether an InstanceBuffer object is valid
+
     /**
      * Kicks the hardware thread (e.g. the OpenGL, Vulkan or Metal thread) and blocks until
      * all commands to this point are executed. Note that does guarantee that the
diff --git a/filament/include/filament/Material.h b/filament/include/filament/Material.h
index 5b9314a042d..06988983c4d 100644
--- a/filament/include/filament/Material.h
+++ b/filament/include/filament/Material.h
@@ -166,6 +166,20 @@ class UTILS_PUBLIC Material : public FilamentAPI {
      * many previous frames are enqueued in the backend. This also varies by backend. Therefore,
      * it is recommended to only call this method once per material shortly after creation.
      *
+     * If the same variant is scheduled for compilation multiple times, the first scheduling
+     * takes precedence; later scheduling are ignored.
+     *
+     * caveat: A consequence is that if a variant is scheduled on the low priority queue and later
+     * scheduled again on the high priority queue, the later scheduling is ignored.
+     * Therefore, the second callback could be called before the variant is compiled.
+     * However, the first callback, if specified, will trigger as expected.
+     *
+     * The callback is guaranteed to be called. If the engine is destroyed while some material
+     * variants are still compiling or in the queue, these will be discarded and the corresponding
+     * callback will be called. In that case however the Material pointer passed to the callback
+     * is guaranteed to be invalid (either because it's been destroyed by the user already, or,
+     * because it's been cleaned-up by the Engine).
+     *
      * @param priority      Which priority queue to use, LOW or HIGH.
      * @param variants      Variants to include to the compile command.
      * @param handler       Handler to dispatch the callback or nullptr for the default handler
diff --git a/filament/include/filament/MaterialInstance.h b/filament/include/filament/MaterialInstance.h
index c2095e1c477..ee7a8e252ff 100644
--- a/filament/include/filament/MaterialInstance.h
+++ b/filament/include/filament/MaterialInstance.h
@@ -52,6 +52,7 @@ class UTILS_PUBLIC MaterialInstance : public FilamentAPI {
 public:
     using CullingMode = filament::backend::CullingMode;
     using TransparencyMode = filament::TransparencyMode;
+    using DepthFunc = filament::backend::SamplerCompareFunc;
     using StencilCompareFunc = filament::backend::SamplerCompareFunc;
     using StencilOperation = filament::backend::StencilOperation;
     using StencilFace = filament::backend::StencilFace;
@@ -367,6 +368,16 @@ class UTILS_PUBLIC MaterialInstance : public FilamentAPI {
      */
     void setDepthCulling(bool enable) noexcept;
 
+    /**
+     * Overrides the default depth function state that was set on the material.
+     */
+    void setDepthFunc(DepthFunc depthFunc) noexcept;
+
+    /**
+     * Returns the depth function state.
+     */
+    DepthFunc getDepthFunc() const noexcept;
+
     /**
      * Returns whether depth culling is enabled.
      */
diff --git a/filament/src/Engine.cpp b/filament/src/Engine.cpp
index ccb1748047e..ce97bdfd0ff 100644
--- a/filament/src/Engine.cpp
+++ b/filament/src/Engine.cpp
@@ -196,6 +196,61 @@ void Engine::destroy(Entity e) {
     downcast(this)->destroy(e);
 }
 
+bool Engine::isValid(const BufferObject* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const VertexBuffer* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const Fence* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const IndexBuffer* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const SkinningBuffer* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const MorphTargetBuffer* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const IndirectLight* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const Material* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const Renderer* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const Scene* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const Skybox* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const ColorGrading* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const SwapChain* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const Stream* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const Texture* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const RenderTarget* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const View* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+bool Engine::isValid(const InstanceBuffer* p) {
+    return downcast(this)->isValid(downcast(p));
+}
+
 void Engine::flushAndWait() {
     downcast(this)->flushAndWait();
 }
diff --git a/filament/src/FrameInfo.cpp b/filament/src/FrameInfo.cpp
index d6f9653ae28..12923e69d37 100644
--- a/filament/src/FrameInfo.cpp
+++ b/filament/src/FrameInfo.cpp
@@ -51,7 +51,7 @@ void FrameInfoManager::terminate(DriverApi& driver) noexcept {
     }
 }
 
-void FrameInfoManager::beginFrame(DriverApi& driver,Config const& config, uint32_t frameId) noexcept {
+void FrameInfoManager::beginFrame(DriverApi& driver,Config const& config, uint32_t) noexcept {
     driver.beginTimerQuery(mQueries[mIndex]);
     uint64_t elapsed = 0;
     if (driver.getTimerQueryValue(mQueries[mLast], &elapsed)) {
@@ -67,7 +67,8 @@ void FrameInfoManager::endFrame(DriverApi& driver) noexcept {
     mIndex = (mIndex + 1) % POOL_COUNT;
 }
 
-void FrameInfoManager::update(Config const& config, FrameInfoManager::duration lastFrameTime) noexcept {
+void FrameInfoManager::update(Config const& config,
+        FrameInfoManager::duration lastFrameTime) noexcept {
     // keep an history of frame times
     auto& history = mFrameTimeHistory;
 
@@ -85,12 +86,13 @@ void FrameInfoManager::update(Config const& config, FrameInfoManager::duration l
     // apply a median filter to get a good representation of the frame time of the last
     // N frames.
     std::array<duration, MAX_FRAMETIME_HISTORY> median; // NOLINT -- it's initialized below
-    size_t size = std::min(mFrameTimeHistorySize, std::min(config.historySize, (uint32_t)median.size()));
+    size_t const size = std::min(mFrameTimeHistorySize,
+            std::min(config.historySize, (uint32_t)median.size()));
     for (size_t i = 0; i < size; ++i) {
         median[i] = history[i].frameTime;
     }
     std::sort(median.begin(), median.begin() + size);
-    duration denoisedFrameTime = median[size / 2];
+    duration const denoisedFrameTime = median[size / 2];
 
     history[0].denoisedFrameTime = denoisedFrameTime;
     history[0].valid = true;
diff --git a/filament/src/FrameSkipper.cpp b/filament/src/FrameSkipper.cpp
index ff0a185f27a..acb27958cf5 100644
--- a/filament/src/FrameSkipper.cpp
+++ b/filament/src/FrameSkipper.cpp
@@ -16,8 +16,6 @@
 
 #include "FrameSkipper.h"
 
-#include "details/Engine.h"
-
 #include <utils/Log.h>
 #include <utils/debug.h>
 
@@ -27,46 +25,47 @@ using namespace utils;
 using namespace backend;
 
 FrameSkipper::FrameSkipper(size_t latency) noexcept
-        : mLast(latency) {
+        : mLast(latency - 1) {
     assert_invariant(latency <= MAX_FRAME_LATENCY);
 }
 
 FrameSkipper::~FrameSkipper() noexcept = default;
 
 void FrameSkipper::terminate(DriverApi& driver) noexcept {
-    for (auto sync : mDelayedSyncs) {
-        if (sync) {
-            driver.destroySync(sync);
+    for (auto fence : mDelayedFences) {
+        if (fence) {
+            driver.destroyFence(fence);
         }
     }
 }
 
 bool FrameSkipper::beginFrame(DriverApi& driver) noexcept {
-    auto& syncs = mDelayedSyncs;
-    auto sync = syncs.front();
-    if (sync) {
-        auto status = driver.getSyncStatus(sync);
-        if (status == SyncStatus::NOT_SIGNALED) {
+    auto& fences = mDelayedFences;
+    auto fence = fences.front();
+    if (fence) {
+        auto status = driver.wait(fence, 0);
+        if (status == FenceStatus::TIMEOUT_EXPIRED) {
             // Sync not ready, skip frame
             return false;
         }
-        driver.destroySync(sync);
+        assert_invariant(status == FenceStatus::CONDITION_SATISFIED);
+        driver.destroyFence(fence);
     }
     // shift all fences down by 1
-    std::move(syncs.begin() + 1, syncs.end(), syncs.begin());
-    syncs.back() = {};
+    std::move(fences.begin() + 1, fences.end(), fences.begin());
+    fences.back() = {};
     return true;
 }
 
 void FrameSkipper::endFrame(DriverApi& driver) noexcept {
-    // if the user produced a new frame despite the fact that the previous one wasn't finished
+    // If the user produced a new frame despite the fact that the previous one wasn't finished
     // (i.e. FrameSkipper::beginFrame() returned false), we need to make sure to replace
     // a fence that might be here already)
-    auto& sync = mDelayedSyncs[mLast];
-    if (sync) {
-        driver.destroySync(sync);
+    auto& fence = mDelayedFences[mLast];
+    if (fence) {
+        driver.destroyFence(fence);
     }
-    sync = driver.createSync();
+    fence = driver.createFence();
 }
 
 } // namespace filament
diff --git a/filament/src/FrameSkipper.h b/filament/src/FrameSkipper.h
index 434c61f6e26..3b7cedbc25c 100644
--- a/filament/src/FrameSkipper.h
+++ b/filament/src/FrameSkipper.h
@@ -24,24 +24,42 @@
 
 namespace filament {
 
+/*
+ * FrameSkipper is used to determine if the current frame needs to be skipped so that we don't
+ * outrun the GPU.
+ */
 class FrameSkipper {
-    static constexpr size_t MAX_FRAME_LATENCY = 4;
+    static constexpr size_t MAX_FRAME_LATENCY = 3;
 public:
+    /*
+     * The latency parameter defines how many unfinished frames we want to accept before we start
+     * dropping frames. This affects frame latency.
+     *
+     * A latency of 1 means that the GPU must be finished with the previous frame so that
+     * we don't drop the current frame. While this provides the best latency this doesn't allow
+     * much overlap between the main thread, the back thread and the GPU.
+     *
+     * A latency of 2 (default) allows full overlap between the CPU And GPU, but the main and driver
+     * thread can't fully overlap.
+     *
+     * A latency 3 allows the main thread, driver thread and GPU to overlap, each being able to
+     * use up to 16ms (or whatever the refresh rate is).
+     */
     explicit FrameSkipper(size_t latency = 2) noexcept;
     ~FrameSkipper() noexcept;
 
     void terminate(backend::DriverApi& driver) noexcept;
 
-    // returns false if we need to skip this frame, because the gpu is running behind the cpu.
-    // in that case, don't call endFrame().
-    // returns true if rendering can proceed. Always call endFrame() when done.
+    // Returns false if we need to skip this frame, because the GPU is running behind the CPU;
+    // In that case, don't call render endFrame()
+    // Returns true if rendering can proceed. Always call endFrame() when done.
     bool beginFrame(backend::DriverApi& driver) noexcept;
 
     void endFrame(backend::DriverApi& driver) noexcept;
 
 private:
-    using Container = std::array<backend::Handle<backend::HwSync>, MAX_FRAME_LATENCY>;
-    mutable Container mDelayedSyncs{};
+    using Container = std::array<backend::Handle<backend::HwFence>, MAX_FRAME_LATENCY>;
+    mutable Container mDelayedFences{};
     size_t mLast;
 };
 
diff --git a/filament/src/MaterialInstance.cpp b/filament/src/MaterialInstance.cpp
index ec3e3ea931c..33f3ab764b1 100644
--- a/filament/src/MaterialInstance.cpp
+++ b/filament/src/MaterialInstance.cpp
@@ -255,6 +255,14 @@ void MaterialInstance::setDepthCulling(bool enable) noexcept {
     downcast(this)->setDepthCulling(enable);
 }
 
+void MaterialInstance::setDepthFunc(DepthFunc depthFunc) noexcept {
+    downcast(this)->setDepthFunc(depthFunc);
+}
+
+MaterialInstance::DepthFunc MaterialInstance::getDepthFunc() const noexcept {
+    return downcast(this)->getDepthFunc();
+}
+
 void MaterialInstance::setStencilWrite(bool enable) noexcept {
     downcast(this)->setStencilWrite(enable);
 }
diff --git a/filament/src/ResourceList.cpp b/filament/src/ResourceList.cpp
index b8e8c8f6618..4579ed8a320 100644
--- a/filament/src/ResourceList.cpp
+++ b/filament/src/ResourceList.cpp
@@ -45,12 +45,15 @@ bool ResourceListBase::remove(void const* item) {
     return mList.erase(const_cast<void*>(item)) > 0;
 }
 
+auto ResourceListBase::find(void const* item) -> iterator {
+    return mList.find(const_cast<void*>(item));
+}
 
 void ResourceListBase::clear() noexcept {
     mList.clear();
 }
 
-// this is not inlined so we don't pay the code-size cost of iterating the list
+// this is not inlined, so we don't pay the code-size cost of iterating the list
 void ResourceListBase::forEach(void (* f)(void*, void*), void* user) const noexcept {
     std::for_each(mList.begin(), mList.end(), [=](void* p) {
         f(user, p);
diff --git a/filament/src/ResourceList.h b/filament/src/ResourceList.h
index 188a5925fe4..1bdf188a563 100644
--- a/filament/src/ResourceList.h
+++ b/filament/src/ResourceList.h
@@ -38,6 +38,8 @@ class ResourceListBase {
 
     bool remove(void const* item);
 
+    iterator find(void const* item);
+
     void clear() noexcept;
 
     bool empty() const noexcept {
@@ -76,9 +78,12 @@ class ResourceList : private ResourceListBase {
     using ResourceListBase::forEach;
     using ResourceListBase::insert;
     using ResourceListBase::remove;
+    using ResourceListBase::find;
     using ResourceListBase::empty;
     using ResourceListBase::size;
     using ResourceListBase::clear;
+    using ResourceListBase::begin;
+    using ResourceListBase::end;
 
     explicit ResourceList(const char* name) noexcept: ResourceListBase(name) {}
 
diff --git a/filament/src/details/Engine.cpp b/filament/src/details/Engine.cpp
index 3206d1ec702..6816396aeb1 100644
--- a/filament/src/details/Engine.cpp
+++ b/filament/src/details/Engine.cpp
@@ -465,7 +465,7 @@ void FEngine::shutdown() {
         getDriverApi().terminate();
     } else {
         mDriverThread.join();
-
+        // Driver::terminate() has been called here.
     }
 
     // Finally, call user callbacks that might have been scheduled.
@@ -852,6 +852,12 @@ void FEngine::cleanupResourceListLocked(Lock& lock, ResourceList<T>&& list) {
 
 // -----------------------------------------------------------------------------------------------
 
+template<typename T>
+UTILS_ALWAYS_INLINE
+inline bool FEngine::isValid(const T* ptr, ResourceList<T>& list) {
+    return list.find(ptr) != list.end();
+}
+
 template<typename T>
 UTILS_ALWAYS_INLINE
 inline bool FEngine::terminateAndDestroy(const T* ptr, ResourceList<T>& list) {
@@ -1019,6 +1025,79 @@ void FEngine::destroy(Entity e) {
     mCameraManager.destroy(e);
 }
 
+bool FEngine::isValid(const FBufferObject* p) {
+    return isValid(p, mBufferObjects);
+}
+
+bool FEngine::isValid(const FVertexBuffer* p) {
+    return isValid(p, mVertexBuffers);
+}
+
+bool FEngine::isValid(const FFence* p) {
+    return isValid(p, mFences);
+}
+
+bool FEngine::isValid(const FIndexBuffer* p) {
+    return isValid(p, mIndexBuffers);
+}
+
+bool FEngine::isValid(const FSkinningBuffer* p) {
+    return isValid(p, mSkinningBuffers);
+}
+
+bool FEngine::isValid(const FMorphTargetBuffer* p) {
+    return isValid(p, mMorphTargetBuffers);
+}
+
+bool FEngine::isValid(const FIndirectLight* p) {
+    return isValid(p, mIndirectLights);
+}
+
+bool FEngine::isValid(const FMaterial* p) {
+    return isValid(p, mMaterials);
+}
+
+bool FEngine::isValid(const FRenderer* p) {
+    return isValid(p, mRenderers);
+}
+
+bool FEngine::isValid(const FScene* p) {
+    return isValid(p, mScenes);
+}
+
+bool FEngine::isValid(const FSkybox* p) {
+    return isValid(p, mSkyboxes);
+}
+
+bool FEngine::isValid(const FColorGrading* p) {
+    return isValid(p, mColorGradings);
+}
+
+bool FEngine::isValid(const FSwapChain* p) {
+    return isValid(p, mSwapChains);
+}
+
+bool FEngine::isValid(const FStream* p) {
+    return isValid(p, mStreams);
+}
+
+bool FEngine::isValid(const FTexture* p) {
+    return isValid(p, mTextures);
+}
+
+bool FEngine::isValid(const FRenderTarget* p) {
+    return isValid(p, mRenderTargets);
+}
+
+bool FEngine::isValid(const FView* p) {
+    return isValid(p, mViews);
+}
+
+bool FEngine::isValid(const FInstanceBuffer* p) {
+    return isValid(p, mInstanceBuffers);
+}
+
+
 void* FEngine::streamAlloc(size_t size, size_t alignment) noexcept {
     // we allow this only for small allocations
     if (size > 65536) {
@@ -1035,9 +1114,10 @@ bool FEngine::execute() {
     }
 
     // execute all command buffers
+    auto& driver = getDriverApi();
     for (auto& item : buffers) {
         if (UTILS_LIKELY(item.begin)) {
-            getDriverApi().execute(item.begin);
+            driver.execute(item.begin);
             mCommandBufferQueue.releaseBuffer(item);
         }
     }
diff --git a/filament/src/details/Engine.h b/filament/src/details/Engine.h
index fdb4d3a3a7c..c9c10471fa0 100644
--- a/filament/src/details/Engine.h
+++ b/filament/src/details/Engine.h
@@ -293,6 +293,26 @@ class FEngine : public Engine {
     bool destroy(const FView* p);
     bool destroy(const FInstanceBuffer* p);
 
+    bool isValid(const FBufferObject* p);
+    bool isValid(const FVertexBuffer* p);
+    bool isValid(const FFence* p);
+    bool isValid(const FIndexBuffer* p);
+    bool isValid(const FSkinningBuffer* p);
+    bool isValid(const FMorphTargetBuffer* p);
+    bool isValid(const FIndirectLight* p);
+    bool isValid(const FMaterial* p);
+    bool isValid(const FMaterialInstance* p);
+    bool isValid(const FRenderer* p);
+    bool isValid(const FScene* p);
+    bool isValid(const FSkybox* p);
+    bool isValid(const FColorGrading* p);
+    bool isValid(const FSwapChain* p);
+    bool isValid(const FStream* p);
+    bool isValid(const FTexture* p);
+    bool isValid(const FRenderTarget* p);
+    bool isValid(const FView* p);
+    bool isValid(const FInstanceBuffer* p);
+
     void destroy(utils::Entity e);
 
     void flushAndWait();
@@ -392,6 +412,9 @@ class FEngine : public Engine {
 
     backend::Driver& getDriver() const noexcept { return *mDriver; }
 
+    template<typename T>
+    bool isValid(const T* ptr, ResourceList<T>& list);
+
     template<typename T>
     bool terminateAndDestroy(const T* p, ResourceList<T>& list);
 
diff --git a/filament/src/details/MaterialInstance.h b/filament/src/details/MaterialInstance.h
index c98c8f3115e..6be23b7e06c 100644
--- a/filament/src/details/MaterialInstance.h
+++ b/filament/src/details/MaterialInstance.h
@@ -101,6 +101,10 @@ class FMaterialInstance : public MaterialInstance {
 
     backend::RasterState::DepthFunc getDepthFunc() const noexcept { return mDepthFunc; }
 
+    void setDepthFunc(backend::RasterState::DepthFunc depthFunc) noexcept {
+        mDepthFunc = depthFunc;
+    }
+
     void setPolygonOffset(float scale, float constant) noexcept {
         // handle reversed Z
         mPolygonOffset = { -scale, -constant };
diff --git a/filament/src/details/Renderer.cpp b/filament/src/details/Renderer.cpp
index 3b01c13420d..b17a6104758 100644
--- a/filament/src/details/Renderer.cpp
+++ b/filament/src/details/Renderer.cpp
@@ -55,7 +55,7 @@ using namespace backend;
 
 FRenderer::FRenderer(FEngine& engine) :
         mEngine(engine),
-        mFrameSkipper(1u),
+        mFrameSkipper(),
         mRenderTargetHandle(engine.getDefaultRenderTarget()),
         mFrameInfoManager(engine.getDriverApi()),
         mHdrTranslucent(TextureFormat::RGBA16F),
@@ -287,14 +287,14 @@ void FRenderer::endFrame() {
         driver.debugThreading();
     }
 
-    mFrameInfoManager.endFrame(driver);
-    mFrameSkipper.endFrame(driver);
-
     if (mSwapChain) {
         mSwapChain->commit(driver);
         mSwapChain = nullptr;
     }
 
+    mFrameInfoManager.endFrame(driver);
+    mFrameSkipper.endFrame(driver);
+
     driver.endFrame(mFrameId);
 
     // gives the backend a chance to execute periodic tasks
diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec
index 86e39c076d6..8a31ccd6333 100644
--- a/ios/CocoaPods/Filament.podspec
+++ b/ios/CocoaPods/Filament.podspec
@@ -1,12 +1,12 @@
 Pod::Spec.new do |spec|
   spec.name = "Filament"
-  spec.version = "1.40.4"
+  spec.version = "1.40.5"
   spec.license = { :type => "Apache 2.0", :file => "LICENSE" }
   spec.homepage = "https://google.github.io/filament"
   spec.authors = "Google LLC."
   spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL."
   spec.platform = :ios, "11.0"
-  spec.source = { :http => "https://github.com/google/filament/releases/download/v1.40.4/filament-v1.40.4-ios.tgz" }
+  spec.source = { :http => "https://github.com/google/filament/releases/download/v1.40.5/filament-v1.40.5-ios.tgz" }
 
   # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon.
   spec.pod_target_xcconfig = {
diff --git a/libs/filamat/src/GLSLPostProcessor.cpp b/libs/filamat/src/GLSLPostProcessor.cpp
index d8cb02e11bb..0206aea23f6 100644
--- a/libs/filamat/src/GLSLPostProcessor.cpp
+++ b/libs/filamat/src/GLSLPostProcessor.cpp
@@ -628,36 +628,33 @@ void GLSLPostProcessor::fixupClipDistance(
     }
 }
 
-void GLSLPostProcessor::registerPerformancePasses(Optimizer& optimizer, Config const& config) {
-    optimizer
-            .RegisterPass(CreateWrapOpKillPass())
-            .RegisterPass(CreateDeadBranchElimPass());
-
-    if (config.shaderModel != ShaderModel::DESKTOP ||
-            config.targetApi != MaterialBuilder::TargetApi::OPENGL) {
-        // this triggers a segfault with AMD OpenGL drivers on MacOS
-        // note that Metal also requires this pass in order to correctly generate half-precision MSL
-        optimizer.RegisterPass(CreateMergeReturnPass());
-    }
+// CreateMergeReturnPass() causes these issues:
+// - triggers a segfault with AMD OpenGL drivers on macOS
+// - triggers a crash on some Adreno drivers (b/291140208, b/289401984, b/289393290)
+// However Metal requires this pass in order to correctly generate half-precision MSL
+//
+// CreateSimplificationPass() creates a lot of problems:
+// - Adreno GPU show artifacts after running simplification passes (Vulkan)
+// - spirv-cross fails generating working glsl
+//      (https://github.com/KhronosGroup/SPIRV-Cross/issues/2162)
+// - generally it makes the code more complicated, e.g.: replacing for loops with
+//   while-if-break, unclear if it helps for anything.
+// However, the simplification passes below are necessary when targeting Metal, otherwise the
+// result is mismatched half / float assignments in MSL.
 
-    // CreateSimplificationPass() creates a lot of problems:
-    // - Adreno GPU show artifacts after running simplication passes (Vulkan)
-    // - spirv-cross fails generating working glsl
-    //      (https://github.com/KhronosGroup/SPIRV-Cross/issues/2162)
-    // - generally it makes the code more complicated, e.g.: replacing for loops with
-    //   while-if-break, unclear if it helps for anything.
-    // However, the simplification passes below are necessary when targeting Metal, otherwise the
-    // result is mismatched half / float assignments in MSL.
 
+void GLSLPostProcessor::registerPerformancePasses(Optimizer& optimizer, Config const& config) {
     auto RegisterPass = [&](spvtools::Optimizer::PassToken&& pass,
-                                MaterialBuilder::TargetApi apiFilter =
-                                        MaterialBuilder::TargetApi::ALL) {
+            MaterialBuilder::TargetApi apiFilter = MaterialBuilder::TargetApi::ALL) {
         if (!(config.targetApi & apiFilter)) {
             return;
         }
         optimizer.RegisterPass(std::move(pass));
     };
 
+    RegisterPass(CreateWrapOpKillPass());
+    RegisterPass(CreateDeadBranchElimPass());
+    RegisterPass(CreateMergeReturnPass(), MaterialBuilder::TargetApi::METAL);
     RegisterPass(CreateInlineExhaustivePass());
     RegisterPass(CreateAggressiveDCEPass());
     RegisterPass(CreatePrivateToLocalPass());
@@ -692,47 +689,48 @@ void GLSLPostProcessor::registerPerformancePasses(Optimizer& optimizer, Config c
 }
 
 void GLSLPostProcessor::registerSizePasses(Optimizer& optimizer, Config const& config) {
-    optimizer
-            .RegisterPass(CreateWrapOpKillPass())
-            .RegisterPass(CreateDeadBranchElimPass());
-
-    if (config.shaderModel != ShaderModel::DESKTOP) {
-        // this triggers a segfault with AMD drivers on MacOS
-        optimizer.RegisterPass(CreateMergeReturnPass());
-    }
+    auto RegisterPass = [&](spvtools::Optimizer::PassToken&& pass,
+            MaterialBuilder::TargetApi apiFilter = MaterialBuilder::TargetApi::ALL) {
+        if (!(config.targetApi & apiFilter)) {
+            return;
+        }
+        optimizer.RegisterPass(std::move(pass));
+    };
 
-    optimizer
-            .RegisterPass(CreateInlineExhaustivePass())
-            .RegisterPass(CreateEliminateDeadFunctionsPass())
-            .RegisterPass(CreatePrivateToLocalPass())
-            .RegisterPass(CreateScalarReplacementPass(0))
-            .RegisterPass(CreateLocalMultiStoreElimPass())
-            .RegisterPass(CreateCCPPass())
-            .RegisterPass(CreateLoopUnrollPass(true))
-            .RegisterPass(CreateDeadBranchElimPass())
-            .RegisterPass(CreateSimplificationPass())
-            .RegisterPass(CreateScalarReplacementPass(0))
-            .RegisterPass(CreateLocalSingleStoreElimPass())
-            .RegisterPass(CreateIfConversionPass())
-            .RegisterPass(CreateSimplificationPass())
-            .RegisterPass(CreateAggressiveDCEPass())
-            .RegisterPass(CreateDeadBranchElimPass())
-            .RegisterPass(CreateBlockMergePass())
-            .RegisterPass(CreateLocalAccessChainConvertPass())
-            .RegisterPass(CreateLocalSingleBlockLoadStoreElimPass())
-            .RegisterPass(CreateAggressiveDCEPass())
-            .RegisterPass(CreateCopyPropagateArraysPass())
-            .RegisterPass(CreateVectorDCEPass())
-            .RegisterPass(CreateDeadInsertElimPass())
-            // this breaks UBO layout
-            //.RegisterPass(CreateEliminateDeadMembersPass())
-            .RegisterPass(CreateLocalSingleStoreElimPass())
-            .RegisterPass(CreateBlockMergePass())
-            .RegisterPass(CreateLocalMultiStoreElimPass())
-            .RegisterPass(CreateRedundancyEliminationPass())
-            .RegisterPass(CreateSimplificationPass())
-            .RegisterPass(CreateAggressiveDCEPass())
-            .RegisterPass(CreateCFGCleanupPass());
+    RegisterPass(CreateWrapOpKillPass());
+    RegisterPass(CreateDeadBranchElimPass());
+    RegisterPass(CreateMergeReturnPass(), MaterialBuilder::TargetApi::METAL);
+    RegisterPass(CreateInlineExhaustivePass());
+    RegisterPass(CreateEliminateDeadFunctionsPass());
+    RegisterPass(CreatePrivateToLocalPass());
+    RegisterPass(CreateScalarReplacementPass(0));
+    RegisterPass(CreateLocalMultiStoreElimPass());
+    RegisterPass(CreateCCPPass());
+    RegisterPass(CreateLoopUnrollPass(true));
+    RegisterPass(CreateDeadBranchElimPass());
+    RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL);
+    RegisterPass(CreateScalarReplacementPass(0));
+    RegisterPass(CreateLocalSingleStoreElimPass());
+    RegisterPass(CreateIfConversionPass());
+    RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL);
+    RegisterPass(CreateAggressiveDCEPass());
+    RegisterPass(CreateDeadBranchElimPass());
+    RegisterPass(CreateBlockMergePass());
+    RegisterPass(CreateLocalAccessChainConvertPass());
+    RegisterPass(CreateLocalSingleBlockLoadStoreElimPass());
+    RegisterPass(CreateAggressiveDCEPass());
+    RegisterPass(CreateCopyPropagateArraysPass());
+    RegisterPass(CreateVectorDCEPass());
+    RegisterPass(CreateDeadInsertElimPass());
+    // this breaks UBO layout
+    //RegisterPass(CreateEliminateDeadMembersPass());
+    RegisterPass(CreateLocalSingleStoreElimPass());
+    RegisterPass(CreateBlockMergePass());
+    RegisterPass(CreateLocalMultiStoreElimPass());
+    RegisterPass(CreateRedundancyEliminationPass());
+    RegisterPass(CreateSimplificationPass(), MaterialBuilder::TargetApi::METAL);
+    RegisterPass(CreateAggressiveDCEPass());
+    RegisterPass(CreateCFGCleanupPass());
 }
 
 } // namespace filamat
diff --git a/libs/gltfio/CMakeLists.txt b/libs/gltfio/CMakeLists.txt
index d06676e699f..66b8db65de4 100644
--- a/libs/gltfio/CMakeLists.txt
+++ b/libs/gltfio/CMakeLists.txt
@@ -4,6 +4,8 @@ project(gltfio C ASM)
 set(TARGET gltfio)
 set(PUBLIC_HDR_DIR include)
 
+set(ROOT_DIR ${CMAKE_CURRENT_SOURCE_DIR}/../..)
+
 # ==================================================================================================
 # Sources and headers
 # ==================================================================================================
@@ -189,6 +191,40 @@ if (NOT WEBGL AND NOT ANDROID AND NOT IOS)
 
 endif()
 
+# ==================================================================================================
+# Tests
+# ==================================================================================================
+
+set(GLTF_TEST_FILES)
+function(add_test_gltf SOURCE TARGET)
+    set(source_path "${ROOT_DIR}/${SOURCE}")
+    set(target_path "${CMAKE_CURRENT_BINARY_DIR}/${TARGET}")
+    add_custom_command(
+        OUTPUT ${target_path}
+        DEPENDS ${source_path}
+        COMMAND ${CMAKE_COMMAND} -E copy ${source_path} ${target_path}
+    )
+    list(APPEND GLTF_TEST_FILES "${target_path}")
+    set(GLTF_TEST_FILES ${GLTF_TEST_FILES} PARENT_SCOPE)
+endfunction()
+
+add_test_gltf("third_party/models/AnimatedMorphCube/AnimatedMorphCube.glb" "AnimatedMorphCube.glb")
+
+add_custom_target(test_gltfio_files DEPENDS ${GLTF_TEST_FILES})
+
+# The following tests rely on private APIs that are stripped
+# away in Release builds
+if (TNT_DEV AND NOT WEBGL AND NOT ANDROID AND NOT IOS)
+    set(TEST_TARGET test_gltfio)
+
+    add_executable(${TEST_TARGET} test/gltfio_test.cpp)
+    add_dependencies(${TEST_TARGET} test_gltfio_files)
+
+    target_link_libraries(${TEST_TARGET} PRIVATE ${TARGET} filament filabridge gtest uberarchive)
+    target_compile_options(${TEST_TARGET} PRIVATE -Wno-deprecated-register)
+    set_target_properties(${TEST_TARGET} PROPERTIES FOLDER Tests)
+endif()
+
 # ==================================================================================================
 # Installation
 # ==================================================================================================
diff --git a/libs/gltfio/src/ArchiveCache.cpp b/libs/gltfio/src/ArchiveCache.cpp
index 118beec0fdc..47798741fa3 100644
--- a/libs/gltfio/src/ArchiveCache.cpp
+++ b/libs/gltfio/src/ArchiveCache.cpp
@@ -118,22 +118,22 @@ Material* ArchiveCache::getMaterial(const ArchiveRequirements& reqs) {
                 mMaterials[i] = Material::Builder()
                     .package(spec.package, spec.packageByteCount)
                     .build(mEngine);
-            }
 
-            // Don't attempt to precompile shaders on WebGL.
-            // Chrome already suffers from slow shader compilation:
-            // https://github.com/google/filament/issues/6615
-            // Precompiling shaders exacerbates the problem.
-#if !defined(__EMSCRIPTEN__)
-            // compile everything at low priority
-            mMaterials[i]->compile(Material::CompilerPriorityQueue::LOW);
-
-            // promote variants we care about to high priority
-            mMaterials[i]->compile(Material::CompilerPriorityQueue::HIGH,
-                    UserVariantFilterBit::DIRECTIONAL_LIGHTING |
-                    UserVariantFilterBit::DYNAMIC_LIGHTING |
-                    UserVariantFilterBit::SHADOW_RECEIVER);
-#endif
+                // Don't attempt to precompile shaders on WebGL.
+                // Chrome already suffers from slow shader compilation:
+                // https://github.com/google/filament/issues/6615
+                // Precompiling shaders exacerbates the problem.
+    #if !defined(__EMSCRIPTEN__)
+                // First compile high priority variants
+                mMaterials[i]->compile(Material::CompilerPriorityQueue::HIGH,
+                        UserVariantFilterBit::DIRECTIONAL_LIGHTING |
+                        UserVariantFilterBit::DYNAMIC_LIGHTING |
+                        UserVariantFilterBit::SHADOW_RECEIVER);
+
+                // and then, everything else at low priority
+                mMaterials[i]->compile(Material::CompilerPriorityQueue::LOW);
+    #endif
+            }
 
             return mMaterials[i];
         }
diff --git a/libs/gltfio/test/gltfio_test.cpp b/libs/gltfio/test/gltfio_test.cpp
new file mode 100644
index 00000000000..5f6fd2d3c0d
--- /dev/null
+++ b/libs/gltfio/test/gltfio_test.cpp
@@ -0,0 +1,229 @@
+/*
+ * Copyright (C) 2023 The Android Open Source Project
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <gtest/gtest.h>
+
+#include <backend/PixelBufferDescriptor.h>
+
+#include <filament/Engine.h>
+#include <filament/MaterialEnums.h>
+#include <filament/RenderableManager.h>
+#include <filament/TransformManager.h>
+
+#include <gltfio/AssetLoader.h>
+#include <gltfio/FilamentAsset.h>
+#include <gltfio/ResourceLoader.h>
+#include <gltfio/TextureProvider.h>
+#include <gltfio/math.h>
+#include <math/mathfwd.h>
+#include <utils/EntityManager.h>
+#include <utils/NameComponentManager.h>
+#include <utils/Path.h>
+
+#include "materials/uberarchive.h"
+
+#include <fstream>
+#include <unordered_map>
+
+using namespace filament;
+using namespace backend;
+using namespace gltfio;
+using namespace utils;
+
+constexpr uint32_t WIDTH = 64;
+constexpr uint32_t HEIGHT = 64;
+
+char const* ANIMATED_MORPH_CUBE_GLB = "AnimatedMorphCube.glb";
+
+static std::ifstream::pos_type getFileSize(const char* filename) {
+    std::ifstream in(filename, std::ifstream::ate | std::ifstream::binary);
+    return in.tellg();
+}
+
+class glTFData {
+public:
+    glTFData(Path filename, Engine* engine, MaterialProvider* materialProvider,
+            NameComponentManager* nameManager)
+        : mAssetLoader(AssetLoader::create({engine, materialProvider, nameManager})),
+          mResourceLoader(new ResourceLoader({
+                  engine, filename.getAbsolutePath().c_str(), false, /* normalizeSkinningWeights */
+          })),
+          mStbDecoder(createStbProvider(engine)), mKtxDecoder(createKtx2Provider(engine)) {
+        mResourceLoader->addTextureProvider("image/png", mStbDecoder);
+        mResourceLoader->addTextureProvider("image/ktx2", mKtxDecoder);
+
+        long contentSize = static_cast<long>(getFileSize(filename.c_str()));
+        if (contentSize <= 0) {
+            std::cerr << "Unable to open " << filename.c_str() << std::endl;
+            exit(1);
+        }
+
+        // Consume the glTF file.
+        std::ifstream in(filename.c_str(), std::ifstream::binary | std::ifstream::in);
+        std::vector<uint8_t> buffer(static_cast<unsigned long>(contentSize));
+        if (!in.read((char*) buffer.data(), contentSize)) {
+            std::cerr << "Unable to read " << filename.c_str() << std::endl;
+            exit(1);
+        }
+
+        // Parse the glTF file and create Filament entities.
+        mAsset = mAssetLoader->createAsset(buffer.data(), buffer.size());
+        buffer.clear();
+        buffer.shrink_to_fit();
+
+        if (!mAsset) {
+            std::cerr << "Unable to parse " << filename.c_str() << std::endl;
+            exit(1);
+        }
+
+        // Load resources
+        if (!mResourceLoader->asyncBeginLoad(mAsset)) {
+            std::cerr << "Unable to start loading resources for " << filename << std::endl;
+            exit(1);
+        }
+        mAsset->releaseSourceData();
+    }
+
+    ~glTFData() {
+        mAssetLoader->destroyAsset(mAsset);
+        delete mResourceLoader;
+        delete mStbDecoder;
+        delete mKtxDecoder;
+
+        AssetLoader::destroy(&mAssetLoader);
+    }
+
+    FilamentAsset* getAsset() const { return mAsset; }
+
+    AssetLoader* mAssetLoader;
+    ResourceLoader* mResourceLoader = nullptr;
+    TextureProvider* mStbDecoder = nullptr;
+    TextureProvider* mKtxDecoder = nullptr;
+    FilamentAsset* mAsset = nullptr;
+};
+
+class glTFIOTest : public testing::Test {
+protected:
+    Engine* mEngine = nullptr;
+    NameComponentManager* mNameManager = nullptr;
+    MaterialProvider* mMaterialProvider = nullptr;
+
+    //    std::unique_ptr<glTFData> mData;
+    std::unordered_map<char const*, std::unique_ptr<glTFData>> mData;
+
+    void SetUp() override {
+        mEngine = Engine::Builder().backend(Backend::NOOP).build();
+
+        mNameManager = new NameComponentManager(EntityManager::get());
+        mMaterialProvider = createUbershaderProvider(mEngine, UBERARCHIVE_DEFAULT_DATA,
+                UBERARCHIVE_DEFAULT_SIZE);
+
+        for (auto fname: {ANIMATED_MORPH_CUBE_GLB}) {
+            Path gltfFile = Path::getCurrentExecutable().getParent() + Path(fname);
+            mData[fname] =
+                    std::make_unique<glTFData>(gltfFile, mEngine, mMaterialProvider, mNameManager);
+        }
+    }
+
+    void TearDown() override {
+        mData.clear();
+        mMaterialProvider->destroyMaterials();
+        Engine::destroy(&mEngine);
+
+        delete mMaterialProvider;
+        delete mNameManager;
+    }
+};
+
+TEST_F(glTFIOTest, AnimatedMorphCubeMaterials) {
+    FilamentAsset const& morphCubeAsset = *mData[ANIMATED_MORPH_CUBE_GLB]->getAsset();
+    Entity const* renderables = morphCubeAsset.getRenderableEntities();
+    auto& renderableManager = mEngine->getRenderableManager();
+
+    auto inst = renderableManager.getInstance(renderables[0]);
+    auto materialInst = renderableManager.getMaterialInstanceAt(inst, 0);
+    std::string_view name{materialInst->getName()};
+
+    EXPECT_EQ(name, "Material");
+}
+
+TEST_F(glTFIOTest, AnimatedMorphCubeTransforms) {
+    FilamentAsset const& morphCubeAsset = *mData[ANIMATED_MORPH_CUBE_GLB]->getAsset();
+    auto const& transformManager = mEngine->getTransformManager();
+    auto const& renderableManager = mEngine->getRenderableManager();
+    Entity const* renderables = morphCubeAsset.getRenderableEntities();
+
+    EXPECT_EQ(morphCubeAsset.getRenderableEntityCount(), 1u);
+
+    EXPECT_TRUE(transformManager.hasComponent(renderables[0]));
+
+    auto const inst = transformManager.getInstance(renderables[0]);
+    math::mat4f const transform = transformManager.getTransform(inst);
+    math::mat4f const expectedTransform = composeMatrix(math::float3{0.0, 0.0, 0.0},
+            math::quatf{0.0, 0.0, 0.7071067, -0.7071068}, math::float3{100.0, 100.0, 100.0});
+
+    auto const result = inverse(transform) * expectedTransform;
+
+    // We expect the result to be identity
+    EXPECT_EQ(result, math::mat4f{});
+}
+
+TEST_F(glTFIOTest, AnimatedMorphCubeRenderables) {
+    FilamentAsset const& morphCubeAsset = *mData[ANIMATED_MORPH_CUBE_GLB]->getAsset();
+    Entity const* renderables = morphCubeAsset.getRenderableEntities();
+    auto const& renderableManager = mEngine->getRenderableManager();
+
+    EXPECT_EQ(morphCubeAsset.getRenderableEntityCount(), 1u);
+
+    EXPECT_TRUE(renderableManager.hasComponent(renderables[0]));
+    auto const inst = renderableManager.getInstance(renderables[0]);
+    EXPECT_EQ(renderableManager.getPrimitiveCount(inst), 1u);
+    AttributeBitset const attribs = renderableManager.getEnabledAttributesAt(inst, 0);
+
+    EXPECT_TRUE(attribs[VertexAttribute::POSITION]);
+    EXPECT_TRUE(attribs[VertexAttribute::TANGENTS]);
+    if (mMaterialProvider->needsDummyData(VertexAttribute::COLOR)) {
+        EXPECT_TRUE(attribs[VertexAttribute::COLOR]);
+    } else {
+        EXPECT_FALSE(attribs[VertexAttribute::COLOR]);
+    }
+    if (mMaterialProvider->needsDummyData(VertexAttribute::UV0)) {
+        EXPECT_TRUE(attribs[VertexAttribute::UV0]);
+    } else {
+        EXPECT_FALSE(attribs[VertexAttribute::UV0]);
+    }
+    if (mMaterialProvider->needsDummyData(VertexAttribute::UV1)) {
+        EXPECT_TRUE(attribs[VertexAttribute::UV1]);
+    } else {
+        EXPECT_FALSE(attribs[VertexAttribute::UV1]);
+    }
+
+    // The AnimatedMorphCube has two morph targets: "thin" and "angle"
+    EXPECT_EQ(renderableManager.getMorphTargetCount(inst), 2u);
+
+    // The 0-th MorphTargetBuffer holds both of the targets
+    auto const morphTargetBuffer = renderableManager.getMorphTargetBufferAt(inst, 0, 0);
+    EXPECT_EQ(morphTargetBuffer->getCount(), 2u);
+
+    // The number of vertices for the morph target should be the face vertices in a cube =>
+    // (6 faces * 4 vertices per face) = 24 vertices
+    EXPECT_EQ(morphTargetBuffer->getVertexCount(), 24u);
+}
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/samples/depthtesting.cpp b/samples/depthtesting.cpp
index 8b678466761..77fae2233f1 100644
--- a/samples/depthtesting.cpp
+++ b/samples/depthtesting.cpp
@@ -26,6 +26,8 @@
 #include <filament/VertexBuffer.h>
 #include <filament/View.h>
 
+#include <imgui.h>
+
 #include <utils/EntityManager.h>
 
 #include <filamentapp/Config.h>
@@ -48,6 +50,7 @@ struct App {
     Skybox* skybox;
     Entity whiteTriangle;
     Entity colorTriangle;
+    MaterialInstance::DepthFunc depthFunc;
 };
 
 struct Vertex {
@@ -115,8 +118,11 @@ int main(int argc, char** argv) {
                 .culling(false)
                 .receiveShadows(false)
                 .castShadows(false)
+                .priority(5)  // draw after whiteTriangle.
                 .build(*engine, app.colorTriangle);
         scene->addEntity(app.colorTriangle);
+
+        app.depthFunc = MaterialInstance::DepthFunc::GE;
     };
 
     auto cleanup = [&app](Engine* engine, View*, Scene*) {
@@ -131,6 +137,17 @@ int main(int argc, char** argv) {
         utils::EntityManager::get().destroy(app.camera);
     };
 
+    auto gui = [&app](Engine* engine, View* view) {
+        int depthFuncSelection = (int) app.depthFunc;
+        ImGui::Combo("Depth Function", &depthFuncSelection,
+                "Less or equal\0Greater or equal\0Strictly less than\0"
+                "Strictly greater than\0Equal\0Not equal\0Always\0Never\0\0");
+        if (depthFuncSelection != (int) app.depthFunc) {
+            app.depthFunc = (MaterialInstance::DepthFunc) depthFuncSelection;
+            app.mat->getDefaultInstance()->setDepthFunc(app.depthFunc);
+        }
+    };
+
     FilamentApp::get().animate([&app](Engine* engine, View* view, double now) {
         constexpr float ZOOM = 1.5f;
         const uint32_t w = view->getViewport().width;
@@ -144,7 +161,7 @@ int main(int argc, char** argv) {
                 filament::math::mat4f::rotation(now, filament::math::float3{ 0, 1, 0 }));
     });
 
-    FilamentApp::get().run(config, setup, cleanup);
+    FilamentApp::get().run(config, setup, cleanup, gui);
 
     return 0;
 }
diff --git a/samples/hellotriangle.cpp b/samples/hellotriangle.cpp
index a36df692201..c6469fc1b37 100644
--- a/samples/hellotriangle.cpp
+++ b/samples/hellotriangle.cpp
@@ -72,7 +72,7 @@ static void printUsage(char* name) {
     std::string usage(
             "HELLOTRIANGLE renders a spinning colored triangle\n"
             "Usage:\n"
-            "    SHOWCASE [options]\n"
+            "    HELLOTRIANGLE [options]\n"
             "Options:\n"
             "   --help, -h\n"
             "       Prints this message\n\n"
diff --git a/web/filament-js/filament.d.ts b/web/filament-js/filament.d.ts
index d0bfaa0372a..16d6e2f3831 100644
--- a/web/filament-js/filament.d.ts
+++ b/web/filament-js/filament.d.ts
@@ -183,6 +183,7 @@ export class MaterialInstance {
     public setDepthWrite(enable: boolean): void;
     public setStencilWrite(enable: boolean): void;
     public setDepthCulling(enable: boolean): void;
+    public setDepthFunc(func: CompareFunc): void;
     public setStencilCompareFunction(func: CompareFunc, face?: StencilFace): void;
     public setStencilOpStencilFail(op: StencilOperation, face?: StencilFace): void;
     public setStencilOpDepthFail(op: StencilOperation, face?: StencilFace): void;
diff --git a/web/filament-js/jsbindings.cpp b/web/filament-js/jsbindings.cpp
index 11634105458..201ca0c4f24 100644
--- a/web/filament-js/jsbindings.cpp
+++ b/web/filament-js/jsbindings.cpp
@@ -540,7 +540,53 @@ class_<Engine>("Engine")
     /// vb ::argument:: the [VertexBuffer] to destroy
     .function("destroyVertexBuffer", (void (*)(Engine*, VertexBuffer*)) []
             (Engine* engine, VertexBuffer* vb) { engine->destroy(vb); },
-            allow_raw_pointers());
+            allow_raw_pointers())
+
+    .function("isValidRenderer", EMBIND_LAMBDA(bool, (Engine* engine, Renderer* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidView", EMBIND_LAMBDA(bool, (Engine* engine, View* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidScene", EMBIND_LAMBDA(bool, (Engine* engine, Scene* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidFence", EMBIND_LAMBDA(bool, (Engine* engine, Fence* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidStream", EMBIND_LAMBDA(bool, (Engine* engine, Stream* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidIndexBuffer", EMBIND_LAMBDA(bool, (Engine* engine, IndexBuffer* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidVertexBuffer", EMBIND_LAMBDA(bool, (Engine* engine, VertexBuffer* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidSkinningBuffer", EMBIND_LAMBDA(bool, (Engine* engine, SkinningBuffer* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidIndirectLight", EMBIND_LAMBDA(bool, (Engine* engine, IndirectLight* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidMaterial", EMBIND_LAMBDA(bool, (Engine* engine, Material* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidSkybox", EMBIND_LAMBDA(bool, (Engine* engine, Skybox* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidColorGrading", EMBIND_LAMBDA(bool, (Engine* engine, ColorGrading* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidTexture", EMBIND_LAMBDA(bool, (Engine* engine, Texture* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidRenderTarget", EMBIND_LAMBDA(bool, (Engine* engine, RenderTarget* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers())
+    .function("isValidSwapChain", EMBIND_LAMBDA(bool, (Engine* engine, SwapChain* object), {
+                return engine->isValid(object);
+            }), allow_raw_pointers());
 
 /// SwapChain ::core class:: Represents the platform's native rendering surface.
 /// See also the [Engine] methods `createSwapChain` and `destroySwapChain`.
@@ -1325,6 +1371,8 @@ class_<MaterialInstance>("MaterialInstance")
     .function("setStencilWrite", &MaterialInstance::setStencilWrite)
     .function("setDepthCulling", &MaterialInstance::setDepthCulling)
     .function("isDepthCullingEnabled", &MaterialInstance::isDepthCullingEnabled)
+    .function("setDepthFunc", &MaterialInstance::setDepthFunc)
+    .function("getDepthFunc", &MaterialInstance::getDepthFunc)
     .function("setStencilCompareFunction", &MaterialInstance::setStencilCompareFunction)
     .function("setStencilCompareFunction", EMBIND_LAMBDA(void,
             (MaterialInstance* self, MaterialInstance::StencilCompareFunc func), {
diff --git a/web/filament-js/package.json b/web/filament-js/package.json
index 4745c2a343f..b08a19a8b53 100644
--- a/web/filament-js/package.json
+++ b/web/filament-js/package.json
@@ -1,6 +1,6 @@
 {
   "name": "filament",
-  "version": "1.40.4",
+  "version": "1.40.5",
   "description": "Real-time physically based rendering engine",
   "main": "filament.js",
   "module": "filament.js",