diff --git a/README.md b/README.md
index bb2c074eb15..66f0976391a 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,7 @@ repositories {
}
dependencies {
- implementation 'com.google.android.filament:filament-android:1.50.3'
+ implementation 'com.google.android.filament:filament-android:1.50.4'
}
```
@@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`:
iOS projects can use CocoaPods to install the latest release:
```shell
-pod 'Filament', '~> 1.50.3'
+pod 'Filament', '~> 1.50.4'
```
### Snapshots
diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md
index 209a0afdec2..f7fa37e7f58 100644
--- a/RELEASE_NOTES.md
+++ b/RELEASE_NOTES.md
@@ -7,6 +7,9 @@ A new header is inserted each time a *tag* is created.
Instead, if you are authoring a PR for the main branch, add your release note to
[NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md).
+## v1.50.4
+
+
## v1.50.3
diff --git a/android/filament-android/src/main/cpp/Engine.cpp b/android/filament-android/src/main/cpp/Engine.cpp
index 05893cbd5bf..80409702c37 100644
--- a/android/filament-android/src/main/cpp/Engine.cpp
+++ b/android/filament-android/src/main/cpp/Engine.cpp
@@ -484,7 +484,9 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBuilderConfig(JNIEnv*,
jclass, jlong nativeBuilder, jlong commandBufferSizeMB, jlong perRenderPassArenaSizeMB,
jlong driverHandleArenaSizeMB, jlong minCommandBufferSizeMB, jlong perFrameCommandsSizeMB,
- jlong jobSystemThreadCount, jlong stereoscopicEyeCount,
+ jlong jobSystemThreadCount,
+ jlong textureUseAfterFreePoolSize, jboolean disableParallelShaderCompile,
+ jint stereoscopicType, jlong stereoscopicEyeCount,
jlong resourceAllocatorCacheSizeMB, jlong resourceAllocatorCacheMaxAge) {
Engine::Builder* builder = (Engine::Builder*) nativeBuilder;
Engine::Config config = {
@@ -494,6 +496,9 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu
.minCommandBufferSizeMB = (uint32_t) minCommandBufferSizeMB,
.perFrameCommandsSizeMB = (uint32_t) perFrameCommandsSizeMB,
.jobSystemThreadCount = (uint32_t) jobSystemThreadCount,
+ .textureUseAfterFreePoolSize = (uint32_t) textureUseAfterFreePoolSize,
+ .disableParallelShaderCompile = (bool) disableParallelShaderCompile,
+ .stereoscopicType = (Engine::StereoscopicType) stereoscopicType,
.stereoscopicEyeCount = (uint8_t) stereoscopicEyeCount,
.resourceAllocatorCacheSizeMB = (uint32_t) resourceAllocatorCacheSizeMB,
.resourceAllocatorCacheMaxAge = (uint8_t) resourceAllocatorCacheMaxAge,
diff --git a/android/filament-android/src/main/java/com/google/android/filament/Engine.java b/android/filament-android/src/main/java/com/google/android/filament/Engine.java
index 6b4647e2ac1..06f0a2b8ae9 100644
--- a/android/filament-android/src/main/java/com/google/android/filament/Engine.java
+++ b/android/filament-android/src/main/java/com/google/android/filament/Engine.java
@@ -158,6 +158,16 @@ public enum FeatureLevel {
FEATURE_LEVEL_3,
};
+ /**
+ * The type of technique for stereoscopic rendering
+ */
+ public enum StereoscopicType {
+ /** Stereoscopic rendering is performed using instanced rendering technique. */
+ INSTANCED,
+ /** Stereoscopic rendering is performed using the multiview feature from the graphics backend. */
+ MULTIVIEW,
+ };
+
/**
* Constructs Engine
objects using a builder pattern.
*/
@@ -211,7 +221,9 @@ public Builder config(Config config) {
nSetBuilderConfig(mNativeBuilder, config.commandBufferSizeMB,
config.perRenderPassArenaSizeMB, config.driverHandleArenaSizeMB,
config.minCommandBufferSizeMB, config.perFrameCommandsSizeMB,
- config.jobSystemThreadCount, config.stereoscopicEyeCount,
+ config.jobSystemThreadCount,
+ config.textureUseAfterFreePoolSize, config.disableParallelShaderCompile,
+ config.stereoscopicType.ordinal(), config.stereoscopicEyeCount,
config.resourceAllocatorCacheSizeMB, config.resourceAllocatorCacheMaxAge);
return this;
}
@@ -349,6 +361,35 @@ public static class Config {
*/
public long jobSystemThreadCount = 0;
+ /**
+ * Number of most-recently destroyed textures to track for use-after-free.
+ *
+ * This will cause the backend to throw an exception when a texture is freed but still bound
+ * to a SamplerGroup and used in a draw call. 0 disables completely.
+ *
+ * Currently only respected by the Metal backend.
+ */
+ public long textureUseAfterFreePoolSize = 0;
+
+ /**
+ * Set to `true` to forcibly disable parallel shader compilation in the backend.
+ * Currently only honored by the GL backend.
+ */
+ public boolean disableParallelShaderCompile = false;
+
+ /**
+ * The type of technique for stereoscopic rendering.
+ *
+ * This setting determines the algorithm used when stereoscopic rendering is enabled. This
+ * decision applies to the entire Engine for the lifetime of the Engine. E.g., multiple
+ * Views created from the Engine must use the same stereoscopic type.
+ *
+ * Each view can enable stereoscopic rendering via the StereoscopicOptions::enable flag.
+ *
+ * @see View#setStereoscopicOptions
+ */
+ public StereoscopicType stereoscopicType = StereoscopicType.INSTANCED;
+
/**
* The number of eyes to render when stereoscopic rendering is enabled. Supported values are
* between 1 and Engine#getMaxStereoscopicEyes() (inclusive).
@@ -1240,7 +1281,8 @@ private static void assertDestroy(boolean success) {
private static native void nSetBuilderConfig(long nativeBuilder, long commandBufferSizeMB,
long perRenderPassArenaSizeMB, long driverHandleArenaSizeMB,
long minCommandBufferSizeMB, long perFrameCommandsSizeMB, long jobSystemThreadCount,
- long stereoscopicEyeCount,
+ long textureUseAfterFreePoolSize, boolean disableParallelShaderCompile,
+ int stereoscopicType, long stereoscopicEyeCount,
long resourceAllocatorCacheSizeMB, long resourceAllocatorCacheMaxAge);
private static native void nSetBuilderFeatureLevel(long nativeBuilder, int ordinal);
private static native void nSetBuilderSharedContext(long nativeBuilder, long sharedContext);
diff --git a/android/gradle.properties b/android/gradle.properties
index 53e64758c22..34340a20b8a 100644
--- a/android/gradle.properties
+++ b/android/gradle.properties
@@ -1,5 +1,5 @@
GROUP=com.google.android.filament
-VERSION_NAME=1.50.3
+VERSION_NAME=1.50.4
POM_DESCRIPTION=Real-time physically based rendering engine for Android.
diff --git a/filament/backend/include/backend/DriverEnums.h b/filament/backend/include/backend/DriverEnums.h
index aba2b404145..c41d1b83049 100644
--- a/filament/backend/include/backend/DriverEnums.h
+++ b/filament/backend/include/backend/DriverEnums.h
@@ -1212,6 +1212,14 @@ enum class Workaround : uint16_t {
DISABLE_THREAD_AFFINITY
};
+//! The type of technique for stereoscopic rendering
+enum class StereoscopicType : uint8_t {
+ // Stereoscopic rendering is performed using instanced rendering technique.
+ INSTANCED,
+ // Stereoscopic rendering is performed using the multiview feature from the graphics backend.
+ MULTIVIEW,
+};
+
} // namespace filament::backend
template<> struct utils::EnableBitMaskOperators
diff --git a/filament/backend/include/backend/Handle.h b/filament/backend/include/backend/Handle.h
index 7b8846ba7bc..ffc16133fd2 100644
--- a/filament/backend/include/backend/Handle.h
+++ b/filament/backend/include/backend/Handle.h
@@ -62,14 +62,6 @@ class HandleBase {
// clear the handle, this doesn't free associated resources
void clear() noexcept { object = nullid; }
- // compare handles
- bool operator==(const HandleBase& rhs) const noexcept { return object == rhs.object; }
- bool operator!=(const HandleBase& rhs) const noexcept { return object != rhs.object; }
- bool operator<(const HandleBase& rhs) const noexcept { return object < rhs.object; }
- bool operator<=(const HandleBase& rhs) const noexcept { return object <= rhs.object; }
- bool operator>(const HandleBase& rhs) const noexcept { return object > rhs.object; }
- bool operator>=(const HandleBase& rhs) const noexcept { return object >= rhs.object; }
-
// get this handle's handleId
HandleId getId() const noexcept { return object; }
@@ -101,6 +93,14 @@ struct Handle : public HandleBase {
explicit Handle(HandleId id) noexcept : HandleBase(id) { }
+ // compare handles of the same type
+ bool operator==(const Handle& rhs) const noexcept { return getId() == rhs.getId(); }
+ bool operator!=(const Handle& rhs) const noexcept { return getId() != rhs.getId(); }
+ bool operator<(const Handle& rhs) const noexcept { return getId() < rhs.getId(); }
+ bool operator<=(const Handle& rhs) const noexcept { return getId() <= rhs.getId(); }
+ bool operator>(const Handle& rhs) const noexcept { return getId() > rhs.getId(); }
+ bool operator>=(const Handle& rhs) const noexcept { return getId() >= rhs.getId(); }
+
// type-safe Handle cast
template::value> >
Handle(Handle const& base) noexcept : HandleBase(base) { } // NOLINT(hicpp-explicit-conversions,google-explicit-constructor)
diff --git a/filament/backend/include/private/backend/CircularBuffer.h b/filament/backend/include/private/backend/CircularBuffer.h
index aae6e69c03b..7d2de52b009 100644
--- a/filament/backend/include/private/backend/CircularBuffer.h
+++ b/filament/backend/include/private/backend/CircularBuffer.h
@@ -17,7 +17,10 @@
#ifndef TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H
#define TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H
+#include
+
#include
+#include
namespace filament::backend {
@@ -37,28 +40,36 @@ class CircularBuffer {
~CircularBuffer() noexcept;
- // allocates 'size' bytes in the circular buffer and returns a pointer to the memory
- // return the current head and moves it forward by size bytes
- inline void* allocate(size_t size) noexcept {
+ static size_t getBlockSize() noexcept { return sPageSize; }
+
+ // Total size of circular buffer. This is a constant.
+ size_t size() const noexcept { return mSize; }
+
+ // Allocates `s` bytes in the circular buffer and returns a pointer to the memory. All
+ // allocations must not exceed size() bytes.
+ inline void* allocate(size_t s) noexcept {
+ // We can never allocate more that size().
+ assert_invariant(getUsed() + s <= size());
char* const cur = static_cast(mHead);
- mHead = cur + size;
+ mHead = cur + s;
return cur;
}
- // Total size of circular buffer
- size_t size() const noexcept { return mSize; }
-
- // returns true if the buffer is empty (e.g. after calling flush)
+ // Returns true if the buffer is empty, i.e.: no allocations were made since
+ // calling getBuffer();
bool empty() const noexcept { return mTail == mHead; }
- void* getHead() const noexcept { return mHead; }
-
- void* getTail() const noexcept { return mTail; }
+ // Returns the size used since the last call to getBuffer()
+ size_t getUsed() const noexcept { return intptr_t(mHead) - intptr_t(mTail); }
- // call at least once every getRequiredSize() bytes allocated from the buffer
- void circularize() noexcept;
-
- static size_t getBlockSize() noexcept { return sPageSize; }
+ // Retrieves the current allocated range and frees it. It is the responsibility of the caller
+ // to make sure the returned range is no longer in use by the time allocate() allocates
+ // (size() - getUsed()) bytes.
+ struct Range {
+ void* tail;
+ void* head;
+ };
+ Range getBuffer() noexcept;
private:
void* alloc(size_t size) noexcept;
@@ -66,10 +77,10 @@ class CircularBuffer {
// pointer to the beginning of the circular buffer (constant)
void* mData = nullptr;
- int mUsesAshmem = -1;
+ int mAshmemFd = -1;
// size of the circular buffer (constant)
- size_t mSize = 0;
+ size_t const mSize;
// pointer to the beginning of recorded data
void* mTail = nullptr;
diff --git a/filament/backend/include/private/backend/CommandBufferQueue.h b/filament/backend/include/private/backend/CommandBufferQueue.h
index 6a434477789..28122452386 100644
--- a/filament/backend/include/private/backend/CommandBufferQueue.h
+++ b/filament/backend/include/private/backend/CommandBufferQueue.h
@@ -33,7 +33,7 @@ namespace filament::backend {
* A producer-consumer command queue that uses a CircularBuffer as main storage
*/
class CommandBufferQueue {
- struct Slice {
+ struct Range {
void* begin;
void* end;
};
@@ -46,7 +46,7 @@ class CommandBufferQueue {
mutable utils::Mutex mLock;
mutable utils::Condition mCondition;
- mutable std::vector mCommandBuffersToExecute;
+ mutable std::vector mCommandBuffersToExecute;
size_t mFreeSpace = 0;
size_t mHighWatermark = 0;
uint32_t mExitRequested = 0;
@@ -58,17 +58,20 @@ class CommandBufferQueue {
CommandBufferQueue(size_t requiredSize, size_t bufferSize);
~CommandBufferQueue();
- CircularBuffer& getCircularBuffer() { return mCircularBuffer; }
+ CircularBuffer& getCircularBuffer() noexcept { return mCircularBuffer; }
+ CircularBuffer const& getCircularBuffer() const noexcept { return mCircularBuffer; }
+
+ size_t getCapacity() const noexcept { return mRequiredSize; }
size_t getHighWatermark() const noexcept { return mHighWatermark; }
// wait for commands to be available and returns an array containing these commands
- std::vector waitForCommands() const;
+ std::vector waitForCommands() const;
// return the memory used by this command buffer to the circular buffer
// WARNING: releaseBuffer() must be called in sequence of the Slices returned by
// waitForCommands()
- void releaseBuffer(Slice const& buffer);
+ void releaseBuffer(Range const& buffer);
// all commands buffers (Slices) written to this point are returned by waitForCommand(). This
// call blocks until the CircularBuffer has at least mRequiredSize bytes available.
diff --git a/filament/backend/include/private/backend/CommandStream.h b/filament/backend/include/private/backend/CommandStream.h
index be84b323ad0..985fa5fcd6e 100644
--- a/filament/backend/include/private/backend/CommandStream.h
+++ b/filament/backend/include/private/backend/CommandStream.h
@@ -213,6 +213,8 @@ class CommandStream {
CommandStream(CommandStream const& rhs) noexcept = delete;
CommandStream& operator=(CommandStream const& rhs) noexcept = delete;
+ CircularBuffer const& getCircularBuffer() const noexcept { return mCurrentBuffer; }
+
public:
#define DECL_DRIVER_API(methodName, paramsDecl, params) \
inline void methodName(paramsDecl) { \
diff --git a/filament/backend/include/private/backend/HandleAllocator.h b/filament/backend/include/private/backend/HandleAllocator.h
index aa5f53be695..04e66d85774 100644
--- a/filament/backend/include/private/backend/HandleAllocator.h
+++ b/filament/backend/include/private/backend/HandleAllocator.h
@@ -24,35 +24,31 @@
#include
#include
#include
+#include
#include
+#include
#include
#include
#include
+#include
#include
#include
-#if !defined(NDEBUG) && UTILS_HAS_RTTI
-# define HANDLE_TYPE_SAFETY 1
-#else
-# define HANDLE_TYPE_SAFETY 0
-#endif
-
-#define HandleAllocatorGL HandleAllocator<16, 64, 208>
-#define HandleAllocatorVK HandleAllocator<16, 64, 880>
-#define HandleAllocatorMTL HandleAllocator<16, 64, 584>
+#define HandleAllocatorGL HandleAllocator<16, 64, 208> // ~3640 / pool / MiB
+#define HandleAllocatorVK HandleAllocator<80, 176, 320> // ~1820 / pool / MiB
+#define HandleAllocatorMTL HandleAllocator<48, 160, 592> // ~1310 / pool / MiB
namespace filament::backend {
/*
* A utility class to efficiently allocate and manage Handle<>
*/
-template
+template
class HandleAllocator {
public:
-
HandleAllocator(const char* name, size_t size) noexcept;
HandleAllocator(HandleAllocator const& rhs) = delete;
HandleAllocator& operator=(HandleAllocator const& rhs) = delete;
@@ -70,14 +66,9 @@ class HandleAllocator {
*/
template
Handle allocateAndConstruct(ARGS&& ... args) noexcept {
- Handle h{ allocateHandle() };
+ Handle h{ allocateHandle() };
D* addr = handle_cast(h);
new(addr) D(std::forward(args)...);
-#if HANDLE_TYPE_SAFETY
- mLock.lock();
- mHandleTypeId[addr] = typeid(D).name();
- mLock.unlock();
-#endif
return h;
}
@@ -93,13 +84,7 @@ class HandleAllocator {
*/
template
Handle allocate() noexcept {
- Handle h{ allocateHandle() };
-#if HANDLE_TYPE_SAFETY
- D* addr = handle_cast(h);
- mLock.lock();
- mHandleTypeId[addr] = typeid(D).name();
- mLock.unlock();
-#endif
+ Handle h{ allocateHandle() };
return h;
}
@@ -116,17 +101,10 @@ class HandleAllocator {
assert_invariant(handle);
D* addr = handle_cast(const_cast&>(handle));
assert_invariant(addr);
-
// currently we implement construct<> with dtor+ctor, we could use operator= also
// but all our dtors are trivial, ~D() is actually a noop.
addr->~D();
new(addr) D(std::forward(args)...);
-
-#if HANDLE_TYPE_SAFETY
- mLock.lock();
- mHandleTypeId[addr] = typeid(D).name();
- mLock.unlock();
-#endif
return addr;
}
@@ -143,12 +121,6 @@ class HandleAllocator {
D* addr = handle_cast(const_cast&>(handle));
assert_invariant(addr);
new(addr) D(std::forward(args)...);
-
-#if HANDLE_TYPE_SAFETY
- mLock.lock();
- mHandleTypeId[addr] = typeid(D).name();
- mLock.unlock();
-#endif
return addr;
}
@@ -164,19 +136,8 @@ class HandleAllocator {
void deallocate(Handle& handle, D const* p) noexcept {
// allow to destroy the nullptr, similarly to operator delete
if (p) {
-#if HANDLE_TYPE_SAFETY
- mLock.lock();
- auto typeId = mHandleTypeId[p];
- mHandleTypeId.erase(p);
- mLock.unlock();
- if (UTILS_UNLIKELY(typeId != typeid(D).name())) {
- utils::slog.e << "Destroying handle " << handle.getId() << ", type " << typeid(D).name()
- << ", but handle's actual type is " << typeId << utils::io::endl;
- std::terminate();
- }
-#endif
p->~D();
- deallocateHandle(handle.getId());
+ deallocateHandle(handle.getId());
}
}
@@ -204,7 +165,17 @@ class HandleAllocator {
std::is_base_of_v>, Dp>
handle_cast(Handle& handle) noexcept {
assert_invariant(handle);
- void* const p = handleToPointer(handle.getId());
+ auto [p, tag] = handleToPointer(handle.getId());
+
+ if (isPoolHandle(handle.getId())) {
+ // check for use after free
+ uint8_t const age = (tag & HANDLE_AGE_MASK) >> HANDLE_AGE_SHIFT;
+ auto const pNode = static_cast(p);
+ uint8_t const expectedAge = pNode[-1].age;
+ ASSERT_POSTCONDITION(expectedAge == age,
+ "use-after-free of Handle with id=%d", handle.getId());
+ }
+
return static_cast(p);
}
@@ -219,29 +190,57 @@ class HandleAllocator {
private:
- // template
+ template
+ static constexpr size_t getBucketSize() noexcept {
+ if constexpr (sizeof(D) <= P0) { return P0; }
+ if constexpr (sizeof(D) <= P1) { return P1; }
+ static_assert(sizeof(D) <= P2);
+ return P2;
+ }
+
class Allocator {
friend class HandleAllocator;
- utils::PoolAllocator mPool0;
- utils::PoolAllocator mPool1;
- utils::PoolAllocator mPool2;
+ static constexpr size_t MIN_ALIGNMENT = alignof(std::max_align_t);
+ struct Node { uint8_t age; };
+ // Note: using the `extra` parameter of PoolAllocator<>, even with a 1-byte structure,
+ // generally increases all pool allocations by 8-bytes because of alignment restrictions.
+ template
+ using Pool = utils::PoolAllocator;
+ Pool mPool0;
+ Pool mPool1;
+ Pool mPool2;
UTILS_UNUSED_IN_RELEASE const utils::AreaPolicy::HeapArea& mArea;
public:
- static constexpr size_t MIN_ALIGNMENT_SHIFT = 4;
explicit Allocator(const utils::AreaPolicy::HeapArea& area);
+ static constexpr size_t getAlignment() noexcept { return MIN_ALIGNMENT; }
+
// this is in fact always called with a constexpr size argument
- [[nodiscard]] inline void* alloc(size_t size, size_t, size_t extra) noexcept {
+ [[nodiscard]] inline void* alloc(size_t size, size_t, size_t, uint8_t* outAge) noexcept {
void* p = nullptr;
- if (size <= mPool0.getSize()) p = mPool0.alloc(size, 16, extra);
- else if (size <= mPool1.getSize()) p = mPool1.alloc(size, 16, extra);
- else if (size <= mPool2.getSize()) p = mPool2.alloc(size, 16, extra);
+ if (size <= mPool0.getSize()) p = mPool0.alloc(size);
+ else if (size <= mPool1.getSize()) p = mPool1.alloc(size);
+ else if (size <= mPool2.getSize()) p = mPool2.alloc(size);
+ if (UTILS_LIKELY(p)) {
+ Node const* const pNode = static_cast(p);
+ // we are guaranteed to have at least sizeof bytes of extra storage before
+ // the allocation address.
+ *outAge = pNode[-1].age;
+ }
return p;
}
// this is in fact always called with a constexpr size argument
- inline void free(void* p, size_t size) noexcept {
+ inline void free(void* p, size_t size, uint8_t age) noexcept {
assert_invariant(p >= mArea.begin() && (char*)p + size <= (char*)mArea.end());
+
+ // check for double-free
+ Node* const pNode = static_cast(p);
+ uint8_t& expectedAge = pNode[-1].age;
+ ASSERT_POSTCONDITION(expectedAge == age,
+ "double-free of Handle of size %d at %p", size, p);
+ expectedAge = (expectedAge + 1) & 0xF; // fixme
+
if (size <= mPool0.getSize()) { mPool0.free(p); return; }
if (size <= mPool1.getSize()) { mPool1.free(p); return; }
if (size <= mPool2.getSize()) { mPool2.free(p); return; }
@@ -263,24 +262,16 @@ class HandleAllocator {
// allocateHandle()/deallocateHandle() selects the pool to use at compile-time based on the
// allocation size this is always inlined, because all these do is to call
// allocateHandleInPool()/deallocateHandleFromPool() with the right pool size.
- template
+ template
HandleBase::HandleId allocateHandle() noexcept {
- if constexpr (SIZE <= P0) { return allocateHandleInPool(); }
- if constexpr (SIZE <= P1) { return allocateHandleInPool(); }
- static_assert(SIZE <= P2);
- return allocateHandleInPool();
+ constexpr size_t BUCKET_SIZE = getBucketSize();
+ return allocateHandleInPool();
}
- template
+ template
void deallocateHandle(HandleBase::HandleId id) noexcept {
- if constexpr (SIZE <= P0) {
- deallocateHandleFromPool(id);
- } else if constexpr (SIZE <= P1) {
- deallocateHandleFromPool(id);
- } else {
- static_assert(SIZE <= P2);
- deallocateHandleFromPool(id);
- }
+ constexpr size_t BUCKET_SIZE = getBucketSize();
+ deallocateHandleFromPool(id);
}
// allocateHandleInPool()/deallocateHandleFromPool() is NOT inlined, which will cause three
@@ -289,9 +280,11 @@ class HandleAllocator {
template
UTILS_NOINLINE
HandleBase::HandleId allocateHandleInPool() noexcept {
- void* p = mHandleArena.alloc(SIZE);
+ uint8_t age;
+ void* p = mHandleArena.alloc(SIZE, alignof(std::max_align_t), 0, &age);
if (UTILS_LIKELY(p)) {
- return pointerToHandle(p);
+ uint32_t const tag = (uint32_t(age) << HANDLE_AGE_SHIFT) & HANDLE_AGE_MASK;
+ return arenaPointerToHandle(p, tag);
} else {
return allocateHandleSlow(SIZE);
}
@@ -301,42 +294,51 @@ class HandleAllocator {
UTILS_NOINLINE
void deallocateHandleFromPool(HandleBase::HandleId id) noexcept {
if (UTILS_LIKELY(isPoolHandle(id))) {
- void* p = handleToPointer(id);
- mHandleArena.free(p, SIZE);
+ auto [p, tag] = handleToPointer(id);
+ uint8_t const age = (tag & HANDLE_AGE_MASK) >> HANDLE_AGE_SHIFT;
+ mHandleArena.free(p, SIZE, age);
} else {
deallocateHandleSlow(id, SIZE);
}
}
- static constexpr uint32_t HEAP_HANDLE_FLAG = 0x80000000u;
+ // we handle a 4 bits age per address
+ static constexpr uint32_t HANDLE_HEAP_FLAG = 0x80000000u; // pool vs heap handle
+ static constexpr uint32_t HANDLE_AGE_MASK = 0x78000000u; // handle's age
+ static constexpr uint32_t HANDLE_INDEX_MASK = 0x07FFFFFFu; // handle index
+ static constexpr uint32_t HANDLE_TAG_MASK = HANDLE_AGE_MASK;
+ static constexpr uint32_t HANDLE_AGE_SHIFT = 27;
static bool isPoolHandle(HandleBase::HandleId id) noexcept {
- return (id & HEAP_HANDLE_FLAG) == 0u;
+ return (id & HANDLE_HEAP_FLAG) == 0u;
}
HandleBase::HandleId allocateHandleSlow(size_t size) noexcept;
void deallocateHandleSlow(HandleBase::HandleId id, size_t size) noexcept;
// We inline this because it's just 4 instructions in the fast case
- inline void* handleToPointer(HandleBase::HandleId id) const noexcept {
+ inline std::pair handleToPointer(HandleBase::HandleId id) const noexcept {
// note: the null handle will end-up returning nullptr b/c it'll be handled as
// a non-pool handle.
if (UTILS_LIKELY(isPoolHandle(id))) {
char* const base = (char*)mHandleArena.getArea().begin();
- size_t offset = id << Allocator::MIN_ALIGNMENT_SHIFT;
- return static_cast(base + offset);
+ uint32_t const tag = id & HANDLE_TAG_MASK;
+ size_t const offset = (id & HANDLE_INDEX_MASK) * Allocator::getAlignment();
+ return { static_cast(base + offset), tag };
}
- return handleToPointerSlow(id);
+ return { handleToPointerSlow(id), 0 };
}
void* handleToPointerSlow(HandleBase::HandleId id) const noexcept;
// We inline this because it's just 3 instructions
- inline HandleBase::HandleId pointerToHandle(void* p) const noexcept {
+ inline HandleBase::HandleId arenaPointerToHandle(void* p, uint32_t tag) const noexcept {
char* const base = (char*)mHandleArena.getArea().begin();
- size_t offset = (char*)p - base;
- auto id = HandleBase::HandleId(offset >> Allocator::MIN_ALIGNMENT_SHIFT);
- assert_invariant((id & HEAP_HANDLE_FLAG) == 0);
+ size_t const offset = (char*)p - base;
+ assert_invariant((offset % Allocator::getAlignment()) == 0);
+ auto id = HandleBase::HandleId(offset / Allocator::getAlignment());
+ id |= tag & HANDLE_TAG_MASK;
+ assert_invariant((id & HANDLE_HEAP_FLAG) == 0);
return id;
}
@@ -346,9 +348,6 @@ class HandleAllocator {
mutable utils::Mutex mLock;
tsl::robin_map mOverflowMap;
HandleBase::HandleId mId = 0;
-#if HANDLE_TYPE_SAFETY
- mutable std::unordered_map mHandleTypeId;
-#endif
};
} // namespace filament::backend
diff --git a/filament/backend/src/CircularBuffer.cpp b/filament/backend/src/CircularBuffer.cpp
index d9a877d3f59..41dd4173008 100644
--- a/filament/backend/src/CircularBuffer.cpp
+++ b/filament/backend/src/CircularBuffer.cpp
@@ -16,6 +16,14 @@
#include "private/backend/CircularBuffer.h"
+#include
+#include
+#include
+#include
+#include
+#include
+#include
+
#if !defined(WIN32) && !defined(__EMSCRIPTEN__) && !defined(IOS)
# include
# include
@@ -24,23 +32,20 @@
# define HAS_MMAP 0
#endif
+#include
+#include
+#include
#include
-#include
-#include
-#include
-#include
-#include
-
using namespace utils;
namespace filament::backend {
size_t CircularBuffer::sPageSize = arch::getPageSize();
-CircularBuffer::CircularBuffer(size_t size) {
+CircularBuffer::CircularBuffer(size_t size)
+ : mSize(size) {
mData = alloc(size);
- mSize = size;
mTail = mData;
mHead = mData;
}
@@ -85,7 +90,7 @@ void* CircularBuffer::alloc(size_t size) noexcept {
MAP_PRIVATE, fd, (off_t)size);
if (vaddr_guard != MAP_FAILED && (vaddr_guard == (char*)vaddr_shadow + size)) {
// woo-hoo success!
- mUsesAshmem = fd;
+ mAshmemFd = fd;
data = vaddr;
}
}
@@ -93,7 +98,7 @@ void* CircularBuffer::alloc(size_t size) noexcept {
}
}
- if (UTILS_UNLIKELY(mUsesAshmem < 0)) {
+ if (UTILS_UNLIKELY(mAshmemFd < 0)) {
// ashmem failed
if (vaddr_guard != MAP_FAILED) {
munmap(vaddr_guard, size);
@@ -137,9 +142,9 @@ void CircularBuffer::dealloc() noexcept {
if (mData) {
size_t const BLOCK_SIZE = getBlockSize();
munmap(mData, mSize * 2 + BLOCK_SIZE);
- if (mUsesAshmem >= 0) {
- close(mUsesAshmem);
- mUsesAshmem = -1;
+ if (mAshmemFd >= 0) {
+ close(mAshmemFd);
+ mAshmemFd = -1;
}
}
#else
@@ -149,23 +154,37 @@ void CircularBuffer::dealloc() noexcept {
}
-void CircularBuffer::circularize() noexcept {
- if (mUsesAshmem > 0) {
- intptr_t const overflow = intptr_t(mHead) - (intptr_t(mData) + ssize_t(mSize));
- if (overflow >= 0) {
- assert_invariant(size_t(overflow) <= mSize);
- mHead = (void *) (intptr_t(mData) + overflow);
- #ifndef NDEBUG
- memset(mData, 0xA5, size_t(overflow));
- #endif
- }
- } else {
- // Only circularize if mHead if in the second buffer.
- if (intptr_t(mHead) - intptr_t(mData) > ssize_t(mSize)) {
+CircularBuffer::Range CircularBuffer::getBuffer() noexcept {
+ Range const range{ .tail = mTail, .head = mHead };
+
+ char* const pData = static_cast(mData);
+ char const* const pEnd = pData + mSize;
+ char const* const pHead = static_cast(mHead);
+ if (UTILS_UNLIKELY(pHead >= pEnd)) {
+ size_t const overflow = pHead - pEnd;
+ if (UTILS_LIKELY(mAshmemFd > 0)) {
+ assert_invariant(overflow <= mSize);
+ mHead = static_cast(pData + overflow);
+ // Data Tail End Head [virtual]
+ // v v v v
+ // +-------------:----+-----:--------------+
+ // | : | : |
+ // +-----:------------+--------------------+
+ // Head |<------ copy ------>| [physical]
+ } else {
+ // Data Tail End Head
+ // v v v v
+ // +-------------:----+-----:--------------+
+ // | : | : |
+ // +-----|------------+-----|--------------+
+ // |<---------------->|
+ // sliding window
mHead = mData;
}
}
mTail = mHead;
+
+ return range;
}
} // namespace filament::backend
diff --git a/filament/backend/src/CommandBufferQueue.cpp b/filament/backend/src/CommandBufferQueue.cpp
index ccf9d33a0d7..e3e5de045c8 100644
--- a/filament/backend/src/CommandBufferQueue.cpp
+++ b/filament/backend/src/CommandBufferQueue.cpp
@@ -15,14 +15,25 @@
*/
#include "private/backend/CommandBufferQueue.h"
+#include "private/backend/CircularBuffer.h"
+#include "private/backend/CommandStream.h"
+#include
#include
-#include
+#include
+#include
#include
+#include
#include
-#include "private/backend/BackendUtils.h"
-#include "private/backend/CommandStream.h"
+#include
+#include
+#include
+#include
+#include
+
+#include
+#include
using namespace utils;
@@ -65,50 +76,53 @@ void CommandBufferQueue::flush() noexcept {
// always guaranteed to have enough space for the NoopCommand
new(circularBuffer.allocate(sizeof(NoopCommand))) NoopCommand(nullptr);
- // end of this slice
- void* const head = circularBuffer.getHead();
+ const size_t requiredSize = mRequiredSize;
- // beginning of this slice
- void* const tail = circularBuffer.getTail();
+ // get the current buffer
+ auto const [begin, end] = circularBuffer.getBuffer();
- // size of this slice
- uint32_t const used = uint32_t(intptr_t(head) - intptr_t(tail));
+ assert_invariant(circularBuffer.empty());
- circularBuffer.circularize();
+ // size of the current buffer
+ size_t const used = std::distance(
+ static_cast(begin), static_cast(end));
std::unique_lock lock(mLock);
- mCommandBuffersToExecute.push_back({ tail, head });
+ mCommandBuffersToExecute.push_back({ begin, end });
+ mCondition.notify_one();
// circular buffer is too small, we corrupted the stream
ASSERT_POSTCONDITION(used <= mFreeSpace,
"Backend CommandStream overflow. Commands are corrupted and unrecoverable.\n"
"Please increase minCommandBufferSizeMB inside the Config passed to Engine::create.\n"
- "Space used at this time: %u bytes",
- (unsigned)used);
+ "Space used at this time: %u bytes, overflow: %u bytes",
+ (unsigned)used, unsigned(used - mFreeSpace));
// wait until there is enough space in the buffer
mFreeSpace -= used;
- const size_t requiredSize = mRequiredSize;
+ if (UTILS_UNLIKELY(mFreeSpace < requiredSize)) {
+
#ifndef NDEBUG
- size_t totalUsed = circularBuffer.size() - mFreeSpace;
- mHighWatermark = std::max(mHighWatermark, totalUsed);
- if (UTILS_UNLIKELY(totalUsed > requiredSize)) {
- slog.d << "CommandStream used too much space: " << totalUsed
- << ", out of " << requiredSize << " (will block)" << io::endl;
- }
+ size_t const totalUsed = circularBuffer.size() - mFreeSpace;
+ slog.d << "CommandStream used too much space (will block): "
+ << "needed space " << requiredSize << " out of " << mFreeSpace
+ << ", totalUsed=" << totalUsed << ", current=" << used
+ << ", queue size=" << mCommandBuffersToExecute.size() << " buffers"
+ << io::endl;
+
+ mHighWatermark = std::max(mHighWatermark, totalUsed);
#endif
- mCondition.notify_one();
- if (UTILS_LIKELY(mFreeSpace < requiredSize)) {
SYSTRACE_NAME("waiting: CircularBuffer::flush()");
mCondition.wait(lock, [this, requiredSize]() -> bool {
+ // TODO: on macOS, we need to call pumpEvents from time to time
return mFreeSpace >= requiredSize;
});
}
}
-std::vector CommandBufferQueue::waitForCommands() const {
+std::vector CommandBufferQueue::waitForCommands() const {
if (!UTILS_HAS_THREADING) {
return std::move(mCommandBuffersToExecute);
}
@@ -123,7 +137,7 @@ std::vector CommandBufferQueue::waitForCommands() con
return std::move(mCommandBuffersToExecute);
}
-void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Slice const& buffer) {
+void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Range const& buffer) {
std::lock_guard const lock(mLock);
mFreeSpace += uintptr_t(buffer.end) - uintptr_t(buffer.begin);
mCondition.notify_one();
diff --git a/filament/backend/src/HandleAllocator.cpp b/filament/backend/src/HandleAllocator.cpp
index 3257e4e2c94..bf8e779614c 100644
--- a/filament/backend/src/HandleAllocator.cpp
+++ b/filament/backend/src/HandleAllocator.cpp
@@ -16,9 +16,22 @@
#include "private/backend/HandleAllocator.h"
+#include
+
+#include
+#include
#include
+#include
+#include
+#include
+
+#include
+#include
+#include
+#include
#include
+#include
namespace filament::backend {
@@ -28,14 +41,34 @@ template
UTILS_NOINLINE
HandleAllocator::Allocator::Allocator(AreaPolicy::HeapArea const& area)
: mArea(area) {
- // TODO: we probably need a better way to set the size of these pools
- const size_t unit = area.size() / 32;
- const size_t offsetPool1 = unit;
- const size_t offsetPool2 = 16 * unit;
- char* const p = (char*)area.begin();
- mPool0 = PoolAllocator< P0, 16>(p, p + offsetPool1);
- mPool1 = PoolAllocator< P1, 16>(p + offsetPool1, p + offsetPool2);
- mPool2 = PoolAllocator< P2, 16>(p + offsetPool2, area.end());
+
+ // The largest handle this allocator can generate currently depends on the architecture's
+ // min alignment, typically 8 or 16 bytes.
+ // e.g. On Android armv8, the alignment is 16 bytes, so for a 1 MiB heap, the largest handle
+ // index will be 65536. Note that this is not the same as the number of handles (which
+ // will always be less).
+ // Because our maximum representable handle currently is 0x07FFFFFF, the maximum no-nonsensical
+ // heap size is 2 GiB, which amounts to 7.6 millions handles per pool (in the GL case).
+ size_t const maxHeapSize = std::min(area.size(), HANDLE_INDEX_MASK * getAlignment());
+
+ if (UTILS_UNLIKELY(maxHeapSize != area.size())) {
+ slog.w << "HandleAllocator heap size reduced to "
+ << maxHeapSize << " from " << area.size() << io::endl;
+ }
+
+ // make sure we start with a clean arena. This is needed to ensure that all blocks start
+ // with an age of 0.
+ memset(area.data(), 0, maxHeapSize);
+
+ // size the different pools so that they can all contain the same number of handles
+ size_t const count = maxHeapSize / (P0 + P1 + P2);
+ char* const p0 = static_cast(area.begin());
+ char* const p1 = p0 + count * P0;
+ char* const p2 = p1 + count * P1;
+
+ mPool0 = Pool(p0, count * P0);
+ mPool1 = Pool(p1, count * P1);
+ mPool2 = Pool(p2, count * P2);
}
// ------------------------------------------------------------------------------------------------
@@ -73,11 +106,17 @@ template
HandleBase::HandleId HandleAllocator::allocateHandleSlow(size_t size) noexcept {
void* p = ::malloc(size);
std::unique_lock lock(mLock);
- HandleBase::HandleId id = (++mId) | HEAP_HANDLE_FLAG;
+
+ HandleBase::HandleId id = (++mId) | HANDLE_HEAP_FLAG;
+
+ ASSERT_POSTCONDITION(mId < HANDLE_HEAP_FLAG,
+ "No more Handle ids available! This can happen if HandleAllocator arena has been full"
+ " for a while. Please increase FILAMENT_OPENGL_HANDLE_ARENA_SIZE_IN_MB");
+
mOverflowMap.emplace(id, p);
lock.unlock();
- if (UTILS_UNLIKELY(id == (HEAP_HANDLE_FLAG|1u))) { // meaning id was zero
+ if (UTILS_UNLIKELY(id == (HANDLE_HEAP_FLAG | 1u))) { // meaning id was zero
PANIC_LOG("HandleAllocator arena is full, using slower system heap. Please increase "
"the appropriate constant (e.g. FILAMENT_OPENGL_HANDLE_ARENA_SIZE_IN_MB).");
}
@@ -86,7 +125,7 @@ HandleBase::HandleId HandleAllocator::allocateHandleSlow(size_t size
template
void HandleAllocator::deallocateHandleSlow(HandleBase::HandleId id, size_t) noexcept {
- assert_invariant(id & HEAP_HANDLE_FLAG);
+ assert_invariant(id & HANDLE_HEAP_FLAG);
void* p = nullptr;
auto& overflowMap = mOverflowMap;
diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm
index ef5c35e1080..b1e3d7574f7 100644
--- a/filament/backend/src/metal/MetalDriver.mm
+++ b/filament/backend/src/metal/MetalDriver.mm
@@ -43,6 +43,40 @@
namespace backend {
Driver* MetalDriverFactory::create(MetalPlatform* const platform, const Platform::DriverConfig& driverConfig) {
+#if 0
+ // this is useful for development, but too verbose even for debug builds
+ // For reference on a 64-bits machine in Release mode:
+ // MetalTimerQuery : 16 few
+ // HwStream : 24 few
+ // MetalIndexBuffer : 40 moderate
+ // MetalFence : 48 few
+ // MetalBufferObject : 48 many
+ // -- less than or equal 48 bytes
+ // MetalSamplerGroup : 112 few
+ // MetalProgram : 144 moderate
+ // MetalTexture : 152 moderate
+ // MetalVertexBuffer : 152 moderate
+ // -- less than or equal 160 bytes
+ // MetalSwapChain : 184 few
+ // MetalRenderTarget : 272 few
+ // MetalRenderPrimitive : 584 many
+ // -- less than or equal to 592 bytes
+
+ utils::slog.d
+ << "\nMetalSwapChain: " << sizeof(MetalSwapChain)
+ << "\nMetalBufferObject: " << sizeof(MetalBufferObject)
+ << "\nMetalVertexBuffer: " << sizeof(MetalVertexBuffer)
+ << "\nMetalIndexBuffer: " << sizeof(MetalIndexBuffer)
+ << "\nMetalSamplerGroup: " << sizeof(MetalSamplerGroup)
+ << "\nMetalRenderPrimitive: " << sizeof(MetalRenderPrimitive)
+ << "\nMetalTexture: " << sizeof(MetalTexture)
+ << "\nMetalTimerQuery: " << sizeof(MetalTimerQuery)
+ << "\nHwStream: " << sizeof(HwStream)
+ << "\nMetalRenderTarget: " << sizeof(MetalRenderTarget)
+ << "\nMetalFence: " << sizeof(MetalFence)
+ << "\nMetalProgram: " << sizeof(MetalProgram)
+ << utils::io::endl;
+#endif
return MetalDriver::create(platform, driverConfig);
}
diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp
index 1d3e06282c4..047f28383db 100644
--- a/filament/backend/src/opengl/OpenGLDriver.cpp
+++ b/filament/backend/src/opengl/OpenGLDriver.cpp
@@ -90,24 +90,24 @@ Driver* OpenGLDriver::create(OpenGLPlatform* const platform,
#if 0
// this is useful for development, but too verbose even for debug builds
// For reference on a 64-bits machine in Release mode:
- // GLFence : 8 few
// GLIndexBuffer : 8 moderate
- // GLSamplerGroup : 8 few
+ // GLSamplerGroup : 16 few
+ // GLSwapChain : 16 few
+ // GLTimerQuery : 16 few
// -- less than or equal 16 bytes
- // GLBufferObject : 24 many
- // GLSync : 24 few
- // GLTimerQuery : 32 few
- // OpenGLProgram : 32 moderate
- // GLRenderPrimitive : 48 many
+ // GLFence : 24 few
+ // GLBufferObject : 32 many
+ // GLRenderPrimitive : 40 many
+ // OpenGLProgram : 56 moderate
+ // GLTexture : 64 moderate
// -- less than or equal 64 bytes
- // GLTexture : 72 moderate
+ // GLStream : 104 few
// GLRenderTarget : 112 few
- // GLStream : 184 few
// GLVertexBuffer : 200 moderate
// -- less than or equal to 208 bytes
slog.d
- << "HwFence: " << sizeof(HwFence)
+ << "\nGLSwapChain: " << sizeof(GLSwapChain)
<< "\nGLBufferObject: " << sizeof(GLBufferObject)
<< "\nGLVertexBuffer: " << sizeof(GLVertexBuffer)
<< "\nGLIndexBuffer: " << sizeof(GLIndexBuffer)
@@ -117,7 +117,7 @@ Driver* OpenGLDriver::create(OpenGLPlatform* const platform,
<< "\nGLTimerQuery: " << sizeof(GLTimerQuery)
<< "\nGLStream: " << sizeof(GLStream)
<< "\nGLRenderTarget: " << sizeof(GLRenderTarget)
- << "\nGLSync: " << sizeof(GLSync)
+ << "\nGLFence: " << sizeof(GLFence)
<< "\nOpenGLProgram: " << sizeof(OpenGLProgram)
<< io::endl;
#endif
diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp
index 9680320f929..c700ce9c6a3 100644
--- a/filament/backend/src/vulkan/VulkanDriver.cpp
+++ b/filament/backend/src/vulkan/VulkanDriver.cpp
@@ -213,6 +213,41 @@ VulkanDriver::~VulkanDriver() noexcept = default;
UTILS_NOINLINE
Driver* VulkanDriver::create(VulkanPlatform* platform, VulkanContext const& context,
Platform::DriverConfig const& driverConfig) noexcept {
+#if 0
+ // this is useful for development, but too verbose even for debug builds
+ // For reference on a 64-bits machine in Release mode:
+ // VulkanSamplerGroup : 24 few
+ // HwStream : 24 few
+ // VulkanFence : 40 few
+ // VulkanProgram : 40 moderate
+ // VulkanIndexBuffer : 72 moderate
+ // VulkanBufferObject : 72 many
+ // -- less than or equal 80 bytes
+ // VulkanRenderPrimitive : 104 many
+ // VulkanSwapChain : 112 few
+ // VulkanTimerQuery : 168 few
+ // -- less than or equal 176 bytes
+ // VulkanTexture : 232 moderate
+ // VulkanVertexBuffer : 312 moderate
+ // VulkanRenderTarget : 320 few
+ // -- less than or equal to 320 bytes
+
+ utils::slog.d
+ << "\nVulkanSwapChain: " << sizeof(VulkanSwapChain)
+ << "\nVulkanBufferObject: " << sizeof(VulkanBufferObject)
+ << "\nVulkanVertexBuffer: " << sizeof(VulkanVertexBuffer)
+ << "\nVulkanIndexBuffer: " << sizeof(VulkanIndexBuffer)
+ << "\nVulkanSamplerGroup: " << sizeof(VulkanSamplerGroup)
+ << "\nVulkanRenderPrimitive: " << sizeof(VulkanRenderPrimitive)
+ << "\nVulkanTexture: " << sizeof(VulkanTexture)
+ << "\nVulkanTimerQuery: " << sizeof(VulkanTimerQuery)
+ << "\nHwStream: " << sizeof(HwStream)
+ << "\nVulkanRenderTarget: " << sizeof(VulkanRenderTarget)
+ << "\nVulkanFence: " << sizeof(VulkanFence)
+ << "\nVulkanProgram: " << sizeof(VulkanProgram)
+ << utils::io::endl;
+#endif
+
assert_invariant(platform);
size_t defaultSize = FVK_HANDLE_ARENA_SIZE_IN_MB * 1024U * 1024U;
Platform::DriverConfig validConfig {driverConfig};
@@ -1641,26 +1676,26 @@ void VulkanDriver::draw(PipelineState pipelineState, Handle r
// Update the VK raster state.
const VulkanRenderTarget* rt = mCurrentRenderPass.renderTarget;
- auto vkraster = mPipelineCache.getCurrentRasterState();
- vkraster.cullMode = getCullMode(rasterState.culling);
- vkraster.frontFace = getFrontFace(rasterState.inverseFrontFaces);
- vkraster.depthBiasEnable = (depthOffset.constant || depthOffset.slope) ? true : false;
- vkraster.depthBiasConstantFactor = depthOffset.constant;
- vkraster.depthBiasSlopeFactor = depthOffset.slope;
- vkraster.blendEnable = rasterState.hasBlending();
- vkraster.srcColorBlendFactor = getBlendFactor(rasterState.blendFunctionSrcRGB);
- vkraster.dstColorBlendFactor = getBlendFactor(rasterState.blendFunctionDstRGB);
- vkraster.colorBlendOp = rasterState.blendEquationRGB;
- vkraster.srcAlphaBlendFactor = getBlendFactor(rasterState.blendFunctionSrcAlpha);
- vkraster.dstAlphaBlendFactor = getBlendFactor(rasterState.blendFunctionDstAlpha);
- vkraster.alphaBlendOp = rasterState.blendEquationAlpha;
- vkraster.colorWriteMask = (VkColorComponentFlags) (rasterState.colorWrite ? 0xf : 0x0);
- vkraster.depthWriteEnable = rasterState.depthWrite;
- vkraster.depthCompareOp = rasterState.depthFunc;
- vkraster.rasterizationSamples = rt->getSamples();
- vkraster.alphaToCoverageEnable = rasterState.alphaToCoverage;
- vkraster.colorTargetCount = rt->getColorTargetCount(mCurrentRenderPass);
- mPipelineCache.setCurrentRasterState(vkraster);
+ VulkanPipelineCache::RasterState const vulkanRasterState{
+ .cullMode = getCullMode(rasterState.culling),
+ .frontFace = getFrontFace(rasterState.inverseFrontFaces),
+ .depthBiasEnable = (depthOffset.constant || depthOffset.slope) ? true : false,
+ .blendEnable = rasterState.hasBlending(),
+ .depthWriteEnable = rasterState.depthWrite,
+ .alphaToCoverageEnable = rasterState.alphaToCoverage,
+ .srcColorBlendFactor = getBlendFactor(rasterState.blendFunctionSrcRGB),
+ .dstColorBlendFactor = getBlendFactor(rasterState.blendFunctionDstRGB),
+ .srcAlphaBlendFactor = getBlendFactor(rasterState.blendFunctionSrcAlpha),
+ .dstAlphaBlendFactor = getBlendFactor(rasterState.blendFunctionDstAlpha),
+ .colorWriteMask = (VkColorComponentFlags) (rasterState.colorWrite ? 0xf : 0x0),
+ .rasterizationSamples = rt->getSamples(),
+ .colorTargetCount = rt->getColorTargetCount(mCurrentRenderPass),
+ .colorBlendOp = rasterState.blendEquationRGB,
+ .alphaBlendOp = rasterState.blendEquationAlpha,
+ .depthCompareOp = rasterState.depthFunc,
+ .depthBiasConstantFactor = depthOffset.constant,
+ .depthBiasSlopeFactor = depthOffset.slope
+ };
// Declare fixed-size arrays that get passed to the pipeCache and to vkCmdBindVertexBuffers.
uint32_t const bufferCount = prim.vertexBuffer->attributes.size();
@@ -1671,7 +1706,7 @@ void VulkanDriver::draw(PipelineState pipelineState, Handle r
// Push state changes to the VulkanPipelineCache instance. This is fast and does not make VK calls.
mPipelineCache.bindProgram(program);
- mPipelineCache.bindRasterState(mPipelineCache.getCurrentRasterState());
+ mPipelineCache.bindRasterState(vulkanRasterState);
mPipelineCache.bindPrimitiveTopology(prim.primitiveTopology);
mPipelineCache.bindVertexArray(attribDesc, bufferDesc, bufferCount);
diff --git a/filament/backend/src/vulkan/VulkanPipelineCache.cpp b/filament/backend/src/vulkan/VulkanPipelineCache.cpp
index 889888cd083..2d976f66ff2 100644
--- a/filament/backend/src/vulkan/VulkanPipelineCache.cpp
+++ b/filament/backend/src/vulkan/VulkanPipelineCache.cpp
@@ -34,8 +34,6 @@ using namespace bluevk;
namespace filament::backend {
-static VulkanPipelineCache::RasterState createDefaultRasterState();
-
static VkShaderStageFlags getShaderStageFlags(VulkanPipelineCache::UsageFlags key, uint16_t binding) {
// NOTE: if you modify this function, you also need to modify getUsageFlags.
assert_invariant(binding < MAX_SAMPLER_COUNT);
@@ -73,8 +71,7 @@ VulkanPipelineCache::UsageFlags VulkanPipelineCache::disableUsageFlags(uint16_t
}
VulkanPipelineCache::VulkanPipelineCache(VulkanResourceAllocator* allocator)
- : mCurrentRasterState(createDefaultRasterState()),
- mResourceAllocator(allocator),
+ : mResourceAllocator(allocator),
mPipelineBoundResources(allocator) {
mDummyBufferWriteInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
mDummyBufferWriteInfo.pNext = nullptr;
@@ -569,7 +566,7 @@ void VulkanPipelineCache::bindProgram(VulkanProgram* program) noexcept {
}
void VulkanPipelineCache::bindRasterState(const RasterState& rasterState) noexcept {
- mPipelineRequirements.rasterState = mCurrentRasterState = rasterState;
+ mPipelineRequirements.rasterState = rasterState;
}
void VulkanPipelineCache::bindRenderPass(VkRenderPass renderPass, int subpassIndex) noexcept {
@@ -917,23 +914,6 @@ bool VulkanPipelineCache::DescEqual::operator()(const DescriptorKey& k1,
return true;
}
-static VulkanPipelineCache::RasterState createDefaultRasterState() {
- return VulkanPipelineCache::RasterState {
- .cullMode = VK_CULL_MODE_NONE,
- .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
- .depthBiasEnable = VK_FALSE,
- .blendEnable = VK_FALSE,
- .depthWriteEnable = VK_TRUE,
- .alphaToCoverageEnable = true,
- .colorWriteMask = 0xf,
- .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
- .colorTargetCount = 1,
- .depthCompareOp = SamplerCompareFunc::LE,
- .depthBiasConstantFactor = 0.0f,
- .depthBiasSlopeFactor = 0.0f,
- };
-}
-
} // namespace filament::backend
#pragma clang diagnostic pop
diff --git a/filament/backend/src/vulkan/VulkanPipelineCache.h b/filament/backend/src/vulkan/VulkanPipelineCache.h
index 018fd00efec..a28327c5d9e 100644
--- a/filament/backend/src/vulkan/VulkanPipelineCache.h
+++ b/filament/backend/src/vulkan/VulkanPipelineCache.h
@@ -199,15 +199,6 @@ class VulkanPipelineCache : public CommandBufferObserver {
mPipelineBoundResources.acquire(resource);
}
- inline RasterState getCurrentRasterState() const noexcept {
- return mCurrentRasterState;
- }
-
- // We need to update this outside of bindRasterState due to VulkanDriver::draw.
- inline void setCurrentRasterState(RasterState const& rasterState) noexcept {
- mCurrentRasterState = rasterState;
- }
-
private:
// PIPELINE LAYOUT CACHE KEY
// -------------------------
@@ -413,7 +404,6 @@ class VulkanPipelineCache : public CommandBufferObserver {
VmaAllocator mAllocator = VK_NULL_HANDLE;
// Current requirements for the pipeline layout, pipeline, and descriptor sets.
- RasterState mCurrentRasterState;
PipelineKey mPipelineRequirements = {};
DescriptorKey mDescriptorRequirements = {};
diff --git a/filament/backend/src/vulkan/VulkanResources.h b/filament/backend/src/vulkan/VulkanResources.h
index 77b6498b860..9421e11a14d 100644
--- a/filament/backend/src/vulkan/VulkanResources.h
+++ b/filament/backend/src/vulkan/VulkanResources.h
@@ -63,7 +63,8 @@ struct VulkanResourceBase {
explicit VulkanResourceBase(VulkanResourceType type)
: mRefCount(IS_HEAP_ALLOC_TYPE(type) ? 1 : 0),
mType(type),
- mHandleId(0) {}
+ mHandleId(0) {
+ }
private:
inline VulkanResourceType getType() {
@@ -82,6 +83,7 @@ struct VulkanResourceBase {
if (IS_HEAP_ALLOC_TYPE(mType)) {
return;
}
+ assert_invariant(mRefCount < ((1<<24) - 1));
++mRefCount;
}
@@ -89,6 +91,7 @@ struct VulkanResourceBase {
if (IS_HEAP_ALLOC_TYPE(mType)) {
return;
}
+ assert_invariant(mRefCount > 0);
--mRefCount;
}
@@ -96,8 +99,8 @@ struct VulkanResourceBase {
return mRefCount;
}
- size_t mRefCount = 0;
- VulkanResourceType mType = VulkanResourceType::BUFFER_OBJECT;
+ uint32_t mRefCount : 24; // 16M is enough for the refcount
+ VulkanResourceType mType : 8;
HandleBase::HandleId mHandleId;
friend struct VulkanThreadSafeResource;
diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h
index 904cbda4a3f..2f8c6d4af74 100644
--- a/filament/include/filament/Engine.h
+++ b/filament/include/filament/Engine.h
@@ -178,6 +178,7 @@ class UTILS_PUBLIC Engine {
using Backend = backend::Backend;
using DriverConfig = backend::Platform::DriverConfig;
using FeatureLevel = backend::FeatureLevel;
+ using StereoscopicType = backend::StereoscopicType;
/**
* Config is used to define the memory footprint used by the engine, such as the
@@ -297,6 +298,25 @@ class UTILS_PUBLIC Engine {
*/
size_t textureUseAfterFreePoolSize = 0;
+ /**
+ * Set to `true` to forcibly disable parallel shader compilation in the backend.
+ * Currently only honored by the GL backend.
+ */
+ bool disableParallelShaderCompile = false;
+
+ /*
+ * The type of technique for stereoscopic rendering.
+ *
+ * This setting determines the algorithm used when stereoscopic rendering is enabled. This
+ * decision applies to the entire Engine for the lifetime of the Engine. E.g., multiple
+ * Views created from the Engine must use the same stereoscopic type.
+ *
+ * Each view can enable stereoscopic rendering via the StereoscopicOptions::enable flag.
+ *
+ * @see View::setStereoscopicOptions
+ */
+ StereoscopicType stereoscopicType = StereoscopicType::INSTANCED;
+
/*
* The number of eyes to render when stereoscopic rendering is enabled. Supported values are
* between 1 and Engine::getMaxStereoscopicEyes() (inclusive).
diff --git a/filament/include/filament/View.h b/filament/include/filament/View.h
index e4ba827aad2..3cdd527fac7 100644
--- a/filament/include/filament/View.h
+++ b/filament/include/filament/View.h
@@ -719,7 +719,7 @@ class UTILS_PUBLIC View : public FilamentAPI {
void setDebugCamera(Camera* UTILS_NULLABLE camera) noexcept;
//! debugging: returns a Camera from the point of view of *the* dominant directional light used for shadowing.
- Camera const* UTILS_NULLABLE getDirectionalLightCamera() const noexcept;
+ Camera const* UTILS_NULLABLE getDirectionalShadowCamera() const noexcept;
/** Result of a picking query */
diff --git a/filament/src/Allocators.h b/filament/src/Allocators.h
index eb354b8d329..84962e30c0e 100644
--- a/filament/src/Allocators.h
+++ b/filament/src/Allocators.h
@@ -54,7 +54,7 @@ using LinearAllocatorArena = utils::Arena<
#endif
-using ArenaScope = utils::ArenaScope;
+using RootArenaScope = utils::ArenaScope;
} // namespace filament
diff --git a/filament/src/Froxelizer.cpp b/filament/src/Froxelizer.cpp
index c469932c251..47bd0d343dd 100644
--- a/filament/src/Froxelizer.cpp
+++ b/filament/src/Froxelizer.cpp
@@ -168,7 +168,8 @@ void Froxelizer::setProjection(const mat4f& projection,
}
bool Froxelizer::prepare(
- FEngine::DriverApi& driverApi, ArenaScope& arena, filament::Viewport const& viewport,
+ FEngine::DriverApi& driverApi, RootArenaScope& rootArenaScope,
+ filament::Viewport const& viewport,
const mat4f& projection, float projectionNear, float projectionFar) noexcept {
setViewport(viewport);
setProjection(projection, projectionNear, projectionFar);
@@ -199,12 +200,12 @@ bool Froxelizer::prepare(
// light records per froxel (~256 KiB)
mLightRecords = {
- arena.allocate(getFroxelBufferEntryCount(), CACHELINE_SIZE),
+ rootArenaScope.allocate(getFroxelBufferEntryCount(), CACHELINE_SIZE),
getFroxelBufferEntryCount() };
// froxel thread data (~256 KiB)
mFroxelShardedData = {
- arena.allocate(GROUP_COUNT, CACHELINE_SIZE),
+ rootArenaScope.allocate(GROUP_COUNT, CACHELINE_SIZE),
uint32_t(GROUP_COUNT)
};
diff --git a/filament/src/Froxelizer.h b/filament/src/Froxelizer.h
index 27885e24bc7..27ba3c57641 100644
--- a/filament/src/Froxelizer.h
+++ b/filament/src/Froxelizer.h
@@ -110,7 +110,7 @@ class Froxelizer {
*
* return true if updateUniforms() needs to be called
*/
- bool prepare(backend::DriverApi& driverApi, ArenaScope& arena, Viewport const& viewport,
+ bool prepare(backend::DriverApi& driverApi, RootArenaScope& rootArenaScope, Viewport const& viewport,
const math::mat4f& projection, float projectionNear, float projectionFar) noexcept;
Froxel getFroxelAt(size_t x, size_t y, size_t z) const noexcept;
diff --git a/filament/src/PostProcessManager.cpp b/filament/src/PostProcessManager.cpp
index 78814f74852..f186ee9cb6d 100644
--- a/filament/src/PostProcessManager.cpp
+++ b/filament/src/PostProcessManager.cpp
@@ -414,7 +414,7 @@ void PostProcessManager::commitAndRender(FrameGraphResources::RenderPassInfo con
// ------------------------------------------------------------------------------------------------
PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph& fg,
- RenderPass const& pass, uint8_t structureRenderFlags,
+ RenderPassBuilder const& passBuilder, uint8_t structureRenderFlags,
uint32_t width, uint32_t height,
StructurePassConfig const& config) noexcept {
@@ -466,17 +466,19 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph
.clearFlags = TargetBufferFlags::COLOR0 | TargetBufferFlags::DEPTH
});
},
- [=, renderPass = pass](FrameGraphResources const& resources,
+ [=, passBuilder = passBuilder](FrameGraphResources const& resources,
auto const&, DriverApi&) mutable {
Variant structureVariant(Variant::DEPTH_VARIANT);
structureVariant.setPicking(config.picking);
auto out = resources.getRenderPassInfo();
- renderPass.setRenderFlags(structureRenderFlags);
- renderPass.setVariant(structureVariant);
- renderPass.appendCommands(mEngine, RenderPass::CommandTypeFlags::SSAO);
- renderPass.sortCommands(mEngine);
- renderPass.execute(mEngine, resources.getPassName(), out.target, out.params);
+
+ passBuilder.renderFlags(structureRenderFlags);
+ passBuilder.variant(structureVariant);
+ passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SSAO);
+
+ RenderPass const pass{ passBuilder.build(mEngine) };
+ RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params);
});
auto depth = structurePass->depth;
@@ -523,7 +525,7 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph
// ------------------------------------------------------------------------------------------------
FrameGraphId PostProcessManager::ssr(FrameGraph& fg,
- RenderPass const& pass,
+ RenderPassBuilder const& passBuilder,
FrameHistory const& frameHistory,
CameraInfo const& cameraInfo,
PerViewUniforms& uniforms,
@@ -586,7 +588,7 @@ FrameGraphId PostProcessManager::ssr(FrameGraph& fg,
},
[this, projection = cameraInfo.projection,
userViewMatrix = cameraInfo.getUserViewMatrix(), uvFromClipMatrix, historyProjection,
- options, &uniforms, renderPass = pass]
+ options, &uniforms, passBuilder = passBuilder]
(FrameGraphResources const& resources, auto const& data, DriverApi& driver) mutable {
// set structure sampler
uniforms.prepareStructure(data.structure ?
@@ -607,17 +609,17 @@ FrameGraphId PostProcessManager::ssr(FrameGraph& fg,
auto out = resources.getRenderPassInfo();
// Remove the HAS_SHADOWING RenderFlags, since it's irrelevant when rendering reflections
- RenderPass::RenderFlags flags = renderPass.getRenderFlags();
- flags &= ~RenderPass::HAS_SHADOWING;
- renderPass.setRenderFlags(flags);
+ passBuilder.renderFlags(~RenderPass::HAS_SHADOWING, 0);
// use our special SSR variant, it can only be applied to object that have
// the SCREEN_SPACE ReflectionMode.
- renderPass.setVariant(Variant{Variant::SPECIAL_SSR});
+ passBuilder.variant(Variant{ Variant::SPECIAL_SSR });
+
// generate all our drawing commands, except blended objects.
- renderPass.appendCommands(mEngine, RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS);
- renderPass.sortCommands(mEngine);
- renderPass.execute(mEngine, resources.getPassName(), out.target, out.params);
+ passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS);
+
+ RenderPass const pass{ passBuilder.build(mEngine) };
+ RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params);
});
return ssrPass->reflections;
diff --git a/filament/src/PostProcessManager.h b/filament/src/PostProcessManager.h
index 081e795f061..12b211dc238 100644
--- a/filament/src/PostProcessManager.h
+++ b/filament/src/PostProcessManager.h
@@ -50,6 +50,7 @@ class FMaterialInstance;
class FrameGraph;
class PerViewUniforms;
class RenderPass;
+class RenderPassBuilder;
struct CameraInfo;
class PostProcessManager {
@@ -99,12 +100,12 @@ class PostProcessManager {
FrameGraphId picking;
};
StructurePassOutput structure(FrameGraph& fg,
- RenderPass const& pass, uint8_t structureRenderFlags,
+ RenderPassBuilder const& passBuilder, uint8_t structureRenderFlags,
uint32_t width, uint32_t height, StructurePassConfig const& config) noexcept;
// reflections pass
FrameGraphId ssr(FrameGraph& fg,
- RenderPass const& pass,
+ RenderPassBuilder const& passBuilder,
FrameHistory const& frameHistory,
CameraInfo const& cameraInfo,
PerViewUniforms& uniforms,
diff --git a/filament/src/RenderPass.cpp b/filament/src/RenderPass.cpp
index 2932fcf481b..d5063043f5a 100644
--- a/filament/src/RenderPass.cpp
+++ b/filament/src/RenderPass.cpp
@@ -19,17 +19,43 @@
#include "RenderPrimitive.h"
#include "ShadowMap.h"
+#include "details/Camera.h"
#include "details/Material.h"
#include "details/MaterialInstance.h"
#include "details/View.h"
+#include "components/RenderableManager.h"
+
+#include
#include
+#include
+
+#include
+
+#include
+#include
+#include
+#include
+
+#include "private/backend/CircularBuffer.h"
+#include
+#include
#include
+#include
+#include
#include
+#include
+#include
+#include
+#include
#include
+#include
+#include
+#include
+
using namespace utils;
using namespace filament::math;
@@ -37,64 +63,112 @@ namespace filament {
using namespace backend;
-RenderPass::RenderPass(FEngine& engine,
- RenderPass::Arena& arena) noexcept
- : mCommandArena(arena),
- mCustomCommands(engine.getPerRenderPassAllocator()) {
+RenderPassBuilder& RenderPassBuilder::customCommand(
+ FEngine& engine,
+ uint8_t channel,
+ RenderPass::Pass pass,
+ RenderPass::CustomCommand custom,
+ uint32_t order,
+ RenderPass::Executor::CustomCommandFn const& command) {
+ if (!mCustomCommands.has_value()) {
+ // construct the vector the first time
+ mCustomCommands.emplace(engine.getPerRenderPassArena());
+ }
+ mCustomCommands->emplace_back(channel, pass, custom, order, command);
+ return *this;
}
-RenderPass::RenderPass(RenderPass const& rhs) = default;
+RenderPass RenderPassBuilder::build(FEngine& engine) {
+ ASSERT_POSTCONDITION(mRenderableSoa, "RenderPassBuilder::geometry() hasn't been called");
+ assert_invariant(mScissorViewport.width <= std::numeric_limits::max());
+ assert_invariant(mScissorViewport.height <= std::numeric_limits::max());
+ return RenderPass{ engine, *this };
+}
-// this destructor is actually heavy because it inlines ~vector<>
-RenderPass::~RenderPass() noexcept = default;
+// ------------------------------------------------------------------------------------------------
+
+RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept
+ : mRenderableSoa(*builder.mRenderableSoa),
+ mVisibleRenderables(builder.mVisibleRenderables),
+ mUboHandle(builder.mUboHandle),
+ mCameraPosition(builder.mCameraPosition),
+ mCameraForwardVector(builder.mCameraForwardVector),
+ mFlags(builder.mFlags),
+ mVariant(builder.mVariant),
+ mVisibilityMask(builder.mVisibilityMask),
+ mScissorViewport(builder.mScissorViewport),
+ mCustomCommands(engine.getPerRenderPassArena()) {
+
+ // compute the number of commands we need
+ updateSummedPrimitiveCounts(
+ const_cast(mRenderableSoa), mVisibleRenderables);
+
+ uint32_t commandCount =
+ FScene::getPrimitiveCount(mRenderableSoa, mVisibleRenderables.last);
+ const bool colorPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::COLOR);
+ const bool depthPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::DEPTH);
+ commandCount *= uint32_t(colorPass * 2 + depthPass);
+ commandCount += 1; // for the sentinel
+
+ uint32_t const customCommandCount =
+ builder.mCustomCommands.has_value() ? builder.mCustomCommands->size() : 0;
-RenderPass::Command* RenderPass::append(size_t count) noexcept {
- // this is like an "in-place" realloc(). Works only with LinearAllocator.
- Command* const curr = mCommandArena.alloc(count);
+ Command* const curr = builder.mArena.alloc(commandCount + customCommandCount);
assert_invariant(curr);
- assert_invariant(mCommandBegin == nullptr || curr == mCommandEnd);
- if (mCommandBegin == nullptr) {
- mCommandBegin = mCommandEnd = curr;
+
+ if (UTILS_UNLIKELY(builder.mArena.getAllocator().isHeapAllocation(curr))) {
+ static bool sLogOnce = true;
+ if (UTILS_UNLIKELY(sLogOnce)) {
+ sLogOnce = false;
+ PANIC_LOG("RenderPass arena is full, using slower system heap. Please increase "
+ "the appropriate constant (e.g. FILAMENT_PER_RENDER_PASS_ARENA_SIZE_IN_MB).");
+ }
}
- mCommandEnd += count;
- return curr;
-}
-void RenderPass::resize(size_t count) noexcept {
- if (mCommandBegin) {
- mCommandEnd = mCommandBegin + count;
- mCommandArena.rewind(mCommandEnd);
+ mCommandBegin = curr;
+ mCommandEnd = curr + commandCount + customCommandCount;
+
+ appendCommands(engine, { curr, commandCount }, builder.mCommandTypeFlags);
+
+ if (builder.mCustomCommands.has_value()) {
+ Command* p = curr + commandCount;
+ for (auto [channel, passId, command, order, fn]: builder.mCustomCommands.value()) {
+ appendCustomCommand(p++, channel, passId, command, order, fn);
+ }
}
-}
-void RenderPass::setGeometry(FScene::RenderableSoa const& soa, Range vr,
- backend::Handle uboHandle) noexcept {
- mRenderableSoa = &soa;
- mVisibleRenderables = vr;
- mUboHandle = uboHandle;
-}
+ // sort commands once we're done adding commands
+ sortCommands(builder.mArena);
-void RenderPass::setCamera(const CameraInfo& camera) noexcept {
- mCameraPosition = camera.getPosition();
- mCameraForwardVector = camera.getForwardVector();
+ if (engine.isAutomaticInstancingEnabled()) {
+ instanceify(engine, builder.mArena);
+ }
}
-void RenderPass::setScissorViewport(backend::Viewport viewport) noexcept {
- assert_invariant(viewport.width <= std::numeric_limits::max());
- assert_invariant(viewport.height <= std::numeric_limits::max());
- mScissorViewport = viewport;
+// this destructor is actually heavy because it inlines ~vector<>
+RenderPass::~RenderPass() noexcept = default;
+
+void RenderPass::resize(Arena& arena, size_t count) noexcept {
+ if (mCommandBegin) {
+ mCommandEnd = mCommandBegin + count;
+ arena.rewind(mCommandEnd);
+ }
}
-void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandTypeFlags) noexcept {
+void RenderPass::appendCommands(FEngine& engine,
+ Slice commands, CommandTypeFlags const commandTypeFlags) noexcept {
SYSTRACE_CALL();
SYSTRACE_CONTEXT();
- assert_invariant(mRenderableSoa);
-
utils::Range const vr = mVisibleRenderables;
// trace the number of visible renderables
SYSTRACE_VALUE32("visibleRenderables", vr.size());
if (UTILS_UNLIKELY(vr.empty())) {
+ // no renderables, we still need the sentinel and the command buffer size should be
+ // exactly 1.
+ assert_invariant(commands.size() == 1);
+ Command* curr = commands.data();
+ curr->key = uint64_t(Pass::SENTINEL);
return;
}
@@ -104,17 +178,10 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT
const FScene::VisibleMaskType visibilityMask = mVisibilityMask;
// up-to-date summed primitive counts needed for generateCommands()
- FScene::RenderableSoa const& soa = *mRenderableSoa;
- updateSummedPrimitiveCounts(const_cast(soa), vr);
+ FScene::RenderableSoa const& soa = mRenderableSoa;
- // compute how much maximum storage we need for this pass
- uint32_t commandCount = FScene::getPrimitiveCount(soa, vr.last);
- // double the color pass for transparent objects that need to render twice
- const bool colorPass = bool(commandTypeFlags & CommandTypeFlags::COLOR);
- const bool depthPass = bool(commandTypeFlags & CommandTypeFlags::DEPTH);
- commandCount *= uint32_t(colorPass * 2 + depthPass);
- commandCount += 1; // for the sentinel
- Command* const curr = append(commandCount);
+ Command* curr = commands.data();
+ size_t const commandCount = commands.size();
auto stereoscopicEyeCount =
renderFlags & IS_STEREOSCOPIC ? engine.getConfig().stereoscopicEyeCount : 1;
@@ -152,7 +219,8 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT
}
}
-void RenderPass::appendCustomCommand(uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
+void RenderPass::appendCustomCommand(Command* commands,
+ uint8_t channel, Pass pass, CustomCommand custom, uint32_t order,
Executor::CustomCommandFn command) {
assert_invariant((uint64_t(order) << CUSTOM_ORDER_SHIFT) <= CUSTOM_ORDER_MASK);
@@ -168,11 +236,10 @@ void RenderPass::appendCustomCommand(uint8_t channel, Pass pass, CustomCommand c
cmd |= uint64_t(order) << CUSTOM_ORDER_SHIFT;
cmd |= uint64_t(index);
- Command* const curr = append(1);
- curr->key = cmd;
+ commands->key = cmd;
}
-void RenderPass::sortCommands(FEngine& engine) noexcept {
+void RenderPass::sortCommands(Arena& arena) noexcept {
SYSTRACE_NAME("sort and trim commands");
std::sort(mCommandBegin, mCommandEnd);
@@ -183,30 +250,20 @@ void RenderPass::sortCommands(FEngine& engine) noexcept {
return c.key != uint64_t(Pass::SENTINEL);
});
- resize(uint32_t(last - mCommandBegin));
-
- if (engine.isAutomaticInstancingEnabled()) {
- instanceify(engine);
- }
+ resize(arena, uint32_t(last - mCommandBegin));
}
-void RenderPass::execute(FEngine& engine, const char* name,
+void RenderPass::execute(RenderPass const& pass,
+ FEngine& engine, const char* name,
backend::Handle renderTarget,
- backend::RenderPassParams params) const noexcept {
-
+ backend::RenderPassParams params) noexcept {
DriverApi& driver = engine.getDriverApi();
-
- // this is a good time to flush the CommandStream, because we're about to potentially
- // output a lot of commands. This guarantees here that we have at least
- // FILAMENT_MIN_COMMAND_BUFFERS_SIZE_IN_MB bytes (1MiB by default).
- engine.flush();
-
driver.beginRenderPass(renderTarget, params);
- getExecutor().execute(engine, name);
+ pass.getExecutor().execute(engine, name);
driver.endRenderPass();
}
-void RenderPass::instanceify(FEngine& engine) noexcept {
+void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept {
SYSTRACE_NAME("instanceify");
// instanceify works by scanning the **sorted** command stream, looking for repeat draw
@@ -262,7 +319,8 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
// buffer large enough for all instances data
stagingBufferSize = sizeof(PerRenderableData) * (last - curr);
stagingBuffer = (PerRenderableData*)::malloc(stagingBufferSize);
- uboData = mRenderableSoa->data();
+ uboData = mRenderableSoa.data();
+ assert_invariant(uboData);
}
// copy the ubo data to a staging buffer
@@ -315,7 +373,7 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
return command.key == uint64_t(Pass::SENTINEL);
});
- resize(uint32_t(lastCommand - mCommandBegin));
+ resize(arena, uint32_t(lastCommand - mCommandBegin));
}
assert_invariant(stagingBuffer == nullptr);
@@ -323,7 +381,7 @@ void RenderPass::instanceify(FEngine& engine) noexcept {
/* static */
-UTILS_ALWAYS_INLINE // this function exists only to make the code more readable. we want it inlined.
+UTILS_ALWAYS_INLINE // This function exists only to make the code more readable. we want it inlined.
inline // and we don't need it in the compilation unit
void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant,
FMaterialInstance const* const UTILS_RESTRICT mi, bool inverseFrontFaces) noexcept {
@@ -374,7 +432,7 @@ void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant,
/* static */
UTILS_NOINLINE
-void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const commands,
+void RenderPass::generateCommands(CommandTypeFlags commandTypeFlags, Command* const commands,
FScene::RenderableSoa const& soa, Range range,
Variant variant, RenderFlags renderFlags,
FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward,
@@ -432,9 +490,9 @@ void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const comm
}
/* static */
-template
+template
UTILS_NOINLINE
-RenderPass::Command* RenderPass::generateCommandsImpl(uint32_t extraFlags,
+RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags,
Command* UTILS_RESTRICT curr,
FScene::RenderableSoa const& UTILS_RESTRICT soa, Range range,
Variant const variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask,
@@ -737,13 +795,13 @@ void RenderPass::updateSummedPrimitiveCounts(
// ------------------------------------------------------------------------------------------------
void RenderPass::Executor::overridePolygonOffset(backend::PolygonOffset const* polygonOffset) noexcept {
- if ((mPolygonOffsetOverride = (polygonOffset != nullptr))) {
+ if ((mPolygonOffsetOverride = (polygonOffset != nullptr))) { // NOLINT(*-assignment-in-if-condition)
mPolygonOffset = *polygonOffset;
}
}
void RenderPass::Executor::overrideScissor(backend::Viewport const* scissor) noexcept {
- if ((mScissorOverride = (scissor != nullptr))) {
+ if ((mScissorOverride = (scissor != nullptr))) { // NOLINT(*-assignment-in-if-condition)
mScissor = *scissor;
}
}
@@ -754,15 +812,20 @@ void RenderPass::Executor::overrideScissor(backend::Viewport const& scissor) noe
}
void RenderPass::Executor::execute(FEngine& engine, const char*) const noexcept {
- execute(engine.getDriverApi(), mCommands.begin(), mCommands.end());
+ execute(engine, mCommands.begin(), mCommands.end());
}
UTILS_NOINLINE // no need to be inlined
-void RenderPass::Executor::execute(backend::DriverApi& driver,
+void RenderPass::Executor::execute(FEngine& engine,
const Command* first, const Command* last) const noexcept {
+
SYSTRACE_CALL();
SYSTRACE_CONTEXT();
+ DriverApi& driver = engine.getDriverApi();
+ size_t const capacity = engine.getMinCommandBufferSize();
+ CircularBuffer const& circularBuffer = driver.getCircularBuffer();
+
if (first != last) {
SYSTRACE_VALUE32("commandCount", last - first);
@@ -781,126 +844,163 @@ void RenderPass::Executor::execute(backend::DriverApi& driver,
FMaterial const* UTILS_RESTRICT ma = nullptr;
auto const* UTILS_RESTRICT pCustomCommands = mCustomCommands.data();
- first--;
- while (++first != last) {
- assert_invariant(first->key != uint64_t(Pass::SENTINEL));
-
- /*
- * Be careful when changing code below, this is the hot inner-loop
- */
-
- if (UTILS_UNLIKELY((first->key & CUSTOM_MASK) != uint64_t(CustomCommand::PASS))) {
- mi = nullptr; // custom command could change the currently bound MaterialInstance
- uint32_t const index = (first->key & CUSTOM_INDEX_MASK) >> CUSTOM_INDEX_SHIFT;
- assert_invariant(index < mCustomCommands.size());
- pCustomCommands[index]();
- continue;
+ // Maximum space occupied in the CircularBuffer by a single `Command`. This must be
+ // reevaluated when the inner loop below adds DriverApi commands or when we change the
+ // CommandStream protocol. Currently, the maximum is 240 bytes, and we use 256 to be on
+ // the safer side.
+ size_t const maxCommandSizeInBytes = 256;
+
+ // Number of Commands that can be issued and guaranteed to fit in the current
+ // CircularBuffer allocation. In practice, we'll have tons of headroom especially if
+ // skinning and morphing aren't used. With a 2 MiB buffer (the default) a batch is
+ // 8192 commands (i.e. draw calls).
+ size_t const batchCommandCount = capacity / maxCommandSizeInBytes;
+ while(first != last) {
+ Command const* const batchLast = std::min(first + batchCommandCount, last);
+
+ // actual number of commands we need to write (can be smaller than batchCommandCount)
+ size_t const commandCount = batchLast - first;
+ size_t const commandSizeInBytes = commandCount * maxCommandSizeInBytes;
+
+ // check we have enough capacity to write these commandCount commands, if not,
+ // request a new CircularBuffer allocation of `capacity` bytes.
+ if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity - commandSizeInBytes)) {
+ engine.flush(); // TODO: we should use a "fast" flush if possible
}
- // primitiveHandle may be invalid if no geometry was set on the renderable.
- if (UTILS_UNLIKELY(!first->primitive.primitiveHandle)) {
- continue;
- }
+ first--;
+ while (++first != batchLast) {
+ assert_invariant(first->key != uint64_t(Pass::SENTINEL));
- // per-renderable uniform
- const PrimitiveInfo info = first->primitive;
- pipeline.rasterState = info.rasterState;
-
- if (UTILS_UNLIKELY(mi != info.mi)) {
- // this is always taken the first time
- mi = info.mi;
- ma = mi->getMaterial();
-
- auto const& scissor = mi->getScissor();
- if (UTILS_UNLIKELY(mi->hasScissor())) {
- // scissor is set, we need to apply the offset/clip
- // clang vectorizes this!
- constexpr int32_t maxvali = std::numeric_limits::max();
- const backend::Viewport scissorViewport = mScissorViewport;
- // compute new left/bottom, assume no overflow
- int32_t l = scissor.left + scissorViewport.left;
- int32_t b = scissor.bottom + scissorViewport.bottom;
- // compute right/top without overflowing, scissor.width/height guaranteed
- // to convert to int32
- int32_t r = (l > maxvali - int32_t(scissor.width)) ?
- maxvali : l + int32_t(scissor.width);
- int32_t t = (b > maxvali - int32_t(scissor.height)) ?
- maxvali : b + int32_t(scissor.height);
- // clip to the viewport
- l = std::max(l, scissorViewport.left);
- b = std::max(b, scissorViewport.bottom);
- r = std::min(r, scissorViewport.left + int32_t(scissorViewport.width));
- t = std::min(t, scissorViewport.bottom + int32_t(scissorViewport.height));
- assert_invariant(r >= l && t >= b);
- *pScissor = { l, b, uint32_t(r - l), uint32_t(t - b) };
- } else {
- // no scissor set (common case), 'scissor' has its default value, use that.
- *pScissor = scissor;
+ /*
+ * Be careful when changing code below, this is the hot inner-loop
+ */
+
+ if (UTILS_UNLIKELY((first->key & CUSTOM_MASK) != uint64_t(CustomCommand::PASS))) {
+ mi = nullptr; // custom command could change the currently bound MaterialInstance
+ uint32_t const index = (first->key & CUSTOM_INDEX_MASK) >> CUSTOM_INDEX_SHIFT;
+ assert_invariant(index < mCustomCommands.size());
+ pCustomCommands[index]();
+ continue;
}
- *pPipelinePolygonOffset = mi->getPolygonOffset();
- pipeline.stencilState = mi->getStencilState();
- mi->use(driver);
- }
+ // primitiveHandle may be invalid if no geometry was set on the renderable.
+ if (UTILS_UNLIKELY(!first->primitive.primitiveHandle)) {
+ continue;
+ }
- pipeline.program = ma->getProgram(info.materialVariant);
+ // per-renderable uniform
+ const PrimitiveInfo info = first->primitive;
+ pipeline.rasterState = info.rasterState;
+
+ if (UTILS_UNLIKELY(mi != info.mi)) {
+ // this is always taken the first time
+ mi = info.mi;
+ assert_invariant(mi);
+
+ ma = mi->getMaterial();
+
+ auto const& scissor = mi->getScissor();
+ if (UTILS_UNLIKELY(mi->hasScissor())) {
+ // scissor is set, we need to apply the offset/clip
+ // clang vectorizes this!
+ constexpr int32_t maxvali = std::numeric_limits::max();
+ const backend::Viewport scissorViewport = mScissorViewport;
+ // compute new left/bottom, assume no overflow
+ int32_t l = scissor.left + scissorViewport.left;
+ int32_t b = scissor.bottom + scissorViewport.bottom;
+ // compute right/top without overflowing, scissor.width/height guaranteed
+ // to convert to int32
+ int32_t r = (l > maxvali - int32_t(scissor.width)) ?
+ maxvali : l + int32_t(scissor.width);
+ int32_t t = (b > maxvali - int32_t(scissor.height)) ?
+ maxvali : b + int32_t(scissor.height);
+ // clip to the viewport
+ l = std::max(l, scissorViewport.left);
+ b = std::max(b, scissorViewport.bottom);
+ r = std::min(r, scissorViewport.left + int32_t(scissorViewport.width));
+ t = std::min(t, scissorViewport.bottom + int32_t(scissorViewport.height));
+ assert_invariant(r >= l && t >= b);
+ *pScissor = { l, b, uint32_t(r - l), uint32_t(t - b) };
+ } else {
+ // no scissor set (common case), 'scissor' has its default value, use that.
+ *pScissor = scissor;
+ }
+
+ *pPipelinePolygonOffset = mi->getPolygonOffset();
+ pipeline.stencilState = mi->getStencilState();
+ mi->use(driver);
+ }
- uint16_t const instanceCount = info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK;
- auto getPerObjectUboHandle =
- [this, &info, &instanceCount]() -> std::pair, uint32_t> {
- if (info.instanceBufferHandle) {
- // "hybrid" instancing -- instanceBufferHandle takes the place of the UBO
- return { info.instanceBufferHandle, 0 };
+ assert_invariant(ma);
+ pipeline.program = ma->getProgram(info.materialVariant);
+
+ uint16_t const instanceCount =
+ info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK;
+ auto getPerObjectUboHandle =
+ [this, &info, &instanceCount]() -> std::pair, uint32_t> {
+ if (info.instanceBufferHandle) {
+ // "hybrid" instancing -- instanceBufferHandle takes the place of the UBO
+ return { info.instanceBufferHandle, 0 };
+ }
+ bool const userInstancing =
+ (info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u;
+ if (!userInstancing && instanceCount > 1) {
+ // automatic instancing
+ return {
+ mInstancedUboHandle,
+ info.index * sizeof(PerRenderableData) };
+ } else {
+ // manual instancing
+ return { mUboHandle, info.index * sizeof(PerRenderableData) };
+ }
+ };
+
+ // Bind per-renderable uniform block. There is no need to attempt to skip this command
+ // because the backends already do this.
+ auto const [perObjectUboHandle, offset] = getPerObjectUboHandle();
+ assert_invariant(perObjectUboHandle);
+ driver.bindBufferRange(BufferObjectBinding::UNIFORM,
+ +UniformBindingPoints::PER_RENDERABLE,
+ perObjectUboHandle,
+ offset,
+ sizeof(PerRenderableUib));
+
+ if (UTILS_UNLIKELY(info.skinningHandle)) {
+ // note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations
+ driver.bindBufferRange(BufferObjectBinding::UNIFORM,
+ +UniformBindingPoints::PER_RENDERABLE_BONES,
+ info.skinningHandle,
+ info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData),
+ sizeof(PerRenderableBoneUib));
+ // note: always bind the skinningTexture because the shader needs it.
+ driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
+ info.skinningTexture);
+ // note: even if only skinning is enabled, binding morphTargetBuffer is needed.
+ driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
+ info.morphTargetBuffer);
}
- bool const userInstancing =
- (info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u;
- if (!userInstancing && instanceCount > 1) {
- // automatic instancing
- return { mInstancedUboHandle, info.index * sizeof(PerRenderableData) };
- } else {
- // manual instancing
- return { mUboHandle, info.index * sizeof(PerRenderableData) };
+
+ if (UTILS_UNLIKELY(info.morphWeightBuffer)) {
+ // Instead of using a UBO per primitive, we could also have a single UBO for all
+ // primitives and use bindUniformBufferRange which might be more efficient.
+ driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING,
+ info.morphWeightBuffer);
+ driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
+ info.morphTargetBuffer);
+ // note: even if only morphing is enabled, binding skinningTexture is needed.
+ driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
+ info.skinningTexture);
}
- };
-
- // bind per-renderable uniform block. there is no need to attempt to skip this command
- // because the backends already do this.
- auto const [perObjectUboHandle, offset] = getPerObjectUboHandle();
- assert_invariant(perObjectUboHandle);
- driver.bindBufferRange(BufferObjectBinding::UNIFORM,
- +UniformBindingPoints::PER_RENDERABLE,
- perObjectUboHandle,
- offset,
- sizeof(PerRenderableUib));
-
- if (UTILS_UNLIKELY(info.skinningHandle)) {
- // note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations
- driver.bindBufferRange(BufferObjectBinding::UNIFORM,
- +UniformBindingPoints::PER_RENDERABLE_BONES,
- info.skinningHandle,
- info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData),
- sizeof(PerRenderableBoneUib));
- // note: always bind the skinningTexture because the shader needs it.
- driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
- info.skinningTexture);
- // note: even if only skinning is enabled, binding morphTargetBuffer is needed.
- driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
- info.morphTargetBuffer);
- }
-
- if (UTILS_UNLIKELY(info.morphWeightBuffer)) {
- // Instead of using a UBO per primitive, we could also have a single UBO for all
- // primitives and use bindUniformBufferRange which might be more efficient.
- driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING,
- info.morphWeightBuffer);
- driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING,
- info.morphTargetBuffer);
- // note: even if only morphing is enabled, binding skinningTexture is needed.
- driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING,
- info.skinningTexture);
+
+ driver.draw(pipeline, info.primitiveHandle, instanceCount);
}
+ }
- driver.draw(pipeline, info.primitiveHandle, instanceCount);
+ // If the remaining space is less than half the capacity, we flush right away to
+ // allow some headroom for commands that might come later.
+ if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity / 2)) {
+ engine.flush();
}
}
diff --git a/filament/src/RenderPass.h b/filament/src/RenderPass.h
index 4474079594f..646171efd58 100644
--- a/filament/src/RenderPass.h
+++ b/filament/src/RenderPass.h
@@ -22,26 +22,38 @@
#include "details/Camera.h"
#include "details/Scene.h"
-#include "backend/DriverApiForward.h"
-
-#include
+#include "private/filament/Variant.h"
+#include "utils/BitmaskEnum.h"
#include
#include
#include
#include
+#include
#include
-#include
#include
+#include