diff --git a/README.md b/README.md index bb2c074eb15..66f0976391a 100644 --- a/README.md +++ b/README.md @@ -31,7 +31,7 @@ repositories { } dependencies { - implementation 'com.google.android.filament:filament-android:1.50.3' + implementation 'com.google.android.filament:filament-android:1.50.4' } ``` @@ -51,7 +51,7 @@ Here are all the libraries available in the group `com.google.android.filament`: iOS projects can use CocoaPods to install the latest release: ```shell -pod 'Filament', '~> 1.50.3' +pod 'Filament', '~> 1.50.4' ``` ### Snapshots diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md index 209a0afdec2..f7fa37e7f58 100644 --- a/RELEASE_NOTES.md +++ b/RELEASE_NOTES.md @@ -7,6 +7,9 @@ A new header is inserted each time a *tag* is created. Instead, if you are authoring a PR for the main branch, add your release note to [NEW_RELEASE_NOTES.md](./NEW_RELEASE_NOTES.md). +## v1.50.4 + + ## v1.50.3 diff --git a/android/filament-android/src/main/cpp/Engine.cpp b/android/filament-android/src/main/cpp/Engine.cpp index 05893cbd5bf..80409702c37 100644 --- a/android/filament-android/src/main/cpp/Engine.cpp +++ b/android/filament-android/src/main/cpp/Engine.cpp @@ -484,7 +484,9 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBuilderConfig(JNIEnv*, jclass, jlong nativeBuilder, jlong commandBufferSizeMB, jlong perRenderPassArenaSizeMB, jlong driverHandleArenaSizeMB, jlong minCommandBufferSizeMB, jlong perFrameCommandsSizeMB, - jlong jobSystemThreadCount, jlong stereoscopicEyeCount, + jlong jobSystemThreadCount, + jlong textureUseAfterFreePoolSize, jboolean disableParallelShaderCompile, + jint stereoscopicType, jlong stereoscopicEyeCount, jlong resourceAllocatorCacheSizeMB, jlong resourceAllocatorCacheMaxAge) { Engine::Builder* builder = (Engine::Builder*) nativeBuilder; Engine::Config config = { @@ -494,6 +496,9 @@ extern "C" JNIEXPORT void JNICALL Java_com_google_android_filament_Engine_nSetBu .minCommandBufferSizeMB = (uint32_t) minCommandBufferSizeMB, .perFrameCommandsSizeMB = (uint32_t) perFrameCommandsSizeMB, .jobSystemThreadCount = (uint32_t) jobSystemThreadCount, + .textureUseAfterFreePoolSize = (uint32_t) textureUseAfterFreePoolSize, + .disableParallelShaderCompile = (bool) disableParallelShaderCompile, + .stereoscopicType = (Engine::StereoscopicType) stereoscopicType, .stereoscopicEyeCount = (uint8_t) stereoscopicEyeCount, .resourceAllocatorCacheSizeMB = (uint32_t) resourceAllocatorCacheSizeMB, .resourceAllocatorCacheMaxAge = (uint8_t) resourceAllocatorCacheMaxAge, diff --git a/android/filament-android/src/main/java/com/google/android/filament/Engine.java b/android/filament-android/src/main/java/com/google/android/filament/Engine.java index 6b4647e2ac1..06f0a2b8ae9 100644 --- a/android/filament-android/src/main/java/com/google/android/filament/Engine.java +++ b/android/filament-android/src/main/java/com/google/android/filament/Engine.java @@ -158,6 +158,16 @@ public enum FeatureLevel { FEATURE_LEVEL_3, }; + /** + * The type of technique for stereoscopic rendering + */ + public enum StereoscopicType { + /** Stereoscopic rendering is performed using instanced rendering technique. */ + INSTANCED, + /** Stereoscopic rendering is performed using the multiview feature from the graphics backend. */ + MULTIVIEW, + }; + /** * Constructs Engine objects using a builder pattern. */ @@ -211,7 +221,9 @@ public Builder config(Config config) { nSetBuilderConfig(mNativeBuilder, config.commandBufferSizeMB, config.perRenderPassArenaSizeMB, config.driverHandleArenaSizeMB, config.minCommandBufferSizeMB, config.perFrameCommandsSizeMB, - config.jobSystemThreadCount, config.stereoscopicEyeCount, + config.jobSystemThreadCount, + config.textureUseAfterFreePoolSize, config.disableParallelShaderCompile, + config.stereoscopicType.ordinal(), config.stereoscopicEyeCount, config.resourceAllocatorCacheSizeMB, config.resourceAllocatorCacheMaxAge); return this; } @@ -349,6 +361,35 @@ public static class Config { */ public long jobSystemThreadCount = 0; + /** + * Number of most-recently destroyed textures to track for use-after-free. + * + * This will cause the backend to throw an exception when a texture is freed but still bound + * to a SamplerGroup and used in a draw call. 0 disables completely. + * + * Currently only respected by the Metal backend. + */ + public long textureUseAfterFreePoolSize = 0; + + /** + * Set to `true` to forcibly disable parallel shader compilation in the backend. + * Currently only honored by the GL backend. + */ + public boolean disableParallelShaderCompile = false; + + /** + * The type of technique for stereoscopic rendering. + * + * This setting determines the algorithm used when stereoscopic rendering is enabled. This + * decision applies to the entire Engine for the lifetime of the Engine. E.g., multiple + * Views created from the Engine must use the same stereoscopic type. + * + * Each view can enable stereoscopic rendering via the StereoscopicOptions::enable flag. + * + * @see View#setStereoscopicOptions + */ + public StereoscopicType stereoscopicType = StereoscopicType.INSTANCED; + /** * The number of eyes to render when stereoscopic rendering is enabled. Supported values are * between 1 and Engine#getMaxStereoscopicEyes() (inclusive). @@ -1240,7 +1281,8 @@ private static void assertDestroy(boolean success) { private static native void nSetBuilderConfig(long nativeBuilder, long commandBufferSizeMB, long perRenderPassArenaSizeMB, long driverHandleArenaSizeMB, long minCommandBufferSizeMB, long perFrameCommandsSizeMB, long jobSystemThreadCount, - long stereoscopicEyeCount, + long textureUseAfterFreePoolSize, boolean disableParallelShaderCompile, + int stereoscopicType, long stereoscopicEyeCount, long resourceAllocatorCacheSizeMB, long resourceAllocatorCacheMaxAge); private static native void nSetBuilderFeatureLevel(long nativeBuilder, int ordinal); private static native void nSetBuilderSharedContext(long nativeBuilder, long sharedContext); diff --git a/android/gradle.properties b/android/gradle.properties index 53e64758c22..34340a20b8a 100644 --- a/android/gradle.properties +++ b/android/gradle.properties @@ -1,5 +1,5 @@ GROUP=com.google.android.filament -VERSION_NAME=1.50.3 +VERSION_NAME=1.50.4 POM_DESCRIPTION=Real-time physically based rendering engine for Android. diff --git a/filament/backend/include/backend/DriverEnums.h b/filament/backend/include/backend/DriverEnums.h index aba2b404145..c41d1b83049 100644 --- a/filament/backend/include/backend/DriverEnums.h +++ b/filament/backend/include/backend/DriverEnums.h @@ -1212,6 +1212,14 @@ enum class Workaround : uint16_t { DISABLE_THREAD_AFFINITY }; +//! The type of technique for stereoscopic rendering +enum class StereoscopicType : uint8_t { + // Stereoscopic rendering is performed using instanced rendering technique. + INSTANCED, + // Stereoscopic rendering is performed using the multiview feature from the graphics backend. + MULTIVIEW, +}; + } // namespace filament::backend template<> struct utils::EnableBitMaskOperators diff --git a/filament/backend/include/backend/Handle.h b/filament/backend/include/backend/Handle.h index 7b8846ba7bc..ffc16133fd2 100644 --- a/filament/backend/include/backend/Handle.h +++ b/filament/backend/include/backend/Handle.h @@ -62,14 +62,6 @@ class HandleBase { // clear the handle, this doesn't free associated resources void clear() noexcept { object = nullid; } - // compare handles - bool operator==(const HandleBase& rhs) const noexcept { return object == rhs.object; } - bool operator!=(const HandleBase& rhs) const noexcept { return object != rhs.object; } - bool operator<(const HandleBase& rhs) const noexcept { return object < rhs.object; } - bool operator<=(const HandleBase& rhs) const noexcept { return object <= rhs.object; } - bool operator>(const HandleBase& rhs) const noexcept { return object > rhs.object; } - bool operator>=(const HandleBase& rhs) const noexcept { return object >= rhs.object; } - // get this handle's handleId HandleId getId() const noexcept { return object; } @@ -101,6 +93,14 @@ struct Handle : public HandleBase { explicit Handle(HandleId id) noexcept : HandleBase(id) { } + // compare handles of the same type + bool operator==(const Handle& rhs) const noexcept { return getId() == rhs.getId(); } + bool operator!=(const Handle& rhs) const noexcept { return getId() != rhs.getId(); } + bool operator<(const Handle& rhs) const noexcept { return getId() < rhs.getId(); } + bool operator<=(const Handle& rhs) const noexcept { return getId() <= rhs.getId(); } + bool operator>(const Handle& rhs) const noexcept { return getId() > rhs.getId(); } + bool operator>=(const Handle& rhs) const noexcept { return getId() >= rhs.getId(); } + // type-safe Handle cast template::value> > Handle(Handle const& base) noexcept : HandleBase(base) { } // NOLINT(hicpp-explicit-conversions,google-explicit-constructor) diff --git a/filament/backend/include/private/backend/CircularBuffer.h b/filament/backend/include/private/backend/CircularBuffer.h index aae6e69c03b..7d2de52b009 100644 --- a/filament/backend/include/private/backend/CircularBuffer.h +++ b/filament/backend/include/private/backend/CircularBuffer.h @@ -17,7 +17,10 @@ #ifndef TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H #define TNT_FILAMENT_BACKEND_PRIVATE_CIRCULARBUFFER_H +#include + #include +#include namespace filament::backend { @@ -37,28 +40,36 @@ class CircularBuffer { ~CircularBuffer() noexcept; - // allocates 'size' bytes in the circular buffer and returns a pointer to the memory - // return the current head and moves it forward by size bytes - inline void* allocate(size_t size) noexcept { + static size_t getBlockSize() noexcept { return sPageSize; } + + // Total size of circular buffer. This is a constant. + size_t size() const noexcept { return mSize; } + + // Allocates `s` bytes in the circular buffer and returns a pointer to the memory. All + // allocations must not exceed size() bytes. + inline void* allocate(size_t s) noexcept { + // We can never allocate more that size(). + assert_invariant(getUsed() + s <= size()); char* const cur = static_cast(mHead); - mHead = cur + size; + mHead = cur + s; return cur; } - // Total size of circular buffer - size_t size() const noexcept { return mSize; } - - // returns true if the buffer is empty (e.g. after calling flush) + // Returns true if the buffer is empty, i.e.: no allocations were made since + // calling getBuffer(); bool empty() const noexcept { return mTail == mHead; } - void* getHead() const noexcept { return mHead; } - - void* getTail() const noexcept { return mTail; } + // Returns the size used since the last call to getBuffer() + size_t getUsed() const noexcept { return intptr_t(mHead) - intptr_t(mTail); } - // call at least once every getRequiredSize() bytes allocated from the buffer - void circularize() noexcept; - - static size_t getBlockSize() noexcept { return sPageSize; } + // Retrieves the current allocated range and frees it. It is the responsibility of the caller + // to make sure the returned range is no longer in use by the time allocate() allocates + // (size() - getUsed()) bytes. + struct Range { + void* tail; + void* head; + }; + Range getBuffer() noexcept; private: void* alloc(size_t size) noexcept; @@ -66,10 +77,10 @@ class CircularBuffer { // pointer to the beginning of the circular buffer (constant) void* mData = nullptr; - int mUsesAshmem = -1; + int mAshmemFd = -1; // size of the circular buffer (constant) - size_t mSize = 0; + size_t const mSize; // pointer to the beginning of recorded data void* mTail = nullptr; diff --git a/filament/backend/include/private/backend/CommandBufferQueue.h b/filament/backend/include/private/backend/CommandBufferQueue.h index 6a434477789..28122452386 100644 --- a/filament/backend/include/private/backend/CommandBufferQueue.h +++ b/filament/backend/include/private/backend/CommandBufferQueue.h @@ -33,7 +33,7 @@ namespace filament::backend { * A producer-consumer command queue that uses a CircularBuffer as main storage */ class CommandBufferQueue { - struct Slice { + struct Range { void* begin; void* end; }; @@ -46,7 +46,7 @@ class CommandBufferQueue { mutable utils::Mutex mLock; mutable utils::Condition mCondition; - mutable std::vector mCommandBuffersToExecute; + mutable std::vector mCommandBuffersToExecute; size_t mFreeSpace = 0; size_t mHighWatermark = 0; uint32_t mExitRequested = 0; @@ -58,17 +58,20 @@ class CommandBufferQueue { CommandBufferQueue(size_t requiredSize, size_t bufferSize); ~CommandBufferQueue(); - CircularBuffer& getCircularBuffer() { return mCircularBuffer; } + CircularBuffer& getCircularBuffer() noexcept { return mCircularBuffer; } + CircularBuffer const& getCircularBuffer() const noexcept { return mCircularBuffer; } + + size_t getCapacity() const noexcept { return mRequiredSize; } size_t getHighWatermark() const noexcept { return mHighWatermark; } // wait for commands to be available and returns an array containing these commands - std::vector waitForCommands() const; + std::vector waitForCommands() const; // return the memory used by this command buffer to the circular buffer // WARNING: releaseBuffer() must be called in sequence of the Slices returned by // waitForCommands() - void releaseBuffer(Slice const& buffer); + void releaseBuffer(Range const& buffer); // all commands buffers (Slices) written to this point are returned by waitForCommand(). This // call blocks until the CircularBuffer has at least mRequiredSize bytes available. diff --git a/filament/backend/include/private/backend/CommandStream.h b/filament/backend/include/private/backend/CommandStream.h index be84b323ad0..985fa5fcd6e 100644 --- a/filament/backend/include/private/backend/CommandStream.h +++ b/filament/backend/include/private/backend/CommandStream.h @@ -213,6 +213,8 @@ class CommandStream { CommandStream(CommandStream const& rhs) noexcept = delete; CommandStream& operator=(CommandStream const& rhs) noexcept = delete; + CircularBuffer const& getCircularBuffer() const noexcept { return mCurrentBuffer; } + public: #define DECL_DRIVER_API(methodName, paramsDecl, params) \ inline void methodName(paramsDecl) { \ diff --git a/filament/backend/include/private/backend/HandleAllocator.h b/filament/backend/include/private/backend/HandleAllocator.h index aa5f53be695..04e66d85774 100644 --- a/filament/backend/include/private/backend/HandleAllocator.h +++ b/filament/backend/include/private/backend/HandleAllocator.h @@ -24,35 +24,31 @@ #include #include #include +#include #include +#include #include #include #include +#include #include #include -#if !defined(NDEBUG) && UTILS_HAS_RTTI -# define HANDLE_TYPE_SAFETY 1 -#else -# define HANDLE_TYPE_SAFETY 0 -#endif - -#define HandleAllocatorGL HandleAllocator<16, 64, 208> -#define HandleAllocatorVK HandleAllocator<16, 64, 880> -#define HandleAllocatorMTL HandleAllocator<16, 64, 584> +#define HandleAllocatorGL HandleAllocator<16, 64, 208> // ~3640 / pool / MiB +#define HandleAllocatorVK HandleAllocator<80, 176, 320> // ~1820 / pool / MiB +#define HandleAllocatorMTL HandleAllocator<48, 160, 592> // ~1310 / pool / MiB namespace filament::backend { /* * A utility class to efficiently allocate and manage Handle<> */ -template +template class HandleAllocator { public: - HandleAllocator(const char* name, size_t size) noexcept; HandleAllocator(HandleAllocator const& rhs) = delete; HandleAllocator& operator=(HandleAllocator const& rhs) = delete; @@ -70,14 +66,9 @@ class HandleAllocator { */ template Handle allocateAndConstruct(ARGS&& ... args) noexcept { - Handle h{ allocateHandle() }; + Handle h{ allocateHandle() }; D* addr = handle_cast(h); new(addr) D(std::forward(args)...); -#if HANDLE_TYPE_SAFETY - mLock.lock(); - mHandleTypeId[addr] = typeid(D).name(); - mLock.unlock(); -#endif return h; } @@ -93,13 +84,7 @@ class HandleAllocator { */ template Handle allocate() noexcept { - Handle h{ allocateHandle() }; -#if HANDLE_TYPE_SAFETY - D* addr = handle_cast(h); - mLock.lock(); - mHandleTypeId[addr] = typeid(D).name(); - mLock.unlock(); -#endif + Handle h{ allocateHandle() }; return h; } @@ -116,17 +101,10 @@ class HandleAllocator { assert_invariant(handle); D* addr = handle_cast(const_cast&>(handle)); assert_invariant(addr); - // currently we implement construct<> with dtor+ctor, we could use operator= also // but all our dtors are trivial, ~D() is actually a noop. addr->~D(); new(addr) D(std::forward(args)...); - -#if HANDLE_TYPE_SAFETY - mLock.lock(); - mHandleTypeId[addr] = typeid(D).name(); - mLock.unlock(); -#endif return addr; } @@ -143,12 +121,6 @@ class HandleAllocator { D* addr = handle_cast(const_cast&>(handle)); assert_invariant(addr); new(addr) D(std::forward(args)...); - -#if HANDLE_TYPE_SAFETY - mLock.lock(); - mHandleTypeId[addr] = typeid(D).name(); - mLock.unlock(); -#endif return addr; } @@ -164,19 +136,8 @@ class HandleAllocator { void deallocate(Handle& handle, D const* p) noexcept { // allow to destroy the nullptr, similarly to operator delete if (p) { -#if HANDLE_TYPE_SAFETY - mLock.lock(); - auto typeId = mHandleTypeId[p]; - mHandleTypeId.erase(p); - mLock.unlock(); - if (UTILS_UNLIKELY(typeId != typeid(D).name())) { - utils::slog.e << "Destroying handle " << handle.getId() << ", type " << typeid(D).name() - << ", but handle's actual type is " << typeId << utils::io::endl; - std::terminate(); - } -#endif p->~D(); - deallocateHandle(handle.getId()); + deallocateHandle(handle.getId()); } } @@ -204,7 +165,17 @@ class HandleAllocator { std::is_base_of_v>, Dp> handle_cast(Handle& handle) noexcept { assert_invariant(handle); - void* const p = handleToPointer(handle.getId()); + auto [p, tag] = handleToPointer(handle.getId()); + + if (isPoolHandle(handle.getId())) { + // check for use after free + uint8_t const age = (tag & HANDLE_AGE_MASK) >> HANDLE_AGE_SHIFT; + auto const pNode = static_cast(p); + uint8_t const expectedAge = pNode[-1].age; + ASSERT_POSTCONDITION(expectedAge == age, + "use-after-free of Handle with id=%d", handle.getId()); + } + return static_cast(p); } @@ -219,29 +190,57 @@ class HandleAllocator { private: - // template + template + static constexpr size_t getBucketSize() noexcept { + if constexpr (sizeof(D) <= P0) { return P0; } + if constexpr (sizeof(D) <= P1) { return P1; } + static_assert(sizeof(D) <= P2); + return P2; + } + class Allocator { friend class HandleAllocator; - utils::PoolAllocator mPool0; - utils::PoolAllocator mPool1; - utils::PoolAllocator mPool2; + static constexpr size_t MIN_ALIGNMENT = alignof(std::max_align_t); + struct Node { uint8_t age; }; + // Note: using the `extra` parameter of PoolAllocator<>, even with a 1-byte structure, + // generally increases all pool allocations by 8-bytes because of alignment restrictions. + template + using Pool = utils::PoolAllocator; + Pool mPool0; + Pool mPool1; + Pool mPool2; UTILS_UNUSED_IN_RELEASE const utils::AreaPolicy::HeapArea& mArea; public: - static constexpr size_t MIN_ALIGNMENT_SHIFT = 4; explicit Allocator(const utils::AreaPolicy::HeapArea& area); + static constexpr size_t getAlignment() noexcept { return MIN_ALIGNMENT; } + // this is in fact always called with a constexpr size argument - [[nodiscard]] inline void* alloc(size_t size, size_t, size_t extra) noexcept { + [[nodiscard]] inline void* alloc(size_t size, size_t, size_t, uint8_t* outAge) noexcept { void* p = nullptr; - if (size <= mPool0.getSize()) p = mPool0.alloc(size, 16, extra); - else if (size <= mPool1.getSize()) p = mPool1.alloc(size, 16, extra); - else if (size <= mPool2.getSize()) p = mPool2.alloc(size, 16, extra); + if (size <= mPool0.getSize()) p = mPool0.alloc(size); + else if (size <= mPool1.getSize()) p = mPool1.alloc(size); + else if (size <= mPool2.getSize()) p = mPool2.alloc(size); + if (UTILS_LIKELY(p)) { + Node const* const pNode = static_cast(p); + // we are guaranteed to have at least sizeof bytes of extra storage before + // the allocation address. + *outAge = pNode[-1].age; + } return p; } // this is in fact always called with a constexpr size argument - inline void free(void* p, size_t size) noexcept { + inline void free(void* p, size_t size, uint8_t age) noexcept { assert_invariant(p >= mArea.begin() && (char*)p + size <= (char*)mArea.end()); + + // check for double-free + Node* const pNode = static_cast(p); + uint8_t& expectedAge = pNode[-1].age; + ASSERT_POSTCONDITION(expectedAge == age, + "double-free of Handle of size %d at %p", size, p); + expectedAge = (expectedAge + 1) & 0xF; // fixme + if (size <= mPool0.getSize()) { mPool0.free(p); return; } if (size <= mPool1.getSize()) { mPool1.free(p); return; } if (size <= mPool2.getSize()) { mPool2.free(p); return; } @@ -263,24 +262,16 @@ class HandleAllocator { // allocateHandle()/deallocateHandle() selects the pool to use at compile-time based on the // allocation size this is always inlined, because all these do is to call // allocateHandleInPool()/deallocateHandleFromPool() with the right pool size. - template + template HandleBase::HandleId allocateHandle() noexcept { - if constexpr (SIZE <= P0) { return allocateHandleInPool(); } - if constexpr (SIZE <= P1) { return allocateHandleInPool(); } - static_assert(SIZE <= P2); - return allocateHandleInPool(); + constexpr size_t BUCKET_SIZE = getBucketSize(); + return allocateHandleInPool(); } - template + template void deallocateHandle(HandleBase::HandleId id) noexcept { - if constexpr (SIZE <= P0) { - deallocateHandleFromPool(id); - } else if constexpr (SIZE <= P1) { - deallocateHandleFromPool(id); - } else { - static_assert(SIZE <= P2); - deallocateHandleFromPool(id); - } + constexpr size_t BUCKET_SIZE = getBucketSize(); + deallocateHandleFromPool(id); } // allocateHandleInPool()/deallocateHandleFromPool() is NOT inlined, which will cause three @@ -289,9 +280,11 @@ class HandleAllocator { template UTILS_NOINLINE HandleBase::HandleId allocateHandleInPool() noexcept { - void* p = mHandleArena.alloc(SIZE); + uint8_t age; + void* p = mHandleArena.alloc(SIZE, alignof(std::max_align_t), 0, &age); if (UTILS_LIKELY(p)) { - return pointerToHandle(p); + uint32_t const tag = (uint32_t(age) << HANDLE_AGE_SHIFT) & HANDLE_AGE_MASK; + return arenaPointerToHandle(p, tag); } else { return allocateHandleSlow(SIZE); } @@ -301,42 +294,51 @@ class HandleAllocator { UTILS_NOINLINE void deallocateHandleFromPool(HandleBase::HandleId id) noexcept { if (UTILS_LIKELY(isPoolHandle(id))) { - void* p = handleToPointer(id); - mHandleArena.free(p, SIZE); + auto [p, tag] = handleToPointer(id); + uint8_t const age = (tag & HANDLE_AGE_MASK) >> HANDLE_AGE_SHIFT; + mHandleArena.free(p, SIZE, age); } else { deallocateHandleSlow(id, SIZE); } } - static constexpr uint32_t HEAP_HANDLE_FLAG = 0x80000000u; + // we handle a 4 bits age per address + static constexpr uint32_t HANDLE_HEAP_FLAG = 0x80000000u; // pool vs heap handle + static constexpr uint32_t HANDLE_AGE_MASK = 0x78000000u; // handle's age + static constexpr uint32_t HANDLE_INDEX_MASK = 0x07FFFFFFu; // handle index + static constexpr uint32_t HANDLE_TAG_MASK = HANDLE_AGE_MASK; + static constexpr uint32_t HANDLE_AGE_SHIFT = 27; static bool isPoolHandle(HandleBase::HandleId id) noexcept { - return (id & HEAP_HANDLE_FLAG) == 0u; + return (id & HANDLE_HEAP_FLAG) == 0u; } HandleBase::HandleId allocateHandleSlow(size_t size) noexcept; void deallocateHandleSlow(HandleBase::HandleId id, size_t size) noexcept; // We inline this because it's just 4 instructions in the fast case - inline void* handleToPointer(HandleBase::HandleId id) const noexcept { + inline std::pair handleToPointer(HandleBase::HandleId id) const noexcept { // note: the null handle will end-up returning nullptr b/c it'll be handled as // a non-pool handle. if (UTILS_LIKELY(isPoolHandle(id))) { char* const base = (char*)mHandleArena.getArea().begin(); - size_t offset = id << Allocator::MIN_ALIGNMENT_SHIFT; - return static_cast(base + offset); + uint32_t const tag = id & HANDLE_TAG_MASK; + size_t const offset = (id & HANDLE_INDEX_MASK) * Allocator::getAlignment(); + return { static_cast(base + offset), tag }; } - return handleToPointerSlow(id); + return { handleToPointerSlow(id), 0 }; } void* handleToPointerSlow(HandleBase::HandleId id) const noexcept; // We inline this because it's just 3 instructions - inline HandleBase::HandleId pointerToHandle(void* p) const noexcept { + inline HandleBase::HandleId arenaPointerToHandle(void* p, uint32_t tag) const noexcept { char* const base = (char*)mHandleArena.getArea().begin(); - size_t offset = (char*)p - base; - auto id = HandleBase::HandleId(offset >> Allocator::MIN_ALIGNMENT_SHIFT); - assert_invariant((id & HEAP_HANDLE_FLAG) == 0); + size_t const offset = (char*)p - base; + assert_invariant((offset % Allocator::getAlignment()) == 0); + auto id = HandleBase::HandleId(offset / Allocator::getAlignment()); + id |= tag & HANDLE_TAG_MASK; + assert_invariant((id & HANDLE_HEAP_FLAG) == 0); return id; } @@ -346,9 +348,6 @@ class HandleAllocator { mutable utils::Mutex mLock; tsl::robin_map mOverflowMap; HandleBase::HandleId mId = 0; -#if HANDLE_TYPE_SAFETY - mutable std::unordered_map mHandleTypeId; -#endif }; } // namespace filament::backend diff --git a/filament/backend/src/CircularBuffer.cpp b/filament/backend/src/CircularBuffer.cpp index d9a877d3f59..41dd4173008 100644 --- a/filament/backend/src/CircularBuffer.cpp +++ b/filament/backend/src/CircularBuffer.cpp @@ -16,6 +16,14 @@ #include "private/backend/CircularBuffer.h" +#include +#include +#include +#include +#include +#include +#include + #if !defined(WIN32) && !defined(__EMSCRIPTEN__) && !defined(IOS) # include # include @@ -24,23 +32,20 @@ # define HAS_MMAP 0 #endif +#include +#include +#include #include -#include -#include -#include -#include -#include - using namespace utils; namespace filament::backend { size_t CircularBuffer::sPageSize = arch::getPageSize(); -CircularBuffer::CircularBuffer(size_t size) { +CircularBuffer::CircularBuffer(size_t size) + : mSize(size) { mData = alloc(size); - mSize = size; mTail = mData; mHead = mData; } @@ -85,7 +90,7 @@ void* CircularBuffer::alloc(size_t size) noexcept { MAP_PRIVATE, fd, (off_t)size); if (vaddr_guard != MAP_FAILED && (vaddr_guard == (char*)vaddr_shadow + size)) { // woo-hoo success! - mUsesAshmem = fd; + mAshmemFd = fd; data = vaddr; } } @@ -93,7 +98,7 @@ void* CircularBuffer::alloc(size_t size) noexcept { } } - if (UTILS_UNLIKELY(mUsesAshmem < 0)) { + if (UTILS_UNLIKELY(mAshmemFd < 0)) { // ashmem failed if (vaddr_guard != MAP_FAILED) { munmap(vaddr_guard, size); @@ -137,9 +142,9 @@ void CircularBuffer::dealloc() noexcept { if (mData) { size_t const BLOCK_SIZE = getBlockSize(); munmap(mData, mSize * 2 + BLOCK_SIZE); - if (mUsesAshmem >= 0) { - close(mUsesAshmem); - mUsesAshmem = -1; + if (mAshmemFd >= 0) { + close(mAshmemFd); + mAshmemFd = -1; } } #else @@ -149,23 +154,37 @@ void CircularBuffer::dealloc() noexcept { } -void CircularBuffer::circularize() noexcept { - if (mUsesAshmem > 0) { - intptr_t const overflow = intptr_t(mHead) - (intptr_t(mData) + ssize_t(mSize)); - if (overflow >= 0) { - assert_invariant(size_t(overflow) <= mSize); - mHead = (void *) (intptr_t(mData) + overflow); - #ifndef NDEBUG - memset(mData, 0xA5, size_t(overflow)); - #endif - } - } else { - // Only circularize if mHead if in the second buffer. - if (intptr_t(mHead) - intptr_t(mData) > ssize_t(mSize)) { +CircularBuffer::Range CircularBuffer::getBuffer() noexcept { + Range const range{ .tail = mTail, .head = mHead }; + + char* const pData = static_cast(mData); + char const* const pEnd = pData + mSize; + char const* const pHead = static_cast(mHead); + if (UTILS_UNLIKELY(pHead >= pEnd)) { + size_t const overflow = pHead - pEnd; + if (UTILS_LIKELY(mAshmemFd > 0)) { + assert_invariant(overflow <= mSize); + mHead = static_cast(pData + overflow); + // Data Tail End Head [virtual] + // v v v v + // +-------------:----+-----:--------------+ + // | : | : | + // +-----:------------+--------------------+ + // Head |<------ copy ------>| [physical] + } else { + // Data Tail End Head + // v v v v + // +-------------:----+-----:--------------+ + // | : | : | + // +-----|------------+-----|--------------+ + // |<---------------->| + // sliding window mHead = mData; } } mTail = mHead; + + return range; } } // namespace filament::backend diff --git a/filament/backend/src/CommandBufferQueue.cpp b/filament/backend/src/CommandBufferQueue.cpp index ccf9d33a0d7..e3e5de045c8 100644 --- a/filament/backend/src/CommandBufferQueue.cpp +++ b/filament/backend/src/CommandBufferQueue.cpp @@ -15,14 +15,25 @@ */ #include "private/backend/CommandBufferQueue.h" +#include "private/backend/CircularBuffer.h" +#include "private/backend/CommandStream.h" +#include #include -#include +#include +#include #include +#include #include -#include "private/backend/BackendUtils.h" -#include "private/backend/CommandStream.h" +#include +#include +#include +#include +#include + +#include +#include using namespace utils; @@ -65,50 +76,53 @@ void CommandBufferQueue::flush() noexcept { // always guaranteed to have enough space for the NoopCommand new(circularBuffer.allocate(sizeof(NoopCommand))) NoopCommand(nullptr); - // end of this slice - void* const head = circularBuffer.getHead(); + const size_t requiredSize = mRequiredSize; - // beginning of this slice - void* const tail = circularBuffer.getTail(); + // get the current buffer + auto const [begin, end] = circularBuffer.getBuffer(); - // size of this slice - uint32_t const used = uint32_t(intptr_t(head) - intptr_t(tail)); + assert_invariant(circularBuffer.empty()); - circularBuffer.circularize(); + // size of the current buffer + size_t const used = std::distance( + static_cast(begin), static_cast(end)); std::unique_lock lock(mLock); - mCommandBuffersToExecute.push_back({ tail, head }); + mCommandBuffersToExecute.push_back({ begin, end }); + mCondition.notify_one(); // circular buffer is too small, we corrupted the stream ASSERT_POSTCONDITION(used <= mFreeSpace, "Backend CommandStream overflow. Commands are corrupted and unrecoverable.\n" "Please increase minCommandBufferSizeMB inside the Config passed to Engine::create.\n" - "Space used at this time: %u bytes", - (unsigned)used); + "Space used at this time: %u bytes, overflow: %u bytes", + (unsigned)used, unsigned(used - mFreeSpace)); // wait until there is enough space in the buffer mFreeSpace -= used; - const size_t requiredSize = mRequiredSize; + if (UTILS_UNLIKELY(mFreeSpace < requiredSize)) { + #ifndef NDEBUG - size_t totalUsed = circularBuffer.size() - mFreeSpace; - mHighWatermark = std::max(mHighWatermark, totalUsed); - if (UTILS_UNLIKELY(totalUsed > requiredSize)) { - slog.d << "CommandStream used too much space: " << totalUsed - << ", out of " << requiredSize << " (will block)" << io::endl; - } + size_t const totalUsed = circularBuffer.size() - mFreeSpace; + slog.d << "CommandStream used too much space (will block): " + << "needed space " << requiredSize << " out of " << mFreeSpace + << ", totalUsed=" << totalUsed << ", current=" << used + << ", queue size=" << mCommandBuffersToExecute.size() << " buffers" + << io::endl; + + mHighWatermark = std::max(mHighWatermark, totalUsed); #endif - mCondition.notify_one(); - if (UTILS_LIKELY(mFreeSpace < requiredSize)) { SYSTRACE_NAME("waiting: CircularBuffer::flush()"); mCondition.wait(lock, [this, requiredSize]() -> bool { + // TODO: on macOS, we need to call pumpEvents from time to time return mFreeSpace >= requiredSize; }); } } -std::vector CommandBufferQueue::waitForCommands() const { +std::vector CommandBufferQueue::waitForCommands() const { if (!UTILS_HAS_THREADING) { return std::move(mCommandBuffersToExecute); } @@ -123,7 +137,7 @@ std::vector CommandBufferQueue::waitForCommands() con return std::move(mCommandBuffersToExecute); } -void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Slice const& buffer) { +void CommandBufferQueue::releaseBuffer(CommandBufferQueue::Range const& buffer) { std::lock_guard const lock(mLock); mFreeSpace += uintptr_t(buffer.end) - uintptr_t(buffer.begin); mCondition.notify_one(); diff --git a/filament/backend/src/HandleAllocator.cpp b/filament/backend/src/HandleAllocator.cpp index 3257e4e2c94..bf8e779614c 100644 --- a/filament/backend/src/HandleAllocator.cpp +++ b/filament/backend/src/HandleAllocator.cpp @@ -16,9 +16,22 @@ #include "private/backend/HandleAllocator.h" +#include + +#include +#include #include +#include +#include +#include + +#include +#include +#include +#include #include +#include namespace filament::backend { @@ -28,14 +41,34 @@ template UTILS_NOINLINE HandleAllocator::Allocator::Allocator(AreaPolicy::HeapArea const& area) : mArea(area) { - // TODO: we probably need a better way to set the size of these pools - const size_t unit = area.size() / 32; - const size_t offsetPool1 = unit; - const size_t offsetPool2 = 16 * unit; - char* const p = (char*)area.begin(); - mPool0 = PoolAllocator< P0, 16>(p, p + offsetPool1); - mPool1 = PoolAllocator< P1, 16>(p + offsetPool1, p + offsetPool2); - mPool2 = PoolAllocator< P2, 16>(p + offsetPool2, area.end()); + + // The largest handle this allocator can generate currently depends on the architecture's + // min alignment, typically 8 or 16 bytes. + // e.g. On Android armv8, the alignment is 16 bytes, so for a 1 MiB heap, the largest handle + // index will be 65536. Note that this is not the same as the number of handles (which + // will always be less). + // Because our maximum representable handle currently is 0x07FFFFFF, the maximum no-nonsensical + // heap size is 2 GiB, which amounts to 7.6 millions handles per pool (in the GL case). + size_t const maxHeapSize = std::min(area.size(), HANDLE_INDEX_MASK * getAlignment()); + + if (UTILS_UNLIKELY(maxHeapSize != area.size())) { + slog.w << "HandleAllocator heap size reduced to " + << maxHeapSize << " from " << area.size() << io::endl; + } + + // make sure we start with a clean arena. This is needed to ensure that all blocks start + // with an age of 0. + memset(area.data(), 0, maxHeapSize); + + // size the different pools so that they can all contain the same number of handles + size_t const count = maxHeapSize / (P0 + P1 + P2); + char* const p0 = static_cast(area.begin()); + char* const p1 = p0 + count * P0; + char* const p2 = p1 + count * P1; + + mPool0 = Pool(p0, count * P0); + mPool1 = Pool(p1, count * P1); + mPool2 = Pool(p2, count * P2); } // ------------------------------------------------------------------------------------------------ @@ -73,11 +106,17 @@ template HandleBase::HandleId HandleAllocator::allocateHandleSlow(size_t size) noexcept { void* p = ::malloc(size); std::unique_lock lock(mLock); - HandleBase::HandleId id = (++mId) | HEAP_HANDLE_FLAG; + + HandleBase::HandleId id = (++mId) | HANDLE_HEAP_FLAG; + + ASSERT_POSTCONDITION(mId < HANDLE_HEAP_FLAG, + "No more Handle ids available! This can happen if HandleAllocator arena has been full" + " for a while. Please increase FILAMENT_OPENGL_HANDLE_ARENA_SIZE_IN_MB"); + mOverflowMap.emplace(id, p); lock.unlock(); - if (UTILS_UNLIKELY(id == (HEAP_HANDLE_FLAG|1u))) { // meaning id was zero + if (UTILS_UNLIKELY(id == (HANDLE_HEAP_FLAG | 1u))) { // meaning id was zero PANIC_LOG("HandleAllocator arena is full, using slower system heap. Please increase " "the appropriate constant (e.g. FILAMENT_OPENGL_HANDLE_ARENA_SIZE_IN_MB)."); } @@ -86,7 +125,7 @@ HandleBase::HandleId HandleAllocator::allocateHandleSlow(size_t size template void HandleAllocator::deallocateHandleSlow(HandleBase::HandleId id, size_t) noexcept { - assert_invariant(id & HEAP_HANDLE_FLAG); + assert_invariant(id & HANDLE_HEAP_FLAG); void* p = nullptr; auto& overflowMap = mOverflowMap; diff --git a/filament/backend/src/metal/MetalDriver.mm b/filament/backend/src/metal/MetalDriver.mm index ef5c35e1080..b1e3d7574f7 100644 --- a/filament/backend/src/metal/MetalDriver.mm +++ b/filament/backend/src/metal/MetalDriver.mm @@ -43,6 +43,40 @@ namespace backend { Driver* MetalDriverFactory::create(MetalPlatform* const platform, const Platform::DriverConfig& driverConfig) { +#if 0 + // this is useful for development, but too verbose even for debug builds + // For reference on a 64-bits machine in Release mode: + // MetalTimerQuery : 16 few + // HwStream : 24 few + // MetalIndexBuffer : 40 moderate + // MetalFence : 48 few + // MetalBufferObject : 48 many + // -- less than or equal 48 bytes + // MetalSamplerGroup : 112 few + // MetalProgram : 144 moderate + // MetalTexture : 152 moderate + // MetalVertexBuffer : 152 moderate + // -- less than or equal 160 bytes + // MetalSwapChain : 184 few + // MetalRenderTarget : 272 few + // MetalRenderPrimitive : 584 many + // -- less than or equal to 592 bytes + + utils::slog.d + << "\nMetalSwapChain: " << sizeof(MetalSwapChain) + << "\nMetalBufferObject: " << sizeof(MetalBufferObject) + << "\nMetalVertexBuffer: " << sizeof(MetalVertexBuffer) + << "\nMetalIndexBuffer: " << sizeof(MetalIndexBuffer) + << "\nMetalSamplerGroup: " << sizeof(MetalSamplerGroup) + << "\nMetalRenderPrimitive: " << sizeof(MetalRenderPrimitive) + << "\nMetalTexture: " << sizeof(MetalTexture) + << "\nMetalTimerQuery: " << sizeof(MetalTimerQuery) + << "\nHwStream: " << sizeof(HwStream) + << "\nMetalRenderTarget: " << sizeof(MetalRenderTarget) + << "\nMetalFence: " << sizeof(MetalFence) + << "\nMetalProgram: " << sizeof(MetalProgram) + << utils::io::endl; +#endif return MetalDriver::create(platform, driverConfig); } diff --git a/filament/backend/src/opengl/OpenGLDriver.cpp b/filament/backend/src/opengl/OpenGLDriver.cpp index 1d3e06282c4..047f28383db 100644 --- a/filament/backend/src/opengl/OpenGLDriver.cpp +++ b/filament/backend/src/opengl/OpenGLDriver.cpp @@ -90,24 +90,24 @@ Driver* OpenGLDriver::create(OpenGLPlatform* const platform, #if 0 // this is useful for development, but too verbose even for debug builds // For reference on a 64-bits machine in Release mode: - // GLFence : 8 few // GLIndexBuffer : 8 moderate - // GLSamplerGroup : 8 few + // GLSamplerGroup : 16 few + // GLSwapChain : 16 few + // GLTimerQuery : 16 few // -- less than or equal 16 bytes - // GLBufferObject : 24 many - // GLSync : 24 few - // GLTimerQuery : 32 few - // OpenGLProgram : 32 moderate - // GLRenderPrimitive : 48 many + // GLFence : 24 few + // GLBufferObject : 32 many + // GLRenderPrimitive : 40 many + // OpenGLProgram : 56 moderate + // GLTexture : 64 moderate // -- less than or equal 64 bytes - // GLTexture : 72 moderate + // GLStream : 104 few // GLRenderTarget : 112 few - // GLStream : 184 few // GLVertexBuffer : 200 moderate // -- less than or equal to 208 bytes slog.d - << "HwFence: " << sizeof(HwFence) + << "\nGLSwapChain: " << sizeof(GLSwapChain) << "\nGLBufferObject: " << sizeof(GLBufferObject) << "\nGLVertexBuffer: " << sizeof(GLVertexBuffer) << "\nGLIndexBuffer: " << sizeof(GLIndexBuffer) @@ -117,7 +117,7 @@ Driver* OpenGLDriver::create(OpenGLPlatform* const platform, << "\nGLTimerQuery: " << sizeof(GLTimerQuery) << "\nGLStream: " << sizeof(GLStream) << "\nGLRenderTarget: " << sizeof(GLRenderTarget) - << "\nGLSync: " << sizeof(GLSync) + << "\nGLFence: " << sizeof(GLFence) << "\nOpenGLProgram: " << sizeof(OpenGLProgram) << io::endl; #endif diff --git a/filament/backend/src/vulkan/VulkanDriver.cpp b/filament/backend/src/vulkan/VulkanDriver.cpp index 9680320f929..c700ce9c6a3 100644 --- a/filament/backend/src/vulkan/VulkanDriver.cpp +++ b/filament/backend/src/vulkan/VulkanDriver.cpp @@ -213,6 +213,41 @@ VulkanDriver::~VulkanDriver() noexcept = default; UTILS_NOINLINE Driver* VulkanDriver::create(VulkanPlatform* platform, VulkanContext const& context, Platform::DriverConfig const& driverConfig) noexcept { +#if 0 + // this is useful for development, but too verbose even for debug builds + // For reference on a 64-bits machine in Release mode: + // VulkanSamplerGroup : 24 few + // HwStream : 24 few + // VulkanFence : 40 few + // VulkanProgram : 40 moderate + // VulkanIndexBuffer : 72 moderate + // VulkanBufferObject : 72 many + // -- less than or equal 80 bytes + // VulkanRenderPrimitive : 104 many + // VulkanSwapChain : 112 few + // VulkanTimerQuery : 168 few + // -- less than or equal 176 bytes + // VulkanTexture : 232 moderate + // VulkanVertexBuffer : 312 moderate + // VulkanRenderTarget : 320 few + // -- less than or equal to 320 bytes + + utils::slog.d + << "\nVulkanSwapChain: " << sizeof(VulkanSwapChain) + << "\nVulkanBufferObject: " << sizeof(VulkanBufferObject) + << "\nVulkanVertexBuffer: " << sizeof(VulkanVertexBuffer) + << "\nVulkanIndexBuffer: " << sizeof(VulkanIndexBuffer) + << "\nVulkanSamplerGroup: " << sizeof(VulkanSamplerGroup) + << "\nVulkanRenderPrimitive: " << sizeof(VulkanRenderPrimitive) + << "\nVulkanTexture: " << sizeof(VulkanTexture) + << "\nVulkanTimerQuery: " << sizeof(VulkanTimerQuery) + << "\nHwStream: " << sizeof(HwStream) + << "\nVulkanRenderTarget: " << sizeof(VulkanRenderTarget) + << "\nVulkanFence: " << sizeof(VulkanFence) + << "\nVulkanProgram: " << sizeof(VulkanProgram) + << utils::io::endl; +#endif + assert_invariant(platform); size_t defaultSize = FVK_HANDLE_ARENA_SIZE_IN_MB * 1024U * 1024U; Platform::DriverConfig validConfig {driverConfig}; @@ -1641,26 +1676,26 @@ void VulkanDriver::draw(PipelineState pipelineState, Handle r // Update the VK raster state. const VulkanRenderTarget* rt = mCurrentRenderPass.renderTarget; - auto vkraster = mPipelineCache.getCurrentRasterState(); - vkraster.cullMode = getCullMode(rasterState.culling); - vkraster.frontFace = getFrontFace(rasterState.inverseFrontFaces); - vkraster.depthBiasEnable = (depthOffset.constant || depthOffset.slope) ? true : false; - vkraster.depthBiasConstantFactor = depthOffset.constant; - vkraster.depthBiasSlopeFactor = depthOffset.slope; - vkraster.blendEnable = rasterState.hasBlending(); - vkraster.srcColorBlendFactor = getBlendFactor(rasterState.blendFunctionSrcRGB); - vkraster.dstColorBlendFactor = getBlendFactor(rasterState.blendFunctionDstRGB); - vkraster.colorBlendOp = rasterState.blendEquationRGB; - vkraster.srcAlphaBlendFactor = getBlendFactor(rasterState.blendFunctionSrcAlpha); - vkraster.dstAlphaBlendFactor = getBlendFactor(rasterState.blendFunctionDstAlpha); - vkraster.alphaBlendOp = rasterState.blendEquationAlpha; - vkraster.colorWriteMask = (VkColorComponentFlags) (rasterState.colorWrite ? 0xf : 0x0); - vkraster.depthWriteEnable = rasterState.depthWrite; - vkraster.depthCompareOp = rasterState.depthFunc; - vkraster.rasterizationSamples = rt->getSamples(); - vkraster.alphaToCoverageEnable = rasterState.alphaToCoverage; - vkraster.colorTargetCount = rt->getColorTargetCount(mCurrentRenderPass); - mPipelineCache.setCurrentRasterState(vkraster); + VulkanPipelineCache::RasterState const vulkanRasterState{ + .cullMode = getCullMode(rasterState.culling), + .frontFace = getFrontFace(rasterState.inverseFrontFaces), + .depthBiasEnable = (depthOffset.constant || depthOffset.slope) ? true : false, + .blendEnable = rasterState.hasBlending(), + .depthWriteEnable = rasterState.depthWrite, + .alphaToCoverageEnable = rasterState.alphaToCoverage, + .srcColorBlendFactor = getBlendFactor(rasterState.blendFunctionSrcRGB), + .dstColorBlendFactor = getBlendFactor(rasterState.blendFunctionDstRGB), + .srcAlphaBlendFactor = getBlendFactor(rasterState.blendFunctionSrcAlpha), + .dstAlphaBlendFactor = getBlendFactor(rasterState.blendFunctionDstAlpha), + .colorWriteMask = (VkColorComponentFlags) (rasterState.colorWrite ? 0xf : 0x0), + .rasterizationSamples = rt->getSamples(), + .colorTargetCount = rt->getColorTargetCount(mCurrentRenderPass), + .colorBlendOp = rasterState.blendEquationRGB, + .alphaBlendOp = rasterState.blendEquationAlpha, + .depthCompareOp = rasterState.depthFunc, + .depthBiasConstantFactor = depthOffset.constant, + .depthBiasSlopeFactor = depthOffset.slope + }; // Declare fixed-size arrays that get passed to the pipeCache and to vkCmdBindVertexBuffers. uint32_t const bufferCount = prim.vertexBuffer->attributes.size(); @@ -1671,7 +1706,7 @@ void VulkanDriver::draw(PipelineState pipelineState, Handle r // Push state changes to the VulkanPipelineCache instance. This is fast and does not make VK calls. mPipelineCache.bindProgram(program); - mPipelineCache.bindRasterState(mPipelineCache.getCurrentRasterState()); + mPipelineCache.bindRasterState(vulkanRasterState); mPipelineCache.bindPrimitiveTopology(prim.primitiveTopology); mPipelineCache.bindVertexArray(attribDesc, bufferDesc, bufferCount); diff --git a/filament/backend/src/vulkan/VulkanPipelineCache.cpp b/filament/backend/src/vulkan/VulkanPipelineCache.cpp index 889888cd083..2d976f66ff2 100644 --- a/filament/backend/src/vulkan/VulkanPipelineCache.cpp +++ b/filament/backend/src/vulkan/VulkanPipelineCache.cpp @@ -34,8 +34,6 @@ using namespace bluevk; namespace filament::backend { -static VulkanPipelineCache::RasterState createDefaultRasterState(); - static VkShaderStageFlags getShaderStageFlags(VulkanPipelineCache::UsageFlags key, uint16_t binding) { // NOTE: if you modify this function, you also need to modify getUsageFlags. assert_invariant(binding < MAX_SAMPLER_COUNT); @@ -73,8 +71,7 @@ VulkanPipelineCache::UsageFlags VulkanPipelineCache::disableUsageFlags(uint16_t } VulkanPipelineCache::VulkanPipelineCache(VulkanResourceAllocator* allocator) - : mCurrentRasterState(createDefaultRasterState()), - mResourceAllocator(allocator), + : mResourceAllocator(allocator), mPipelineBoundResources(allocator) { mDummyBufferWriteInfo.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET; mDummyBufferWriteInfo.pNext = nullptr; @@ -569,7 +566,7 @@ void VulkanPipelineCache::bindProgram(VulkanProgram* program) noexcept { } void VulkanPipelineCache::bindRasterState(const RasterState& rasterState) noexcept { - mPipelineRequirements.rasterState = mCurrentRasterState = rasterState; + mPipelineRequirements.rasterState = rasterState; } void VulkanPipelineCache::bindRenderPass(VkRenderPass renderPass, int subpassIndex) noexcept { @@ -917,23 +914,6 @@ bool VulkanPipelineCache::DescEqual::operator()(const DescriptorKey& k1, return true; } -static VulkanPipelineCache::RasterState createDefaultRasterState() { - return VulkanPipelineCache::RasterState { - .cullMode = VK_CULL_MODE_NONE, - .frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE, - .depthBiasEnable = VK_FALSE, - .blendEnable = VK_FALSE, - .depthWriteEnable = VK_TRUE, - .alphaToCoverageEnable = true, - .colorWriteMask = 0xf, - .rasterizationSamples = VK_SAMPLE_COUNT_1_BIT, - .colorTargetCount = 1, - .depthCompareOp = SamplerCompareFunc::LE, - .depthBiasConstantFactor = 0.0f, - .depthBiasSlopeFactor = 0.0f, - }; -} - } // namespace filament::backend #pragma clang diagnostic pop diff --git a/filament/backend/src/vulkan/VulkanPipelineCache.h b/filament/backend/src/vulkan/VulkanPipelineCache.h index 018fd00efec..a28327c5d9e 100644 --- a/filament/backend/src/vulkan/VulkanPipelineCache.h +++ b/filament/backend/src/vulkan/VulkanPipelineCache.h @@ -199,15 +199,6 @@ class VulkanPipelineCache : public CommandBufferObserver { mPipelineBoundResources.acquire(resource); } - inline RasterState getCurrentRasterState() const noexcept { - return mCurrentRasterState; - } - - // We need to update this outside of bindRasterState due to VulkanDriver::draw. - inline void setCurrentRasterState(RasterState const& rasterState) noexcept { - mCurrentRasterState = rasterState; - } - private: // PIPELINE LAYOUT CACHE KEY // ------------------------- @@ -413,7 +404,6 @@ class VulkanPipelineCache : public CommandBufferObserver { VmaAllocator mAllocator = VK_NULL_HANDLE; // Current requirements for the pipeline layout, pipeline, and descriptor sets. - RasterState mCurrentRasterState; PipelineKey mPipelineRequirements = {}; DescriptorKey mDescriptorRequirements = {}; diff --git a/filament/backend/src/vulkan/VulkanResources.h b/filament/backend/src/vulkan/VulkanResources.h index 77b6498b860..9421e11a14d 100644 --- a/filament/backend/src/vulkan/VulkanResources.h +++ b/filament/backend/src/vulkan/VulkanResources.h @@ -63,7 +63,8 @@ struct VulkanResourceBase { explicit VulkanResourceBase(VulkanResourceType type) : mRefCount(IS_HEAP_ALLOC_TYPE(type) ? 1 : 0), mType(type), - mHandleId(0) {} + mHandleId(0) { + } private: inline VulkanResourceType getType() { @@ -82,6 +83,7 @@ struct VulkanResourceBase { if (IS_HEAP_ALLOC_TYPE(mType)) { return; } + assert_invariant(mRefCount < ((1<<24) - 1)); ++mRefCount; } @@ -89,6 +91,7 @@ struct VulkanResourceBase { if (IS_HEAP_ALLOC_TYPE(mType)) { return; } + assert_invariant(mRefCount > 0); --mRefCount; } @@ -96,8 +99,8 @@ struct VulkanResourceBase { return mRefCount; } - size_t mRefCount = 0; - VulkanResourceType mType = VulkanResourceType::BUFFER_OBJECT; + uint32_t mRefCount : 24; // 16M is enough for the refcount + VulkanResourceType mType : 8; HandleBase::HandleId mHandleId; friend struct VulkanThreadSafeResource; diff --git a/filament/include/filament/Engine.h b/filament/include/filament/Engine.h index 904cbda4a3f..2f8c6d4af74 100644 --- a/filament/include/filament/Engine.h +++ b/filament/include/filament/Engine.h @@ -178,6 +178,7 @@ class UTILS_PUBLIC Engine { using Backend = backend::Backend; using DriverConfig = backend::Platform::DriverConfig; using FeatureLevel = backend::FeatureLevel; + using StereoscopicType = backend::StereoscopicType; /** * Config is used to define the memory footprint used by the engine, such as the @@ -297,6 +298,25 @@ class UTILS_PUBLIC Engine { */ size_t textureUseAfterFreePoolSize = 0; + /** + * Set to `true` to forcibly disable parallel shader compilation in the backend. + * Currently only honored by the GL backend. + */ + bool disableParallelShaderCompile = false; + + /* + * The type of technique for stereoscopic rendering. + * + * This setting determines the algorithm used when stereoscopic rendering is enabled. This + * decision applies to the entire Engine for the lifetime of the Engine. E.g., multiple + * Views created from the Engine must use the same stereoscopic type. + * + * Each view can enable stereoscopic rendering via the StereoscopicOptions::enable flag. + * + * @see View::setStereoscopicOptions + */ + StereoscopicType stereoscopicType = StereoscopicType::INSTANCED; + /* * The number of eyes to render when stereoscopic rendering is enabled. Supported values are * between 1 and Engine::getMaxStereoscopicEyes() (inclusive). diff --git a/filament/include/filament/View.h b/filament/include/filament/View.h index e4ba827aad2..3cdd527fac7 100644 --- a/filament/include/filament/View.h +++ b/filament/include/filament/View.h @@ -719,7 +719,7 @@ class UTILS_PUBLIC View : public FilamentAPI { void setDebugCamera(Camera* UTILS_NULLABLE camera) noexcept; //! debugging: returns a Camera from the point of view of *the* dominant directional light used for shadowing. - Camera const* UTILS_NULLABLE getDirectionalLightCamera() const noexcept; + Camera const* UTILS_NULLABLE getDirectionalShadowCamera() const noexcept; /** Result of a picking query */ diff --git a/filament/src/Allocators.h b/filament/src/Allocators.h index eb354b8d329..84962e30c0e 100644 --- a/filament/src/Allocators.h +++ b/filament/src/Allocators.h @@ -54,7 +54,7 @@ using LinearAllocatorArena = utils::Arena< #endif -using ArenaScope = utils::ArenaScope; +using RootArenaScope = utils::ArenaScope; } // namespace filament diff --git a/filament/src/Froxelizer.cpp b/filament/src/Froxelizer.cpp index c469932c251..47bd0d343dd 100644 --- a/filament/src/Froxelizer.cpp +++ b/filament/src/Froxelizer.cpp @@ -168,7 +168,8 @@ void Froxelizer::setProjection(const mat4f& projection, } bool Froxelizer::prepare( - FEngine::DriverApi& driverApi, ArenaScope& arena, filament::Viewport const& viewport, + FEngine::DriverApi& driverApi, RootArenaScope& rootArenaScope, + filament::Viewport const& viewport, const mat4f& projection, float projectionNear, float projectionFar) noexcept { setViewport(viewport); setProjection(projection, projectionNear, projectionFar); @@ -199,12 +200,12 @@ bool Froxelizer::prepare( // light records per froxel (~256 KiB) mLightRecords = { - arena.allocate(getFroxelBufferEntryCount(), CACHELINE_SIZE), + rootArenaScope.allocate(getFroxelBufferEntryCount(), CACHELINE_SIZE), getFroxelBufferEntryCount() }; // froxel thread data (~256 KiB) mFroxelShardedData = { - arena.allocate(GROUP_COUNT, CACHELINE_SIZE), + rootArenaScope.allocate(GROUP_COUNT, CACHELINE_SIZE), uint32_t(GROUP_COUNT) }; diff --git a/filament/src/Froxelizer.h b/filament/src/Froxelizer.h index 27885e24bc7..27ba3c57641 100644 --- a/filament/src/Froxelizer.h +++ b/filament/src/Froxelizer.h @@ -110,7 +110,7 @@ class Froxelizer { * * return true if updateUniforms() needs to be called */ - bool prepare(backend::DriverApi& driverApi, ArenaScope& arena, Viewport const& viewport, + bool prepare(backend::DriverApi& driverApi, RootArenaScope& rootArenaScope, Viewport const& viewport, const math::mat4f& projection, float projectionNear, float projectionFar) noexcept; Froxel getFroxelAt(size_t x, size_t y, size_t z) const noexcept; diff --git a/filament/src/PostProcessManager.cpp b/filament/src/PostProcessManager.cpp index 78814f74852..f186ee9cb6d 100644 --- a/filament/src/PostProcessManager.cpp +++ b/filament/src/PostProcessManager.cpp @@ -414,7 +414,7 @@ void PostProcessManager::commitAndRender(FrameGraphResources::RenderPassInfo con // ------------------------------------------------------------------------------------------------ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph& fg, - RenderPass const& pass, uint8_t structureRenderFlags, + RenderPassBuilder const& passBuilder, uint8_t structureRenderFlags, uint32_t width, uint32_t height, StructurePassConfig const& config) noexcept { @@ -466,17 +466,19 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph .clearFlags = TargetBufferFlags::COLOR0 | TargetBufferFlags::DEPTH }); }, - [=, renderPass = pass](FrameGraphResources const& resources, + [=, passBuilder = passBuilder](FrameGraphResources const& resources, auto const&, DriverApi&) mutable { Variant structureVariant(Variant::DEPTH_VARIANT); structureVariant.setPicking(config.picking); auto out = resources.getRenderPassInfo(); - renderPass.setRenderFlags(structureRenderFlags); - renderPass.setVariant(structureVariant); - renderPass.appendCommands(mEngine, RenderPass::CommandTypeFlags::SSAO); - renderPass.sortCommands(mEngine); - renderPass.execute(mEngine, resources.getPassName(), out.target, out.params); + + passBuilder.renderFlags(structureRenderFlags); + passBuilder.variant(structureVariant); + passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SSAO); + + RenderPass const pass{ passBuilder.build(mEngine) }; + RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params); }); auto depth = structurePass->depth; @@ -523,7 +525,7 @@ PostProcessManager::StructurePassOutput PostProcessManager::structure(FrameGraph // ------------------------------------------------------------------------------------------------ FrameGraphId PostProcessManager::ssr(FrameGraph& fg, - RenderPass const& pass, + RenderPassBuilder const& passBuilder, FrameHistory const& frameHistory, CameraInfo const& cameraInfo, PerViewUniforms& uniforms, @@ -586,7 +588,7 @@ FrameGraphId PostProcessManager::ssr(FrameGraph& fg, }, [this, projection = cameraInfo.projection, userViewMatrix = cameraInfo.getUserViewMatrix(), uvFromClipMatrix, historyProjection, - options, &uniforms, renderPass = pass] + options, &uniforms, passBuilder = passBuilder] (FrameGraphResources const& resources, auto const& data, DriverApi& driver) mutable { // set structure sampler uniforms.prepareStructure(data.structure ? @@ -607,17 +609,17 @@ FrameGraphId PostProcessManager::ssr(FrameGraph& fg, auto out = resources.getRenderPassInfo(); // Remove the HAS_SHADOWING RenderFlags, since it's irrelevant when rendering reflections - RenderPass::RenderFlags flags = renderPass.getRenderFlags(); - flags &= ~RenderPass::HAS_SHADOWING; - renderPass.setRenderFlags(flags); + passBuilder.renderFlags(~RenderPass::HAS_SHADOWING, 0); // use our special SSR variant, it can only be applied to object that have // the SCREEN_SPACE ReflectionMode. - renderPass.setVariant(Variant{Variant::SPECIAL_SSR}); + passBuilder.variant(Variant{ Variant::SPECIAL_SSR }); + // generate all our drawing commands, except blended objects. - renderPass.appendCommands(mEngine, RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS); - renderPass.sortCommands(mEngine); - renderPass.execute(mEngine, resources.getPassName(), out.target, out.params); + passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::SCREEN_SPACE_REFLECTIONS); + + RenderPass const pass{ passBuilder.build(mEngine) }; + RenderPass::execute(pass, mEngine, resources.getPassName(), out.target, out.params); }); return ssrPass->reflections; diff --git a/filament/src/PostProcessManager.h b/filament/src/PostProcessManager.h index 081e795f061..12b211dc238 100644 --- a/filament/src/PostProcessManager.h +++ b/filament/src/PostProcessManager.h @@ -50,6 +50,7 @@ class FMaterialInstance; class FrameGraph; class PerViewUniforms; class RenderPass; +class RenderPassBuilder; struct CameraInfo; class PostProcessManager { @@ -99,12 +100,12 @@ class PostProcessManager { FrameGraphId picking; }; StructurePassOutput structure(FrameGraph& fg, - RenderPass const& pass, uint8_t structureRenderFlags, + RenderPassBuilder const& passBuilder, uint8_t structureRenderFlags, uint32_t width, uint32_t height, StructurePassConfig const& config) noexcept; // reflections pass FrameGraphId ssr(FrameGraph& fg, - RenderPass const& pass, + RenderPassBuilder const& passBuilder, FrameHistory const& frameHistory, CameraInfo const& cameraInfo, PerViewUniforms& uniforms, diff --git a/filament/src/RenderPass.cpp b/filament/src/RenderPass.cpp index 2932fcf481b..d5063043f5a 100644 --- a/filament/src/RenderPass.cpp +++ b/filament/src/RenderPass.cpp @@ -19,17 +19,43 @@ #include "RenderPrimitive.h" #include "ShadowMap.h" +#include "details/Camera.h" #include "details/Material.h" #include "details/MaterialInstance.h" #include "details/View.h" +#include "components/RenderableManager.h" + +#include #include +#include + +#include + +#include +#include +#include +#include + +#include "private/backend/CircularBuffer.h" +#include +#include #include +#include +#include #include +#include +#include +#include +#include #include +#include +#include +#include + using namespace utils; using namespace filament::math; @@ -37,64 +63,112 @@ namespace filament { using namespace backend; -RenderPass::RenderPass(FEngine& engine, - RenderPass::Arena& arena) noexcept - : mCommandArena(arena), - mCustomCommands(engine.getPerRenderPassAllocator()) { +RenderPassBuilder& RenderPassBuilder::customCommand( + FEngine& engine, + uint8_t channel, + RenderPass::Pass pass, + RenderPass::CustomCommand custom, + uint32_t order, + RenderPass::Executor::CustomCommandFn const& command) { + if (!mCustomCommands.has_value()) { + // construct the vector the first time + mCustomCommands.emplace(engine.getPerRenderPassArena()); + } + mCustomCommands->emplace_back(channel, pass, custom, order, command); + return *this; } -RenderPass::RenderPass(RenderPass const& rhs) = default; +RenderPass RenderPassBuilder::build(FEngine& engine) { + ASSERT_POSTCONDITION(mRenderableSoa, "RenderPassBuilder::geometry() hasn't been called"); + assert_invariant(mScissorViewport.width <= std::numeric_limits::max()); + assert_invariant(mScissorViewport.height <= std::numeric_limits::max()); + return RenderPass{ engine, *this }; +} -// this destructor is actually heavy because it inlines ~vector<> -RenderPass::~RenderPass() noexcept = default; +// ------------------------------------------------------------------------------------------------ + +RenderPass::RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept + : mRenderableSoa(*builder.mRenderableSoa), + mVisibleRenderables(builder.mVisibleRenderables), + mUboHandle(builder.mUboHandle), + mCameraPosition(builder.mCameraPosition), + mCameraForwardVector(builder.mCameraForwardVector), + mFlags(builder.mFlags), + mVariant(builder.mVariant), + mVisibilityMask(builder.mVisibilityMask), + mScissorViewport(builder.mScissorViewport), + mCustomCommands(engine.getPerRenderPassArena()) { + + // compute the number of commands we need + updateSummedPrimitiveCounts( + const_cast(mRenderableSoa), mVisibleRenderables); + + uint32_t commandCount = + FScene::getPrimitiveCount(mRenderableSoa, mVisibleRenderables.last); + const bool colorPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::COLOR); + const bool depthPass = bool(builder.mCommandTypeFlags & CommandTypeFlags::DEPTH); + commandCount *= uint32_t(colorPass * 2 + depthPass); + commandCount += 1; // for the sentinel + + uint32_t const customCommandCount = + builder.mCustomCommands.has_value() ? builder.mCustomCommands->size() : 0; -RenderPass::Command* RenderPass::append(size_t count) noexcept { - // this is like an "in-place" realloc(). Works only with LinearAllocator. - Command* const curr = mCommandArena.alloc(count); + Command* const curr = builder.mArena.alloc(commandCount + customCommandCount); assert_invariant(curr); - assert_invariant(mCommandBegin == nullptr || curr == mCommandEnd); - if (mCommandBegin == nullptr) { - mCommandBegin = mCommandEnd = curr; + + if (UTILS_UNLIKELY(builder.mArena.getAllocator().isHeapAllocation(curr))) { + static bool sLogOnce = true; + if (UTILS_UNLIKELY(sLogOnce)) { + sLogOnce = false; + PANIC_LOG("RenderPass arena is full, using slower system heap. Please increase " + "the appropriate constant (e.g. FILAMENT_PER_RENDER_PASS_ARENA_SIZE_IN_MB)."); + } } - mCommandEnd += count; - return curr; -} -void RenderPass::resize(size_t count) noexcept { - if (mCommandBegin) { - mCommandEnd = mCommandBegin + count; - mCommandArena.rewind(mCommandEnd); + mCommandBegin = curr; + mCommandEnd = curr + commandCount + customCommandCount; + + appendCommands(engine, { curr, commandCount }, builder.mCommandTypeFlags); + + if (builder.mCustomCommands.has_value()) { + Command* p = curr + commandCount; + for (auto [channel, passId, command, order, fn]: builder.mCustomCommands.value()) { + appendCustomCommand(p++, channel, passId, command, order, fn); + } } -} -void RenderPass::setGeometry(FScene::RenderableSoa const& soa, Range vr, - backend::Handle uboHandle) noexcept { - mRenderableSoa = &soa; - mVisibleRenderables = vr; - mUboHandle = uboHandle; -} + // sort commands once we're done adding commands + sortCommands(builder.mArena); -void RenderPass::setCamera(const CameraInfo& camera) noexcept { - mCameraPosition = camera.getPosition(); - mCameraForwardVector = camera.getForwardVector(); + if (engine.isAutomaticInstancingEnabled()) { + instanceify(engine, builder.mArena); + } } -void RenderPass::setScissorViewport(backend::Viewport viewport) noexcept { - assert_invariant(viewport.width <= std::numeric_limits::max()); - assert_invariant(viewport.height <= std::numeric_limits::max()); - mScissorViewport = viewport; +// this destructor is actually heavy because it inlines ~vector<> +RenderPass::~RenderPass() noexcept = default; + +void RenderPass::resize(Arena& arena, size_t count) noexcept { + if (mCommandBegin) { + mCommandEnd = mCommandBegin + count; + arena.rewind(mCommandEnd); + } } -void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandTypeFlags) noexcept { +void RenderPass::appendCommands(FEngine& engine, + Slice commands, CommandTypeFlags const commandTypeFlags) noexcept { SYSTRACE_CALL(); SYSTRACE_CONTEXT(); - assert_invariant(mRenderableSoa); - utils::Range const vr = mVisibleRenderables; // trace the number of visible renderables SYSTRACE_VALUE32("visibleRenderables", vr.size()); if (UTILS_UNLIKELY(vr.empty())) { + // no renderables, we still need the sentinel and the command buffer size should be + // exactly 1. + assert_invariant(commands.size() == 1); + Command* curr = commands.data(); + curr->key = uint64_t(Pass::SENTINEL); return; } @@ -104,17 +178,10 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT const FScene::VisibleMaskType visibilityMask = mVisibilityMask; // up-to-date summed primitive counts needed for generateCommands() - FScene::RenderableSoa const& soa = *mRenderableSoa; - updateSummedPrimitiveCounts(const_cast(soa), vr); + FScene::RenderableSoa const& soa = mRenderableSoa; - // compute how much maximum storage we need for this pass - uint32_t commandCount = FScene::getPrimitiveCount(soa, vr.last); - // double the color pass for transparent objects that need to render twice - const bool colorPass = bool(commandTypeFlags & CommandTypeFlags::COLOR); - const bool depthPass = bool(commandTypeFlags & CommandTypeFlags::DEPTH); - commandCount *= uint32_t(colorPass * 2 + depthPass); - commandCount += 1; // for the sentinel - Command* const curr = append(commandCount); + Command* curr = commands.data(); + size_t const commandCount = commands.size(); auto stereoscopicEyeCount = renderFlags & IS_STEREOSCOPIC ? engine.getConfig().stereoscopicEyeCount : 1; @@ -152,7 +219,8 @@ void RenderPass::appendCommands(FEngine& engine, CommandTypeFlags const commandT } } -void RenderPass::appendCustomCommand(uint8_t channel, Pass pass, CustomCommand custom, uint32_t order, +void RenderPass::appendCustomCommand(Command* commands, + uint8_t channel, Pass pass, CustomCommand custom, uint32_t order, Executor::CustomCommandFn command) { assert_invariant((uint64_t(order) << CUSTOM_ORDER_SHIFT) <= CUSTOM_ORDER_MASK); @@ -168,11 +236,10 @@ void RenderPass::appendCustomCommand(uint8_t channel, Pass pass, CustomCommand c cmd |= uint64_t(order) << CUSTOM_ORDER_SHIFT; cmd |= uint64_t(index); - Command* const curr = append(1); - curr->key = cmd; + commands->key = cmd; } -void RenderPass::sortCommands(FEngine& engine) noexcept { +void RenderPass::sortCommands(Arena& arena) noexcept { SYSTRACE_NAME("sort and trim commands"); std::sort(mCommandBegin, mCommandEnd); @@ -183,30 +250,20 @@ void RenderPass::sortCommands(FEngine& engine) noexcept { return c.key != uint64_t(Pass::SENTINEL); }); - resize(uint32_t(last - mCommandBegin)); - - if (engine.isAutomaticInstancingEnabled()) { - instanceify(engine); - } + resize(arena, uint32_t(last - mCommandBegin)); } -void RenderPass::execute(FEngine& engine, const char* name, +void RenderPass::execute(RenderPass const& pass, + FEngine& engine, const char* name, backend::Handle renderTarget, - backend::RenderPassParams params) const noexcept { - + backend::RenderPassParams params) noexcept { DriverApi& driver = engine.getDriverApi(); - - // this is a good time to flush the CommandStream, because we're about to potentially - // output a lot of commands. This guarantees here that we have at least - // FILAMENT_MIN_COMMAND_BUFFERS_SIZE_IN_MB bytes (1MiB by default). - engine.flush(); - driver.beginRenderPass(renderTarget, params); - getExecutor().execute(engine, name); + pass.getExecutor().execute(engine, name); driver.endRenderPass(); } -void RenderPass::instanceify(FEngine& engine) noexcept { +void RenderPass::instanceify(FEngine& engine, Arena& arena) noexcept { SYSTRACE_NAME("instanceify"); // instanceify works by scanning the **sorted** command stream, looking for repeat draw @@ -262,7 +319,8 @@ void RenderPass::instanceify(FEngine& engine) noexcept { // buffer large enough for all instances data stagingBufferSize = sizeof(PerRenderableData) * (last - curr); stagingBuffer = (PerRenderableData*)::malloc(stagingBufferSize); - uboData = mRenderableSoa->data(); + uboData = mRenderableSoa.data(); + assert_invariant(uboData); } // copy the ubo data to a staging buffer @@ -315,7 +373,7 @@ void RenderPass::instanceify(FEngine& engine) noexcept { return command.key == uint64_t(Pass::SENTINEL); }); - resize(uint32_t(lastCommand - mCommandBegin)); + resize(arena, uint32_t(lastCommand - mCommandBegin)); } assert_invariant(stagingBuffer == nullptr); @@ -323,7 +381,7 @@ void RenderPass::instanceify(FEngine& engine) noexcept { /* static */ -UTILS_ALWAYS_INLINE // this function exists only to make the code more readable. we want it inlined. +UTILS_ALWAYS_INLINE // This function exists only to make the code more readable. we want it inlined. inline // and we don't need it in the compilation unit void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant, FMaterialInstance const* const UTILS_RESTRICT mi, bool inverseFrontFaces) noexcept { @@ -374,7 +432,7 @@ void RenderPass::setupColorCommand(Command& cmdDraw, Variant variant, /* static */ UTILS_NOINLINE -void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const commands, +void RenderPass::generateCommands(CommandTypeFlags commandTypeFlags, Command* const commands, FScene::RenderableSoa const& soa, Range range, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, float3 cameraPosition, float3 cameraForward, @@ -432,9 +490,9 @@ void RenderPass::generateCommands(uint32_t commandTypeFlags, Command* const comm } /* static */ -template +template UTILS_NOINLINE -RenderPass::Command* RenderPass::generateCommandsImpl(uint32_t extraFlags, +RenderPass::Command* RenderPass::generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags, Command* UTILS_RESTRICT curr, FScene::RenderableSoa const& UTILS_RESTRICT soa, Range range, Variant const variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, @@ -737,13 +795,13 @@ void RenderPass::updateSummedPrimitiveCounts( // ------------------------------------------------------------------------------------------------ void RenderPass::Executor::overridePolygonOffset(backend::PolygonOffset const* polygonOffset) noexcept { - if ((mPolygonOffsetOverride = (polygonOffset != nullptr))) { + if ((mPolygonOffsetOverride = (polygonOffset != nullptr))) { // NOLINT(*-assignment-in-if-condition) mPolygonOffset = *polygonOffset; } } void RenderPass::Executor::overrideScissor(backend::Viewport const* scissor) noexcept { - if ((mScissorOverride = (scissor != nullptr))) { + if ((mScissorOverride = (scissor != nullptr))) { // NOLINT(*-assignment-in-if-condition) mScissor = *scissor; } } @@ -754,15 +812,20 @@ void RenderPass::Executor::overrideScissor(backend::Viewport const& scissor) noe } void RenderPass::Executor::execute(FEngine& engine, const char*) const noexcept { - execute(engine.getDriverApi(), mCommands.begin(), mCommands.end()); + execute(engine, mCommands.begin(), mCommands.end()); } UTILS_NOINLINE // no need to be inlined -void RenderPass::Executor::execute(backend::DriverApi& driver, +void RenderPass::Executor::execute(FEngine& engine, const Command* first, const Command* last) const noexcept { + SYSTRACE_CALL(); SYSTRACE_CONTEXT(); + DriverApi& driver = engine.getDriverApi(); + size_t const capacity = engine.getMinCommandBufferSize(); + CircularBuffer const& circularBuffer = driver.getCircularBuffer(); + if (first != last) { SYSTRACE_VALUE32("commandCount", last - first); @@ -781,126 +844,163 @@ void RenderPass::Executor::execute(backend::DriverApi& driver, FMaterial const* UTILS_RESTRICT ma = nullptr; auto const* UTILS_RESTRICT pCustomCommands = mCustomCommands.data(); - first--; - while (++first != last) { - assert_invariant(first->key != uint64_t(Pass::SENTINEL)); - - /* - * Be careful when changing code below, this is the hot inner-loop - */ - - if (UTILS_UNLIKELY((first->key & CUSTOM_MASK) != uint64_t(CustomCommand::PASS))) { - mi = nullptr; // custom command could change the currently bound MaterialInstance - uint32_t const index = (first->key & CUSTOM_INDEX_MASK) >> CUSTOM_INDEX_SHIFT; - assert_invariant(index < mCustomCommands.size()); - pCustomCommands[index](); - continue; + // Maximum space occupied in the CircularBuffer by a single `Command`. This must be + // reevaluated when the inner loop below adds DriverApi commands or when we change the + // CommandStream protocol. Currently, the maximum is 240 bytes, and we use 256 to be on + // the safer side. + size_t const maxCommandSizeInBytes = 256; + + // Number of Commands that can be issued and guaranteed to fit in the current + // CircularBuffer allocation. In practice, we'll have tons of headroom especially if + // skinning and morphing aren't used. With a 2 MiB buffer (the default) a batch is + // 8192 commands (i.e. draw calls). + size_t const batchCommandCount = capacity / maxCommandSizeInBytes; + while(first != last) { + Command const* const batchLast = std::min(first + batchCommandCount, last); + + // actual number of commands we need to write (can be smaller than batchCommandCount) + size_t const commandCount = batchLast - first; + size_t const commandSizeInBytes = commandCount * maxCommandSizeInBytes; + + // check we have enough capacity to write these commandCount commands, if not, + // request a new CircularBuffer allocation of `capacity` bytes. + if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity - commandSizeInBytes)) { + engine.flush(); // TODO: we should use a "fast" flush if possible } - // primitiveHandle may be invalid if no geometry was set on the renderable. - if (UTILS_UNLIKELY(!first->primitive.primitiveHandle)) { - continue; - } + first--; + while (++first != batchLast) { + assert_invariant(first->key != uint64_t(Pass::SENTINEL)); - // per-renderable uniform - const PrimitiveInfo info = first->primitive; - pipeline.rasterState = info.rasterState; - - if (UTILS_UNLIKELY(mi != info.mi)) { - // this is always taken the first time - mi = info.mi; - ma = mi->getMaterial(); - - auto const& scissor = mi->getScissor(); - if (UTILS_UNLIKELY(mi->hasScissor())) { - // scissor is set, we need to apply the offset/clip - // clang vectorizes this! - constexpr int32_t maxvali = std::numeric_limits::max(); - const backend::Viewport scissorViewport = mScissorViewport; - // compute new left/bottom, assume no overflow - int32_t l = scissor.left + scissorViewport.left; - int32_t b = scissor.bottom + scissorViewport.bottom; - // compute right/top without overflowing, scissor.width/height guaranteed - // to convert to int32 - int32_t r = (l > maxvali - int32_t(scissor.width)) ? - maxvali : l + int32_t(scissor.width); - int32_t t = (b > maxvali - int32_t(scissor.height)) ? - maxvali : b + int32_t(scissor.height); - // clip to the viewport - l = std::max(l, scissorViewport.left); - b = std::max(b, scissorViewport.bottom); - r = std::min(r, scissorViewport.left + int32_t(scissorViewport.width)); - t = std::min(t, scissorViewport.bottom + int32_t(scissorViewport.height)); - assert_invariant(r >= l && t >= b); - *pScissor = { l, b, uint32_t(r - l), uint32_t(t - b) }; - } else { - // no scissor set (common case), 'scissor' has its default value, use that. - *pScissor = scissor; + /* + * Be careful when changing code below, this is the hot inner-loop + */ + + if (UTILS_UNLIKELY((first->key & CUSTOM_MASK) != uint64_t(CustomCommand::PASS))) { + mi = nullptr; // custom command could change the currently bound MaterialInstance + uint32_t const index = (first->key & CUSTOM_INDEX_MASK) >> CUSTOM_INDEX_SHIFT; + assert_invariant(index < mCustomCommands.size()); + pCustomCommands[index](); + continue; } - *pPipelinePolygonOffset = mi->getPolygonOffset(); - pipeline.stencilState = mi->getStencilState(); - mi->use(driver); - } + // primitiveHandle may be invalid if no geometry was set on the renderable. + if (UTILS_UNLIKELY(!first->primitive.primitiveHandle)) { + continue; + } - pipeline.program = ma->getProgram(info.materialVariant); + // per-renderable uniform + const PrimitiveInfo info = first->primitive; + pipeline.rasterState = info.rasterState; + + if (UTILS_UNLIKELY(mi != info.mi)) { + // this is always taken the first time + mi = info.mi; + assert_invariant(mi); + + ma = mi->getMaterial(); + + auto const& scissor = mi->getScissor(); + if (UTILS_UNLIKELY(mi->hasScissor())) { + // scissor is set, we need to apply the offset/clip + // clang vectorizes this! + constexpr int32_t maxvali = std::numeric_limits::max(); + const backend::Viewport scissorViewport = mScissorViewport; + // compute new left/bottom, assume no overflow + int32_t l = scissor.left + scissorViewport.left; + int32_t b = scissor.bottom + scissorViewport.bottom; + // compute right/top without overflowing, scissor.width/height guaranteed + // to convert to int32 + int32_t r = (l > maxvali - int32_t(scissor.width)) ? + maxvali : l + int32_t(scissor.width); + int32_t t = (b > maxvali - int32_t(scissor.height)) ? + maxvali : b + int32_t(scissor.height); + // clip to the viewport + l = std::max(l, scissorViewport.left); + b = std::max(b, scissorViewport.bottom); + r = std::min(r, scissorViewport.left + int32_t(scissorViewport.width)); + t = std::min(t, scissorViewport.bottom + int32_t(scissorViewport.height)); + assert_invariant(r >= l && t >= b); + *pScissor = { l, b, uint32_t(r - l), uint32_t(t - b) }; + } else { + // no scissor set (common case), 'scissor' has its default value, use that. + *pScissor = scissor; + } + + *pPipelinePolygonOffset = mi->getPolygonOffset(); + pipeline.stencilState = mi->getStencilState(); + mi->use(driver); + } - uint16_t const instanceCount = info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK; - auto getPerObjectUboHandle = - [this, &info, &instanceCount]() -> std::pair, uint32_t> { - if (info.instanceBufferHandle) { - // "hybrid" instancing -- instanceBufferHandle takes the place of the UBO - return { info.instanceBufferHandle, 0 }; + assert_invariant(ma); + pipeline.program = ma->getProgram(info.materialVariant); + + uint16_t const instanceCount = + info.instanceCount & PrimitiveInfo::INSTANCE_COUNT_MASK; + auto getPerObjectUboHandle = + [this, &info, &instanceCount]() -> std::pair, uint32_t> { + if (info.instanceBufferHandle) { + // "hybrid" instancing -- instanceBufferHandle takes the place of the UBO + return { info.instanceBufferHandle, 0 }; + } + bool const userInstancing = + (info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u; + if (!userInstancing && instanceCount > 1) { + // automatic instancing + return { + mInstancedUboHandle, + info.index * sizeof(PerRenderableData) }; + } else { + // manual instancing + return { mUboHandle, info.index * sizeof(PerRenderableData) }; + } + }; + + // Bind per-renderable uniform block. There is no need to attempt to skip this command + // because the backends already do this. + auto const [perObjectUboHandle, offset] = getPerObjectUboHandle(); + assert_invariant(perObjectUboHandle); + driver.bindBufferRange(BufferObjectBinding::UNIFORM, + +UniformBindingPoints::PER_RENDERABLE, + perObjectUboHandle, + offset, + sizeof(PerRenderableUib)); + + if (UTILS_UNLIKELY(info.skinningHandle)) { + // note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations + driver.bindBufferRange(BufferObjectBinding::UNIFORM, + +UniformBindingPoints::PER_RENDERABLE_BONES, + info.skinningHandle, + info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData), + sizeof(PerRenderableBoneUib)); + // note: always bind the skinningTexture because the shader needs it. + driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING, + info.skinningTexture); + // note: even if only skinning is enabled, binding morphTargetBuffer is needed. + driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING, + info.morphTargetBuffer); } - bool const userInstancing = - (info.instanceCount & PrimitiveInfo::USER_INSTANCE_MASK) != 0u; - if (!userInstancing && instanceCount > 1) { - // automatic instancing - return { mInstancedUboHandle, info.index * sizeof(PerRenderableData) }; - } else { - // manual instancing - return { mUboHandle, info.index * sizeof(PerRenderableData) }; + + if (UTILS_UNLIKELY(info.morphWeightBuffer)) { + // Instead of using a UBO per primitive, we could also have a single UBO for all + // primitives and use bindUniformBufferRange which might be more efficient. + driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING, + info.morphWeightBuffer); + driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING, + info.morphTargetBuffer); + // note: even if only morphing is enabled, binding skinningTexture is needed. + driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING, + info.skinningTexture); } - }; - - // bind per-renderable uniform block. there is no need to attempt to skip this command - // because the backends already do this. - auto const [perObjectUboHandle, offset] = getPerObjectUboHandle(); - assert_invariant(perObjectUboHandle); - driver.bindBufferRange(BufferObjectBinding::UNIFORM, - +UniformBindingPoints::PER_RENDERABLE, - perObjectUboHandle, - offset, - sizeof(PerRenderableUib)); - - if (UTILS_UNLIKELY(info.skinningHandle)) { - // note: we can't bind less than sizeof(PerRenderableBoneUib) due to glsl limitations - driver.bindBufferRange(BufferObjectBinding::UNIFORM, - +UniformBindingPoints::PER_RENDERABLE_BONES, - info.skinningHandle, - info.skinningOffset * sizeof(PerRenderableBoneUib::BoneData), - sizeof(PerRenderableBoneUib)); - // note: always bind the skinningTexture because the shader needs it. - driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING, - info.skinningTexture); - // note: even if only skinning is enabled, binding morphTargetBuffer is needed. - driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING, - info.morphTargetBuffer); - } - - if (UTILS_UNLIKELY(info.morphWeightBuffer)) { - // Instead of using a UBO per primitive, we could also have a single UBO for all - // primitives and use bindUniformBufferRange which might be more efficient. - driver.bindUniformBuffer(+UniformBindingPoints::PER_RENDERABLE_MORPHING, - info.morphWeightBuffer); - driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_MORPHING, - info.morphTargetBuffer); - // note: even if only morphing is enabled, binding skinningTexture is needed. - driver.bindSamplers(+SamplerBindingPoints::PER_RENDERABLE_SKINNING, - info.skinningTexture); + + driver.draw(pipeline, info.primitiveHandle, instanceCount); } + } - driver.draw(pipeline, info.primitiveHandle, instanceCount); + // If the remaining space is less than half the capacity, we flush right away to + // allow some headroom for commands that might come later. + if (UTILS_UNLIKELY(circularBuffer.getUsed() > capacity / 2)) { + engine.flush(); } } diff --git a/filament/src/RenderPass.h b/filament/src/RenderPass.h index 4474079594f..646171efd58 100644 --- a/filament/src/RenderPass.h +++ b/filament/src/RenderPass.h @@ -22,26 +22,38 @@ #include "details/Camera.h" #include "details/Scene.h" -#include "backend/DriverApiForward.h" - -#include +#include "private/filament/Variant.h" +#include "utils/BitmaskEnum.h" #include #include #include #include +#include #include -#include #include +#include + #include #include +#include +#include +#include #include +#include +#include + namespace filament { +namespace backend { +class CommandBufferQueue; +} + class FMaterialInstance; +class RenderPassBuilder; class RenderPass { public: @@ -171,7 +183,7 @@ class RenderPass { EPILOG = uint64_t(0x2) << CUSTOM_SHIFT }; - enum CommandTypeFlags : uint8_t { + enum class CommandTypeFlags : uint32_t { COLOR = 0x1, // generate the color pass only DEPTH = 0x2, // generate the depth pass only ( e.g. shadowmap) @@ -191,7 +203,6 @@ class RenderPass { SCREEN_SPACE_REFLECTIONS = COLOR | FILTER_TRANSLUCENT_OBJECTS }; - /* * The sorting material key is 32 bits and encoded as: * @@ -240,7 +251,6 @@ class RenderPass { uint32_t skinningOffset = 0; // 4 bytes uint16_t instanceCount; // 2 bytes [MSb: user] Variant materialVariant; // 1 byte -// uint8_t reserved[0] = {}; // 0 bytes static const uint16_t USER_INSTANCE_MASK = 0x8000u; static const uint16_t INSTANCE_COUNT_MASK = 0x7fffu; @@ -253,7 +263,7 @@ class RenderPass { uint64_t reserved[1] = {}; // 8 bytes bool operator < (Command const& rhs) const noexcept { return key < rhs.key; } // placement new declared as "throw" to avoid the compiler's null-check - inline void* operator new (std::size_t, void* ptr) { + inline void* operator new (size_t, void* ptr) { assert_invariant(ptr); return ptr; } @@ -269,61 +279,31 @@ class RenderPass { // Arena used for commands using Arena = utils::Arena< - utils::LinearAllocator, // note: can't change this allocator + utils::LinearAllocatorWithFallback, utils::LockingPolicy::NoLock, utils::TrackingPolicy::HighWatermark, utils::AreaPolicy::StaticArea>; - /* - * Create a RenderPass. - * The Arena is used to allocate commands which are then owned by the Arena. - */ - RenderPass(FEngine& engine, Arena& arena) noexcept; + // RenderPass can only be moved + RenderPass(RenderPass&& rhs) = default; - // Copy the RenderPass as is. This can be used to create a RenderPass from a "template" - // by copying from an "empty" RenderPass. - RenderPass(RenderPass const& rhs); + // RenderPass can't be copied + RenderPass(RenderPass const& rhs) = delete; + RenderPass& operator=(RenderPass const& rhs) = delete; + RenderPass& operator=(RenderPass&& rhs) = delete; // allocated commands ARE NOT freed, they're owned by the Arena ~RenderPass() noexcept; - // a box that both offsets the viewport and clips it - void setScissorViewport(backend::Viewport viewport) noexcept; - - // specifies the geometry to generate commands for - void setGeometry(FScene::RenderableSoa const& soa, utils::Range vr, - backend::Handle uboHandle) noexcept; - - // specifies camera information (e.g. used for sorting commands) - void setCamera(const CameraInfo& camera) noexcept; - - // flags controlling how commands are generated - void setRenderFlags(RenderFlags flags) noexcept { mFlags = flags; } - RenderFlags getRenderFlags() const noexcept { return mFlags; } - - // variant to use - void setVariant(Variant variant) noexcept { mVariant = variant; } - - // Sets the visibility mask, which is AND-ed against each Renderable's VISIBLE_MASK to determine - // if the renderable is visible for this pass. - // Defaults to all 1's, which means all renderables in this render pass will be rendered. - void setVisibilityMask(FScene::VisibleMaskType mask) noexcept { mVisibilityMask = mask; } - Command const* begin() const noexcept { return mCommandBegin; } Command const* end() const noexcept { return mCommandEnd; } bool empty() const noexcept { return begin() == end(); } - // This is the main function of this class, this appends commands to the pass using - // the current camera, geometry and flags set. This can be called multiple times if needed. - void appendCommands(FEngine& engine, CommandTypeFlags commandTypeFlags) noexcept; - - // sorts and instanceify commands then trims sentinels - void sortCommands(FEngine& engine) noexcept; - // Helper to execute all the commands generated by this RenderPass - void execute(FEngine& engine, const char* name, + static void execute(RenderPass const& pass, + FEngine& engine, const char* name, backend::Handle renderTarget, - backend::RenderPassParams params) const noexcept; + backend::RenderPassParams params) noexcept; /* * Executor holds the range of commands to execute for a given pass @@ -331,6 +311,7 @@ class RenderPass { class Executor { using CustomCommandFn = std::function; friend class RenderPass; + friend class RenderPassBuilder; // these fields are constant after creation utils::Slice mCommands; @@ -346,8 +327,7 @@ class RenderPass { Executor(RenderPass const* pass, Command const* b, Command const* e) noexcept; - void execute(backend::DriverApi& driver, - const Command* first, const Command* last) const noexcept; + void execute(FEngine& engine, const Command* first, const Command* last) const noexcept; public: Executor() = default; @@ -366,37 +346,39 @@ class RenderPass { }; // returns a new executor for this pass - Executor getExecutor() { - return { this, mCommandBegin, mCommandEnd }; - } - Executor getExecutor() const { return { this, mCommandBegin, mCommandEnd }; } - // returns a new executor for this pass with a custom range - Executor getExecutor(Command const* b, Command const* e) { - return { this, b, e }; - } - Executor getExecutor(Command const* b, Command const* e) const { return { this, b, e }; } +private: + friend class FRenderer; + friend class RenderPassBuilder; + RenderPass(FEngine& engine, RenderPassBuilder const& builder) noexcept; + + // This is the main function of this class, this appends commands to the pass using + // the current camera, geometry and flags set. This can be called multiple times if needed. + void appendCommands(FEngine& engine, + utils::Slice commands, CommandTypeFlags commandTypeFlags) noexcept; + // Appends a custom command. - void appendCustomCommand(uint8_t channel, Pass pass, CustomCommand custom, uint32_t order, + void appendCustomCommand(Command* commands, + uint8_t channel, Pass pass, CustomCommand custom, uint32_t order, Executor::CustomCommandFn command); + void resize(Arena& arena, size_t count) noexcept; -private: - friend class FRenderer; + // sorts commands then trims sentinels + void sortCommands(Arena& arena) noexcept; - Command* append(size_t count) noexcept; - void resize(size_t count) noexcept; - void instanceify(FEngine& engine) noexcept; + // instanceify commands then trims sentinels + void instanceify(FEngine& engine, Arena& arena) noexcept; - // we choose the command count per job to minimize JobSystem overhead. - // on a Pixel 4, 2048 commands is about half a millisecond of processing. + // We choose the command count per job to minimize JobSystem overhead. + // On a Pixel 4, 2048 commands is about half a millisecond of processing. static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_COUNT = 2048; static constexpr size_t JOBS_PARALLEL_FOR_COMMANDS_SIZE = sizeof(Command) * JOBS_PARALLEL_FOR_COMMANDS_COUNT; @@ -404,15 +386,15 @@ class RenderPass { static_assert(JOBS_PARALLEL_FOR_COMMANDS_SIZE % utils::CACHELINE_SIZE == 0, "Size of Commands jobs must be multiple of a cache-line size"); - static inline void generateCommands(uint32_t commandTypeFlags, Command* commands, + static inline void generateCommands(CommandTypeFlags commandTypeFlags, Command* commands, FScene::RenderableSoa const& soa, utils::Range range, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, math::float3 cameraPosition, math::float3 cameraForward, uint8_t instancedStereoEyeCount) noexcept; - template - static inline Command* generateCommandsImpl(uint32_t extraFlags, Command* curr, + template + static inline Command* generateCommandsImpl(RenderPass::CommandTypeFlags extraFlags, Command* curr, FScene::RenderableSoa const& soa, utils::Range range, Variant variant, RenderFlags renderFlags, FScene::VisibleMaskType visibilityMask, math::float3 cameraPosition, math::float3 cameraForward, @@ -424,50 +406,128 @@ class RenderPass { static void updateSummedPrimitiveCounts( FScene::RenderableSoa& renderableData, utils::Range vr) noexcept; - // a reference to the Engine, mostly to get to things like JobSystem - // Arena where all Commands are allocated. The Arena owns the commands. - Arena& mCommandArena; + FScene::RenderableSoa const& mRenderableSoa; + utils::Range const mVisibleRenderables; + backend::Handle const mUboHandle; + math::float3 const mCameraPosition; + math::float3 const mCameraForwardVector; + RenderFlags const mFlags; + Variant const mVariant; + FScene::VisibleMaskType const mVisibilityMask; + backend::Viewport const mScissorViewport; // Pointer to the first command Command* mCommandBegin = nullptr; - // Pointer to one past the last command Command* mCommandEnd = nullptr; + // a UBO for instanced primitives + backend::Handle mInstancedUboHandle; + // a vector for our custom commands + using CustomCommandVector = std::vector>; + mutable CustomCommandVector mCustomCommands; +}; - // the SOA containing the renderables we're interested in - FScene::RenderableSoa const* mRenderableSoa = nullptr; +class RenderPassBuilder { + friend class RenderPass; - // The range of visible renderables in the SOA above + RenderPass::Arena& mArena; + RenderPass::CommandTypeFlags mCommandTypeFlags{}; + backend::Viewport mScissorViewport{ 0, 0, INT32_MAX, INT32_MAX }; + FScene::RenderableSoa const* mRenderableSoa = nullptr; utils::Range mVisibleRenderables{}; - - // the UBO containing the data for the renderables backend::Handle mUboHandle; - backend::Handle mInstancedUboHandle; - - // info about the camera math::float3 mCameraPosition{}; math::float3 mCameraForwardVector{}; + RenderPass::RenderFlags mFlags{}; + Variant mVariant{}; + FScene::VisibleMaskType mVisibilityMask = std::numeric_limits::max(); - // info about the scene features (e.g.: has shadows, lighting, etc...) - RenderFlags mFlags{}; + using CustomCommandRecord = std::tuple< + uint8_t, + RenderPass::Pass, + RenderPass::CustomCommand, + uint32_t, + RenderPass::Executor::CustomCommandFn>; - // Variant to use - Variant mVariant{}; + using CustomCommandContainer = std::vector>; - // Additional visibility mask - FScene::VisibleMaskType mVisibilityMask = std::numeric_limits::max(); + // we make this optional because it's not used often, and we don't want to have + // to construct it by default. + std::optional mCustomCommands; - backend::Viewport mScissorViewport{ 0, 0, - std::numeric_limits::max(), - std::numeric_limits::max() }; +public: + explicit RenderPassBuilder(RenderPass::Arena& arena) : mArena(arena) { } - // a vector for our custom commands - using CustomCommandVector = std::vector>; - mutable CustomCommandVector mCustomCommands; + RenderPassBuilder& commandTypeFlags(RenderPass::CommandTypeFlags commandTypeFlags) noexcept { + mCommandTypeFlags = commandTypeFlags; + return *this; + } + + RenderPassBuilder& scissorViewport(backend::Viewport viewport) noexcept { + mScissorViewport = viewport; + return *this; + } + + // specifies the geometry to generate commands for + RenderPassBuilder& geometry(FScene::RenderableSoa const& soa, utils::Range vr, + backend::Handle uboHandle) noexcept { + mRenderableSoa = &soa; + mVisibleRenderables = vr; + mUboHandle = uboHandle; + return *this; + } + + // Specifies camera information (e.g. used for sorting commands) + RenderPassBuilder& camera(const CameraInfo& camera) noexcept { + mCameraPosition = camera.getPosition(); + mCameraForwardVector = camera.getForwardVector(); + return *this; + } + + // flags controlling how commands are generated + RenderPassBuilder& renderFlags(RenderPass::RenderFlags flags) noexcept { + mFlags = flags; + return *this; + } + + // like above but allows to set specific flags + RenderPassBuilder& renderFlags( + RenderPass::RenderFlags mask, RenderPass::RenderFlags value) noexcept { + mFlags = (mFlags & mask) | (value & mask); + return *this; + } + + // variant to use + RenderPassBuilder& variant(Variant variant) noexcept { + mVariant = variant; + return *this; + } + + // Sets the visibility mask, which is AND-ed against each Renderable's VISIBLE_MASK to + // determine if the renderable is visible for this pass. + // Defaults to all 1's, which means all renderables in this render pass will be rendered. + RenderPassBuilder& visibilityMask(FScene::VisibleMaskType mask) noexcept { + mVisibilityMask = mask; + return *this; + } + + RenderPassBuilder& customCommand(FEngine& engine, + uint8_t channel, + RenderPass::Pass pass, + RenderPass::CustomCommand custom, + uint32_t order, + const RenderPass::Executor::CustomCommandFn& command); + + RenderPass build(FEngine& engine); }; + } // namespace filament +template<> struct utils::EnableBitMaskOperators + : public std::true_type {}; + #endif // TNT_FILAMENT_RENDERPASS_H diff --git a/filament/src/RendererUtils.cpp b/filament/src/RendererUtils.cpp index 2707b9201be..a26b9b7b53e 100644 --- a/filament/src/RendererUtils.cpp +++ b/filament/src/RendererUtils.cpp @@ -228,10 +228,6 @@ FrameGraphId RendererUtils::colorPass( out.params.subpassMask = 1; } - // this is a good time to flush the CommandStream, because we're about to potentially - // output a lot of commands. This guarantees here that we have at least - // FILAMENT_MIN_COMMAND_BUFFERS_SIZE_IN_MB bytes (1MiB by default). - engine.flush(); driver.beginRenderPass(out.target, out.params); passExecutor.execute(engine, resources.getPassName()); driver.endRenderPass(); diff --git a/filament/src/ShadowMap.cpp b/filament/src/ShadowMap.cpp index e4d7f67be01..7015e14466f 100644 --- a/filament/src/ShadowMap.cpp +++ b/filament/src/ShadowMap.cpp @@ -16,19 +16,39 @@ #include "ShadowMap.h" -#include "RenderPass.h" +#include +#include +#include #include "components/LightManager.h" +#include "details/DebugRegistry.h" #include "details/Engine.h" #include "details/Scene.h" +#include #include +#include #include +#include +#include #include +#include +#include +#include +#include +#include + +#include +#include +#include #include +#include + +#include +#include using namespace utils; @@ -239,7 +259,8 @@ ShadowMap::ShaderParameters ShadowMap::updateDirectional(FEngine& engine, ShadowMap::ShaderParameters ShadowMap::updatePunctual( mat4f const& Mv, float outerConeAngle, float nearPlane, float farPlane, const ShadowMapInfo& shadowMapInfo, const FLightManager::ShadowParams& params) noexcept { - const mat4f Mp = mat4f::perspective(outerConeAngle * f::RAD_TO_DEG * 2.0f, 1.0f, nearPlane, farPlane); + const mat4f Mp = mat4f::perspective( + outerConeAngle * f::RAD_TO_DEG * 2.0f, 1.0f, nearPlane, farPlane); assert_invariant(shadowMapInfo.textureDimension == mOptions->mapSize); diff --git a/filament/src/ShadowMap.h b/filament/src/ShadowMap.h index ce1ee860622..d0ca26945f9 100644 --- a/filament/src/ShadowMap.h +++ b/filament/src/ShadowMap.h @@ -17,20 +17,30 @@ #ifndef TNT_FILAMENT_DETAILS_SHADOWMAP_H #define TNT_FILAMENT_DETAILS_SHADOWMAP_H -#include "components/LightManager.h" +#include +#include "Culler.h" #include "PerShadowMapUniforms.h" #include "details/Camera.h" #include "details/Scene.h" +#include "components/LightManager.h" + #include +#include -#include -#include +#include -#include +#include +#include #include +#include + +#include + +#include +#include namespace filament { diff --git a/filament/src/ShadowMapManager.cpp b/filament/src/ShadowMapManager.cpp index 9fa72d54b8b..f2c7c603190 100644 --- a/filament/src/ShadowMapManager.cpp +++ b/filament/src/ShadowMapManager.cpp @@ -15,17 +15,48 @@ */ #include "ShadowMapManager.h" - #include "RenderPass.h" #include "ShadowMap.h" +#include +#include +#include + +#include + +#include "components/RenderableManager.h" + +#include "details/Camera.h" +#include "details/DebugRegistry.h" #include "details/Texture.h" #include "details/View.h" -#include +#include "fg/FrameGraph.h" +#include "fg/FrameGraphId.h" +#include "fg/FrameGraphRenderPass.h" +#include "fg/FrameGraphTexture.h" + +#include +#include -#include #include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include namespace filament { @@ -34,15 +65,6 @@ using namespace math; ShadowMapManager::ShadowMapManager(FEngine& engine) : mEngine(engine) { - // initialize our ShadowMap array in-place - UTILS_NOUNROLL - for (auto& entry : mShadowMapCache) { - new (&entry) ShadowMap(engine); - } - - mShadowUbh = engine.getDriverApi().createBufferObject(mShadowUb.getSize(), - BufferObjectBinding::UNIFORM, BufferUsage::DYNAMIC); - FDebugRegistry& debugRegistry = engine.getDebugRegistry(); debugRegistry.registerProperty("d.shadowmap.visualize_cascades", &engine.debug.shadowmap.visualize_cascades); @@ -52,25 +74,75 @@ ShadowMapManager::ShadowMapManager(FEngine& engine) ShadowMapManager::~ShadowMapManager() { // destroy the ShadowMap array in-place - UTILS_NOUNROLL - for (auto& entry : mShadowMapCache) { - std::destroy_at(std::launder(reinterpret_cast(&entry))); + if (UTILS_UNLIKELY(mInitialized)) { + UTILS_NOUNROLL + for (auto& entry: mShadowMapCache) { + std::destroy_at(std::launder(reinterpret_cast(&entry))); + } } } -void ShadowMapManager::terminate(FEngine& engine) { - DriverApi& driver = engine.getDriverApi(); - driver.destroyBufferObject(mShadowUbh); - UTILS_NOUNROLL - for (auto& entry : mShadowMapCache) { - std::launder(reinterpret_cast(&entry))->terminate(engine); +void ShadowMapManager::createIfNeeded(FEngine& engine, + std::unique_ptr& inOutShadowMapManager) { + if (UTILS_UNLIKELY(!inOutShadowMapManager)) { + inOutShadowMapManager.reset(new ShadowMapManager(engine)); + } +} + +void ShadowMapManager::terminate(FEngine& engine, + std::unique_ptr& shadowMapManager) { + if (shadowMapManager) { + shadowMapManager->terminate(engine); } } +void ShadowMapManager::terminate(FEngine& engine) { + if (UTILS_UNLIKELY(mInitialized)) { + DriverApi& driver = engine.getDriverApi(); + driver.destroyBufferObject(mShadowUbh); + UTILS_NOUNROLL + for (auto& entry: mShadowMapCache) { + std::launder(reinterpret_cast(&entry))->terminate(engine); + } + } +} -ShadowMapManager::ShadowTechnique ShadowMapManager::update(FEngine& engine, FView& view, +ShadowMapManager::ShadowTechnique ShadowMapManager::update( + Builder const& builder, + FEngine& engine, FView& view, CameraInfo const& cameraInfo, FScene::RenderableSoa& renderableData, FScene::LightSoa const& lightData) noexcept { + + if (!builder.mDirectionalShadowMapCount && !builder.mSpotShadowMapCount) { + // no shadows were recorder + return ShadowTechnique::NONE; + } + + // initialize the shadowmap array the first time + if (UTILS_UNLIKELY(!mInitialized)) { + mInitialized = true; + // initialize our ShadowMap array in-place + mShadowUbh = engine.getDriverApi().createBufferObject(mShadowUb.getSize(), + BufferObjectBinding::UNIFORM, BufferUsage::DYNAMIC); + UTILS_NOUNROLL + for (auto& entry: mShadowMapCache) { + new(&entry) ShadowMap(engine); + } + } + + mDirectionalShadowMapCount = builder.mDirectionalShadowMapCount; + mSpotShadowMapCount = builder.mSpotShadowMapCount; + + for (auto const& entry : builder.mShadowMaps) { + auto& shadowMap = getShadowMap(entry.shadowIndex); + shadowMap.initialize( + entry.lightIndex, + entry.shadowType, + entry.shadowIndex, + entry.face, + entry.options); + } + ShadowTechnique shadowTechnique = {}; calculateTextureRequirements(engine, view, lightData); @@ -89,46 +161,54 @@ ShadowMapManager::ShadowTechnique ShadowMapManager::update(FEngine& engine, FVie return shadowTechnique; } -void ShadowMapManager::reset() noexcept { - mDirectionalShadowMapCount = 0; - mSpotShadowMapCount = 0; -} - -void ShadowMapManager::setDirectionalShadowMap(size_t lightIndex, +ShadowMapManager::Builder& ShadowMapManager::Builder::directionalShadowMap(size_t lightIndex, LightManager::ShadowOptions const* options) noexcept { assert_invariant(options->shadowCascades <= CONFIG_MAX_SHADOW_CASCADES); - // this updates getCascadedShadowMap() mDirectionalShadowMapCount = options->shadowCascades; - utils::Slice cascadedShadowMap = getCascadedShadowMap(); for (size_t c = 0; c < options->shadowCascades; c++) { - ShadowMap& shadowMap = cascadedShadowMap[c]; - shadowMap.initialize(lightIndex, ShadowType::DIRECTIONAL, c, 0, options); + mShadowMaps.push_back({ + .lightIndex = lightIndex, + .shadowType = ShadowType::DIRECTIONAL, + .shadowIndex = uint8_t(c), + .face = 0, + .options = options }); } + return *this; } -void ShadowMapManager::addShadowMap(size_t lightIndex, bool spotlight, +ShadowMapManager::Builder& ShadowMapManager::Builder::shadowMap(size_t lightIndex, bool spotlight, LightManager::ShadowOptions const* options) noexcept { if (spotlight) { const size_t c = mSpotShadowMapCount++; const size_t i = c + CONFIG_MAX_SHADOW_CASCADES; assert_invariant(i < CONFIG_MAX_SHADOWMAPS); - auto& shadowMap = getShadowMap(i); - shadowMap.initialize(lightIndex, ShadowType::SPOT, i, 0, options); + mShadowMaps.push_back({ + .lightIndex = lightIndex, + .shadowType = ShadowType::SPOT, + .shadowIndex = uint8_t(i), + .face = 0, + .options = options }); } else { // point-light, generate 6 independent shadowmaps for (size_t face = 0; face < 6; face++) { const size_t c = mSpotShadowMapCount++; const size_t i = c + CONFIG_MAX_SHADOW_CASCADES; assert_invariant(i < CONFIG_MAX_SHADOWMAPS); - auto& shadowMap = getShadowMap(i); - shadowMap.initialize(lightIndex, ShadowType::POINT, i, face, options); + mShadowMaps.push_back({ + .lightIndex = lightIndex, + .shadowType = ShadowType::POINT, + .shadowIndex = uint8_t(i), + .face = uint8_t(face), + .options = options }); } } + return *this; } FrameGraphId ShadowMapManager::render(FEngine& engine, FrameGraph& fg, - RenderPass const& pass, FView& view, CameraInfo const& mainCameraInfo, + RenderPassBuilder const& passBuilder, + FView& view, CameraInfo const& mainCameraInfo, float4 const& userTime) noexcept { const float moment2 = std::numeric_limits::max(); @@ -206,8 +286,8 @@ FrameGraphId ShadowMapManager::render(FEngine& engine, FrameG builder.sideEffect(); }, [this, &engine, &view, vsmShadowOptions, - scene, mainCameraInfo, userTime, passTemplate = pass]( - FrameGraphResources const&, auto const& data, DriverApi& driver) { + scene, mainCameraInfo, userTime, passBuilder = passBuilder]( + FrameGraphResources const&, auto const& data, DriverApi& driver) mutable { // Note: we could almost parallel_for the loop below, the problem currently is // that updatePrimitivesLod() updates temporary global state. @@ -262,19 +342,20 @@ FrameGraphId ShadowMapManager::render(FEngine& engine, FrameG cameraInfo, scene->getRenderableData(), entry.range); // generate and sort the commands for rendering the shadow map - RenderPass pass(passTemplate); - pass.setCamera(cameraInfo); - pass.setVisibilityMask(entry.visibilityMask); - pass.setGeometry(scene->getRenderableData(), - entry.range, scene->getRenderableUBO()); - pass.appendCommands(engine, RenderPass::SHADOW); - pass.sortCommands(engine); + + RenderPass const pass = passBuilder + .camera(cameraInfo) + .visibilityMask(entry.visibilityMask) + .geometry(scene->getRenderableData(), + entry.range, scene->getRenderableUBO()) + .commandTypeFlags(RenderPass::CommandTypeFlags::SHADOW) + .build(engine); entry.executor = pass.getExecutor(); if (!view.hasVSM()) { auto const* options = shadowMap.getShadowOptions(); - const PolygonOffset polygonOffset = { // handle reversed Z + PolygonOffset const polygonOffset = { // handle reversed Z .slope = -options->polygonOffsetSlope, .constant = -options->polygonOffsetConstant }; @@ -395,7 +476,6 @@ FrameGraphId ShadowMapManager::render(FEngine& engine, FrameG auto rt = resources.getRenderPassInfo(data.rt); - engine.flush(); driver.beginRenderPass(rt.target, rt.params); entry.shadowMap->bind(driver); entry.executor.overrideScissor(entry.shadowMap->getScissor()); @@ -765,7 +845,7 @@ void ShadowMapManager::preparePointShadowMap(ShadowMap& shadowMap, FEngine& engine, FView& view, CameraInfo const& mainCameraInfo, FScene::RenderableSoa& renderableData, utils::Range range, FScene::LightSoa& lightData, - ShadowMap::SceneInfo const& sceneInfo) noexcept { + ShadowMap::SceneInfo const&) noexcept { const uint8_t face = shadowMap.getFace(); const size_t lightIndex = shadowMap.getLightIndex(); diff --git a/filament/src/ShadowMapManager.h b/filament/src/ShadowMapManager.h index 5c38048ebed..8f154a4d3b4 100644 --- a/filament/src/ShadowMapManager.h +++ b/filament/src/ShadowMapManager.h @@ -17,32 +17,52 @@ #ifndef TNT_FILAMENT_DETAILS_SHADOWMAPMANAGER_H #define TNT_FILAMENT_DETAILS_SHADOWMAPMANAGER_H -#include - +#include "Culler.h" #include "ShadowMap.h" #include "TypedUniformBuffer.h" +#include +#include + +#include +#include + +#include "components/RenderableManager.h" + #include "details/Engine.h" #include "details/Scene.h" -#include +#include "fg/FrameGraphId.h" +#include "fg/FrameGraphTexture.h" -#include #include #include +#include +#include +#include +#include #include -#include +#include +#include #include #include +#include +#include +#include + +#include +#include namespace filament { +class FCamera; class FView; class FrameGraph; class RenderPass; +class RenderPassBuilder; struct ShadowMappingUniforms { math::float4 cascadeSplits; @@ -54,7 +74,7 @@ struct ShadowMappingUniforms { class ShadowMapManager { public: - using ShadowMappingUniforms = ShadowMappingUniforms; + using ShadowMappingUniforms = filament::ShadowMappingUniforms; using ShadowType = ShadowMap::ShadowType; @@ -64,29 +84,48 @@ class ShadowMapManager { SCREEN_SPACE = 0x2u, }; + class Builder { + friend class ShadowMapManager; + uint32_t mDirectionalShadowMapCount = 0; + uint32_t mSpotShadowMapCount = 0; + struct ShadowMap { + size_t lightIndex; + ShadowType shadowType; + uint16_t shadowIndex; + uint8_t face; + LightManager::ShadowOptions const* options; + }; + std::vector mShadowMaps; + public: + Builder& directionalShadowMap(size_t lightIndex, + LightManager::ShadowOptions const* options) noexcept; - explicit ShadowMapManager(FEngine& engine); - ~ShadowMapManager(); + Builder& shadowMap(size_t lightIndex, bool spotlight, + LightManager::ShadowOptions const* options) noexcept; - void terminate(FEngine& engine); + bool hasShadowMaps() const noexcept { + return mDirectionalShadowMapCount || mSpotShadowMapCount; + } + }; - // Reset shadow map layout. - void reset() noexcept; + ~ShadowMapManager(); - void setDirectionalShadowMap(size_t lightIndex, - LightManager::ShadowOptions const* options) noexcept; + static void createIfNeeded(FEngine& engine, + std::unique_ptr& inOutShadowMapManager); - void addShadowMap(size_t lightIndex, bool spotlight, - LightManager::ShadowOptions const* options) noexcept; + static void terminate(FEngine& engine, + std::unique_ptr& shadowMapManager); // Updates all the shadow maps and performs culling. // Returns true if any of the shadow maps have visible shadows. - ShadowMapManager::ShadowTechnique update(FEngine& engine, FView& view, + ShadowMapManager::ShadowTechnique update(Builder const& builder, + FEngine& engine, FView& view, CameraInfo const& cameraInfo, FScene::RenderableSoa& renderableData, FScene::LightSoa const& lightData) noexcept; // Renders all the shadow maps. - FrameGraphId render(FEngine& engine, FrameGraph& fg, RenderPass const& pass, + FrameGraphId render(FEngine& engine, FrameGraph& fg, + RenderPassBuilder const& passBuilder, FView& view, CameraInfo const& mainCameraInfo, math::float4 const& userTime) noexcept; // valid after calling update() above @@ -99,11 +138,16 @@ class ShadowMapManager { bool hasSpotShadows() const { return !mSpotShadowMapCount; } // for debugging only - FCamera const* getDirectionalLightCamera() const noexcept { + FCamera const* getDirectionalShadowCamera() const noexcept { + if (!mInitialized) return nullptr; return getShadowMap(0).getDebugCamera(); } private: + explicit ShadowMapManager(FEngine& engine); + + void terminate(FEngine& engine); + ShadowMapManager::ShadowTechnique updateCascadeShadowMaps(FEngine& engine, FView& view, CameraInfo cameraInfo, FScene::RenderableSoa& renderableData, FScene::LightSoa const& lightData, ShadowMap::SceneInfo sceneInfo) noexcept; @@ -188,6 +232,7 @@ class ShadowMapManager { ShadowMapCacheContainer mShadowMapCache; uint32_t mDirectionalShadowMapCount = 0; uint32_t mSpotShadowMapCount = 0; + bool mInitialized = false; ShadowMap& getShadowMap(size_t index) noexcept { assert_invariant(index < CONFIG_MAX_SHADOWMAPS); diff --git a/filament/src/View.cpp b/filament/src/View.cpp index bc5da818290..2de966ea0c9 100644 --- a/filament/src/View.cpp +++ b/filament/src/View.cpp @@ -67,8 +67,8 @@ const char* View::getName() const noexcept { return downcast(this)->getName(); } -Camera const* View::getDirectionalLightCamera() const noexcept { - return downcast(this)->getDirectionalLightCamera(); +Camera const* View::getDirectionalShadowCamera() const noexcept { + return downcast(this)->getDirectionalShadowCamera(); } void View::setShadowingEnabled(bool enabled) noexcept { diff --git a/filament/src/details/DebugRegistry.cpp b/filament/src/details/DebugRegistry.cpp index decd59610a9..ad1a54df74a 100644 --- a/filament/src/details/DebugRegistry.cpp +++ b/filament/src/details/DebugRegistry.cpp @@ -16,12 +16,18 @@ #include "details/DebugRegistry.h" +#include +#include #include #include #include #include +#include +#include +#include + #ifndef NDEBUG # define DEBUG_PROPERTIES_WRITABLE true #else @@ -120,12 +126,25 @@ void FDebugRegistry::registerDataSource(std::string_view name, } } +void FDebugRegistry::registerDataSource(std::string_view name, + utils::Invocable&& creator) noexcept { + mDataSourceCreatorMap[name] = std::move(creator); +} + DebugRegistry::DataSource FDebugRegistry::getDataSource(const char* name) const noexcept { std::string_view const key{ name }; auto& dataSourceMap = mDataSourceMap; auto const& it = dataSourceMap.find(key); - if (it == dataSourceMap.end()) { - return { nullptr, 0u }; + if (UTILS_UNLIKELY(it == dataSourceMap.end())) { + auto& dataSourceCreatorMap = mDataSourceCreatorMap; + auto const& pos = dataSourceCreatorMap.find(key); + if (pos == dataSourceCreatorMap.end()) { + return { nullptr, 0u }; + } + DataSource dataSource{ pos->second() }; + dataSourceMap[key] = dataSource; + dataSourceCreatorMap.erase(pos); + return dataSource; } return it->second; } diff --git a/filament/src/details/DebugRegistry.h b/filament/src/details/DebugRegistry.h index 94dfec19414..b60a1c69949 100644 --- a/filament/src/details/DebugRegistry.h +++ b/filament/src/details/DebugRegistry.h @@ -22,12 +22,17 @@ #include #include +#include + +#include #include #include #include #include +#include + namespace filament { class FEngine; @@ -95,8 +100,13 @@ class FDebugRegistry : public DebugRegistry { registerProperty(name, p, FLOAT4, std::move(fn)); } + // registers a DataSource directly void registerDataSource(std::string_view name, void const* data, size_t count) noexcept; + // registers a DataSource lazily + void registerDataSource(std::string_view name, + utils::Invocable&& creator) noexcept; + #if !defined(_MSC_VER) private: #endif @@ -113,7 +123,8 @@ class FDebugRegistry : public DebugRegistry { void const* getPropertyAddress(const char* name) const noexcept; DataSource getDataSource(const char* name) const noexcept; std::unordered_map mPropertyMap; - std::unordered_map mDataSourceMap; + mutable std::unordered_map mDataSourceMap; + mutable std::unordered_map> mDataSourceCreatorMap; }; FILAMENT_DOWNCAST(DebugRegistry) diff --git a/filament/src/details/Engine.cpp b/filament/src/details/Engine.cpp index c09711afa19..2fce2349c39 100644 --- a/filament/src/details/Engine.cpp +++ b/filament/src/details/Engine.cpp @@ -98,7 +98,10 @@ Engine* FEngine::create(Engine::Builder const& builder) { return nullptr; } DriverConfig const driverConfig{ - .handleArenaSize = instance->getRequestedDriverHandleArenaSize() }; + .handleArenaSize = instance->getRequestedDriverHandleArenaSize(), + .textureUseAfterFreePoolSize = instance->getConfig().textureUseAfterFreePoolSize, + .disableParallelShaderCompile = instance->getConfig().disableParallelShaderCompile + }; instance->mDriver = platform->createDriver(sharedContext, driverConfig); } else { @@ -198,7 +201,7 @@ FEngine::FEngine(Engine::Builder const& builder) : mCommandBufferQueue( builder->mConfig.minCommandBufferSizeMB * MiB, builder->mConfig.commandBufferSizeMB * MiB), - mPerRenderPassAllocator( + mPerRenderPassArena( "FEngine::mPerRenderPassAllocator", builder->mConfig.perRenderPassArenaSizeMB * MiB), mHeapAllocator("FEngine::mHeapAllocator", AreaPolicy::NullArea{}), @@ -651,7 +654,8 @@ int FEngine::loop() { DriverConfig const driverConfig { .handleArenaSize = getRequestedDriverHandleArenaSize(), - .textureUseAfterFreePoolSize = mConfig.textureUseAfterFreePoolSize + .textureUseAfterFreePoolSize = mConfig.textureUseAfterFreePoolSize, + .disableParallelShaderCompile = mConfig.disableParallelShaderCompile }; mDriver = mPlatform->createDriver(mSharedGLContext, driverConfig); diff --git a/filament/src/details/Engine.h b/filament/src/details/Engine.h index 03889f13936..03de2ce8414 100644 --- a/filament/src/details/Engine.h +++ b/filament/src/details/Engine.h @@ -58,17 +58,6 @@ #include #include -#if FILAMENT_ENABLE_MATDBG -#include -#else -namespace filament { -namespace matdbg { -class DebugServer; -using MaterialKey = uint32_t; -} // namespace matdbg -} // namespace filament -#endif - #include #include #include @@ -78,8 +67,19 @@ using MaterialKey = uint32_t; #include #include #include +#include +#include #include +#if FILAMENT_ENABLE_MATDBG +#include +#else +namespace filament::matdbg { +class DebugServer; +using MaterialKey = uint32_t; +} // namespace filament::matdbg +#endif + namespace filament { class Renderer; @@ -142,7 +142,7 @@ class FEngine : public Engine { // the per-frame Area is used by all Renderer, so they must run in sequence and // have freed all allocated memory when done. If this needs to change in the future, // we'll simply have to use separate Areas (for instance). - LinearAllocatorArena& getPerRenderPassAllocator() noexcept { return mPerRenderPassAllocator; } + LinearAllocatorArena& getPerRenderPassArena() noexcept { return mPerRenderPassArena; } // Material IDs... uint32_t getMaterialId() const noexcept { return mMaterialId++; } @@ -508,7 +508,7 @@ class FEngine : public Engine { uint32_t mFlushCounter = 0; - LinearAllocatorArena mPerRenderPassAllocator; + RootArenaScope::Arena mPerRenderPassArena; HeapAllocatorArena mHeapAllocator; utils::JobSystem mJobSystem; diff --git a/filament/src/details/Renderer.cpp b/filament/src/details/Renderer.cpp index 6611a48a832..c014be6eb66 100644 --- a/filament/src/details/Renderer.cpp +++ b/filament/src/details/Renderer.cpp @@ -16,6 +16,9 @@ #include "details/Renderer.h" +#include "Allocators.h" +#include "DebugRegistry.h" +#include "FrameHistory.h" #include "PostProcessManager.h" #include "RendererUtils.h" #include "RenderPass.h" @@ -28,21 +31,40 @@ #include "details/Texture.h" #include "details/View.h" +#include +#include +#include #include +#include +#include +#include #include #include "fg/FrameGraph.h" #include "fg/FrameGraphId.h" #include "fg/FrameGraphResources.h" +#include "fg/FrameGraphTexture.h" + +#include +#include +#include #include #include +#include +#include #include #include -#include #include +#include +#include +#include + +#include +#include + // this helps visualize what dynamic-scaling is doing #define DEBUG_DYNAMIC_SCALING false @@ -62,8 +84,7 @@ FRenderer::FRenderer(FEngine& engine) : mHdrQualityMedium(TextureFormat::R11F_G11F_B10F), mHdrQualityHigh(TextureFormat::RGB16F), mIsRGB8Supported(false), - mUserEpoch(engine.getEngineEpoch()), - mPerRenderPassArena(engine.getPerRenderPassAllocator()) + mUserEpoch(engine.getEngineEpoch()) { FDebugRegistry& debugRegistry = engine.getDebugRegistry(); debugRegistry.registerProperty("d.renderer.doFrameCapture", @@ -442,7 +463,7 @@ void FRenderer::render(FView const* view) { if (UTILS_LIKELY(view && view->getScene())) { if (mViewRenderedCount) { - // this is a good place to kick the GPU, since we've rendered a View before, + // This is a good place to kick the GPU, since we've rendered a View before, // and we're about to render another one. mEngine.getDriverApi().flush(); } @@ -452,17 +473,17 @@ void FRenderer::render(FView const* view) { } void FRenderer::renderInternal(FView const* view) { - // per-renderpass data - ArenaScope rootArena(mPerRenderPassArena); - FEngine& engine = mEngine; - JobSystem& js = engine.getJobSystem(); + + // per-renderpass data + RootArenaScope rootArenaScope(engine.getPerRenderPassArena()); // create a root job so no other job can escape + JobSystem& js = engine.getJobSystem(); auto *rootJob = js.setRootJob(js.createJob()); // execute the render pass - renderJob(rootArena, const_cast(*view)); + renderJob(rootArenaScope, const_cast(*view)); // make sure to flush the command buffer engine.flush(); @@ -471,7 +492,7 @@ void FRenderer::renderInternal(FView const* view) { js.runAndWait(rootJob); } -void FRenderer::renderJob(ArenaScope& arena, FView& view) { +void FRenderer::renderJob(RootArenaScope& rootArenaScope, FView& view) { FEngine& engine = mEngine; JobSystem& js = engine.getJobSystem(); FEngine::DriverApi& driver = engine.getDriverApi(); @@ -636,7 +657,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { xvp.bottom = int32_t(guardBand); } - view.prepare(engine, driver, arena, svp, cameraInfo, getShaderUserTime(), needsAlphaChannel); + view.prepare(engine, driver, rootArenaScope, svp, cameraInfo, getShaderUserTime(), needsAlphaChannel); view.prepareUpscaler(scale, taaOptions, dsrOptions); @@ -649,8 +670,10 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { // Allocate some space for our commands in the per-frame Arena, and use that space as // an Arena for commands. All this space is released when we exit this method. size_t const perFrameCommandsSize = engine.getPerFrameCommandsSize(); - void* const arenaBegin = arena.allocate(perFrameCommandsSize, CACHELINE_SIZE); + void* const arenaBegin = rootArenaScope.allocate(perFrameCommandsSize, CACHELINE_SIZE); void* const arenaEnd = pointermath::add(arenaBegin, perFrameCommandsSize); + + // This arena *must* stay valid until all commands have been processed RenderPass::Arena commandArena("Command Arena", { arenaBegin, arenaEnd }); RenderPass::RenderFlags renderFlags = 0; @@ -658,8 +681,8 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { if (view.isFrontFaceWindingInverted()) renderFlags |= RenderPass::HAS_INVERSE_FRONT_FACES; if (view.hasInstancedStereo()) renderFlags |= RenderPass::IS_STEREOSCOPIC; - RenderPass pass(engine, commandArena); - pass.setRenderFlags(renderFlags); + RenderPassBuilder passBuilder(commandArena); + passBuilder.renderFlags(renderFlags); Variant variant; variant.setDirectionalLighting(view.hasDirectionalLight()); @@ -682,10 +705,10 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { if (view.needsShadowMap()) { Variant shadowVariant(Variant::DEPTH_VARIANT); shadowVariant.setVsm(view.getShadowType() == ShadowType::VSM); - - RenderPass shadowPass(pass); - shadowPass.setVariant(shadowVariant); - auto shadows = view.renderShadowMaps(engine, fg, cameraInfo, mShaderUserTime, shadowPass); + auto shadows = view.renderShadowMaps(engine, fg, cameraInfo, mShaderUserTime, + RenderPassBuilder{ commandArena } + .renderFlags(renderFlags) + .variant(shadowVariant)); blackboard["shadows"] = shadows; } @@ -771,8 +794,9 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { view.updatePrimitivesLod(engine, cameraInfo, scene.getRenderableData(), view.getVisibleRenderables()); - pass.setCamera(cameraInfo); - pass.setGeometry(scene.getRenderableData(), view.getVisibleRenderables(), scene.getRenderableUBO()); + passBuilder.camera(cameraInfo); + passBuilder.geometry(scene.getRenderableData(), + view.getVisibleRenderables(), scene.getRenderableUBO()); // view set-ups that need to happen before rendering fg.addTrivialSideEffectPass("Prepare View Uniforms", @@ -818,7 +842,8 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { // This is normally used by SSAO and contact-shadows // TODO: the scaling should depends on all passes that need the structure pass - const auto [structure, picking_] = ppm.structure(fg, pass, renderFlags, svp.width, svp.height, { + const auto [structure, picking_] = ppm.structure(fg, + passBuilder, renderFlags, svp.width, svp.height, { .scale = aoOptions.resolution, .picking = view.hasPicking() }); @@ -876,7 +901,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { // screen-space reflections pass if (ssReflectionsOptions.enabled) { - auto reflections = ppm.ssr(fg, pass, + auto reflections = ppm.ssr(fg, passBuilder, view.getFrameHistory(), cameraInfo, view.getPerViewUniforms(), structure, @@ -894,10 +919,15 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { // -------------------------------------------------------------------------------------------- // Color passes + // this makes the viewport relative to xvp + // FIXME: we should use 'vp' when rendering directly into the swapchain, but that's hard to + // know at this point. This will usually be the case when post-process is disabled. + // FIXME: we probably should take the dynamic scaling into account too + passBuilder.scissorViewport(hasPostProcess ? xvp : vp); + // This one doesn't need to be a FrameGraph pass because it always happens by construction // (i.e. it won't be culled, unless everything is culled), so no need to complexify things. - pass.setVariant(variant); - pass.appendCommands(engine, RenderPass::COLOR); + passBuilder.variant(variant); // color-grading as subpass is done either by the color pass or the TAA pass if any auto colorGradingConfigForColor = colorGradingConfig; @@ -905,7 +935,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { if (colorGradingConfigForColor.asSubpass) { // append color grading subpass after all other passes - pass.appendCustomCommand(3, + passBuilder.customCommand(engine, 3, RenderPass::Pass::BLENDED, RenderPass::CustomCommand::EPILOG, 0, [&ppm, &driver, colorGradingConfigForColor]() { @@ -913,7 +943,7 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { }); } else if (colorGradingConfig.customResolve) { // append custom resolve subpass after all other passes - pass.appendCustomCommand(3, + passBuilder.customCommand(engine, 3, RenderPass::Pass::BLENDED, RenderPass::CustomCommand::EPILOG, 0, [&ppm, &driver]() { @@ -921,16 +951,9 @@ void FRenderer::renderJob(ArenaScope& arena, FView& view) { }); } - // sort commands once we're done adding commands - pass.sortCommands(engine); - - - // this makes the viewport relative to xvp - // FIXME: we should use 'vp' when rendering directly into the swapchain, but that's hard to - // know at this point. This will usually be the case when post-process is disabled. - // FIXME: we probably should take the dynamic scaling into account too - pass.setScissorViewport(hasPostProcess ? xvp : vp); + passBuilder.commandTypeFlags(RenderPass::CommandTypeFlags::COLOR); + RenderPass const pass{ passBuilder.build(engine) }; FrameGraphTexture::Descriptor const desc = { .width = config.physicalViewport.width, diff --git a/filament/src/details/Renderer.h b/filament/src/details/Renderer.h index 2d08b6cfe0d..056d5599770 100644 --- a/filament/src/details/Renderer.h +++ b/filament/src/details/Renderer.h @@ -163,7 +163,7 @@ class FRenderer : public Renderer { } void renderInternal(FView const* view); - void renderJob(ArenaScope& arena, FView& view); + void renderJob(RootArenaScope& rootArenaScope, FView& view); // keep a reference to our engine FEngine& mEngine; @@ -187,9 +187,6 @@ class FRenderer : public Renderer { backend::TargetBufferFlags mClearFlags{}; tsl::robin_set mPreviousRenderTargets; std::function mBeginFrameInternal; - - // per-frame arena for this Renderer - LinearAllocatorArena& mPerRenderPassArena; }; FILAMENT_DOWNCAST(Renderer) diff --git a/filament/src/details/Scene.cpp b/filament/src/details/Scene.cpp index 942fb1e24fc..ff6af6293a5 100644 --- a/filament/src/details/Scene.cpp +++ b/filament/src/details/Scene.cpp @@ -53,7 +53,7 @@ FScene::~FScene() noexcept = default; void FScene::prepare(utils::JobSystem& js, - LinearAllocatorArena& allocator, + RootArenaScope& rootArenaScope, mat4 const& worldTransform, bool shadowReceiversAreCasters) noexcept { // TODO: can we skip this in most cases? Since we rely on indices staying the same, @@ -64,7 +64,7 @@ void FScene::prepare(utils::JobSystem& js, SYSTRACE_CONTEXT(); // This will reset the allocator upon exiting - ArenaScope const arena(allocator); + ArenaScope localArenaScope(rootArenaScope.getArena()); FEngine& engine = mEngine; EntityManager const& em = engine.getEntityManager(); @@ -85,10 +85,10 @@ void FScene::prepare(utils::JobSystem& js, utils::STLAllocator< LightContainerData, LinearAllocatorArena >, false>; RenderableInstanceContainer renderableInstances{ - RenderableInstanceContainer::with_capacity(entities.size(), allocator) }; + RenderableInstanceContainer::with_capacity(entities.size(), localArenaScope.getArena()) }; LightInstanceContainer lightInstances{ - LightInstanceContainer::with_capacity(entities.size(), allocator) }; + LightInstanceContainer::with_capacity(entities.size(), localArenaScope.getArena()) }; SYSTRACE_NAME_BEGIN("InstanceLoop"); @@ -148,7 +148,7 @@ void FScene::prepare(utils::JobSystem& js, // TODO: the resize below could happen in a job - if (sceneData.size() != renderableInstances.size()) { + if (!sceneData.capacity() || sceneData.size() != renderableInstances.size()) { sceneData.clear(); if (sceneData.capacity() < renderableDataCapacity) { sceneData.setCapacity(renderableDataCapacity); @@ -454,7 +454,7 @@ void FScene::terminate(FEngine&) { mRenderableViewUbh.clear(); } -void FScene::prepareDynamicLights(const CameraInfo& camera, ArenaScope&, +void FScene::prepareDynamicLights(const CameraInfo& camera, Handle lightUbh) noexcept { FEngine::DriverApi& driver = mEngine.getDriverApi(); FLightManager const& lcm = mEngine.getLightManager(); diff --git a/filament/src/details/Scene.h b/filament/src/details/Scene.h index 1882bb4dc30..490d115af3c 100644 --- a/filament/src/details/Scene.h +++ b/filament/src/details/Scene.h @@ -31,6 +31,8 @@ #include #include +#include + #include #include #include @@ -70,12 +72,12 @@ class FScene : public Scene { ~FScene() noexcept; void terminate(FEngine& engine); - void prepare(utils::JobSystem& js, LinearAllocatorArena& allocator, + void prepare(utils::JobSystem& js, RootArenaScope& rootArenaScope, math::mat4 const& worldTransform, bool shadowReceiversAreCasters) noexcept; void prepareVisibleRenderables(utils::Range visibleRenderables) noexcept; - void prepareDynamicLights(const CameraInfo& camera, ArenaScope& arena, + void prepareDynamicLights(const CameraInfo& camera, backend::Handle lightUbh) noexcept; backend::Handle getRenderableUBO() const noexcept { diff --git a/filament/src/details/View.cpp b/filament/src/details/View.cpp index 7cc641de093..8188390f7b5 100644 --- a/filament/src/details/View.cpp +++ b/filament/src/details/View.cpp @@ -20,6 +20,7 @@ #include "Froxelizer.h" #include "RenderPrimitive.h" #include "ResourceAllocator.h" +#include "ShadowMapManager.h" #include "details/Engine.h" #include "details/IndirectLight.h" @@ -43,6 +44,7 @@ #include #include +#include #include using namespace utils; @@ -59,8 +61,8 @@ FView::FView(FEngine& engine) : mFroxelizer(engine), mFogEntity(engine.getEntityManager().create()), mIsStereoSupported(engine.getDriverApi().isStereoSupported()), - mPerViewUniforms(engine), - mShadowMapManager(engine) { + mPerViewUniforms(engine) { + DriverApi& driver = engine.getDriverApi(); FDebugRegistry& debugRegistry = engine.getDebugRegistry(); @@ -76,7 +78,11 @@ FView::FView(FEngine& engine) #ifndef NDEBUG debugRegistry.registerDataSource("d.view.frame_info", - mDebugFrameHistory.data(), mDebugFrameHistory.size()); + [this]() -> DebugRegistry::DataSource { + assert_invariant(!mDebugFrameHistory); + mDebugFrameHistory = std::make_unique>(); + return { mDebugFrameHistory->data(), mDebugFrameHistory->size() }; + }); debugRegistry.registerProperty("d.view.pid.kp", &engine.debug.view.pid.kp); debugRegistry.registerProperty("d.view.pid.ki", &engine.debug.view.pid.ki); debugRegistry.registerProperty("d.view.pid.kd", &engine.debug.view.pid.kd); @@ -113,7 +119,8 @@ void FView::terminate(FEngine& engine) { driver.destroyBufferObject(mLightUbh); driver.destroyBufferObject(mRenderableUbh); drainFrameHistory(engine); - mShadowMapManager.terminate(engine); + + ShadowMapManager::terminate(engine, mShadowMapManager); mPerViewUniforms.terminate(driver); mFroxelizer.terminate(driver); @@ -242,21 +249,24 @@ float2 FView::updateScale(FEngine& engine, #ifndef NDEBUG // only for debugging... - using duration_ms = std::chrono::duration; - const float target = (1000.0f * float(frameRateOptions.interval)) / displayInfo.refreshRate; - const float targetWithHeadroom = target * (1.0f - frameRateOptions.headRoomRatio); - std::move(mDebugFrameHistory.begin() + 1, - mDebugFrameHistory.end(), mDebugFrameHistory.begin()); - mDebugFrameHistory.back() = { - .target = target, - .targetWithHeadroom = targetWithHeadroom, - .frameTime = std::chrono::duration_cast(info.frameTime).count(), - .frameTimeDenoised = std::chrono::duration_cast(info.denoisedFrameTime).count(), - .scale = mScale.x * mScale.y, - .pid_e = mPidController.getError(), - .pid_i = mPidController.getIntegral(), - .pid_d = mPidController.getDerivative() - }; + if (mDebugFrameHistory) { + using namespace std::chrono; + using duration_ms = duration; + const float target = (1000.0f * float(frameRateOptions.interval)) / displayInfo.refreshRate; + const float targetWithHeadroom = target * (1.0f - frameRateOptions.headRoomRatio); + std::move(mDebugFrameHistory->begin() + 1, + mDebugFrameHistory->end(), mDebugFrameHistory->begin()); + mDebugFrameHistory->back() = { + .target = target, + .targetWithHeadroom = targetWithHeadroom, + .frameTime = duration_cast(info.frameTime).count(), + .frameTimeDenoised = duration_cast(info.denoisedFrameTime).count(), + .scale = mScale.x * mScale.y, + .pid_e = mPidController.getError(), + .pid_i = mPidController.getIntegral(), + .pid_d = mPidController.getDerivative() + }; + } #endif return mScale; @@ -281,10 +291,10 @@ void FView::prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableD return; } - mShadowMapManager.reset(); - auto& lcm = engine.getLightManager(); + ShadowMapManager::Builder builder; + // dominant directional light is always as index 0 FLightManager::Instance const directionalLight = lightData.elementAt(0); const bool hasDirectionalShadows = directionalLight && lcm.isShadowCaster(directionalLight); @@ -292,7 +302,7 @@ void FView::prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableD const auto& shadowOptions = lcm.getShadowOptions(directionalLight); assert_invariant(shadowOptions.shadowCascades >= 1 && shadowOptions.shadowCascades <= CONFIG_MAX_SHADOW_CASCADES); - mShadowMapManager.setDirectionalShadowMap(0, &shadowOptions); + builder.directionalShadowMap(0, &shadowOptions); } // Find all shadow-casting spotlights. @@ -326,7 +336,7 @@ void FView::prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableD if (shadowMapCount + shadowMapCountNeeded <= CONFIG_MAX_SHADOWMAPS) { shadowMapCount += shadowMapCountNeeded; const auto& shadowOptions = lcm.getShadowOptions(li); - mShadowMapManager.addShadowMap(l, spotLight, &shadowOptions); + builder.shadowMap(l, spotLight, &shadowOptions); } if (shadowMapCount >= CONFIG_MAX_SHADOWMAPS) { @@ -334,15 +344,17 @@ void FView::prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableD } } - auto shadowTechnique = mShadowMapManager.update(engine, *this, cameraInfo, - renderableData, lightData); + if (builder.hasShadowMaps()) { + ShadowMapManager::createIfNeeded(engine, mShadowMapManager); + auto shadowTechnique = mShadowMapManager->update(builder, engine, *this, + cameraInfo, renderableData, lightData); - mHasShadowing = any(shadowTechnique); - mNeedsShadowMap = any(shadowTechnique & ShadowMapManager::ShadowTechnique::SHADOW_MAP); + mHasShadowing = any(shadowTechnique); + mNeedsShadowMap = any(shadowTechnique & ShadowMapManager::ShadowTechnique::SHADOW_MAP); + } } -void FView::prepareLighting(FEngine& engine, ArenaScope& arena, - CameraInfo const& cameraInfo) noexcept { +void FView::prepareLighting(FEngine& engine, CameraInfo const& cameraInfo) noexcept { SYSTRACE_CALL(); SYSTRACE_CONTEXT(); @@ -354,7 +366,7 @@ void FView::prepareLighting(FEngine& engine, ArenaScope& arena, */ if (hasDynamicLighting()) { - scene->prepareDynamicLights(cameraInfo, arena, mLightUbh); + scene->prepareDynamicLights(cameraInfo, mLightUbh); } // here the array of visible lights has been shrunk to CONFIG_MAX_LIGHT_COUNT @@ -427,7 +439,7 @@ CameraInfo FView::computeCameraInfo(FEngine& engine) const noexcept { return { *camera, mat4{ rotation } * mat4::translation(translation) }; } -void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, +void FView::prepare(FEngine& engine, DriverApi& driver, RootArenaScope& rootArenaScope, filament::Viewport viewport, CameraInfo cameraInfo, float4 const& userTime, bool needsAlphaChannel) noexcept { @@ -465,7 +477,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, * Gather all information needed to render this scene. Apply the world origin to all * objects in the scene. */ - scene->prepare(js, arena.getAllocator(), + scene->prepare(js, rootArenaScope, cameraInfo.worldTransform, hasVSM()); @@ -475,14 +487,22 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, JobSystem::Job* froxelizeLightsJob = nullptr; JobSystem::Job* prepareVisibleLightsJob = nullptr; - if (scene->getLightData().size() > FScene::DIRECTIONAL_LIGHTS_COUNT) { + size_t const lightCount = scene->getLightData().size(); + if (lightCount > FScene::DIRECTIONAL_LIGHTS_COUNT) { // create and start the prepareVisibleLights job // note: this job updates LightData (non const) + // allocate a scratch buffer for distances outside the job below, so we don't need + // to use a locked allocator; the downside is that we need to account for the worst case. + size_t const positionalLightCount = lightCount - FScene::DIRECTIONAL_LIGHTS_COUNT; + float* const distances = rootArenaScope.allocate( + (positionalLightCount + 3u) & ~3u, CACHELINE_SIZE); + prepareVisibleLightsJob = js.runAndRetain(js.createJob(nullptr, - [&engine, &arena, &viewMatrix = cameraInfo.view, &cullingFrustum, + [&engine, distances, positionalLightCount, &viewMatrix = cameraInfo.view, &cullingFrustum, &lightData = scene->getLightData()] (JobSystem&, JobSystem::Job*) { - FView::prepareVisibleLights(engine.getLightManager(), arena, + FView::prepareVisibleLights(engine.getLightManager(), + { distances, distances + positionalLightCount }, viewMatrix, cullingFrustum, lightData); })); } @@ -530,7 +550,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, // As soon as prepareVisibleLight finishes, we can kick-off the froxelization if (hasDynamicLighting()) { auto& froxelizer = mFroxelizer; - if (froxelizer.prepare(driver, arena, viewport, + if (froxelizer.prepare(driver, rootArenaScope, viewport, cameraInfo.projection, cameraInfo.zn, cameraInfo.zf)) { // TODO: might be more consistent to do this in prepareLighting(), but it's not // strictly necessary @@ -606,7 +626,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, uint32_t(endDirCastersOnly - beginRenderables)}; merged = { 0, uint32_t(endPotentialSpotCastersOnly - beginRenderables) }; - if (!mShadowMapManager.hasSpotShadows()) { + if (!needsShadowMap() || !mShadowMapManager->hasSpotShadows()) { // we know we don't have spot shadows, we can reduce the range to not even include // the potential spot casters merged = { 0, uint32_t(endDirCastersOnly - beginRenderables) }; @@ -645,7 +665,7 @@ void FView::prepare(FEngine& engine, DriverApi& driver, ArenaScope& arena, * Relies on FScene::prepare() and prepareVisibleLights() */ - prepareLighting(engine, arena, cameraInfo); + prepareLighting(engine, cameraInfo); /* * Update driver state @@ -672,8 +692,11 @@ void FView::bindPerViewUniformsAndSamplers(FEngine::DriverApi& driver) const noe driver.bindUniformBuffer(+UniformBindingPoints::LIGHTS, mLightUbh); - driver.bindUniformBuffer(+UniformBindingPoints::SHADOW, - mShadowMapManager.getShadowUniformsHandle()); + if (needsShadowMap()) { + assert_invariant(mShadowMapManager->getShadowUniformsHandle()); + driver.bindUniformBuffer(+UniformBindingPoints::SHADOW, + mShadowMapManager->getShadowUniformsHandle()); + } driver.bindUniformBuffer(+UniformBindingPoints::FROXEL_RECORDS, mFroxelizer.getRecordBuffer()); @@ -774,7 +797,12 @@ void FView::prepareStructure(Handle structure) const noexcept { } void FView::prepareShadow(Handle texture) const noexcept { - const auto& uniforms = mShadowMapManager.getShadowMappingUniforms(); + // when needsShadowMap() is not set, this method only just sets a dummy texture + // in the needed samplers (in that case `texture` is actually a dummy texture). + ShadowMapManager::ShadowMappingUniforms uniforms; + if (needsShadowMap()) { + uniforms = mShadowMapManager->getShadowMappingUniforms(); + } switch (mShadowType) { case filament::ShadowType::PCF: mPerViewUniforms.prepareShadowPCF(texture, uniforms); @@ -850,7 +878,8 @@ void FView::cullRenderables(JobSystem&, functor(0, renderableData.size()); } -void FView::prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena, +void FView::prepareVisibleLights(FLightManager const& lcm, + utils::Slice scratch, mat4f const& viewMatrix, Frustum const& frustum, FScene::LightSoa& lightData) noexcept { SYSTRACE_CALL(); @@ -918,28 +947,25 @@ void FView::prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena * - This helps our limited numbers of spot-shadow as well. */ - ArenaScope arena(rootArena.getAllocator()); - size_t const size = visibleLightCount; // number of point/spotlights - size_t const positionalLightCount = size - FScene::DIRECTIONAL_LIGHTS_COUNT; + size_t const positionalLightCount = visibleLightCount - FScene::DIRECTIONAL_LIGHTS_COUNT; if (positionalLightCount) { - // always allocate at least 4 entries, because the vectorized loops below rely on that - float* const UTILS_RESTRICT distances = - arena.allocate((size + 3u) & ~3u, CACHELINE_SIZE); - + assert_invariant(positionalLightCount <= scratch.size()); // pre-compute the lights' distance to the camera, for sorting below // - we don't skip the directional light, because we don't care, it's ignored during sorting + float* const UTILS_RESTRICT distances = scratch.data(); float4 const* const UTILS_RESTRICT spheres = lightData.data(); - computeLightCameraDistances(distances, viewMatrix, spheres, size); + computeLightCameraDistances(distances, viewMatrix, spheres, visibleLightCount); // skip directional light Zip2Iterator b = { lightData.begin(), distances }; - std::sort(b + FScene::DIRECTIONAL_LIGHTS_COUNT, b + size, + std::sort(b + FScene::DIRECTIONAL_LIGHTS_COUNT, b + visibleLightCount, [](auto const& lhs, auto const& rhs) { return lhs.second < rhs.second; }); } // drop excess lights - lightData.resize(std::min(size, CONFIG_MAX_LIGHT_COUNT + FScene::DIRECTIONAL_LIGHTS_COUNT)); + lightData.resize(std::min(visibleLightCount, + CONFIG_MAX_LIGHT_COUNT + FScene::DIRECTIONAL_LIGHTS_COUNT)); } // These methods need to exist so clang honors the __restrict__ keyword, which in turn @@ -972,8 +998,10 @@ void FView::updatePrimitivesLod(FEngine& engine, const CameraInfo&, } FrameGraphId FView::renderShadowMaps(FEngine& engine, FrameGraph& fg, - CameraInfo const& cameraInfo, float4 const& userTime, RenderPass const& pass) noexcept { - return mShadowMapManager.render(engine, fg, pass, *this, cameraInfo, userTime); + CameraInfo const& cameraInfo, float4 const& userTime, + RenderPassBuilder const& passBuilder) noexcept { + assert_invariant(needsShadowMap()); + return mShadowMapManager->render(engine, fg, passBuilder, *this, cameraInfo, userTime); } void FView::commitFrameHistory(FEngine& engine) noexcept { diff --git a/filament/src/details/View.h b/filament/src/details/View.h index 204f11b9d56..6680306296d 100644 --- a/filament/src/details/View.h +++ b/filament/src/details/View.h @@ -53,6 +53,9 @@ #include #include +#include +#include + namespace utils { class JobSystem; } // namespace utils; @@ -88,7 +91,7 @@ class FView : public View { // note: viewport/cameraInfo are passed by value to make it clear that prepare cannot // keep references on them that would outlive the scope of prepare() (e.g. with JobSystem). - void prepare(FEngine& engine, backend::DriverApi& driver, ArenaScope& arena, + void prepare(FEngine& engine, backend::DriverApi& driver, RootArenaScope& rootArenaScope, filament::Viewport viewport, CameraInfo cameraInfo, math::float4 const& userTime, bool needsAlphaChannel) noexcept; @@ -144,7 +147,7 @@ class FView : public View { void prepareShadowing(FEngine& engine, FScene::RenderableSoa& renderableData, FScene::LightSoa const& lightData, CameraInfo const& cameraInfo) noexcept; - void prepareLighting(FEngine& engine, ArenaScope& arena, CameraInfo const& cameraInfo) noexcept; + void prepareLighting(FEngine& engine, CameraInfo const& cameraInfo) noexcept; void prepareSSAO(backend::Handle ssao) const noexcept; void prepareSSR(backend::Handle ssr, bool disableSSR, @@ -176,7 +179,7 @@ class FView : public View { FrameGraphId renderShadowMaps(FEngine& engine, FrameGraph& fg, CameraInfo const& cameraInfo, math::float4 const& userTime, - RenderPass const& pass) noexcept; + RenderPassBuilder const& passBuilder) noexcept; void updatePrimitivesLod( FEngine& engine, const CameraInfo& camera, @@ -198,8 +201,9 @@ class FView : public View { void setStereoscopicOptions(StereoscopicOptions const& options) noexcept; - FCamera const* getDirectionalLightCamera() const noexcept { - return mShadowMapManager.getDirectionalLightCamera(); + FCamera const* getDirectionalShadowCamera() const noexcept { + if (!mShadowMapManager) return nullptr; + return mShadowMapManager->getDirectionalShadowCamera(); } void setRenderTarget(FRenderTarget* renderTarget) noexcept { @@ -460,7 +464,8 @@ class FView : public View { void prepareVisibleRenderables(utils::JobSystem& js, Frustum const& frustum, FScene::RenderableSoa& renderableData) const noexcept; - static void prepareVisibleLights(FLightManager const& lcm, ArenaScope& rootArena, + static void prepareVisibleLights(FLightManager const& lcm, + utils::Slice scratch, math::mat4f const& viewMatrix, Frustum const& frustum, FScene::LightSoa& lightData) noexcept; @@ -554,7 +559,7 @@ class FView : public View { mutable bool mHasShadowing = false; mutable bool mNeedsShadowMap = false; - ShadowMapManager mShadowMapManager; + std::unique_ptr mShadowMapManager; std::array mMaterialGlobals = {{ { 0, 0, 0, 1 }, @@ -564,7 +569,7 @@ class FView : public View { }}; #ifndef NDEBUG - std::array mDebugFrameHistory; + std::unique_ptr> mDebugFrameHistory; #endif }; diff --git a/ios/CocoaPods/Filament.podspec b/ios/CocoaPods/Filament.podspec index 5b53805fa63..df0911beb7d 100644 --- a/ios/CocoaPods/Filament.podspec +++ b/ios/CocoaPods/Filament.podspec @@ -1,12 +1,12 @@ Pod::Spec.new do |spec| spec.name = "Filament" - spec.version = "1.50.3" + spec.version = "1.50.4" spec.license = { :type => "Apache 2.0", :file => "LICENSE" } spec.homepage = "https://google.github.io/filament" spec.authors = "Google LLC." spec.summary = "Filament is a real-time physically based rendering engine for Android, iOS, Windows, Linux, macOS, and WASM/WebGL." spec.platform = :ios, "11.0" - spec.source = { :http => "https://github.com/google/filament/releases/download/v1.50.3/filament-v1.50.3-ios.tgz" } + spec.source = { :http => "https://github.com/google/filament/releases/download/v1.50.4/filament-v1.50.4-ios.tgz" } # Fix linking error with Xcode 12; we do not yet support the simulator on Apple silicon. spec.pod_target_xcconfig = { diff --git a/libs/camutils/src/FreeFlightManipulator.h b/libs/camutils/src/FreeFlightManipulator.h index 1df4dd56f73..c20b2578295 100644 --- a/libs/camutils/src/FreeFlightManipulator.h +++ b/libs/camutils/src/FreeFlightManipulator.h @@ -24,6 +24,7 @@ #include #include +#include #include namespace filament { @@ -121,50 +122,90 @@ class FreeFlightManipulator : public Manipulator { } void update(FLOAT deltaTime) override { - vec3 forceLocal { 0.0, 0.0, 0.0 }; - if (mKeyDown[(int) Base::Key::FORWARD]) { - forceLocal += vec3{ 0.0, 0.0, -1.0 }; - } - if (mKeyDown[(int) Base::Key::LEFT]) { - forceLocal += vec3{ -1.0, 0.0, 0.0 }; - } - if (mKeyDown[(int) Base::Key::BACKWARD]) { - forceLocal += vec3{ 0.0, 0.0, 1.0 }; - } - if (mKeyDown[(int) Base::Key::RIGHT]) { - forceLocal += vec3{ 1.0, 0.0, 0.0 }; - } - - const mat4 orientation = mat4::lookAt(Base::mEye, Base::mTarget, Base::mProps.upVector); - vec3 forceWorld = (orientation * vec4{ forceLocal, 0.0f }).xyz; - - if (mKeyDown[(int) Base::Key::UP]) { - forceWorld += vec3{ 0.0, 1.0, 0.0 }; - } - if (mKeyDown[(int) Base::Key::DOWN]) { - forceWorld += vec3{ 0.0, -1.0, 0.0 }; - } - - forceWorld *= mMoveSpeed; - - const auto dampingFactor = Base::mProps.flightMoveDamping; + auto getLocalDirection = [this]() -> vec3 { + vec3 directionLocal{ 0.0, 0.0, 0.0 }; + if (mKeyDown[(int)Base::Key::FORWARD]) { + directionLocal += vec3{ 0.0, 0.0, -1.0 }; + } + if (mKeyDown[(int)Base::Key::LEFT]) { + directionLocal += vec3{ -1.0, 0.0, 0.0 }; + } + if (mKeyDown[(int)Base::Key::BACKWARD]) { + directionLocal += vec3{ 0.0, 0.0, 1.0 }; + } + if (mKeyDown[(int)Base::Key::RIGHT]) { + directionLocal += vec3{ 1.0, 0.0, 0.0 }; + } + return directionLocal; + }; + + auto getWorldDirection = [this](vec3 directionLocal) -> vec3 { + const mat4 orientation = mat4::lookAt(Base::mEye, Base::mTarget, Base::mProps.upVector); + vec3 directionWorld = (orientation * vec4{ directionLocal, 0.0f }).xyz; + if (mKeyDown[(int)Base::Key::UP]) { + directionWorld += vec3{ 0.0, 1.0, 0.0 }; + } + if (mKeyDown[(int)Base::Key::DOWN]) { + directionWorld += vec3{ 0.0, -1.0, 0.0 }; + } + return directionWorld; + }; + + vec3 const localDirection = getLocalDirection(); + vec3 const worldDirection = getWorldDirection(localDirection); + + // unit of dampingFactor is [1/s] + FLOAT const dampingFactor = Base::mProps.flightMoveDamping; if (dampingFactor == 0.0) { // Without damping, we simply treat the force as our velocity. - mEyeVelocity = forceWorld; + vec3 const speed = worldDirection * mMoveSpeed; + mEyeVelocity = speed; + vec3 const positionDelta = mEyeVelocity * deltaTime; + Base::mEye += positionDelta; + Base::mTarget += positionDelta; } else { - // The dampingFactor acts as "friction", which acts upon the camera in the direction - // opposite its velocity. - // Force is also multiplied by the dampingFactor, to "make up" for the friction. - // This ensures that the max velocity still approaches mMoveSpeed; - vec3 velocityDelta = (forceWorld - mEyeVelocity) * dampingFactor; - mEyeVelocity += velocityDelta * deltaTime; + auto dt = deltaTime / 16.0; + for (size_t i = 0; i < 16; i++) { + // Note: the algorithm below doesn't work well for large time steps because + // we're not using a closed form for updating the position, so we need + // to loop a few times. We could make this better by having a dynamic + // loop count. What we're really doing is evaluation the solution to + // a differential equation numerically. + + // Kinetic friction is a force opposing velocity and proportional to it.: + // F = -kv + // F = ma + // ==> ma = -kv + // a = -vk/m [m.s^-2] = [m/s] * [Kg/s] / [Kg] + // ==> dampingFactor = k/m [1/s] = [Kg/s] / [Kg] + // + // The velocity update for dt due to friction is then: + // v = v + a.dt + // = v - v * dampingFactor * dt + // = v * (1.0 - dampingFactor * dt) + mEyeVelocity = mEyeVelocity * saturate(1.0 - dampingFactor * dt); + + // We also undergo an acceleration proportional to the distance to the target speed + // (the closer we are the less we accelerate, similar to a car). + // F = k * (target_v - v) + // F = ma + // ==> ma = k * (target_v - v) + // a = k/m * (target_v - v) [m.s^-2] = [Kg/s] / [Kg] * [m/s] + // + // The velocity update for dt due to the acceleration (the gas basically) is then: + // v = v + a.dt + // = v + k/m * (target_v - v).dt + // We're using the same dampingFactor here, but we don't have to. + auto const accelerationFactor = dampingFactor; + vec3 const acceleration = worldDirection * + (accelerationFactor * std::max(mMoveSpeed - length(mEyeVelocity), FLOAT(0))); + mEyeVelocity += acceleration * dt; + vec3 const positionDelta = mEyeVelocity * dt; + Base::mEye += positionDelta; + Base::mTarget += positionDelta; + } } - - const vec3 positionDelta = mEyeVelocity * deltaTime; - - Base::mEye += positionDelta; - Base::mTarget += positionDelta; } Bookmark getCurrentBookmark() const override { diff --git a/libs/filamentapp/src/FilamentApp.cpp b/libs/filamentapp/src/FilamentApp.cpp index 108bb0ac535..7e315b0da47 100644 --- a/libs/filamentapp/src/FilamentApp.cpp +++ b/libs/filamentapp/src/FilamentApp.cpp @@ -430,8 +430,10 @@ void FilamentApp::run(const Config& config, SetupCallback setupCallback, window->mDebugCamera->lookAt(eye, center, up); // Update the cube distortion matrix used for frustum visualization. - const Camera* lightmapCamera = window->mMainView->getView()->getDirectionalLightCamera(); - lightmapCube->mapFrustum(*mEngine, lightmapCamera); + const Camera* lightmapCamera = window->mMainView->getView()->getDirectionalShadowCamera(); + if (lightmapCamera) { + lightmapCube->mapFrustum(*mEngine, lightmapCamera); + } cameraCube->mapFrustum(*mEngine, window->mMainCamera); // Delay rendering for roughly one monitor refresh interval @@ -713,7 +715,10 @@ FilamentApp::Window::Window(FilamentApp* filamentApp, mGodView->setCameraManipulator(mDebugCameraMan); // Ortho view obviously uses an ortho camera - mOrthoView->setCamera( (Camera *)mMainView->getView()->getDirectionalLightCamera() ); + Camera const* debugDirectionalShadowCamera = mMainView->getView()->getDirectionalShadowCamera(); + if (debugDirectionalShadowCamera) { + mOrthoView->setCamera(const_cast(debugDirectionalShadowCamera)); + } } // configure the cameras diff --git a/libs/geometry/src/MikktspaceImpl.cpp b/libs/geometry/src/MikktspaceImpl.cpp index f9f54c332cb..19942ada3bd 100644 --- a/libs/geometry/src/MikktspaceImpl.cpp +++ b/libs/geometry/src/MikktspaceImpl.cpp @@ -19,12 +19,13 @@ #include #include - #include #include #include +#include // memcpy + namespace filament::geometry { using namespace filament::math; @@ -98,7 +99,7 @@ void MikktspaceImpl::setTSpaceBasic(SMikkTSpaceContext const* context, float con cursor += 36; for (auto [attribArray, attribStride, attribSize]: wrapper->mInputAttribArrays) { uint8_t const* input = pointerAdd(attribArray, vertInd, attribStride); - std::memcpy(cursor, input, attribSize); + memcpy(cursor, input, attribSize); cursor += attribSize; } } diff --git a/libs/math/include/math/mat3.h b/libs/math/include/math/mat3.h index 5ad06bdf4bc..035865fe2bb 100644 --- a/libs/math/include/math/mat3.h +++ b/libs/math/include/math/mat3.h @@ -17,15 +17,21 @@ #ifndef TNT_MATH_MAT3_H #define TNT_MATH_MAT3_H -#include #include #include #include +#include +#include #include #include #include +#include + +#include +#include + namespace filament { namespace math { // ------------------------------------------------------------------------------------- diff --git a/libs/utils/include/utils/Allocator.h b/libs/utils/include/utils/Allocator.h index c726ac5ffe6..073206f48ba 100644 --- a/libs/utils/include/utils/Allocator.h +++ b/libs/utils/include/utils/Allocator.h @@ -30,6 +30,7 @@ #include #include #include +#include namespace utils { @@ -43,14 +44,14 @@ static inline P* add(P* a, T b) noexcept { template static inline P* align(P* p, size_t alignment) noexcept { // alignment must be a power-of-two - assert(alignment && !(alignment & alignment-1)); + assert_invariant(alignment && !(alignment & alignment-1)); return (P*)((uintptr_t(p) + alignment - 1) & ~(alignment - 1)); } template static inline P* align(P* p, size_t alignment, size_t offset) noexcept { P* const r = align(add(p, offset), alignment); - assert(r >= add(p, offset)); + assert_invariant(r >= add(p, offset)); return r; } @@ -89,20 +90,19 @@ class LinearAllocator { // branch-less allocation void* const p = pointermath::align(current(), alignment, extra); void* const c = pointermath::add(p, size); - bool success = c <= end(); + bool const success = c <= end(); set_current(success ? c : current()); return success ? p : nullptr; } // API specific to this allocator - void *getCurrent() UTILS_RESTRICT noexcept { return current(); } // free memory back to the specified point void rewind(void* p) UTILS_RESTRICT noexcept { - assert(p>=mBegin && p= mBegin && p < end()); set_current(p); } @@ -122,16 +122,21 @@ class LinearAllocator { void swap(LinearAllocator& rhs) noexcept; void *base() noexcept { return mBegin; } + void const *base() const noexcept { return mBegin; } void free(void*, size_t) UTILS_RESTRICT noexcept { } -private: +protected: void* end() UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mSize); } + void const* end() const UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mSize); } + void* current() UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mCur); } + void const* current() const UTILS_RESTRICT noexcept { return pointermath::add(mBegin, mCur); } + +private: void set_current(void* p) UTILS_RESTRICT noexcept { mCur = uint32_t(uintptr_t(p) - uintptr_t(mBegin)); } - void* mBegin = nullptr; uint32_t mSize = 0; uint32_t mCur = 0; @@ -152,9 +157,7 @@ class HeapAllocator { explicit HeapAllocator(const AREA&) { } // our allocator concept - void* alloc(size_t size, size_t alignment = alignof(std::max_align_t), size_t extra = 0) { - // this allocator doesn't support 'extra' - assert(extra == 0); + void* alloc(size_t size, size_t alignment = alignof(std::max_align_t)) { return aligned_alloc(size, alignment); } @@ -171,6 +174,50 @@ class HeapAllocator { void swap(HeapAllocator&) noexcept { } }; +/* ------------------------------------------------------------------------------------------------ + * LinearAllocatorWithFallback + * + * This is a LinearAllocator that falls back to a HeapAllocator when allocation fail. The Heap + * allocator memory is freed only when the LinearAllocator is reset or destroyed. + * ------------------------------------------------------------------------------------------------ + */ +class LinearAllocatorWithFallback : private LinearAllocator, private HeapAllocator { + std::vector mHeapAllocations; +public: + LinearAllocatorWithFallback(void* begin, void* end) noexcept + : LinearAllocator(begin, end) { + } + + template + explicit LinearAllocatorWithFallback(const AREA& area) + : LinearAllocatorWithFallback(area.begin(), area.end()) { + } + + ~LinearAllocatorWithFallback() noexcept { + LinearAllocatorWithFallback::reset(); + } + + void* alloc(size_t size, size_t alignment = alignof(std::max_align_t)); + + void *getCurrent() noexcept { + return LinearAllocator::getCurrent(); + } + + void rewind(void* p) noexcept { + if (p >= LinearAllocator::base() && p < LinearAllocator::end()) { + LinearAllocator::rewind(p); + } + } + + void reset() noexcept; + + void free(void*, size_t) noexcept { } + + bool isHeapAllocation(void* p) const noexcept { + return p < LinearAllocator::base() || p >= LinearAllocator::end(); + } +}; + // ------------------------------------------------------------------------------------------------ class FreeList { @@ -186,13 +233,13 @@ class FreeList { Node* const head = mHead; mHead = head ? head->next : nullptr; // this could indicate a use after free - assert(!mHead || mHead >= mBegin && mHead < mEnd); + assert_invariant(!mHead || mHead >= mBegin && mHead < mEnd); return head; } void push(void* p) noexcept { - assert(p); - assert(p >= mBegin && p < mEnd); + assert_invariant(p); + assert_invariant(p >= mBegin && p < mEnd); // TODO: assert this is one of our pointer (i.e.: it's address match one of ours) Node* const head = static_cast(p); head->next = mHead; @@ -203,11 +250,11 @@ class FreeList { return mHead; } -private: struct Node { Node* next; }; +private: static Node* init(void* begin, void* end, size_t elementSize, size_t alignment, size_t extra) noexcept; @@ -225,20 +272,20 @@ class AtomicFreeList { AtomicFreeList() noexcept = default; AtomicFreeList(void* begin, void* end, size_t elementSize, size_t alignment, size_t extra) noexcept; - AtomicFreeList(const FreeList& rhs) = delete; - AtomicFreeList& operator=(const FreeList& rhs) = delete; + AtomicFreeList(const AtomicFreeList& rhs) = delete; + AtomicFreeList& operator=(const AtomicFreeList& rhs) = delete; void* pop() noexcept { - Node* const storage = mStorage; + Node* const pStorage = mStorage; HeadPtr currentHead = mHead.load(); while (currentHead.offset >= 0) { - // The value of "next" we load here might already contain application data if another + // The value of "pNext" we load here might already contain application data if another // thread raced ahead of us. But in that case, the computed "newHead" will be discarded // since compare_exchange_weak fails. Then this thread will loop with the updated // value of currentHead, and try again. - Node* const next = storage[currentHead.offset].next.load(std::memory_order_relaxed); - const HeadPtr newHead{ next ? int32_t(next - storage) : -1, currentHead.tag + 1 }; + Node* const pNext = pStorage[currentHead.offset].next.load(std::memory_order_relaxed); + const HeadPtr newHead{ pNext ? int32_t(pNext - pStorage) : -1, currentHead.tag + 1 }; // In the rare case that the other thread that raced ahead of us already returned the // same mHead we just loaded, but it now has a different "next" value, the tag field will not // match, and compare_exchange_weak will fail and prevent that particular race condition. @@ -246,18 +293,18 @@ class AtomicFreeList { // This assert needs to occur after we have validated that there was no race condition // Otherwise, next might already contain application data, if another thread // raced ahead of us after we loaded mHead, but before we loaded mHead->next. - assert(!next || next >= storage); + assert_invariant(!pNext || pNext >= pStorage); break; } } - void* p = (currentHead.offset >= 0) ? (storage + currentHead.offset) : nullptr; - assert(!p || p >= storage); + void* p = (currentHead.offset >= 0) ? (pStorage + currentHead.offset) : nullptr; + assert_invariant(!p || p >= pStorage); return p; } void push(void* p) noexcept { Node* const storage = mStorage; - assert(p && p >= storage); + assert_invariant(p && p >= storage); Node* const node = static_cast(p); HeadPtr currentHead = mHead.load(); HeadPtr newHead = { int32_t(node - storage), currentHead.tag + 1 }; @@ -272,7 +319,6 @@ class AtomicFreeList { return mStorage + mHead.load(std::memory_order_relaxed).offset; } -private: struct Node { // This should be a regular (non-atomic) pointer, but this causes TSAN to complain // about a data-race that exists but is benin. We always use this atomic<> in @@ -303,6 +349,7 @@ class AtomicFreeList { std::atomic next; }; +private: // This struct is using a 32-bit offset into the arena rather than // a direct pointer, because together with the 32-bit tag, it needs to // fit into 8 bytes. If it was any larger, it would not be possible to @@ -325,14 +372,15 @@ template < size_t OFFSET = 0, typename FREELIST = FreeList> class PoolAllocator { - static_assert(ELEMENT_SIZE >= sizeof(void*), "ELEMENT_SIZE must accommodate at least a pointer"); + static_assert(ELEMENT_SIZE >= sizeof(typename FREELIST::Node), + "ELEMENT_SIZE must accommodate at least a FreeList::Node"); public: // our allocator concept void* alloc(size_t size = ELEMENT_SIZE, size_t alignment = ALIGNMENT, size_t offset = OFFSET) noexcept { - assert(size <= ELEMENT_SIZE); - assert(alignment <= ALIGNMENT); - assert(offset == OFFSET); + assert_invariant(size <= ELEMENT_SIZE); + assert_invariant(alignment <= ALIGNMENT); + assert_invariant(offset == OFFSET); return mFreeList.pop(); } @@ -346,6 +394,10 @@ class PoolAllocator { : mFreeList(begin, end, ELEMENT_SIZE, ALIGNMENT, OFFSET) { } + PoolAllocator(void* begin, size_t size) noexcept + : mFreeList(begin, static_cast(begin) + size, ELEMENT_SIZE, ALIGNMENT, OFFSET) { + } + template explicit PoolAllocator(const AREA& area) noexcept : PoolAllocator(area.begin(), area.end()) { @@ -585,32 +637,54 @@ class Arena { mListener(name, mArea.data(), mArea.size()) { } + template + void* alloc(size_t size, size_t alignment, size_t extra, ARGS&& ... args) noexcept { + std::lock_guard guard(mLock); + void* p = mAllocator.alloc(size, alignment, extra, std::forward(args) ...); + mListener.onAlloc(p, size, alignment, extra); + return p; + } + + // allocate memory from arena with given size and alignment // (acceptable size/alignment may depend on the allocator provided) - void* alloc(size_t size, size_t alignment = alignof(std::max_align_t), size_t extra = 0) noexcept { + void* alloc(size_t size, size_t alignment, size_t extra) noexcept { std::lock_guard guard(mLock); void* p = mAllocator.alloc(size, alignment, extra); mListener.onAlloc(p, size, alignment, extra); return p; } + void* alloc(size_t size, size_t alignment = alignof(std::max_align_t)) noexcept { + std::lock_guard guard(mLock); + void* p = mAllocator.alloc(size, alignment); + mListener.onAlloc(p, size, alignment, 0); + return p; + } + // Allocate an array of trivially destructible objects // for safety, we disable the object-based alloc method if the object type is not // trivially destructible, since free() won't call the destructor and this is allocating // an array. template ::value>::type> - T* alloc(size_t count, size_t alignment = alignof(T), size_t extra = 0) noexcept { + T* alloc(size_t count, size_t alignment, size_t extra) noexcept { return (T*)alloc(count * sizeof(T), alignment, extra); } - // return memory pointed by p to the arena - // (actual behaviour may depend on allocator provided) - void free(void* p) noexcept { + template ::value>::type> + T* alloc(size_t count, size_t alignment = alignof(T)) noexcept { + return (T*)alloc(count * sizeof(T), alignment); + } + + // some allocators require more parameters + template + void free(void* p, size_t size, ARGS&& ... args) noexcept { if (p) { std::lock_guard guard(mLock); - mListener.onFree(p); - mAllocator.free(p); + mListener.onFree(p, size); + mAllocator.free(p, size, std::forward(args) ...); } } @@ -623,6 +697,16 @@ class Arena { } } + // return memory pointed by p to the arena + // (actual behaviour may depend on allocator provided) + void free(void* p) noexcept { + if (p) { + std::lock_guard guard(mLock); + mListener.onFree(p); + mAllocator.free(p); + } + } + // some allocators don't have a free() call, but a single reset() or rewind() instead void reset() noexcept { std::lock_guard guard(mLock); @@ -720,6 +804,8 @@ class ArenaScope { } public: + using Arena = ARENA; + explicit ArenaScope(ARENA& allocator) : mArena(allocator), mRewind(allocator.getCurrent()) { } @@ -771,7 +857,7 @@ class ArenaScope { } // use with caution - ARENA& getAllocator() noexcept { return mArena; } + ARENA& getArena() noexcept { return mArena; } private: ARENA& mArena; diff --git a/libs/utils/src/Allocator.cpp b/libs/utils/src/Allocator.cpp index 2d7a8fcbe92..fd6e5945691 100644 --- a/libs/utils/src/Allocator.cpp +++ b/libs/utils/src/Allocator.cpp @@ -16,6 +16,8 @@ #include +#include +#include #include #include @@ -52,6 +54,29 @@ void LinearAllocator::swap(LinearAllocator& rhs) noexcept { std::swap(mCur, rhs.mCur); } + +// ------------------------------------------------------------------------------------------------ +// LinearAllocatorWithFallback +// ------------------------------------------------------------------------------------------------ + +void* LinearAllocatorWithFallback::alloc(size_t size, size_t alignment) { + void* p = LinearAllocator::alloc(size, alignment); + if (UTILS_UNLIKELY(!p)) { + p = HeapAllocator::alloc(size, alignment); + mHeapAllocations.push_back(p); + } + assert_invariant(p); + return p; +} + +void LinearAllocatorWithFallback::reset() noexcept { + LinearAllocator::reset(); + for (auto* p : mHeapAllocations) { + HeapAllocator::free(p); + } + mHeapAllocations.clear(); +} + // ------------------------------------------------------------------------------------------------ // FreeList // ------------------------------------------------------------------------------------------------ @@ -61,8 +86,8 @@ FreeList::Node* FreeList::init(void* begin, void* end, { void* const p = pointermath::align(begin, alignment, extra); void* const n = pointermath::align(pointermath::add(p, elementSize), alignment, extra); - assert(p >= begin && p < end); - assert(n >= begin && n < end && n > p); + assert_invariant(p >= begin && p < end); + assert_invariant(n >= begin && n < end && n > p); const size_t d = uintptr_t(n) - uintptr_t(p); const size_t num = (uintptr_t(end) - uintptr_t(p)) / d; @@ -77,8 +102,8 @@ FreeList::Node* FreeList::init(void* begin, void* end, cur->next = next; cur = next; } - assert(cur < end); - assert(pointermath::add(cur, d) <= end); + assert_invariant(cur < end); + assert_invariant(pointermath::add(cur, d) <= end); cur->next = nullptr; return head; } @@ -97,13 +122,13 @@ AtomicFreeList::AtomicFreeList(void* begin, void* end, { #ifdef __ANDROID__ // on some platform (e.g. web) this returns false. we really only care about mobile though. - assert(mHead.is_lock_free()); + assert_invariant(mHead.is_lock_free()); #endif void* const p = pointermath::align(begin, alignment, extra); void* const n = pointermath::align(pointermath::add(p, elementSize), alignment, extra); - assert(p >= begin && p < end); - assert(n >= begin && n < end && n > p); + assert_invariant(p >= begin && p < end); + assert_invariant(n >= begin && n < end && n > p); const size_t d = uintptr_t(n) - uintptr_t(p); const size_t num = (uintptr_t(end) - uintptr_t(p)) / d; @@ -119,8 +144,8 @@ AtomicFreeList::AtomicFreeList(void* begin, void* end, cur->next = next; cur = next; } - assert(cur < end); - assert(pointermath::add(cur, d) <= end); + assert_invariant(cur < end); + assert_invariant(pointermath::add(cur, d) <= end); cur->next = nullptr; mHead.store({ int32_t(head - mStorage), 0 }); @@ -148,22 +173,25 @@ TrackingPolicy::HighWatermark::~HighWatermark() noexcept { } void TrackingPolicy::HighWatermark::onFree(void* p, size_t size) noexcept { - assert(mCurrent >= size); + // FIXME: this code is incorrect with LinearAllocators because free() is a no-op for them + assert_invariant(mCurrent >= size); mCurrent -= uint32_t(size); } void TrackingPolicy::HighWatermark::onReset() noexcept { // we should never be here if mBase is nullptr because compilation would have failed when // Arena::onReset() tries to call the underlying allocator's onReset() - assert(mBase); + assert_invariant(mBase); mCurrent = 0; } void TrackingPolicy::HighWatermark::onRewind(void const* addr) noexcept { // we should never be here if mBase is nullptr because compilation would have failed when // Arena::onRewind() tries to call the underlying allocator's onReset() - assert(mBase); - assert(addr >= mBase); - mCurrent = uint32_t(uintptr_t(addr) - uintptr_t(mBase)); + assert_invariant(mBase); + // for LinearAllocatorWithFallback we could get pointers outside the range + if (addr >= mBase && addr < pointermath::add(mBase, mSize)) { + mCurrent = uint32_t(uintptr_t(addr) - uintptr_t(mBase)); + } } // ------------------------------------------------------------------------------------------------ @@ -183,7 +211,7 @@ void TrackingPolicy::Debug::onFree(void* p, size_t size) noexcept { void TrackingPolicy::Debug::onReset() noexcept { // we should never be here if mBase is nullptr because compilation would have failed when // Arena::onReset() tries to call the underlying allocator's onReset() - assert(mBase); + assert_invariant(mBase); memset(mBase, 0xec, mSize); } diff --git a/web/filament-js/package.json b/web/filament-js/package.json index 7ebc0a0ff4b..52b661bd10e 100644 --- a/web/filament-js/package.json +++ b/web/filament-js/package.json @@ -1,6 +1,6 @@ { "name": "filament", - "version": "1.50.3", + "version": "1.50.4", "description": "Real-time physically based rendering engine", "main": "filament.js", "module": "filament.js",