From 54cae5dff84cd8bd07d310354e9b64bcbf9dc4c7 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle Date: Sat, 26 Oct 2024 15:30:13 +0300 Subject: [PATCH 1/3] cmake: Enable userfaultfd --- CMakeLists.txt | 7 +++++++ src/video_core/page_manager.cpp | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 04bd6a33193..e73062c5d58 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -15,6 +15,8 @@ if (NOT CMAKE_BUILD_TYPE) set(CMAKE_BUILD_TYPE Release) endif() +include(CMakeDependentOption) + project(shadPS4) # Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory. @@ -31,6 +33,7 @@ endif() option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) option(ENABLE_DISCORD_RPC "Enable the Discord RPC integration" ON) +CMAKE_DEPENDENT_OPTION(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" ON "NOT LINUX" OFF) # First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR. if (APPLE AND CMAKE_OSX_ARCHITECTURES) @@ -833,6 +836,10 @@ if (ENABLE_QT_GUI) add_definitions(-DENABLE_QT_GUI) endif() +if (ENABLE_USERFAULTFD) + add_definitions(-DENABLE_USERFAULTFD) +endif() + if (WIN32) target_link_libraries(shadps4 PRIVATE mincore winpthreads) diff --git a/src/video_core/page_manager.cpp b/src/video_core/page_manager.cpp index a49fff43a2f..e3a18f11ddb 100644 --- a/src/video_core/page_manager.cpp +++ b/src/video_core/page_manager.cpp @@ -28,7 +28,7 @@ namespace VideoCore { constexpr size_t PAGESIZE = 4_KB; constexpr size_t PAGEBITS = 12; -#if ENABLE_USERFAULTFD +#ifdef ENABLE_USERFAULTFD struct PageManager::Impl { Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} { uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK); From 7702ceb8d18efc00d411f43c5ea58cc0bef870a4 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle Date: Sat, 26 Oct 2024 18:34:12 +0300 Subject: [PATCH 2/3] texture_cache: Subresource uploads --- CMakeLists.txt | 2 +- src/video_core/amdgpu/liverpool.cpp | 2 +- src/video_core/texture_cache/image.cpp | 3 + src/video_core/texture_cache/image.h | 16 +++++ .../texture_cache/texture_cache.cpp | 67 ++++++++++++++----- 5 files changed, 70 insertions(+), 20 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index e73062c5d58..c24a89b75eb 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,7 +33,7 @@ endif() option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) option(ENABLE_DISCORD_RPC "Enable the Discord RPC integration" ON) -CMAKE_DEPENDENT_OPTION(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" ON "NOT LINUX" OFF) +CMAKE_DEPENDENT_OPTION(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" ON "NOT LINUX OR APPLE" OFF) # First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR. if (APPLE AND CMAKE_OSX_ARCHITECTURES) diff --git a/src/video_core/amdgpu/liverpool.cpp b/src/video_core/amdgpu/liverpool.cpp index 53aab630ec7..a34c5db5e22 100644 --- a/src/video_core/amdgpu/liverpool.cpp +++ b/src/video_core/amdgpu/liverpool.cpp @@ -550,7 +550,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span dcb, std::spansrc_sel == DmaDataSrc::Gds && dma_data->dst_sel == DmaDataDst::Memory) { - LOG_WARNING(Render_Vulkan, "GDS memory read"); + LOG_DEBUG(Render_Vulkan, "GDS memory read"); } else if (dma_data->src_sel == DmaDataSrc::Memory && dma_data->dst_sel == DmaDataDst::Memory) { rasterizer->InlineData(dma_data->DstAddress(), diff --git a/src/video_core/texture_cache/image.cpp b/src/video_core/texture_cache/image.cpp index bea2ce4ff1d..258e6d53501 100644 --- a/src/video_core/texture_cache/image.cpp +++ b/src/video_core/texture_cache/image.cpp @@ -137,6 +137,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_, : instance{&instance_}, scheduler{&scheduler_}, info{info_}, image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address}, cpu_addr_end{cpu_addr + info.guest_size_bytes} { + ASSERT(info.resources.layers * info.resources.levels <= 64); + subres_state = + std::numeric_limits::max() >> (64 - info.resources.levels * info.resources.layers); mip_hashes.resize(info.resources.levels); ASSERT(info.pixel_format != vk::Format::eUndefined); // Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case diff --git a/src/video_core/texture_cache/image.h b/src/video_core/texture_cache/image.h index 312ff97e835..d0d94dc45b2 100644 --- a/src/video_core/texture_cache/image.h +++ b/src/video_core/texture_cache/image.h @@ -91,9 +91,24 @@ struct Image { return image_view_ids[std::distance(image_view_infos.begin(), it)]; } + void ForEachSubresource(VAddr addr, size_t size, auto&& func) { + const u32 num_layers = info.resources.layers; + for (u32 m = 0; const auto& mip : info.mips_layout) { + for (u32 l = 0; l < num_layers; l++) { + const VAddr mip_addr = info.guest_address + mip.offset * num_layers + mip.size * l; + const VAddr mip_addr_end = mip_addr + mip.size; + if (mip_addr < addr + size && addr < mip_addr_end) { + func(m * num_layers + l); + } + } + m++; + } + } + boost::container::small_vector GetBarriers( vk::ImageLayout dst_layout, vk::Flags dst_mask, vk::PipelineStageFlags2 dst_stage, std::optional subres_range); + void Transit(vk::ImageLayout dst_layout, vk::Flags dst_mask, std::optional range, vk::CommandBuffer cmdbuf = {}); void Upload(vk::Buffer buffer, u64 offset); @@ -111,6 +126,7 @@ struct Image { VAddr cpu_addr_end = 0; std::vector image_view_infos; std::vector image_view_ids; + u64 subres_state{}; // Resource state tracking vk::ImageUsageFlags usage; diff --git a/src/video_core/texture_cache/texture_cache.cpp b/src/video_core/texture_cache/texture_cache.cpp index 279e0d82b91..9a3dad607fe 100644 --- a/src/video_core/texture_cache/texture_cache.cpp +++ b/src/video_core/texture_cache/texture_cache.cpp @@ -46,8 +46,10 @@ TextureCache::~TextureCache() = default; void TextureCache::InvalidateMemory(VAddr address, size_t size) { std::scoped_lock lock{mutex}; ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) { - // Ensure image is reuploaded when accessed again. + // Mark any subresources as dirty. image.flags |= ImageFlagBits::CpuDirty; + image.ForEachSubresource(address, size, + [&](u32 index) { image.subres_state |= 1ULL << index; }); // Untrack image, so the range is unprotected and the guest can write freely. UntrackImage(image_id); }); @@ -57,12 +59,13 @@ void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) { std::scoped_lock lock{mutex}; ForEachImageInRegion(address, max_size, [&](ImageId image_id, Image& image) { // Only consider images that match base address. - // TODO: Maybe also consider subresources if (image.info.guest_address != address) { return; } - // Ensure image is reuploaded when accessed again. + // Mark any subresources as dirty. image.flags |= ImageFlagBits::GpuDirty; + image.ForEachSubresource(address, max_size, + [&](u32 index) { image.subres_state |= 1ULL << index; }); }); } @@ -375,12 +378,18 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule return; } - const auto& num_layers = image.info.resources.layers; - const auto& num_mips = image.info.resources.levels; + const u32 num_layers = image.info.resources.layers; + const u32 num_mips = image.info.resources.levels; ASSERT(num_mips == image.info.mips_layout.size()); boost::container::small_vector image_copy{}; for (u32 m = 0; m < num_mips; m++) { + const u32 mask = (1 << num_layers) - 1; + const u64 subres_state = (image.subres_state >> (m * num_layers)) & mask; + if (subres_state == 0) { + continue; + } + const u32 width = std::max(image.info.size.width >> m, 1u); const u32 height = std::max(image.info.size.height >> m, 1u); const u32 depth = @@ -399,19 +408,40 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule image.mip_hashes[m] = hash; } - image_copy.push_back({ - .bufferOffset = mip_ofs * num_layers, - .bufferRowLength = static_cast(mip_pitch), - .bufferImageHeight = static_cast(mip_height), - .imageSubresource{ - .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, - .mipLevel = m, - .baseArrayLayer = 0, - .layerCount = num_layers, - }, - .imageOffset = {0, 0, 0}, - .imageExtent = {width, height, depth}, - }); + if (subres_state == mask) { + image_copy.push_back({ + .bufferOffset = mip_ofs * num_layers, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = m, + .baseArrayLayer = 0, + .layerCount = num_layers, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, depth}, + }); + } else { + for (u32 l = 0; l < num_layers; l++) { + if (!(subres_state & (1 << l))) { + continue; + } + image_copy.push_back({ + .bufferOffset = mip_ofs * num_layers + mip_size * l, + .bufferRowLength = static_cast(mip_pitch), + .bufferImageHeight = static_cast(mip_height), + .imageSubresource{ + .aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil, + .mipLevel = m, + .baseArrayLayer = l, + .layerCount = 1, + }, + .imageOffset = {0, 0, 0}, + .imageExtent = {width, height, depth}, + }); + } + } } if (image_copy.empty()) { @@ -447,6 +477,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy); image.flags &= ~ImageFlagBits::Dirty; + image.subres_state = 0; } vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) { From dc5b8564348270c9e3621e77d09f97adde5ffff1 Mon Sep 17 00:00:00 2001 From: IndecisiveTurtle Date: Sat, 26 Oct 2024 18:57:39 +0300 Subject: [PATCH 3/3] vk_compute_pipeline: Add missed meta check --- CMakeLists.txt | 2 +- .../renderer_vulkan/vk_compute_pipeline.cpp | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index c24a89b75eb..4bba4e3cdf5 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -33,7 +33,7 @@ endif() option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF) option(ENABLE_DISCORD_RPC "Enable the Discord RPC integration" ON) -CMAKE_DEPENDENT_OPTION(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" ON "NOT LINUX OR APPLE" OFF) +option(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" OFF) # First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR. if (APPLE AND CMAKE_OSX_ARCHITECTURES) diff --git a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp index 7122ca134fb..0c3570ab502 100644 --- a/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp +++ b/src/video_core/renderer_vulkan/vk_compute_pipeline.cpp @@ -127,18 +127,33 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache, // we can skip the whole dispatch and update the tracked state instead. Also, it is not // intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we // will need its full emulation anyways. For cases of metadata read a warning will be logged. - for (const auto& desc : info->texture_buffers) { + const auto IsMetaUpdate = [&](const auto& desc) { const VAddr address = desc.GetSharp(*info).base_address; if (desc.is_written) { if (texture_cache.TouchMeta(address, true)) { LOG_TRACE(Render_Vulkan, "Metadata update skipped"); - return false; + return true; } } else { if (texture_cache.IsMeta(address)) { LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)"); } } + return false; + }; + + for (const auto& desc : info->buffers) { + if (desc.is_gds_buffer) { + continue; + } + if (IsMetaUpdate(desc)) { + return false; + } + } + for (const auto& desc : info->texture_buffers) { + if (IsMetaUpdate(desc)) { + return false; + } } BindBuffers(buffer_cache, texture_cache, *info, binding, push_data, set_writes,