Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

texture_cache: Implement subresource specific uploads #1451

Open
wants to merge 3 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@ if (NOT CMAKE_BUILD_TYPE)
set(CMAKE_BUILD_TYPE Release)
endif()

include(CMakeDependentOption)

project(shadPS4)

# Forcing PIE makes sure that the base address is high enough so that it doesn't clash with the PS4 memory.
Expand All @@ -31,6 +33,7 @@ endif()

option(ENABLE_QT_GUI "Enable the Qt GUI. If not selected then the emulator uses a minimal SDL-based UI instead" OFF)
option(ENABLE_DISCORD_RPC "Enable the Discord RPC integration" ON)
option(ENABLE_USERFAULTFD "Enable write tracking using userfaultfd on unix" OFF)

# First, determine whether to use CMAKE_OSX_ARCHITECTURES or CMAKE_SYSTEM_PROCESSOR.
if (APPLE AND CMAKE_OSX_ARCHITECTURES)
Expand Down Expand Up @@ -833,6 +836,10 @@ if (ENABLE_QT_GUI)
add_definitions(-DENABLE_QT_GUI)
endif()

if (ENABLE_USERFAULTFD)
add_definitions(-DENABLE_USERFAULTFD)
endif()

if (WIN32)
target_link_libraries(shadps4 PRIVATE mincore winpthreads)

Expand Down
2 changes: 1 addition & 1 deletion src/video_core/amdgpu/liverpool.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -550,7 +550,7 @@ Liverpool::Task Liverpool::ProcessGraphics(std::span<const u32> dcb, std::span<c
sizeof(u32), false);
} else if (dma_data->src_sel == DmaDataSrc::Gds &&
dma_data->dst_sel == DmaDataDst::Memory) {
LOG_WARNING(Render_Vulkan, "GDS memory read");
LOG_DEBUG(Render_Vulkan, "GDS memory read");
} else if (dma_data->src_sel == DmaDataSrc::Memory &&
dma_data->dst_sel == DmaDataDst::Memory) {
rasterizer->InlineData(dma_data->DstAddress<VAddr>(),
Expand Down
2 changes: 1 addition & 1 deletion src/video_core/page_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ namespace VideoCore {
constexpr size_t PAGESIZE = 4_KB;
constexpr size_t PAGEBITS = 12;

#if ENABLE_USERFAULTFD
#ifdef ENABLE_USERFAULTFD
struct PageManager::Impl {
Impl(Vulkan::Rasterizer* rasterizer_) : rasterizer{rasterizer_} {
uffd = syscall(__NR_userfaultfd, O_CLOEXEC | O_NONBLOCK);
Expand Down
19 changes: 17 additions & 2 deletions src/video_core/renderer_vulkan/vk_compute_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,18 +127,33 @@ bool ComputePipeline::BindResources(VideoCore::BufferCache& buffer_cache,
// we can skip the whole dispatch and update the tracked state instead. Also, it is not
// intended to be consumed and in such rare cases (e.g. HTile introspection, CRAA) we
// will need its full emulation anyways. For cases of metadata read a warning will be logged.
for (const auto& desc : info->texture_buffers) {
const auto IsMetaUpdate = [&](const auto& desc) {
const VAddr address = desc.GetSharp(*info).base_address;
if (desc.is_written) {
if (texture_cache.TouchMeta(address, true)) {
LOG_TRACE(Render_Vulkan, "Metadata update skipped");
return false;
return true;
}
} else {
if (texture_cache.IsMeta(address)) {
LOG_WARNING(Render_Vulkan, "Unexpected metadata read by a CS shader (buffer)");
}
}
return false;
};

for (const auto& desc : info->buffers) {
if (desc.is_gds_buffer) {
continue;
}
if (IsMetaUpdate(desc)) {
return false;
}
}
for (const auto& desc : info->texture_buffers) {
if (IsMetaUpdate(desc)) {
return false;
}
}

BindBuffers(buffer_cache, texture_cache, *info, binding, push_data, set_writes,
Expand Down
3 changes: 3 additions & 0 deletions src/video_core/texture_cache/image.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -137,6 +137,9 @@ Image::Image(const Vulkan::Instance& instance_, Vulkan::Scheduler& scheduler_,
: instance{&instance_}, scheduler{&scheduler_}, info{info_},
image{instance->GetDevice(), instance->GetAllocator()}, cpu_addr{info.guest_address},
cpu_addr_end{cpu_addr + info.guest_size_bytes} {
ASSERT(info.resources.layers * info.resources.levels <= 64);
subres_state =
std::numeric_limits<u64>::max() >> (64 - info.resources.levels * info.resources.layers);
mip_hashes.resize(info.resources.levels);
ASSERT(info.pixel_format != vk::Format::eUndefined);
// Here we force `eExtendedUsage` as don't know all image usage cases beforehand. In normal case
Expand Down
16 changes: 16 additions & 0 deletions src/video_core/texture_cache/image.h
Original file line number Diff line number Diff line change
Expand Up @@ -91,9 +91,24 @@ struct Image {
return image_view_ids[std::distance(image_view_infos.begin(), it)];
}

void ForEachSubresource(VAddr addr, size_t size, auto&& func) {
const u32 num_layers = info.resources.layers;
for (u32 m = 0; const auto& mip : info.mips_layout) {
for (u32 l = 0; l < num_layers; l++) {
const VAddr mip_addr = info.guest_address + mip.offset * num_layers + mip.size * l;
const VAddr mip_addr_end = mip_addr + mip.size;
if (mip_addr < addr + size && addr < mip_addr_end) {
func(m * num_layers + l);
}
}
m++;
}
}

boost::container::small_vector<vk::ImageMemoryBarrier2, 32> GetBarriers(
vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
vk::PipelineStageFlags2 dst_stage, std::optional<SubresourceRange> subres_range);

void Transit(vk::ImageLayout dst_layout, vk::Flags<vk::AccessFlagBits2> dst_mask,
std::optional<SubresourceRange> range, vk::CommandBuffer cmdbuf = {});
void Upload(vk::Buffer buffer, u64 offset);
Expand All @@ -111,6 +126,7 @@ struct Image {
VAddr cpu_addr_end = 0;
std::vector<ImageViewInfo> image_view_infos;
std::vector<ImageViewId> image_view_ids;
u64 subres_state{};

// Resource state tracking
vk::ImageUsageFlags usage;
Expand Down
67 changes: 49 additions & 18 deletions src/video_core/texture_cache/texture_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,10 @@ TextureCache::~TextureCache() = default;
void TextureCache::InvalidateMemory(VAddr address, size_t size) {
std::scoped_lock lock{mutex};
ForEachImageInRegion(address, size, [&](ImageId image_id, Image& image) {
// Ensure image is reuploaded when accessed again.
// Mark any subresources as dirty.
image.flags |= ImageFlagBits::CpuDirty;
image.ForEachSubresource(address, size,
[&](u32 index) { image.subres_state |= 1ULL << index; });
// Untrack image, so the range is unprotected and the guest can write freely.
UntrackImage(image_id);
});
Expand All @@ -57,12 +59,13 @@ void TextureCache::InvalidateMemoryFromGPU(VAddr address, size_t max_size) {
std::scoped_lock lock{mutex};
ForEachImageInRegion(address, max_size, [&](ImageId image_id, Image& image) {
// Only consider images that match base address.
// TODO: Maybe also consider subresources
if (image.info.guest_address != address) {
return;
}
// Ensure image is reuploaded when accessed again.
// Mark any subresources as dirty.
image.flags |= ImageFlagBits::GpuDirty;
image.ForEachSubresource(address, max_size,
[&](u32 index) { image.subres_state |= 1ULL << index; });
});
}

Expand Down Expand Up @@ -375,12 +378,18 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
return;
}

const auto& num_layers = image.info.resources.layers;
const auto& num_mips = image.info.resources.levels;
const u32 num_layers = image.info.resources.layers;
const u32 num_mips = image.info.resources.levels;
ASSERT(num_mips == image.info.mips_layout.size());

boost::container::small_vector<vk::BufferImageCopy, 14> image_copy{};
for (u32 m = 0; m < num_mips; m++) {
const u32 mask = (1 << num_layers) - 1;
const u64 subres_state = (image.subres_state >> (m * num_layers)) & mask;
if (subres_state == 0) {
continue;
}

const u32 width = std::max(image.info.size.width >> m, 1u);
const u32 height = std::max(image.info.size.height >> m, 1u);
const u32 depth =
Expand All @@ -399,19 +408,40 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule
image.mip_hashes[m] = hash;
}

image_copy.push_back({
.bufferOffset = mip_ofs * num_layers,
.bufferRowLength = static_cast<u32>(mip_pitch),
.bufferImageHeight = static_cast<u32>(mip_height),
.imageSubresource{
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
.mipLevel = m,
.baseArrayLayer = 0,
.layerCount = num_layers,
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, depth},
});
if (subres_state == mask) {
image_copy.push_back({
.bufferOffset = mip_ofs * num_layers,
.bufferRowLength = static_cast<u32>(mip_pitch),
.bufferImageHeight = static_cast<u32>(mip_height),
.imageSubresource{
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
.mipLevel = m,
.baseArrayLayer = 0,
.layerCount = num_layers,
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, depth},
});
} else {
for (u32 l = 0; l < num_layers; l++) {
if (!(subres_state & (1 << l))) {
continue;
}
image_copy.push_back({
.bufferOffset = mip_ofs * num_layers + mip_size * l,
.bufferRowLength = static_cast<u32>(mip_pitch),
.bufferImageHeight = static_cast<u32>(mip_height),
.imageSubresource{
.aspectMask = image.aspect_mask & ~vk::ImageAspectFlagBits::eStencil,
.mipLevel = m,
.baseArrayLayer = l,
.layerCount = 1,
},
.imageOffset = {0, 0, 0},
.imageExtent = {width, height, depth},
});
}
}
}

if (image_copy.empty()) {
Expand Down Expand Up @@ -447,6 +477,7 @@ void TextureCache::RefreshImage(Image& image, Vulkan::Scheduler* custom_schedule

cmdbuf.copyBufferToImage(buffer, image.image, vk::ImageLayout::eTransferDstOptimal, image_copy);
image.flags &= ~ImageFlagBits::Dirty;
image.subres_state = 0;
}

vk::Sampler TextureCache::GetSampler(const AmdGpu::Sampler& sampler) {
Expand Down
Loading