Skip to content

Commit

Permalink
GS/HW: Avoid barriers on second alpha pass when only writing to Z
Browse files Browse the repository at this point in the history
Completely redundant. We also don't need to use the drawlist.
  • Loading branch information
stenzek committed May 12, 2024
1 parent ba7096c commit 20dbcfd
Show file tree
Hide file tree
Showing 4 changed files with 74 additions and 47 deletions.
6 changes: 4 additions & 2 deletions pcsx2/GS/Renderers/Common/GSDevice.h
Original file line number Diff line number Diff line change
Expand Up @@ -387,7 +387,7 @@ struct alignas(16) GSHWDrawConfig
__fi bool IsFeedbackLoop() const
{
const u32 sw_blend_bits = blend_a | blend_b | blend_d;
const bool sw_blend_needs_rt = sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u);
const bool sw_blend_needs_rt = (sw_blend_bits != 0 && ((sw_blend_bits | blend_c) & 1u)) || ((a_masked & blend_c) != 0);
return tex_is_fb || fbmask || (date > 0 && date != 3) || sw_blend_needs_rt;
}

Expand Down Expand Up @@ -688,7 +688,9 @@ struct alignas(16) GSHWDrawConfig
struct AlphaPass
{
alignas(8) PSSelector ps;
bool enable;
bool enable : 1;
bool require_one_barrier : 1;
bool require_full_barrier : 1;
ColorMaskSelector colormask;
DepthStencilSelector depth;
float ps_aref;
Expand Down
12 changes: 10 additions & 2 deletions pcsx2/GS/Renderers/HW/GSRendererHW.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5917,11 +5917,10 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta

SetupIA(rtscale, sx, sy);

m_conf.alpha_second_pass.enable = ate_second_pass;

if (ate_second_pass)
{
pxAssert(!env.PABE.PABE);

std::memcpy(&m_conf.alpha_second_pass.ps, &m_conf.ps, sizeof(m_conf.ps));
std::memcpy(&m_conf.alpha_second_pass.colormask, &m_conf.colormask, sizeof(m_conf.colormask));
std::memcpy(&m_conf.alpha_second_pass.depth, &m_conf.depth, sizeof(m_conf.depth));
Expand Down Expand Up @@ -5965,6 +5964,8 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
r = g = b = a = false;
}

m_conf.alpha_second_pass.enable = true;

if (z || r || g || b || a)
{
m_conf.alpha_second_pass.depth.zwe = z;
Expand All @@ -5973,7 +5974,14 @@ __ri void GSRendererHW::DrawPrims(GSTextureCache::Target* rt, GSTextureCache::Ta
m_conf.alpha_second_pass.colormask.wb = b;
m_conf.alpha_second_pass.colormask.wa = a;
if (m_conf.alpha_second_pass.colormask.wrgba == 0)
{
m_conf.alpha_second_pass.ps.DisableColorOutput();
}
if (m_conf.alpha_second_pass.ps.IsFeedbackLoop())
{
m_conf.alpha_second_pass.require_one_barrier = m_conf.require_one_barrier;
m_conf.alpha_second_pass.require_full_barrier = m_conf.require_full_barrier;
}
}
else
{
Expand Down
100 changes: 58 additions & 42 deletions pcsx2/GS/Renderers/Vulkan/GSDeviceVK.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -5839,7 +5839,7 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)

// now we can do the actual draw
if (BindDrawPipeline(pipe))
SendHWDraw(config, draw_rt, skip_first_barrier);
SendHWDraw(config, draw_rt, config.require_one_barrier, config.require_full_barrier, skip_first_barrier);

// blend second pass
if (config.blend_second_pass.enable)
Expand All @@ -5851,7 +5851,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
pipe.ps.blend_hw = config.blend_second_pass.blend_hw;
pipe.ps.dither = config.blend_second_pass.dither;
if (BindDrawPipeline(pipe))
{
// TODO: This probably should have barriers, in case we want to use it conditionally.
DrawIndexedPrimitive();
}
}

// and the alpha pass
Expand All @@ -5869,7 +5872,10 @@ void GSDeviceVK::RenderHW(GSHWDrawConfig& config)
pipe.dss = config.alpha_second_pass.depth;
pipe.bs = config.blend;
if (BindDrawPipeline(pipe))
SendHWDraw(config, draw_rt, false);
{
SendHWDraw(config, draw_rt, config.alpha_second_pass.require_one_barrier,
config.alpha_second_pass.require_full_barrier, false);
}
}

if (draw_rt_clone)
Expand Down Expand Up @@ -5972,49 +5978,57 @@ VkImageMemoryBarrier GSDeviceVK::GetColorBufferBarrier(GSTextureVK* rt) const
VK_QUEUE_FAMILY_IGNORED, VK_QUEUE_FAMILY_IGNORED, rt->GetImage(), {VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u}};
}

void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier)
void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
bool one_barrier, bool full_barrier, bool skip_first_barrier)
{
if (config.drawlist)
if (!m_features.texture_barrier) [[unlikely]]
{
GL_PUSH("Split the draw (SPRITE)");
g_perfmon.Put(
GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()) - static_cast<u32>(skip_first_barrier));

const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
u32 p = 0;
u32 n = 0;

if (skip_first_barrier)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
++n;
}

for (; n < draw_list_size; n++)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);

const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
}

DrawIndexedPrimitive();
return;
}

if (m_features.texture_barrier && m_pipeline_selector.ps.IsFeedbackLoop())
#ifdef PCSX2_DEVBUILD
if ((one_barrier || full_barrier) && !m_pipeline_selector.ps.IsFeedbackLoop()) [[unlikely]]
Console.Warning("GS: Possible unnecessary barrier detected.");
#endif

if (full_barrier)
{
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
const u32 indices_per_prim = config.indices_per_prim;

if (config.require_full_barrier)
if (config.drawlist)
{
GL_PUSH("Split the draw (SPRITE)");
g_perfmon.Put(
GSPerfMon::Barriers, static_cast<u32>(config.drawlist->size()) - static_cast<u32>(skip_first_barrier));

const u32 indices_per_prim = config.indices_per_prim;
const u32 draw_list_size = static_cast<u32>(config.drawlist->size());
const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
u32 p = 0;
u32 n = 0;

if (skip_first_barrier)
{
const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
++n;
}

for (; n < draw_list_size; n++)
{
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);

const u32 count = (*config.drawlist)[n] * indices_per_prim;
DrawIndexedPrimitive(p, count);
p += count;
}
}
else
{
GL_PUSH("Split single draw in %d draw", config.nindices / indices_per_prim);
g_perfmon.Put(
GSPerfMon::Barriers, (config.nindices / indices_per_prim) - static_cast<u32>(skip_first_barrier));
Expand All @@ -6033,16 +6047,18 @@ void GSDeviceVK::SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,

DrawIndexedPrimitive(p, indices_per_prim);
}

return;
}

if (config.require_one_barrier && !skip_first_barrier)
{
g_perfmon.Put(GSPerfMon::Barriers, 1);
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
}
return;
}

if (one_barrier && !skip_first_barrier)
{
g_perfmon.Put(GSPerfMon::Barriers, 1);

const VkImageMemoryBarrier barrier = GetColorBufferBarrier(draw_rt);
vkCmdPipelineBarrier(GetCurrentCommandBuffer(), VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 0, nullptr, 1, &barrier);
}

DrawIndexedPrimitive();
Expand Down
3 changes: 2 additions & 1 deletion pcsx2/GS/Renderers/Vulkan/GSDeviceVK.h
Original file line number Diff line number Diff line change
Expand Up @@ -588,7 +588,8 @@ class GSDeviceVK final : public GSDevice
void UpdateHWPipelineSelector(GSHWDrawConfig& config, PipelineSelector& pipe);
void UploadHWDrawVerticesAndIndices(const GSHWDrawConfig& config);
VkImageMemoryBarrier GetColorBufferBarrier(GSTextureVK* rt) const;
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt, bool skip_first_barrier);
void SendHWDraw(const GSHWDrawConfig& config, GSTextureVK* draw_rt,
bool one_barrier, bool full_barrier, bool skip_first_barrier);

//////////////////////////////////////////////////////////////////////////
// Vulkan State
Expand Down

0 comments on commit 20dbcfd

Please sign in to comment.