Skip to content

Commit

Permalink
do vertex buffer restride in a void vertex function
Browse files Browse the repository at this point in the history
  • Loading branch information
SamoZ256 committed Aug 13, 2024
1 parent 5e9537c commit 27925a4
Show file tree
Hide file tree
Showing 5 changed files with 96 additions and 23 deletions.
61 changes: 47 additions & 14 deletions src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,10 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalCommon.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h"
#include "Metal/MTLResource.hpp"
#include "Cafe/HW/Latte/Renderer/Metal/MetalHybridComputePipeline.h"
#include "Common/precompiled.h"
#include "Foundation/NSRange.hpp"
#include "Metal/MTLRenderCommandEncoder.hpp"

const size_t BUFFER_ALLOCATION_SIZE = 8 * 1024 * 1024;

Expand Down Expand Up @@ -93,21 +96,51 @@ MetalRestridedBufferRange MetalVertexBufferCache::RestrideBufferIfNeeded(MTL::Bu

if (restrideInfo.memoryInvalidated || stride != restrideInfo.lastStride)
{
// TODO: use compute/void vertex function instead
size_t newStride = Align(stride, 4);
size_t newSize = vertexBufferRange.size / stride * newStride;
// TODO: use one big buffer for all restrided buffers
restrideInfo.buffer = m_mtlr->GetDevice()->newBuffer(newSize, MTL::StorageModeShared);

uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
uint8* newPtr = (uint8*)restrideInfo.buffer->contents();

for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
{
memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
}
// TODO: remove
debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange.size, newSize);
if (!restrideInfo.buffer || newSize != restrideInfo.buffer->length())
{
if (restrideInfo.buffer)
restrideInfo.buffer->release();
// TODO: use one big buffer for all restrided buffers
restrideInfo.buffer = m_mtlr->GetDevice()->newBuffer(newSize, MTL::StorageModeShared);
}

//uint8* oldPtr = (uint8*)bufferCache->contents() + vertexBufferRange.offset;
//uint8* newPtr = (uint8*)restrideInfo.buffer->contents();

//for (size_t elem = 0; elem < vertexBufferRange.size / stride; elem++)
//{
// memcpy(newPtr + elem * newStride, oldPtr + elem * stride, stride);
//}
//debug_printf("Restrided vertex buffer (old stride: %zu, new stride: %zu, old size: %zu, new size: %zu)\n", stride, newStride, vertexBufferRange.size, newSize);

if (m_mtlr->GetEncoderType() == MetalEncoderType::Render)
{
auto renderCommandEncoder = static_cast<MTL::RenderCommandEncoder*>(m_mtlr->GetCommandEncoder());

renderCommandEncoder->setRenderPipelineState(m_restrideBufferPipeline->GetRenderPipelineState());
MTL::Buffer* buffers[] = {bufferCache, restrideInfo.buffer};
size_t offsets[] = {vertexBufferRange.offset, 0};
renderCommandEncoder->setVertexBuffers(buffers, offsets, NS::Range(0, 2));

struct
{
uint32 oldStride;
uint32 newStride;
} strideData = {static_cast<uint32>(stride), static_cast<uint32>(newStride)};
renderCommandEncoder->setVertexBytes(&strideData, sizeof(strideData), 2);

renderCommandEncoder->drawPrimitives(MTL::PrimitiveTypePoint, NS::UInteger(0), vertexBufferRange.size / stride);

MTL::Resource* barrierBuffers[] = {restrideInfo.buffer};
renderCommandEncoder->memoryBarrier(barrierBuffers, 1, MTL::RenderStageVertex, MTL::RenderStageVertex);
}
else
{
debug_printf("vertex buffer restride needs an active render encoder\n");
cemu_assert_suspicious();
}

restrideInfo.memoryInvalidated = false;
restrideInfo.lastStride = newStride;
Expand Down
14 changes: 13 additions & 1 deletion src/Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,11 @@ class MetalVertexBufferCache
MetalVertexBufferCache(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer} {}
~MetalVertexBufferCache();

// Vertex buffer cache
void SetRestrideBufferPipeline(class MetalHybridComputePipeline* restrideBufferPipeline)
{
m_restrideBufferPipeline = restrideBufferPipeline;
}

void TrackVertexBuffer(uint32 bufferIndex, size_t offset, size_t size, MetalRestrideInfo* restrideInfo)
{
m_bufferRanges[bufferIndex] = MetalVertexBufferRange{offset, size, restrideInfo};
Expand All @@ -101,6 +105,8 @@ class MetalVertexBufferCache
private:
class MetalRenderer* m_mtlr;

class MetalHybridComputePipeline* m_restrideBufferPipeline = nullptr;

MetalVertexBufferRange m_bufferRanges[LATTE_MAX_VERTEX_BUFFERS] = {};

void MemoryRangeChanged(size_t offset, size_t size);
Expand All @@ -112,6 +118,12 @@ class MetalMemoryManager
MetalMemoryManager(class MetalRenderer* metalRenderer) : m_mtlr{metalRenderer}, m_bufferAllocator(metalRenderer), m_vertexBufferCache(metalRenderer) {}
~MetalMemoryManager();

// Pipelines
void SetRestrideBufferPipeline(class MetalHybridComputePipeline* restrideBufferPipeline)
{
m_vertexBufferCache.SetRestrideBufferPipeline(restrideBufferPipeline);
}

void ResetTemporaryBuffers()
{
m_bufferAllocator/*s[m_bufferAllocatorIndex]*/.ResetTemporaryBuffers();
Expand Down
18 changes: 11 additions & 7 deletions src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,8 +62,9 @@ MetalRenderer::MetalRenderer()
MTL::Library* utilityLibrary = m_device->newLibrary(NS::String::string(utilityShaderSource, NS::ASCIIStringEncoding), nullptr, &error);
if (error)
{
debug_printf("failed to create present library (error: %s)\n", error->localizedDescription()->utf8String());
debug_printf("failed to create utility library (error: %s)\n", error->localizedDescription()->utf8String());
error->release();
throw;
return;
}

Expand Down Expand Up @@ -98,12 +99,16 @@ MetalRenderer::MetalRenderer()

// Hybrid pipelines
m_copyTextureToTexturePipeline = new MetalHybridComputePipeline(this, utilityLibrary, "vertexCopyTextureToTexture", "kernelCopyTextureToTexture");
m_restrideBufferPipeline = new MetalHybridComputePipeline(this, utilityLibrary, "vertexRestrideBuffer", "kernelRestrideBuffer");
utilityLibrary->release();

m_memoryManager->SetRestrideBufferPipeline(m_restrideBufferPipeline);
}

MetalRenderer::~MetalRenderer()
{
delete m_copyTextureToTexturePipeline;
delete m_restrideBufferPipeline;

m_presentPipelineLinear->release();
m_presentPipelineSRGB->release();
Expand Down Expand Up @@ -688,11 +693,6 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
}
const auto fetchShader = LatteSHRC_GetActiveFetchShader();

// Render pipeline state
// TODO: use `m_lastUsedFBO` instead of `m_activeFBO`
MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_activeFBO, LatteGPUState.contextNew);
renderCommandEncoder->setRenderPipelineState(renderPipelineState);

// Depth stencil state
MTL::DepthStencilState* depthStencilState = m_depthStencilCache->GetDepthStencilState(LatteGPUState.contextNew);
renderCommandEncoder->setDepthStencilState(depthStencilState);
Expand Down Expand Up @@ -794,6 +794,10 @@ void MetalRenderer::draw_execute(uint32 baseVertex, uint32 baseInstance, uint32
}
}

// Render pipeline state
MTL::RenderPipelineState* renderPipelineState = m_pipelineCache->GetPipelineState(fetchShader, vertexShader, pixelShader, m_state.m_activeFBO, LatteGPUState.contextNew);
renderCommandEncoder->setRenderPipelineState(renderPipelineState);

// Uniform buffers, textures and samplers
BindStageResources(renderCommandEncoder, vertexShader);
BindStageResources(renderCommandEncoder, pixelShader);
Expand Down Expand Up @@ -1308,7 +1312,7 @@ void MetalRenderer::BindStageResources(MTL::RenderCommandEncoder* renderCommandE
{
LatteMRT::GetCurrentFragCoordScale(GET_UNIFORM_DATA_PTR(shader->uniform.loc_fragCoordScale));
}
// TODO: uncomment?
// TODO: uncomment
/*
if (shader->uniform.loc_verticesPerInstance >= 0)
{
Expand Down
12 changes: 12 additions & 0 deletions src/Cafe/HW/Latte/Renderer/Metal/MetalRenderer.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
#include "Cafe/HW/Latte/Renderer/Metal/MetalMemoryManager.h"
#include "Common/precompiled.h"
#include "Metal/MTLCommandBuffer.hpp"
#include "Metal/MTLCommandEncoder.hpp"
#include "Metal/MTLRenderPass.hpp"

#define MAX_MTL_BUFFERS 31
Expand Down Expand Up @@ -244,6 +245,16 @@ class MetalRenderer : public Renderer
return m_commandBuffers[m_commandBuffers.size() - 1].m_commandBuffer;
}

MTL::CommandEncoder* GetCommandEncoder()
{
return m_commandEncoder;
}

MetalEncoderType GetEncoderType()
{
return m_encoderType;
}

MTL::CommandBuffer* GetCommandBuffer();
bool CommandBufferCompleted(MTL::CommandBuffer* commandBuffer);
void WaitForCommandBufferCompletion(MTL::CommandBuffer* commandBuffer);
Expand Down Expand Up @@ -284,6 +295,7 @@ class MetalRenderer : public Renderer

// Hybrid pipelines
class MetalHybridComputePipeline* m_copyTextureToTexturePipeline;
class MetalHybridComputePipeline* m_restrideBufferPipeline;

// Basic
MTL::SamplerState* m_nearestSampler;
Expand Down
14 changes: 13 additions & 1 deletion src/Cafe/HW/Latte/Renderer/Metal/UtilityShaderSource.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,20 @@ inline const char* utilityShaderSource = \
" uint dstSlice;\n" \
"};\n" \
"\n" \
"vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(0)]], texture2d_array<float, access::write> dst [[texture(1)]], constant CopyParams& params) {\n" \
"vertex void vertexCopyTextureToTexture(uint vid [[vertex_id]], texture2d_array<float, access::read> src [[texture(0)]], texture2d_array<float, access::write> dst [[texture(1)]], constant CopyParams& params [[buffer(0)]]) {\n" \
" uint2 coord = uint2(vid % params.width, vid / params.width);\n" \
" return dst.write(float4(src.read(coord, params.srcSlice, params.srcMip).r, 0.0, 0.0, 0.0), coord, params.dstSlice, params.dstMip);\n" \
"}\n" \
"\n" \
"struct RestrideParams {\n" \
" uint oldStride;\n" \
" uint newStride;\n" \
"};\n" \
"\n" \
/* TODO: use uint32? Since that would require less iterations */ \
"vertex void vertexRestrideBuffer(uint vid [[vertex_id]], device uint8_t* src [[buffer(0)]], device uint8_t* dst [[buffer(1)]], constant RestrideParams& params [[buffer(2)]]) {\n" \
" for (uint32_t i = 0; i < params.oldStride; i++) {\n" \
" dst[vid * params.newStride + i] = src[vid * params.oldStride + i];\n" \
" }\n" \
"}\n" \
"\n";

0 comments on commit 27925a4

Please sign in to comment.