Skip to content

Commit

Permalink
Revert "recompiler: fixed fragment shader built-in attribute access (s…
Browse files Browse the repository at this point in the history
…hadps4-emu#1676)"

This reverts commit 8eacb88.

Revert "semaphore: Add GCD semaphore implementation. (shadps4-emu#1677)"

This reverts commit e1ecfb8.

Revert "hot-fix: Silence depth macrotiled warning"

This reverts commit 357b782.

Revert "buffer_cache: Bump usable address space to 40bits"

This reverts commit 6acfdd5.
  • Loading branch information
Xcedf committed Dec 31, 2024
1 parent 5f105db commit c860489
Show file tree
Hide file tree
Showing 8 changed files with 13 additions and 140 deletions.
36 changes: 1 addition & 35 deletions src/core/libraries/kernel/sync/semaphore.h
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,6 @@

#ifdef _WIN64
#include <windows.h>
#elif defined(__APPLE__)
#include <dispatch/dispatch.h>
#else
#include <semaphore>
#endif
Expand All @@ -23,32 +21,25 @@ template <s64 max>
class Semaphore {
public:
Semaphore(s32 initialCount)
#if !defined(_WIN64) && !defined(__APPLE__)
#ifndef _WIN64
: sem{initialCount}
#endif
{
#ifdef _WIN64
sem = CreateSemaphore(nullptr, initialCount, max, nullptr);
ASSERT(sem);
#elif defined(__APPLE__)
sem = dispatch_semaphore_create(initialCount);
ASSERT(sem);
#endif
}

~Semaphore() {
#ifdef _WIN64
CloseHandle(sem);
#elif defined(__APPLE__)
dispatch_release(sem);
#endif
}

void release() {
#ifdef _WIN64
ReleaseSemaphore(sem, 1, nullptr);
#elif defined(__APPLE__)
dispatch_semaphore_signal(sem);
#else
sem.release();
#endif
Expand All @@ -62,13 +53,6 @@ class Semaphore {
return;
}
}
#elif defined(__APPLE__)
for (;;) {
const auto res = dispatch_semaphore_wait(sem, DISPATCH_TIME_FOREVER);
if (res == 0) {
return;
}
}
#else
sem.acquire();
#endif
Expand All @@ -77,8 +61,6 @@ class Semaphore {
bool try_acquire() {
#ifdef _WIN64
return WaitForSingleObjectEx(sem, 0, true) == WAIT_OBJECT_0;
#elif defined(__APPLE__)
return dispatch_semaphore_wait(sem, DISPATCH_TIME_NOW) == 0;
#else
return sem.try_acquire();
#endif
Expand All @@ -95,10 +77,6 @@ class Semaphore {
}

return WaitForSingleObjectEx(sem, timeout_ms, true) == WAIT_OBJECT_0;
#elif defined(__APPLE__)
const auto rel_time_ns = std::chrono::ceil<std::chrono::nanoseconds>(rel_time).count();
const auto timeout = dispatch_time(DISPATCH_TIME_NOW, rel_time_ns);
return dispatch_semaphore_wait(sem, timeout) == 0;
#else
return sem.try_acquire_for(rel_time);
#endif
Expand All @@ -120,16 +98,6 @@ class Semaphore {

u64 res = WaitForSingleObjectEx(sem, static_cast<u64>(timeout_ms), true);
return res == WAIT_OBJECT_0;
#elif defined(__APPLE__)
auto abs_s = std::chrono::time_point_cast<std::chrono::seconds>(abs_time);
auto abs_ns = std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_time) -
std::chrono::time_point_cast<std::chrono::nanoseconds>(abs_s);
const timespec abs_timespec = {
.tv_sec = abs_s.time_since_epoch().count(),
.tv_nsec = abs_ns.count(),
};
const auto timeout = dispatch_walltime(&abs_timespec, 0);
return dispatch_semaphore_wait(sem, timeout) == 0;
#else
return sem.try_acquire_until(abs_time);
#endif
Expand All @@ -138,8 +106,6 @@ class Semaphore {
private:
#ifdef _WIN64
HANDLE sem;
#elif defined(__APPLE__)
dispatch_semaphore_t sem;
#else
std::counting_semaphore<max> sem;
#endif
Expand Down
75 changes: 8 additions & 67 deletions src/shader_recompiler/frontend/translate/translate.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -53,74 +53,15 @@ void Translator::EmitPrologue() {
}
break;
case Stage::Fragment:
dst_vreg = IR::VectorReg::V0;
if (runtime_info.fs_info.addr_flags.persp_sample_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.persp_center_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.persp_centroid_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.persp_pull_model_ena) {
++dst_vreg; // I/W
++dst_vreg; // J/W
++dst_vreg; // 1/W
}
if (runtime_info.fs_info.addr_flags.linear_sample_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.linear_center_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.linear_centroid_ena) {
++dst_vreg; // I
++dst_vreg; // J
}
if (runtime_info.fs_info.addr_flags.line_stipple_tex_ena) {
++dst_vreg;
}
if (runtime_info.fs_info.addr_flags.pos_x_float_ena) {
if (runtime_info.fs_info.en_flags.pos_x_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 0));
} else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
}
}
if (runtime_info.fs_info.addr_flags.pos_y_float_ena) {
if (runtime_info.fs_info.en_flags.pos_y_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 1));
} else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
}
}
if (runtime_info.fs_info.addr_flags.pos_z_float_ena) {
if (runtime_info.fs_info.en_flags.pos_z_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 2));
} else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
}
}
if (runtime_info.fs_info.addr_flags.pos_w_float_ena) {
if (runtime_info.fs_info.en_flags.pos_w_float_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, 3));
} else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0.0f));
}
}
if (runtime_info.fs_info.addr_flags.front_face_ena) {
if (runtime_info.fs_info.en_flags.front_face_ena) {
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace));
} else {
ir.SetVectorReg(dst_vreg++, ir.Imm32(0));
}
// https://github.com/chaotic-cx/mesa-mirror/blob/72326e15/src/amd/vulkan/radv_shader_args.c#L258
// The first two VGPRs are used for i/j barycentric coordinates. In the vast majority of
// cases it will be only those two, but if shader is using both e.g linear and perspective
// inputs it can be more For now assume that this isn't the case.
dst_vreg = IR::VectorReg::V2;
for (u32 i = 0; i < 4; i++) {
ir.SetVectorReg(dst_vreg++, ir.GetAttribute(IR::Attribute::FragCoord, i));
}
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::IsFrontFace));
break;
case Stage::Compute:
ir.SetVectorReg(dst_vreg++, ir.GetAttributeU32(IR::Attribute::LocalInvocationId, 0));
Expand Down
4 changes: 0 additions & 4 deletions src/shader_recompiler/runtime_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
#include <span>
#include <boost/container/static_vector.hpp>
#include "common/types.h"
#include "video_core/amdgpu/liverpool.h"
#include "video_core/amdgpu/types.h"

namespace Shader {
Expand Down Expand Up @@ -106,8 +105,6 @@ struct FragmentRuntimeInfo {

auto operator<=>(const PsInput&) const noexcept = default;
};
AmdGpu::Liverpool::PsInput en_flags;
AmdGpu::Liverpool::PsInput addr_flags;
u32 num_inputs;
std::array<PsInput, 32> inputs;
struct PsColorBuffer {
Expand All @@ -120,7 +117,6 @@ struct FragmentRuntimeInfo {

bool operator==(const FragmentRuntimeInfo& other) const noexcept {
return std::ranges::equal(color_buffers, other.color_buffers) &&
en_flags.raw == other.en_flags.raw && addr_flags.raw == other.addr_flags.raw &&
num_inputs == other.num_inputs &&
std::ranges::equal(inputs.begin(), inputs.begin() + num_inputs, other.inputs.begin(),
other.inputs.begin() + num_inputs);
Expand Down
29 changes: 1 addition & 28 deletions src/video_core/amdgpu/liverpool.h
Original file line number Diff line number Diff line change
Expand Up @@ -1071,28 +1071,6 @@ struct Liverpool {
BitField<27, 1, u32> enable_postz_overrasterization;
};

union PsInput {
u32 raw;
struct {
u32 persp_sample_ena : 1;
u32 persp_center_ena : 1;
u32 persp_centroid_ena : 1;
u32 persp_pull_model_ena : 1;
u32 linear_sample_ena : 1;
u32 linear_center_ena : 1;
u32 linear_centroid_ena : 1;
u32 line_stipple_tex_ena : 1;
u32 pos_x_float_ena : 1;
u32 pos_y_float_ena : 1;
u32 pos_z_float_ena : 1;
u32 pos_w_float_ena : 1;
u32 front_face_ena : 1;
u32 ancillary_ena : 1;
u32 sample_coverage_ena : 1;
u32 pos_fixed_pt_ena : 1;
};
};

union Regs {
struct {
INSERT_PADDING_WORDS(0x2C08);
Expand Down Expand Up @@ -1148,10 +1126,7 @@ struct Liverpool {
INSERT_PADDING_WORDS(0xA191 - 0xA187);
std::array<PsInputControl, 32> ps_inputs;
VsOutputConfig vs_output_config;
INSERT_PADDING_WORDS(1);
PsInput ps_input_ena;
PsInput ps_input_addr;
INSERT_PADDING_WORDS(1);
INSERT_PADDING_WORDS(4);
BitField<0, 6, u32> num_interp;
INSERT_PADDING_WORDS(0xA1C3 - 0xA1B6 - 1);
ShaderPosFormat shader_pos_format;
Expand Down Expand Up @@ -1413,8 +1388,6 @@ static_assert(GFX6_3D_REG_INDEX(viewports) == 0xA10F);
static_assert(GFX6_3D_REG_INDEX(clip_user_data) == 0xA16F);
static_assert(GFX6_3D_REG_INDEX(ps_inputs) == 0xA191);
static_assert(GFX6_3D_REG_INDEX(vs_output_config) == 0xA1B1);
static_assert(GFX6_3D_REG_INDEX(ps_input_ena) == 0xA1B3);
static_assert(GFX6_3D_REG_INDEX(ps_input_addr) == 0xA1B4);
static_assert(GFX6_3D_REG_INDEX(num_interp) == 0xA1B6);
static_assert(GFX6_3D_REG_INDEX(shader_pos_format) == 0xA1C3);
static_assert(GFX6_3D_REG_INDEX(z_export_format) == 0xA1C4);
Expand Down
2 changes: 1 addition & 1 deletion src/video_core/buffer_cache/buffer_cache.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class BufferCache {

struct Traits {
using Entry = BufferId;
static constexpr size_t AddressSpaceBits = 40;
static constexpr size_t AddressSpaceBits = 39;
static constexpr size_t FirstLevelBits = 14;
static constexpr size_t PageBits = CACHING_PAGEBITS;
};
Expand Down
2 changes: 1 addition & 1 deletion src/video_core/buffer_cache/memory_tracker_base.h
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ namespace VideoCore {

class MemoryTracker {
public:
static constexpr size_t MAX_CPU_PAGE_BITS = 40;
static constexpr size_t MAX_CPU_PAGE_BITS = 39;
static constexpr size_t HIGHER_PAGE_BITS = 22;
static constexpr size_t HIGHER_PAGE_SIZE = 1ULL << HIGHER_PAGE_BITS;
static constexpr size_t HIGHER_PAGE_MASK = HIGHER_PAGE_SIZE - 1ULL;
Expand Down
2 changes: 0 additions & 2 deletions src/video_core/renderer_vulkan/vk_pipeline_cache.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -123,8 +123,6 @@ Shader::RuntimeInfo PipelineCache::BuildRuntimeInfo(Shader::Stage stage) {
}
case Shader::Stage::Fragment: {
BuildCommon(regs.ps_program);
info.fs_info.en_flags = regs.ps_input_ena;
info.fs_info.addr_flags = regs.ps_input_addr;
const auto& ps_inputs = regs.ps_inputs;
info.fs_info.num_inputs = regs.num_interp;
for (u32 i = 0; i < regs.num_interp; i++) {
Expand Down
3 changes: 1 addition & 2 deletions src/video_core/texture_cache/tile_manager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -392,8 +392,7 @@ std::pair<vk::Buffer, u32> TileManager::TryDetile(vk::Buffer in_buffer, u32 in_o
const auto* detiler = GetDetiler(image);
if (!detiler) {
if (image.info.tiling_mode != AmdGpu::TilingMode::Texture_MacroTiled &&
image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled &&
image.info.tiling_mode != AmdGpu::TilingMode::Depth_MacroTiled) {
image.info.tiling_mode != AmdGpu::TilingMode::Display_MacroTiled) {
LOG_ERROR(Render_Vulkan, "Unsupported tiled image: {} ({})",
vk::to_string(image.info.pixel_format), NameOf(image.info.tiling_mode));
}
Expand Down

0 comments on commit c860489

Please sign in to comment.