Skip to content

Commit

Permalink
Update pal from commit acd9848c
Browse files Browse the repository at this point in the history
Add an explicit log file for GpuProfiler
Add ETW event collection to QueueTimings
Add IsGfx9Hwl to cleanup device logic
Add missing overrides and fix more build warnings
Add PalToHResult() and HResultToPal()
Add PC Perfcounter support for Gfx11
Add PHOENIX2 support
Add support for special/UTF8 characters in cache/debug file paths
Add TdrController for PAL-Trace
Add zwp_linux_dmabuf_v1 support to Wayland WS
Allow std::bit_cast
Always issue required async cache flush/inv event for gfx layout blt
Change experiments setting to enable only
Correct programing of depth for poly offset
Deterministic CmdBuffer Log Names
Expose file headers for archives
Factor out SlowColorClear1-7
Make CP DMA blt buffer only and optimize BLT flags for mage only barrier
Make GpuProfiler aware of PerformanceData when fastlink
Make GpuProfiler aware of PerformanceData when fastlink
Null check for active controller
Offscreen GPU Profiling frame support
Optimize CoherClear to CoherColorTarget or CoherDepthStencilTarget transition
Optimize misaligned metadata WA to reduce GL2 sync for BLT access flags
Prevent DbgLogMgr destructor use of loggers
Remove assert in ~DbgLogMgr
Remove dead legacy sdma perfctr handling
Remove IsGfx9 and IsGfx10Plus
Remove more gfx9 stuff
Remove the default disabled legacy barrier path for pre/post clear sync
RemoveOldestFilesOfDirUntilSize() shouldn't return an error on an empty dir
Reorder Perfctr Init after queue info is known
Revise the UcodeVersion check to enable task shaders
Set up PBB dirty tracking so that it PBB is written
Sqtt Data: Add Trace Buffer Size and split chunk into Header and Data
Submodule address-lib update
Submodule devdriver update to v24.04.03
Submodule SWD update
Update and create DevDriver Logger
Update CacheLayerBase
Update GpuPerfExp Trace Source
Use pSymbolName under PAL major 827
Use std::chrono
Fix 32Bit UMDs errors when using the callback mechanism
Fix a typo for "streamout_vertex_strides"
Fix dbgLogger file path gen
Fix one typo about PAL version check
Fix pMsaaState warning
Fix trace markers being missing from UberTrace SQTT
  • Loading branch information
qiaojbao committed May 14, 2024
1 parent 2682a91 commit c789abc
Show file tree
Hide file tree
Showing 313 changed files with 143,686 additions and 281,262 deletions.
8 changes: 8 additions & 0 deletions cmake/PalBuildParameters.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -77,6 +77,8 @@ pal_bp(PAL_BUILD_NULL_DEVICE ON)
# Build PAL with Graphics support?
pal_bp(PAL_BUILD_GFX ON)

pal_bp(PAL_BUILD_BRANCH "2410")

### Specify GPU build options ##########################################################################################

if (PAL_BUILD_GFX)
Expand Down Expand Up @@ -113,6 +115,12 @@ if (PAL_BUILD_GFX9)
PAL_BUILD_GFX11
)
endif()

pal_bp( PAL_BUILD_PHOENIX2 ON MODE "AUTHOR_WARNING"
ASIC_CONFIG
PAL_BUILD_GFX11
CHIP_HDR_PHOENIX2
)
#endif

endif() # PAL_BUILD_GFX9
Expand Down
5 changes: 4 additions & 1 deletion cmake/PalCompileDefinitions.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ function(pal_compile_definitions_gpu TARGET)
# PAL GFXx BUILD Defines
target_compile_definitions(${TARGET} PUBLIC PAL_BUILD_GFX9=$<BOOL:${PAL_BUILD_GFX9}>)
#if PAL_BUILD_GFX11
target_compile_definitions(${TARGET} PUBLIC PAL_BUILD_GFX11=$<BOOL:${PAL_BUILD_GFX11}>)
target_compile_definitions(${TARGET} INTERFACE PAL_BUILD_GFX11=$<BOOL:${PAL_BUILD_GFX11}>)
#endif

# PAL no longer references these defines and our clients must remove their references when upgrading.
Expand Down Expand Up @@ -86,6 +86,9 @@ function(pal_compile_definitions_gpu TARGET)
target_compile_definitions(${TARGET} PRIVATE CHIP_HDR_NAVI33=$<BOOL:${PAL_BUILD_GFX9}>)
target_compile_definitions(${TARGET} PRIVATE CHIP_HDR_PHOENIX1=$<BOOL:${PAL_BUILD_GFX9}>)

target_compile_definitions(${TARGET} PUBLIC PAL_BUILD_PHOENIX2=$<BOOL:${PAL_BUILD_PHOENIX2}>)
target_compile_definitions(${TARGET} PRIVATE CHIP_HDR_PHOENIX2=$<BOOL:${CHIP_HDR_PHOENIX2}>)

endif()
endfunction()

Expand Down
3 changes: 3 additions & 0 deletions cmake/PalOverrides.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -59,5 +59,8 @@ if(PAL_BUILD_GFX9)
set(PAL_SWD_BUILD_PHX ON)
set(PAL_SWD_BUILD_PHX1 ON)

set(PAL_SWD_BUILD_PHX2 ${PAL_BUILD_PHOENIX2})
set(ADDR_PHOENIX2_BUILD ${PAL_BUILD_PHOENIX2})

endif() # PAL_BUILD_GFX9

9 changes: 6 additions & 3 deletions doc/process/palCodingStandards.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,9 +2,6 @@
Copyright (c) 2019-2024 Advanced Micro Devices, Inc. All rights reserved.
```

<!--
-->

- [Introduction](#introduction)
- [Fundamental Underlying Principle:](#fundamental-underlying-principle)
- [Why Coding Standards](#why-coding-standards)
Expand Down Expand Up @@ -169,6 +166,8 @@ General
* In VS Code you could also enable trailing whitespace trimming by pressing Ctrl + Shift + P and then searching for Trim Trailing Whitespace.
* For Visual Studio check out Trailing Whitespace Visualizer extension (found on the Visual Studio marketplace).
- AI generated code **must** not be added to the PAL code base.
General Language Restrictions
-----------------------------
Expand Down Expand Up @@ -199,6 +198,10 @@ General Language Restrictions
- The std `atomic` header ***should*** be used to implement atomic
values and thread barriers.
- The std `bit` header **may** be used for bit-casts and bit fiddling.
- The std `chrono` header ***should*** be used when dealing with time.
- goto statements ***must not*** be used.
- Operator overloading is ***strongly discouraged***; however, it may
Expand Down
18 changes: 18 additions & 0 deletions inc/core/g_palPipelineAbiMetadataImpl.h
Original file line number Diff line number Diff line change
Expand Up @@ -432,6 +432,9 @@ inline Result DeserializeEnum(
case HashLiteralString("_amdgpu_cs_shdr_intrl_tbl"):
*pValue = Abi::PipelineSymbolType::CsShdrIntrlTblPtr;
break;
case HashLiteralString("_amdgpu_ps_dual_source_shdr_intrl_tbl"):
*pValue = Abi::PipelineSymbolType::PsDualSourceShdrIntrlTblPtr;
break;
case HashLiteralString("_amdgpu_ls_disasm"):
*pValue = Abi::PipelineSymbolType::LsDisassembly;
break;
Expand All @@ -453,6 +456,9 @@ inline Result DeserializeEnum(
case HashLiteralString("_amdgpu_cs_disasm"):
*pValue = Abi::PipelineSymbolType::CsDisassembly;
break;
case HashLiteralString("_amdgpu_ps_dual_source_disasm"):
*pValue = Abi::PipelineSymbolType::PsDualSourceDisassembly;
break;
case HashLiteralString("_amdgpu_ls_shdr_intrl_data"):
*pValue = Abi::PipelineSymbolType::LsShdrIntrlData;
break;
Expand Down Expand Up @@ -480,6 +486,9 @@ inline Result DeserializeEnum(
case HashLiteralString("color_export_shader"):
*pValue = Abi::PipelineSymbolType::PsColorExportEntry;
break;
case HashLiteralString("color_export_shader_dual_source"):
*pValue = Abi::PipelineSymbolType::PsColorExportDualSourceEntry;
break;
default:
result = Result::NotFound;
break;
Expand Down Expand Up @@ -541,6 +550,9 @@ inline Result SerializeEnum(
case Abi::PipelineSymbolType::CsShdrIntrlTblPtr:
pWriter->Pack("_amdgpu_cs_shdr_intrl_tbl");
break;
case Abi::PipelineSymbolType::PsDualSourceShdrIntrlTblPtr:
pWriter->Pack("_amdgpu_ps_dual_source_shdr_intrl_tbl");
break;
case Abi::PipelineSymbolType::LsDisassembly:
pWriter->Pack("_amdgpu_ls_disasm");
break;
Expand All @@ -562,6 +574,9 @@ inline Result SerializeEnum(
case Abi::PipelineSymbolType::CsDisassembly:
pWriter->Pack("_amdgpu_cs_disasm");
break;
case Abi::PipelineSymbolType::PsDualSourceDisassembly:
pWriter->Pack("_amdgpu_ps_dual_source_disasm");
break;
case Abi::PipelineSymbolType::LsShdrIntrlData:
pWriter->Pack("_amdgpu_ls_shdr_intrl_data");
break;
Expand Down Expand Up @@ -589,6 +604,9 @@ inline Result SerializeEnum(
case Abi::PipelineSymbolType::PsColorExportEntry:
pWriter->Pack("color_export_shader");
break;
case Abi::PipelineSymbolType::PsColorExportDualSourceEntry:
pWriter->Pack("color_export_shader_dual_source");
break;
default:
break;
}
Expand Down
80 changes: 68 additions & 12 deletions inc/core/palCmdBuffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -273,6 +273,7 @@ enum PipelineStageFlag : uint32
PipelineStageCs = 0x00004000,
PipelineStageBlt = 0x00008000,
PipelineStageBottomOfPipe = 0x00010000,
PipelineStageDsTarget = PipelineStageEarlyDsTarget | PipelineStageLateDsTarget,
PipelineStageAllStages = 0x0001FFFF
#else
PipelineStageFetchIndices = 0x00000004,
Expand All @@ -288,6 +289,7 @@ enum PipelineStageFlag : uint32
PipelineStageCs = 0x00001000,
PipelineStageBlt = 0x00002000,
PipelineStageBottomOfPipe = 0x00004000,
PipelineStageDsTarget = PipelineStageEarlyDsTarget | PipelineStageLateDsTarget,
PipelineStageAllStages = 0x00007FFF
#endif
};
Expand Down Expand Up @@ -980,8 +982,13 @@ struct BarrierInfo
/// If they are provided PAL may detect cases where future read operations use the same caches as the prior read
/// operations and thus can skip the usual visibility operations.
///
/// Note that if the client does provide read operation flags in a source mask they *must* guarantee that the same
/// flags were provided to a prior barrier's destination mask(s). Incorrect behavior may occur otherwise.
/// Note that,
/// 1. If the client does provide read operation flags in a source mask they *must* guarantee that the same flags
/// were provided to a prior barrier's destination mask(s). Incorrect behavior may occur otherwise.
/// 2. One @ref MemBarrier or @ImgBarrier object can only be applied to a single resource otherwise PAL's internal
/// optimization may be incorrect. Don't OR multiple resource transitions' stage or access mask into one
/// @ref MemBarrier or @ImgBarrier when making PAL barrier call. However, you are allowed to OR multiple resource
/// transitions' stage or access mask into the global transition mask.
///
/// This struct is used by @ref AcquireReleaseInfo.
struct MemBarrier
Expand Down Expand Up @@ -2066,6 +2073,9 @@ struct CmdBufInfo
/// a valid vidPnSourceId when privateFlip flag is set and pDirectCapMemory
/// is nullptr.
#endif
#if PAL_CLIENT_INTERFACE_MAJOR_VERSION >= 865
uint64 frameId; ///< Present frame index, incremented at each present
#endif
};

/// Specifies rotation angle between two images. Used as input to ICmdBuffer::CmdScaledCopyImage.
Expand Down Expand Up @@ -2712,24 +2722,70 @@ class ICmdBuffer : public IDestroyable

/// Perform source pipeline stage and cache access optimization based on the acquire/release interface.
///
/// @param [in/out] barrierType Barrier transition type @ref BarrierType.
/// @param [in/out] pStageMask A mask of ORed @ref PipelineStageFlag to optimize.
/// @param [in/out] pAccessMask A mask of ORed @ref CacheCoherencyUsageFlags to optimize.
/// @param [in] barrierType Barrier transition type @ref BarrierType.
/// @param [in] pImage Image pointer for image transition, required when @ref BarrierType is
/// BarrierType::Image.
/// @param [in/out] pSrcStageMask A source mask of ORed @ref PipelineStageFlag to optimize, can't be null.
/// @param [in/out] pSrcAccessMask A source mask of ORed @ref CacheCoherencyUsageFlags to optimize, can't be null.
/// @param [in/out] pDstStageMask A destination mask of ORed @ref PipelineStageFlag to optimize, can't be null.
/// @param [in/out] pDstAccessMask A destination mask of ORed @ref CacheCoherencyUsageFlags to optimize.
///
/// @returns If need flush and invalidate GL2 cache.
///
/// @note PipelineStageBlt will be converted to more accurate stage(s) based on the underlying implementation of
/// outstanding BLTs, but will be left as PipelineStageBlt if the internal outstanding BLTs can't be expressed
/// as a client-facing PipelineStage (e.g., if there are CP DMA BLTs in flight).
virtual void OptimizeAcqRelReleaseInfo(
BarrierType barrierType,
uint32* pStageMask,
uint32* pAccessMask) const = 0;
virtual bool OptimizeAcqRelReleaseInfo(
BarrierType barrierType,
const IImage* pImage,
uint32* pSrcStageMask,
uint32* pSrcAccessMask,
uint32* pDstStageMask,
uint32* pDstAccessMask) const = 0;

#if PAL_CLIENT_INTERFACE_MAJOR_VERSION < 858
/// @param [in/out] pSrcStageMask A mask of ORed @ref PipelineStageFlag to optimize, can't be null.
/// @param [in/out] pSrcAccessMask A mask of ORed @ref CacheCoherencyUsageFlags to optimize, can't be null.
void OptimizeAcqRelReleaseInfo(
uint32* pStageMask,
uint32* pAccessMask) const
uint32* pSrcStageMask,
uint32* pSrcAccessMask) const
{
uint32 unusedStageMask = 0;
uint32 unusedAccessMask = 0;
OptimizeAcqRelReleaseInfo(BarrierType::Global, nullptr,
pSrcStageMask, pSrcAccessMask, &unusedStageMask, &unusedAccessMask);
}
#elif PAL_CLIENT_INTERFACE_MAJOR_VERSION < 864
/// @param [in/out] barrierType Barrier transition type @ref BarrierType.
/// @param [in/out] pSrcStageMask A mask of ORed @ref PipelineStageFlag to optimize, can't be null.
/// @param [in/out] pSrcAccessMask A mask of ORed @ref CacheCoherencyUsageFlags to optimize, can't be null.
void OptimizeAcqRelReleaseInfo(
BarrierType barrierType,
uint32* pSrcStageMask,
uint32* pSrcAccessMask) const
{
uint32 unusedStageMask = 0;
uint32 unusedAccessMask = 0;
OptimizeAcqRelReleaseInfo(barrierType, nullptr,
pSrcStageMask, pSrcAccessMask, &unusedStageMask, &unusedAccessMask);
}
#elif PAL_CLIENT_INTERFACE_MAJOR_VERSION < 867
/// @param [in] barrierType Barrier transition type @ref BarrierType.
/// @param [in/out] pSrcStageMask A source mask of ORed @ref PipelineStageFlag to optimize, can't be null.
/// @param [in/out] pSrcAccessMask A source mask of ORed @ref CacheCoherencyUsageFlags to optimize, can't be null.
/// @param [in/out] pDstStageMask A destination mask of ORed @ref PipelineStageFlag to optimize, can't be null.
/// @param [in/out] pDstAccessMask A destination mask of ORed @ref CacheCoherencyUsageFlags to optimize.
void OptimizeAcqRelReleaseInfo(
BarrierType barrierType,
uint32* pSrcStageMask,
uint32* pSrcAccessMask,
uint32* pDstStageMask,
uint32* pDstAccessMask) const
{
OptimizeAcqRelReleaseInfo(BarrierType::Global, pStageMask, pAccessMask);
uint32 unusedStageMask = 0;
uint32 unusedAccessMask = 0;
OptimizeAcqRelReleaseInfo(barrierType, nullptr,
pSrcStageMask, pSrcAccessMask, &unusedStageMask, &unusedAccessMask);
}
#endif

Expand Down
Loading

0 comments on commit c789abc

Please sign in to comment.