Taken from : https://github.com/jdoerfert/llvm-project/tree/rr-with-jit

koparasy · Jun 27, 2023 · 266b003 · 266b003
1 parent c4f28bf
commit 266b003
Show file tree

Hide file tree

Showing 589 changed files with 3,630,939 additions and 1 deletion.
diff --git a/.gitignore b/.gitignore
@@ -70,3 +70,6 @@ pythonenv*
 /clang/utils/analyzer/projects/*/RefScanBuildResults
 # automodapi puts generated documentation files here.
 /lldb/docs/python_api/
+
+ __pycache__/
+  *.pyc
diff --git a/README.md b/README.md
@@ -1,4 +1,48 @@
-# The LLVM Compiler Infrastructure
+# OpenMP Record Replay Reproducibility
+
+This repository contains all source code required to reproduce the experiments
+of our work. It contains LLVM extensions, OpenMP offload runtime record-replay mechanism
+and all the required scripts to perform the analysis of our work.
+
+## Requirements
+
+* Ninja
+* CMake @3.20
+* cuda@11.6.1 (For NVIDIA systems)
+* ROCm@5.4.3 (For AMD systems)
+* GCC@8.3.1 (For NVIDIA)
+* GCC@10.3.1 (For AMD)
+
+## Installation
+
+We provide an installation script `setup.sh` the remaining requirements of our experimentation
+using spack. The script will clone `spack` in the root directory create an environment and install
+all necessary packages. Once everything is installed it moves forward to install the provided Clang/LLVM.
+Below you can see a snippet of code invoking the `setup.sh` script
+
+In our [Power 9 system](https://hpc.llnl.gov/hardware/compute-platforms/lassen) the setup script takes a little less than 3 hours.
+
+```bash
+module load gcc/8.3.1
+module load ninja
+module load cmake/3.20.1
+module load cuda/11.6.1
+./setup.sh nvidia 40
+...
+```
+
+The first argument of `setup.sh` describes the GPU vendor so it takes the values of nvidia or amd (lower case letters). The second value takes as an argument the number of threads to be used for the installation. Once the installation finishes please source the created environment file `env_${SUFFIX}.sh`. `SUFFIX` can take the value of either nvidia or amd. The scripts must be sourced every time we start the record-replay mechanism.
+
+## Record-Replay optimizations
+
+To continue with our reproducibility you can follow the instructions under this [README](./record-replay/README.md)
+
+## Contributors
+
+The source code extensions written on top of [Clang/LLVM](https://github.com/llvm/llvm-project) were performed by
+[Konstantinos Parasyris](mailto:parasyris1@llnl.gov) , [Giorgis Georgakoudis](mailto:georgakoudis1@llnl.gov), [Johannes Doerfert](mailto:doerfert1@llnl.gov).
+
+## The LLVM Compiler Infrastructure
 
 This directory and its sub-directories contain the source code for LLVM,
 a toolkit for the construction of highly optimized compilers,

diff --git a/record-replay/Passes/AMD/AMDGPUAttributePass.cpp b/record-replay/Passes/AMD/AMDGPUAttributePass.cpp
@@ -0,0 +1,219 @@
+//=============================================================================
+// FILE:
+//    AMDGPUAttributePass.cpp
+//
+// DESCRIPTION:
+//    Find device functions targeting the AMD GPU architecture and set different
+//    values for AMD GPU specific attributes.
+//
+//=============================================================================
+
+#include "llvm/Frontend/OpenMP/OMPConstants.h"
+#include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
+#include "llvm/IR/DebugInfo.h"
+#include "llvm/IR/InstIterator.h"
+#include "llvm/IR/LegacyPassManager.h"
+#include "llvm/IR/Verifier.h"
+#include "llvm/Pass.h"
+#include "llvm/Passes/PassBuilder.h"
+#include "llvm/Passes/PassPlugin.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/OpenMPOpt.h"
+
+using namespace llvm;
+using namespace omp;
+
+static cl::opt<std::string>
+    KernelEntryFunctionNameOpt("amdgpu-kernel-entry-function-name",
+                         cl::desc("Set AMDGPU attributes for this kernel entry function."),
+                         cl::Hidden, cl::init(""));
+
+static cl::opt<std::string>
+    FlatWorkGroupSizeOpt("amdgpu-flat-work-group-size",
+                         cl::desc("Set AMDGPU flat work group size attribute."),
+                         cl::Hidden, cl::init(""));
+
+static cl::opt<std::string>
+    NumSGPROpt("amdgpu-num-sgpr", cl::desc("Set AMDGPU Num SGPR attribute."),
+               cl::Hidden, cl::init(""));
+
+static cl::opt<std::string>
+    NumVGPROpt("amdgpu-num-vgpr", cl::desc("Set AMDGPU Num VGPR attribute."),
+               cl::Hidden, cl::init(""));
+
+static cl::opt<std::string>
+    WavesPerEUOpt("amdgpu-waves-per-eu",
+                  cl::desc("Set AMDGPU waves per EU attribute."), cl::Hidden,
+                  cl::init(""));
+
+//-----------------------------------------------------------------------------
+// AMDGPUAttributePass implementation
+//-----------------------------------------------------------------------------
+// No need to expose the internals of the pass to the outside world - keep
+// everything in an anonymous namespace.
+namespace {
+
+struct EnvVarOpt {
+  std::string OptVal;
+  std::string OptKind;
+
+  EnvVarOpt(const StringRef EnvVarName, StringRef OptKindIn)
+      : OptKind(OptKindIn) {
+    char *EnvVal = std::getenv(EnvVarName.str().c_str());
+    OptVal = (EnvVal ? EnvVal : "");
+  }
+};
+
+// This method implements what the pass does
+void visitor(Module &M) {
+  OpenMPIRBuilder OMPBuilder(M);
+  OMPBuilder.initialize();
+
+  KernelSet KernelEntryFunctions = getDeviceKernels(M);
+
+#if 0
+  auto CollectKernelEntryFunctions = [&](Module &M) {
+    FunctionCallee TargetInit =
+        OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_target_init);
+    FunctionCallee TargetDeinit =
+        OMPBuilder.getOrCreateRuntimeFunction(M, OMPRTL___kmpc_target_deinit);
+
+    for (Function &F : M) {
+      bool CallsTargetInit = false;
+      bool CallsTargetDeinit = false;
+
+      for (Use &U : TargetInit.getCallee()->uses())
+        if (auto *I = dyn_cast<Instruction>(U.getUser()))
+          if (I->getFunction() == &F) {
+            CallsTargetInit = true;
+            break;
+          }
+
+      for (Use &U : TargetDeinit.getCallee()->uses())
+        if (Instruction *I = dyn_cast<Instruction>(U.getUser()))
+          if (I->getFunction() == &F) {
+            CallsTargetDeinit = true;
+            break;
+          }
+
+      if (!CallsTargetInit || !CallsTargetDeinit)
+        continue;
+
+      KernelEntryFunctions.insert(&F);
+    }
+  };
+
+  CollectKernelEntryFunctions(M);
+  #endif
+
+  EnvVarOpt KernelEntryFunctionName("AMDGPU_KERNEL_ENTRY_FUNCTION_NAME", "");
+  EnvVarOpt FlatWorkGroupSize("AMDGPU_FLAT_WORK_GROUP_SIZE",
+                              "amdgpu-flat-work-group-size");
+  EnvVarOpt NumSGPR("AMDGPU_NUM_SGPR", "amdgpu-num-sgpr");
+  EnvVarOpt NumVGPR("AMDGPU_NUM_VGPR", "amdgpu-num-vgpr");
+  EnvVarOpt WavesPerEU("AMDGPU_WAVES_PER_EU", "amdgpu-waves-per-eu");
+
+  auto CheckAndSetAttribute = [](Function &F, StringRef AttrKind,
+                                 StringRef AttrVal) {
+    if (AttrVal.empty())
+      return;
+
+    outs() << "Set Attribute " << AttrKind << " => " << AttrVal << "\n";
+    F.addFnAttr(AttrKind, AttrVal);
+  };
+
+  for (Function *F : KernelEntryFunctions) {
+    outs() << "Found Function " << F->getName() << "\n";
+    if (!KernelEntryFunctionName.OptVal.empty() &&
+        F->getName() != KernelEntryFunctionName.OptVal) {
+      outs() << "Skip " << F->getName() << "\n";
+      continue;
+    }
+
+    outs() << "Kernel entry function " << F->getName() << "\n";
+    // Use command-line options.
+    CheckAndSetAttribute(*F, FlatWorkGroupSizeOpt.ArgStr, FlatWorkGroupSizeOpt);
+    CheckAndSetAttribute(*F, NumSGPROpt.ArgStr, NumSGPROpt);
+    CheckAndSetAttribute(*F, NumVGPROpt.ArgStr, NumVGPROpt);
+    CheckAndSetAttribute(*F, WavesPerEUOpt.ArgStr, WavesPerEUOpt);
+    // Use env vars, they take precedence over command-line options.
+    CheckAndSetAttribute(*F, FlatWorkGroupSize.OptKind, FlatWorkGroupSize.OptVal);
+    CheckAndSetAttribute(*F, NumSGPR.OptKind, NumSGPR.OptVal);
+    CheckAndSetAttribute(*F, NumVGPR.OptKind, NumVGPR.OptVal);
+    CheckAndSetAttribute(*F, WavesPerEU.OptKind, WavesPerEU.OptVal);
+  }
+}
+
+// New PM implementation
+struct AMDGPUAttributePass : PassInfoMixin<AMDGPUAttributePass> {
+  // Main entry point, takes IR unit to run the pass on (&F) and the
+  // corresponding pass manager (to be queried if need be)
+  PreservedAnalyses run(Module &M, ModuleAnalysisManager &AM) {
+    visitor(M);
+    // TODO: is anything preserved?
+    return PreservedAnalyses::none();
+    // return PreservedAnalyses::all();
+  }
+
+  // Without isRequired returning true, this pass will be skipped for functions
+  // decorated with the optnone LLVM attribute. Note that clang -O0 decorates
+  // all functions with optnone.
+  static bool isRequired() { return true; }
+};
+
+// Legacy PM implementation
+struct LegacyAMDGPUAttributePass : public ModulePass {
+  static char ID;
+  LegacyAMDGPUAttributePass() : ModulePass(ID) {}
+  // Main entry point - the name conveys what unit of IR this is to be run on.
+  bool runOnModule(Module &M) override {
+    visitor(M);
+
+    // TODO: what is preserved?
+    return true;
+    // Doesn't modify the input unit of IR, hence 'false'
+    // return false;
+  }
+};
+} // namespace
+
+//-----------------------------------------------------------------------------
+// New PM Registration
+//-----------------------------------------------------------------------------
+llvm::PassPluginLibraryInfo getAMDGPUAttributePassPluginInfo() {
+  const auto callback = [](PassBuilder &PB) {
+    PB.registerPipelineStartEPCallback([&](ModulePassManager &MPM, auto) {
+      // PB.registerPipelineEarlySimplificationEPCallback([&](ModulePassManager
+      // &MPM, auto) {
+      MPM.addPass(AMDGPUAttributePass());
+      return true;
+    });
+  };
+
+  return {LLVM_PLUGIN_API_VERSION, "AMDGPUAttributePass", LLVM_VERSION_STRING,
+          callback};
+}
+
+// TODO: This is the core interface for pass plugins. It guarantees that 'opt'
+// will be able to recognize AMDGPUAttributePass when added to the pass pipeline
+// on the command line, i.e., via '-passes=this-pass'
+extern "C" LLVM_ATTRIBUTE_WEAK ::llvm::PassPluginLibraryInfo
+llvmGetPassPluginInfo() {
+  return getAMDGPUAttributePassPluginInfo();
+}
+
+//-----------------------------------------------------------------------------
+// Legacy PM Registration
+//-----------------------------------------------------------------------------
+// The address of this variable is used to uniquely identify the pass. The
+// actual value doesn't matter.
+char LegacyAMDGPUAttributePass::ID = 0;
+
+// This is the core interface for pass plugins. It guarantees that 'opt' will
+// recognize LegacyAMDGPUAttributePass when added to the pass pipeline on the
+// command line, i.e.  via '--legacy-amdgpuattribute-pass'
+static RegisterPass<LegacyAMDGPUAttributePass>
+    X("legacy-amdgpuattribute-pass", "AMDGPU Attribute Pass",
+      false, // This pass doesn't modify the CFG => false
+      false  // This pass is not a pure analysis pass => false
+    );
diff --git a/record-replay/Passes/CMakeLists.txt b/record-replay/Passes/CMakeLists.txt
@@ -0,0 +1,44 @@
+cmake_minimum_required(VERSION 3.13.4)
+project(llvm-amdattributes-pass)
+
+#===============================================================================
+# 1. LOAD LLVM CONFIGURATION
+#===============================================================================
+# Set this to a valid LLVM installation dir
+set(LT_LLVM_INSTALL_DIR "" CACHE PATH "LLVM installation directory")
+
+# Add the location of LLVMConfig.cmake to CMake search paths (so that
+# find_package can locate it)
+list(APPEND CMAKE_PREFIX_PATH "${LT_LLVM_INSTALL_DIR}/lib/cmake/llvm/")
+
+# FIXME: This is a warkaround for #25. Remove once resolved and use
+find_package(LLVM 16 REQUIRED CONFIG)
+
+# Pass includes headers from LLVM - update the include paths accordingly
+include_directories(SYSTEM ${LLVM_INCLUDE_DIRS})
+
+#===============================================================================
+# 2. LLVM-TUTOR BUILD CONFIGURATION
+#===============================================================================
+# Use the same C++ standard as LLVM does
+set(CMAKE_CXX_STANDARD 17 CACHE STRING "")
+
+# LLVM is normally built without RTTI. Be consistent with that.
+if(NOT LLVM_ENABLE_RTTI)
+  set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fno-rtti")
+endif()
+
+#===============================================================================
+# 3. ADD THE TARGET
+#===============================================================================
+add_library(TuneDevice SHARED AMD/AMDGPUAttributePass.cpp NVPTX/NVPTXTuningDirectives.cpp NVPTX/GlobalSymbolRename.cpp)
+
+# Allow undefined symbols in shared objects on Darwin (this is the default
+# behaviour on Linux)
+target_link_libraries(TuneDevice
+  "$<$<PLATFORM_ID:Darwin>:-undefined dynamic_lookup>")
+
+install(TARGETS TuneDevice
+        EXPORT TuneDevice
+        DESTINATION lib)
+