From 35f29ae954ec50ba240f4c9b426baa2cfd711b30 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Sat, 2 Mar 2024 12:06:58 -0500 Subject: [PATCH 1/7] [CilkSanitizer][CSI] Make sure that instrumentation for allocas appears after those allocas in entry blocks that contain unusual instructions. --- .../Instrumentation/CilkSanitizer.cpp | 13 +++++++--- .../ComprehensiveStaticInstrumentation.cpp | 14 ++++++---- .../cilksan-instrument-alloca.ll | 26 +++++++++++++++++++ 3 files changed, 44 insertions(+), 9 deletions(-) create mode 100644 llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-instrument-alloca.ll diff --git a/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp index 55489ac59695..ed6833dda581 100644 --- a/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp @@ -4476,9 +4476,14 @@ bool CilkSanitizerImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { return true; IRBuilder<> IRB(I); - bool AllocaInEntryBlock = isEntryBlock(*I->getParent(), TI); - if (AllocaInEntryBlock) - IRB.SetInsertPoint(getEntryBBInsertPt(*I->getParent())); + bool InsertingAtAlloca = true; + if (isEntryBlock(*I->getParent(), TI)) { + Instruction *EntryBBInsertPt = getEntryBBInsertPt(*I->getParent()); + if (I->comesBefore(EntryBBInsertPt)) { + IRB.SetInsertPoint(EntryBBInsertPt); + InsertingAtAlloca = false; + } + } AllocaInst *AI = cast(I); uint64_t LocalId = AllocaFED.add(*I); @@ -4500,7 +4505,7 @@ bool CilkSanitizerImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { IRB.getInt64Ty())); BasicBlock::iterator Iter(I); - if (!AllocaInEntryBlock) { + if (InsertingAtAlloca) { Iter++; IRB.SetInsertPoint(&*Iter); } else { diff --git a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp index dab90267f9af..41749ae9a0b7 100644 --- a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp @@ -13,7 +13,6 @@ #include "llvm/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.h" #include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CallGraph.h" @@ -1575,9 +1574,14 @@ void CSIImpl::instrumentSync(SyncInst *SI, unsigned SyncRegNum) { void CSIImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { IRBuilder<> IRB(I); - bool AllocaInEntryBlock = isEntryBlock(*I->getParent(), TI); - if (AllocaInEntryBlock) - IRB.SetInsertPoint(getEntryBBInsertPt(*I->getParent())); + bool InsertingAtAlloca = true; + if (isEntryBlock(*I->getParent(), TI)) { + Instruction *EntryBBInsertPt = getEntryBBInsertPt(*I->getParent()); + if (I->comesBefore(EntryBBInsertPt)) { + IRB.SetInsertPoint(EntryBBInsertPt); + InsertingAtAlloca = false; + } + } AllocaInst *AI = cast(I); uint64_t LocalId = AllocaFED.add(*I); @@ -1596,7 +1600,7 @@ void CSIImpl::instrumentAlloca(Instruction *I, TaskInfo &TI) { IRB.getInt64Ty())); BasicBlock::iterator Iter(I); - if (!AllocaInEntryBlock) { + if (InsertingAtAlloca) { Iter++; IRB.SetInsertPoint(&*Iter); } else { diff --git a/llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-instrument-alloca.ll b/llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-instrument-alloca.ll new file mode 100644 index 000000000000..76306ced90f1 --- /dev/null +++ b/llvm/test/Transforms/Tapir/CilkSanitizer/cilksan-instrument-alloca.ll @@ -0,0 +1,26 @@ +; Check that Cilksan and CSI instrument allocas properly in a function's entry block. +; +; RUN: opt < %s -passes="cilksan" -S | FileCheck %s +; RUN: opt < %s -passes="csi" -S | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx14.0.0" + +; Function Attrs: sanitize_cilk +define i32 @ggml_graph_compute() #0 { +entry: + %0 = call i64 @llvm.bswap.i64(i64 0) + %MyAlloca = alloca i8, i64 0, align 32 + %1 = ptrtoint ptr %MyAlloca to i64 + unreachable +} + +; CHECK: define i32 @ggml_graph_compute() +; CHECK-NOT: call void @__csi_after_alloca( +; CHECK: %MyAlloca = alloca i8, i64 0 +; CHECK: call void @__csi_after_alloca( + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.bswap.i64(i64) #1 + +attributes #0 = { sanitize_cilk } +attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } From 8ec2c5c32844bad8c3c502128898fe06d8584996 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Sat, 2 Mar 2024 12:08:38 -0500 Subject: [PATCH 2/7] [SemaExpr] Fix type checking for default-lvalue conversions that involve hyperobjects. --- clang/lib/Sema/SemaExpr.cpp | 4 ++-- clang/test/Cilk/hyper-type-delete.cpp | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) create mode 100644 clang/test/Cilk/hyper-type-delete.cpp diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index af5acea59f29..ca0458be99e6 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -652,7 +652,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { // converted to a prvalue. if (!E->isGLValue()) return E; - QualType T = E->getType(); + QualType T = E->getType().stripHyperobject(); assert(!T.isNull() && "r-value conversion on typeless expression?"); // lvalue-to-rvalue conversion cannot be applied to function or array types. @@ -687,7 +687,7 @@ ExprResult Sema::DefaultLvalueConversion(Expr *E) { CheckForNullPointerDereference(*this, E); E = BuildHyperobjectLookup(E); - T = E->getType(); + assert(T == E->getType() && "Unexpected Type from hyperobject lookup."); if (const ObjCIsaExpr *OISA = dyn_cast(E->IgnoreParenCasts())) { NamedDecl *ObjectGetClass = LookupSingleName(TUScope, diff --git a/clang/test/Cilk/hyper-type-delete.cpp b/clang/test/Cilk/hyper-type-delete.cpp new file mode 100644 index 000000000000..48d6af8f3437 --- /dev/null +++ b/clang/test/Cilk/hyper-type-delete.cpp @@ -0,0 +1,15 @@ +// Check that calling delete on a hyperobject produces a useful error message. +// +// RUN: %clang_cc1 %s -xc++ -fopencilk -verify -fsyntax-only +struct S { + int x, y; +}; +void identity(void *v); +void reduce(void *l, void *r); +using S_r = S _Hyperobject(identity, reduce); + +class Foo { + S_r r; // expected-warning{{reducer callbacks not implemented for structure members}} +public: + ~Foo() { delete r; }; // expected-error{{cannot delete expression of type 'S_r' (aka 'S _Hyperobject(identity, reduce)')}} +}; From 682c3ab526040436c9f1713bc6e57c3b5ecda74f Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Sat, 2 Mar 2024 12:16:44 -0500 Subject: [PATCH 3/7] [LoopSpawning][TaskSimplify] Properly enclose the body of a Tapir loop within a taskframe when transforming the Tapir loop to implement parallel recursive divide-and-conquer spawning. Fix optimization in task-simplify to prevent erroneous deletion of taskframes. Cleanup code and improve naming of split basic blocks. --- .../llvm/Transforms/Tapir/LoweringUtils.h | 5 +- llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp | 145 +- llvm/lib/Transforms/Utils/TaskSimplify.cpp | 2 +- llvm/test/Transforms/Tapir/loop-remark-iv.ll | 2 +- ...oop-spawning-iv-tripcount-type-mismatch.ll | 6 +- ...loop-spawning-nested-spawn-alloc-unwind.ll | 1763 +++++++++++++++++ .../Tapir/loop-spawning-nested-spawn-alloc.ll | 322 +++ .../nested-loop-spawning-with-exceptions.ll | 8 +- .../Transforms/Tapir/nested-loop-spawning.ll | 8 +- .../Tapir/nested-outline-vector-width.ll | 2 +- llvm/test/Transforms/Tapir/sret-param.ll | 2 +- 11 files changed, 2217 insertions(+), 48 deletions(-) create mode 100644 llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc-unwind.ll create mode 100644 llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc.ll diff --git a/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h b/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h index 27fa8f2e465c..c600a04d4582 100644 --- a/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h +++ b/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h @@ -432,7 +432,10 @@ class LoopOutlineProcessor { /// a common post-processing step for outlined helper functions. void addSyncToOutlineReturns(TapirLoopInfo &TL, TaskOutlineInfo &Out, ValueToValueMapTy &VMap); - + /// Enclose the task of the Tapir loop in a taskframe, if the Tapir loop + /// contains nested spawns. + void maybeEncloseInTaskFrame(TapirLoopInfo &TL, TaskOutlineInfo &Out, + ValueToValueMapTy &VMap); /// Move Cilksan instrumentation out of cloned loop. void moveCilksanInstrumentation(TapirLoopInfo &TL, TaskOutlineInfo &Out, ValueToValueMapTy &VMap); diff --git a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp index 5a3fbb8eb083..8c25f142444b 100644 --- a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp +++ b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp @@ -14,8 +14,8 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TapirTaskInfo.h" @@ -25,15 +25,13 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/Instruction.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/LLVMContext.h" -#include "llvm/IR/PatternMatch.h" #include "llvm/IR/Type.h" -#include "llvm/IR/ValueMap.h" #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" @@ -43,7 +41,6 @@ #include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" -#include "llvm/Transforms/Scalar/SimplifyCFG.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" #include "llvm/Transforms/Tapir.h" #include "llvm/Transforms/Tapir/LoweringUtils.h" @@ -53,14 +50,14 @@ #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" -#include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" #include "llvm/Transforms/Utils/TapirUtils.h" #include "llvm/Transforms/Utils/ValueMapper.h" + #include using namespace llvm; -#define LS_NAME "loop-spawning-ti" +#define LS_NAME "loop-spawning" #define DEBUG_TYPE LS_NAME STATISTIC(TapirLoopsFound, @@ -91,6 +88,7 @@ class DACSpawning : public LoopOutlineProcessor { void postProcessOutline(TapirLoopInfo &TL, TaskOutlineInfo &Out, ValueToValueMapTy &VMap) override final { LoopOutlineProcessor::postProcessOutline(TL, Out, VMap); + maybeEncloseInTaskFrame(TL, Out, VMap); implementDACIterSpawnOnHelper(TL, Out, VMap); ++LoopsConvertedToDAC; @@ -110,7 +108,7 @@ static bool isSRetInput(const Value *V, const Function &F) { if (!isa(V)) return false; - auto ArgIter = F.arg_begin(); + const auto *ArgIter = F.arg_begin(); if (F.hasParamAttribute(0, Attribute::StructRet) && V == &*ArgIter) return true; ++ArgIter; @@ -186,7 +184,9 @@ void LoopOutlineProcessor::addSyncToOutlineReturns(TapirLoopInfo &TL, continue; BasicBlock *Exit = AtExit->GetInsertBlock(); - BasicBlock *NewExit = SplitBlock(Exit, Exit->getTerminator()); + BasicBlock *NewExit = + SplitBlock(Exit, Exit->getTerminator(), (DomTreeUpdater *)nullptr, + nullptr, nullptr, Exit->getName() + ".synced"); SyncInst *NewSync = SyncInst::Create(NewExit, SyncRegion); ReplaceInstWithInst(Exit->getTerminator(), NewSync); @@ -206,6 +206,77 @@ void LoopOutlineProcessor::addSyncToOutlineReturns(TapirLoopInfo &TL, } } +void LoopOutlineProcessor::maybeEncloseInTaskFrame(TapirLoopInfo &TL, + TaskOutlineInfo &Out, + ValueToValueMapTy &VMap) { + Task *T = TL.getTask(); + if (T->subtasks().empty()) + return; + + BasicBlock &Entry = Out.Outline->getEntryBlock(); + + // Get the taskframe intrinsics. + Function *TFCreateFn = + Intrinsic::getDeclaration(&M, Intrinsic::taskframe_create); + Function *TFEndFn = + Intrinsic::getDeclaration(&M, Intrinsic::taskframe_end); + + // Insert the taskframe.create. + Instruction *TFCreate = + IRBuilder<>(&Entry, Entry.begin()).CreateCall(TFCreateFn, {}, "ls.tf"); + TFCreate->setDebugLoc(Entry.getTerminator()->getDebugLoc()); + BasicBlock *UnreachableBlk = nullptr; + BasicBlock *NewResume = nullptr; + EscapeEnumerator EE(*Out.Outline, "ls.tfend", false); + SmallVector Resumes; + while (IRBuilder<> *AtExit = EE.Next()) { + if (isa(*AtExit->GetInsertPoint())) { + AtExit->CreateCall(TFEndFn, TFCreate); + continue; + } + + BasicBlock *Exit = AtExit->GetInsertBlock(); + if (TL.getUnwindDest() && + Exit == cast(VMap[TL.getUnwindDest()])) + continue; + if (Exit == NewResume) + continue; + + if (!UnreachableBlk) { + // Create the placeholder unreachable block, now that it's needed. + UnreachableBlk = BasicBlock::Create( + M.getContext(), Exit->getName() + ".unreachable", Out.Outline); + { // Add an unreachable instruction to the end of UnreachableBlk. + IRBuilder<> Builder(UnreachableBlk); + Builder.CreateUnreachable(); + } + } + + // Create a new resume block. + if (!NewResume) { + NewResume = BasicBlock::Create( + M.getContext(), Exit->getName() + ".tfunwind", Out.Outline); + IRBuilder<> Builder(NewResume); + Builder.SetCurrentDebugLocation(Exit->getTerminator()->getDebugLoc()); + LandingPadInst *LPad = Builder.CreateLandingPad( + cast(Exit->getTerminator())->getValue()->getType(), 0); + LPad->setCleanup(true); + Builder.CreateResume(LPad); + } + + Resumes.push_back(cast(Exit->getTerminator())); + } + + for (ResumeInst *R : Resumes) { + Value *Exn = R->getValue(); + Function *TFResumeFn = Intrinsic::getDeclaration( + &M, Intrinsic::taskframe_resume, {Exn->getType()}); + InvokeInst *TFResume = InvokeInst::Create(TFResumeFn, UnreachableBlk, + NewResume, {TFCreate, Exn}); + ReplaceInstWithInst(R, TFResume); + } +} + static void getDependenciesInSameBlock(Instruction *I, SmallPtrSetImpl &Deps) { const BasicBlock *Block = I->getParent(); @@ -303,18 +374,15 @@ void LoopOutlineProcessor::moveCilksanInstrumentation(TapirLoopInfo &TL, } // Move __csan_detach and __csan_task to the Preheader. - moveInstrumentation("__csan_detach", *Header, *Preheader, - Preheader->getTerminator()); - moveInstrumentation("__csan_task", *TaskEntry, *Preheader, - Preheader->getTerminator()); - - // Move __csan_detach_continue and __csan_task_exit on the normal exit path to - // LatchExit. - moveInstrumentation("__csan_detach_continue", *Latch, *LatchExit); + moveInstrumentation("__csan_task", *TaskEntry, *Preheader); + moveInstrumentation("__csan_detach", *Header, *Preheader); + + // Move __csan_task_exit on the normal exit path to LatchExit. if (TaskExit) // There's only one block with __csan_task_exit instrumentation to move, so // move it from that block. - moveInstrumentation("__csan_task_exit", *TaskExit, *LatchExit); + moveInstrumentation("__csan_task_exit", *TaskExit, *LatchExit, + LatchExit->getTerminator()); else { // We need to create PHI nodes for the arguments of a new instrumentation // call in LatchExit. @@ -364,12 +432,15 @@ void LoopOutlineProcessor::moveCilksanInstrumentation(TapirLoopInfo &TL, // Insert new instrumentation call at the start of LatchExit. CallInst::Create(InstrFunc->getFunctionType(), InstrFunc, InstrArgs, "", - &*LatchExit->getFirstInsertionPt()); + LatchExit->getTerminator()); // Remove old instrumentation calls from predecessors for (BasicBlock *Pred : predecessors(Latch)) Instrumentation[Pred]->eraseFromParent(); } + // Move __csan_detach_continue on the normal exit path to LatchExit. + moveInstrumentation("__csan_detach_continue", *Latch, *LatchExit, + LatchExit->getTerminator()); } namespace { @@ -598,7 +669,7 @@ void DACSpawning::implementDACIterSpawnOnHelper( // Get end and grainsize arguments Argument *End, *Grainsize; { - auto OutlineArgsIter = Helper->arg_begin(); + auto *OutlineArgsIter = Helper->arg_begin(); if (Helper->hasParamAttribute(0, Attribute::StructRet)) ++OutlineArgsIter; // End argument is second LC input. @@ -611,7 +682,9 @@ void DACSpawning::implementDACIterSpawnOnHelper( if (&(Helper->getEntryBlock()) == Preheader) { // Split the entry block. We'll want to create a backedge into // the split block later. - DACHead = SplitBlock(Preheader, &Preheader->front()); + DACHead = + SplitBlock(Preheader, &Preheader->front(), (DomTreeUpdater *)nullptr, + nullptr, nullptr, Preheader->getName() + ".dac.head"); // Move any syncregion_start's in DACHead into Preheader. BasicBlock::iterator InsertPoint = Preheader->begin(); @@ -690,8 +763,12 @@ void DACSpawning::implementDACIterSpawnOnHelper( /*BranchWeights=*/nullptr); RecurHead = RecurTerm->getParent(); // Create RecurHead, RecurDet, and RecurCont, with appropriate branches. - RecurDet = SplitBlock(RecurHead, RecurHead->getTerminator()); - RecurCont = SplitBlock(RecurDet, RecurDet->getTerminator()); + RecurDet = SplitBlock(RecurHead, RecurHead->getTerminator(), + (DomTreeUpdater *)nullptr, nullptr, nullptr, + Preheader->getName() + ".dac.detach"); + RecurCont = SplitBlock(RecurDet, RecurDet->getTerminator(), + (DomTreeUpdater *)nullptr, nullptr, nullptr, + Preheader->getName() + ".dac.cont"); RecurCont->getTerminator()->replaceUsesOfWith(RecurTerm->getSuccessor(0), DACHead); } @@ -758,7 +835,9 @@ void DACSpawning::implementDACIterSpawnOnHelper( RecurCall->setDoesNotThrow(); } else { InvokeInst *RecurCall; - BasicBlock *CallDest = SplitBlock(RecurDet, RecurDet->getTerminator()); + BasicBlock *CallDest = SplitBlock(RecurDet, RecurDet->getTerminator(), + (DomTreeUpdater *)nullptr, nullptr, + nullptr, RecurDet->getName() + ".noexc"); BasicBlock *CallUnwind = createTaskUnwind(Helper, UnwindDest, SyncRegion, RecurDet->getName()+".unwind"); @@ -866,7 +945,9 @@ Task *LoopSpawningImpl::getTaskIfTapirLoop(const Loop *L) { emitMissedWarning(L, Hints, &ORE); } return nullptr; - } else if (!isa(Preheader->getTerminator())) { + } + + if (!isa(Preheader->getTerminator())) { LLVM_DEBUG(dbgs() << "Loop preheader is not terminated by a branch.\n"); if (hintsDemandOutlining(Hints)) { ORE.emit(TapirLoopInfo::createMissedAnalysis(LS_NAME, "ComplexPreheader", @@ -1251,7 +1332,7 @@ class ArgEndMaterializer final : public OutlineMaterializer { return OutlineMaterializer::materialize(V); } }; -} +} // namespace /// Outline Tapir loop \p TL into a helper function. The \p Args set specified /// the arguments to that helper function. The map \p VMap will store the @@ -1428,9 +1509,9 @@ TaskOutlineMapTy LoopSpawningImpl::outlineAllTapirLoops() { for (Task *T : post_order(TI.getRootTask())) { if (TapirLoopInfo *TL = getTapirLoop(T)) { PredicatedScalarEvolution PSE(SE, *TL->getLoop()); - bool canOutline = TL->prepareForOutlining(DT, LI, TI, PSE, AC, LS_NAME, + bool CanOutline = TL->prepareForOutlining(DT, LI, TI, PSE, AC, LS_NAME, ORE, TTI); - if (!canOutline) { + if (!CanOutline) { const Loop *L = TL->getLoop(); TapirLoopHints Hints(L); emitMissedWarning(L, Hints, &ORE); @@ -1743,10 +1824,10 @@ struct LoopSpawningTI : public FunctionPass { AU.addRequired(); } }; -} +} // namespace char LoopSpawningTI::ID = 0; -static const char ls_name[] = "Loop Spawning with Task Info"; +static const char LsName[] = "Loop Spawning with Task Info"; INITIALIZE_PASS_BEGIN(LoopSpawningTI, LS_NAME, ls_name, false, false) INITIALIZE_PASS_DEPENDENCY(AssumptionCacheTracker) INITIALIZE_PASS_DEPENDENCY(LoopSimplify) @@ -1758,10 +1839,10 @@ INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TargetTransformInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(TaskInfoWrapperPass) INITIALIZE_PASS_DEPENDENCY(OptimizationRemarkEmitterWrapperPass) -INITIALIZE_PASS_END(LoopSpawningTI, LS_NAME, ls_name, false, false) +INITIALIZE_PASS_END(LoopSpawningTI, LS_NAME, LsName, false, false) namespace llvm { Pass *createLoopSpawningTIPass() { return new LoopSpawningTI(); } -} +} // namespace llvm diff --git a/llvm/lib/Transforms/Utils/TaskSimplify.cpp b/llvm/lib/Transforms/Utils/TaskSimplify.cpp index 795780acad63..e3b5105da664 100644 --- a/llvm/lib/Transforms/Utils/TaskSimplify.cpp +++ b/llvm/lib/Transforms/Utils/TaskSimplify.cpp @@ -307,7 +307,7 @@ static bool canRemoveTaskFrame(const Spindle *TF, MaybeParallelTasks &MPTasks, continue; // Skip spindles in nested taskframes. - if (S != TF && S->getTaskFrameParent() != TF) + if (S != TF && S->getTaskFrameParent() && S->getTaskFrameParent() != TF) continue; // Filter the task list of S to exclude tasks in parallel with the entry. diff --git a/llvm/test/Transforms/Tapir/loop-remark-iv.ll b/llvm/test/Transforms/Tapir/loop-remark-iv.ll index 4baa840eb2e7..e2253ac00771 100644 --- a/llvm/test/Transforms/Tapir/loop-remark-iv.ll +++ b/llvm/test/Transforms/Tapir/loop-remark-iv.ll @@ -1,4 +1,4 @@ -; RUN: opt < %s -passes='loop-spawning' -pass-remarks-analysis=loop-spawning-ti -disable-output 2>&1 | FileCheck %s +; RUN: opt < %s -passes='loop-spawning' -pass-remarks-analysis=loop-spawning -disable-output 2>&1 | FileCheck %s target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-unknown-linux-gnu" diff --git a/llvm/test/Transforms/Tapir/loop-spawning-iv-tripcount-type-mismatch.ll b/llvm/test/Transforms/Tapir/loop-spawning-iv-tripcount-type-mismatch.ll index 5540d6d556a8..6b26bbe37714 100644 --- a/llvm/test/Transforms/Tapir/loop-spawning-iv-tripcount-type-mismatch.ll +++ b/llvm/test/Transforms/Tapir/loop-spawning-iv-tripcount-type-mismatch.ll @@ -212,8 +212,8 @@ pfor.cond.cleanup: ; preds = %pfor.inc ; CHECK: _Z10initializePdS_S_S_S_mm.exit.ls1: ; CHECK: %[[SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() -; CHECK: _Z10initializePdS_S_S_S_mm.exit.ls1.split: -; CHECK-NEXT: %[[DACIV:.+]] = phi i64 [ %indvars.iv376.start.ls1, %_Z10initializePdS_S_S_S_mm.exit.ls1 ], [ %[[DACIVINC:.+]], %.split.split ] +; CHECK: _Z10initializePdS_S_S_S_mm.exit.ls1.dac.head: +; CHECK-NEXT: %[[DACIV:.+]] = phi i64 [ %indvars.iv376.start.ls1, %_Z10initializePdS_S_S_S_mm.exit.ls1 ], [ %[[DACIVINC:.+]], %_Z10initializePdS_S_S_S_mm.exit.ls1.dac.cont ] ; CHECK: %[[DACIVSTART:.+]] = trunc i64 %[[DACIV]] to i32 ; CHECK: %[[ITERCOUNT:.+]] = sub i32 %[[END]], %[[DACIVSTART]] ; CHECK: %[[CMP:.+]] = icmp ugt i32 %[[ITERCOUNT]], %[[GRAINSIZE]] @@ -230,7 +230,7 @@ pfor.cond.cleanup: ; preds = %pfor.inc ; CHECK: [[CONTIN]]: ; CHECK: %[[DACIVINC]] = zext i32 %[[MIDITER]] to i64 -; CHECK: br label %_Z10initializePdS_S_S_S_mm.exit.ls1.split +; CHECK: br label %_Z10initializePdS_S_S_S_mm.exit.ls1.dac.head declare dso_local double @sqrt(double) local_unnamed_addr #0 diff --git a/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc-unwind.ll b/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc-unwind.ll new file mode 100644 index 000000000000..34d69200b16a --- /dev/null +++ b/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc-unwind.ll @@ -0,0 +1,1763 @@ +; Check that loop-spawning and task-simplify correctly handle a static memory allocation and +; nested spawn within a parallel loop body with a landingpad. +; +; RUN: opt < %s -passes="loop-spawning,task-simplify" -S | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx14.0.0" + +%struct.ggml_type_traits_t = type { ptr, i32, i64, i8, ptr, ptr, ptr, ptr, i32, i64 } +%struct.ggml_tensor = type { i32, i32, ptr, [4 x i64], [4 x i64], i32, [16 x i32], i32, ptr, [10 x ptr], i32, i64, i64, ptr, i64, ptr, [64 x i8], ptr, [8 x i8] } +%struct.ggml_compute_params = type { i32, i32, i32, i64, ptr } + +@type_traits = external local_unnamed_addr global [24 x %struct.ggml_type_traits_t], align 8 +@__csi_unit_func_base_id = internal global i64 0 +@__csi_unit_func_exit_base_id = internal global i64 0 +@__csi_unit_loop_base_id = internal global i64 0 +@__csi_unit_loop_exit_base_id = internal global i64 0 +@__csi_unit_bb_base_id = internal global i64 0 +@__csi_unit_callsite_base_id = internal global i64 0 +@__csi_unit_load_base_id = internal global i64 0 +@__csi_unit_store_base_id = internal global i64 0 +@__csi_unit_alloca_base_id = internal global i64 0 +@__csi_unit_detach_base_id = internal global i64 0 +@__csi_unit_task_base_id = internal global i64 0 +@__csi_unit_task_exit_base_id = internal global i64 0 +@__csi_unit_detach_continue_base_id = internal global i64 0 +@__csi_unit_sync_base_id = internal global i64 0 +@__csi_unit_allocfn_base_id = internal global i64 0 +@__csi_unit_free_base_id = internal global i64 0 +@__csi_func_id_ggml_is_contiguous = weak local_unnamed_addr global i64 -1 +@__csi_func_id_ggml_row_size = weak local_unnamed_addr global i64 -1 +@"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" = private unnamed_addr constant [66 x i8] c"/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp\00" +@__csi_unit_function_name_ggml_compute_forward_mul_mat = private unnamed_addr constant [29 x i8] c"ggml_compute_forward_mul_mat\00" +@__csi_unit_fed_table__csi_unit_func_base_id = internal global [1 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 33, i32 -1, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_func_exit_base_id = internal global [2 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 170, i32 1, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 124, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_loop_base_id = internal global [2 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 124, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_loop_exit_base_id = internal global [0 x { ptr, i32, i32, ptr }] zeroinitializer +@__csi_unit_fed_table__csi_unit_bb_base_id = internal global [0 x { ptr, i32, i32, ptr }] zeroinitializer +@__csi_unit_fed_table__csi_unit_callsite_base_id = internal global [5 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 50, i32 28, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 81, i32 29, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 161, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 161, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 161, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_load_base_id = internal global [40 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 48, i32 39, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 52, i32 71, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 53, i32 71, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 55, i32 71, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 80, i32 36, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 80, i32 29, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 122, i32 55, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 144, i32 62, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 151, i32 41, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 144, i32 62, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 151, i32 41, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 144, i32 62, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 151, i32 41, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 37, i32 39, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 38, i32 39, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 43, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 154, i32 60, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 154, i32 60, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 154, i32 60, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 165, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 165, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 165, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_store_base_id = internal global [3 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 165, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 165, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 165, i32 21, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_detach_base_id = internal global [3 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 124, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_task_base_id = internal global [3 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 0, i32 0, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_task_exit_base_id = internal global [6 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 169, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 124, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_detach_continue_base_id = internal global [6 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 124, i32 56, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 124, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 124, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_sync_base_id = internal global [3 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 127, i32 9, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 124, i32 5, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_alloca_base_id = internal global [2 x { ptr, i32, i32, ptr }] [{ ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 33, i32 -1, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, i32, ptr } { ptr @__csi_unit_function_name_ggml_compute_forward_mul_mat, i32 33, i32 -1, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_fed_table__csi_unit_allocfn_base_id = internal global [0 x { ptr, i32, i32, ptr }] zeroinitializer +@__csi_unit_fed_table__csi_unit_free_base_id = internal global [0 x { ptr, i32, i32, ptr }] zeroinitializer +@__csi_unit_object_name_src0 = private unnamed_addr constant [5 x i8] c"src0\00" +@__csi_unit_object_name_src1 = private unnamed_addr constant [5 x i8] c"src1\00" +@__csi_unit_object_name_type_traits = private unnamed_addr constant [12 x i8] c"type_traits\00" +@__csi_unit_object_name_cond.in = private unnamed_addr constant [8 x i8] c"cond.in\00" +@__csi_unit_object_name_dst = private unnamed_addr constant [4 x i8] c"dst\00" +@__csi_unit_object_name_tmp = private unnamed_addr constant [4 x i8] c"tmp\00" +@__csi_unit_obj_table = internal global [40 x { ptr, i32, ptr }] [{ ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_type_traits, i32 -1, ptr null }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_type_traits, i32 -1, ptr null }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_type_traits, i32 -1, ptr null }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_cond.in, i32 -1, ptr null }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src0, i32 37, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_src1, i32 38, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_dst, i32 35, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_tmp, i32 130, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_tmp, i32 130, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_tmp, i32 130, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_obj_table.1 = internal global [3 x { ptr, i32, ptr }] [{ ptr, i32, ptr } { ptr null, i32 -1, ptr null }, { ptr, i32, ptr } { ptr null, i32 -1, ptr null }, { ptr, i32, ptr } { ptr null, i32 -1, ptr null }] +@__csi_unit_obj_table.2 = internal global [2 x { ptr, i32, ptr }] [{ ptr, i32, ptr } { ptr @__csi_unit_object_name_tmp, i32 130, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }, { ptr, i32, ptr } { ptr @__csi_unit_object_name_tmp, i32 130, ptr @"__csi_unit_filename_/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp" }] +@__csi_unit_obj_table.3 = internal global [0 x { ptr, i32, ptr }] zeroinitializer +@__csi_func_id__Z28ggml_compute_forward_mul_matPK19ggml_compute_paramsP11ggml_tensor = weak local_unnamed_addr global i64 -1 +@__csi_unit_fed_tables = internal global [16 x { i64, ptr, ptr }] [{ i64, ptr, ptr } { i64 1, ptr @__csi_unit_func_base_id, ptr @__csi_unit_fed_table__csi_unit_func_base_id }, { i64, ptr, ptr } { i64 2, ptr @__csi_unit_func_exit_base_id, ptr @__csi_unit_fed_table__csi_unit_func_exit_base_id }, { i64, ptr, ptr } { i64 2, ptr @__csi_unit_loop_base_id, ptr @__csi_unit_fed_table__csi_unit_loop_base_id }, { i64, ptr, ptr } { i64 0, ptr @__csi_unit_loop_exit_base_id, ptr @__csi_unit_fed_table__csi_unit_loop_exit_base_id }, { i64, ptr, ptr } { i64 0, ptr @__csi_unit_bb_base_id, ptr @__csi_unit_fed_table__csi_unit_bb_base_id }, { i64, ptr, ptr } { i64 5, ptr @__csi_unit_callsite_base_id, ptr @__csi_unit_fed_table__csi_unit_callsite_base_id }, { i64, ptr, ptr } { i64 40, ptr @__csi_unit_load_base_id, ptr @__csi_unit_fed_table__csi_unit_load_base_id }, { i64, ptr, ptr } { i64 3, ptr @__csi_unit_store_base_id, ptr @__csi_unit_fed_table__csi_unit_store_base_id }, { i64, ptr, ptr } { i64 3, ptr @__csi_unit_detach_base_id, ptr @__csi_unit_fed_table__csi_unit_detach_base_id }, { i64, ptr, ptr } { i64 3, ptr @__csi_unit_task_base_id, ptr @__csi_unit_fed_table__csi_unit_task_base_id }, { i64, ptr, ptr } { i64 6, ptr @__csi_unit_task_exit_base_id, ptr @__csi_unit_fed_table__csi_unit_task_exit_base_id }, { i64, ptr, ptr } { i64 6, ptr @__csi_unit_detach_continue_base_id, ptr @__csi_unit_fed_table__csi_unit_detach_continue_base_id }, { i64, ptr, ptr } { i64 3, ptr @__csi_unit_sync_base_id, ptr @__csi_unit_fed_table__csi_unit_sync_base_id }, { i64, ptr, ptr } { i64 2, ptr @__csi_unit_alloca_base_id, ptr @__csi_unit_fed_table__csi_unit_alloca_base_id }, { i64, ptr, ptr } { i64 0, ptr @__csi_unit_allocfn_base_id, ptr @__csi_unit_fed_table__csi_unit_allocfn_base_id }, { i64, ptr, ptr } { i64 0, ptr @__csi_unit_free_base_id, ptr @__csi_unit_fed_table__csi_unit_free_base_id }] +@__csi_unit_obj_tables = internal global [4 x { i64, ptr }] [{ i64, ptr } { i64 40, ptr @__csi_unit_obj_table }, { i64, ptr } { i64 3, ptr @__csi_unit_obj_table.1 }, { i64, ptr } { i64 2, ptr @__csi_unit_obj_table.2 }, { i64, ptr } { i64 0, ptr @__csi_unit_obj_table.3 }] +@0 = private unnamed_addr constant [66 x i8] c"/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp\00", align 1 +@llvm.global_ctors = appending global [1 x { i32, ptr, ptr }] [{ i32, ptr, ptr } { i32 0, ptr @csirt.unit_ctor, ptr null }] + +; Function Attrs: mustprogress ssp uwtable(sync) +define void @_Z28ggml_compute_forward_mul_matPK19ggml_compute_paramsP11ggml_tensor(ptr nocapture noundef readonly %params, ptr nocapture noundef readonly %dst) local_unnamed_addr #0 personality ptr @__gcc_personality_v0 !dbg !228 { +entry: + %syncreg = tail call token @llvm.syncregion.start(), !dbg !400 + call void @llvm.dbg.value(metadata ptr %params, metadata !283, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata ptr %dst, metadata !284, metadata !DIExpression()), !dbg !400 + %0 = load i64, ptr @__csi_unit_func_base_id, align 8, !dbg !401, !invariant.load !402 + %1 = call ptr @llvm.frameaddress.p0(i32 0), !dbg !401 + %2 = call ptr @llvm.stacksave(), !dbg !401 + call void @__csan_func_entry(i64 %0, ptr %1, ptr %2, i64 257), !dbg !400 + %3 = alloca i8, align 1, !dbg !401 + call void @__csan_get_MAAP(ptr nonnull %3, i64 %0, i8 0), !dbg !401 + %4 = load i8, ptr %3, align 1, !dbg !401 + %5 = alloca i8, align 1, !dbg !401 + call void @__csan_get_MAAP(ptr nonnull %5, i64 %0, i8 1), !dbg !401 + %6 = load i8, ptr %5, align 1, !dbg !401 + %7 = load i64, ptr @__csi_unit_detach_base_id, align 8, !dbg !401, !invariant.load !402 + %8 = load i64, ptr @__csi_unit_task_base_id, align 8, !dbg !401, !invariant.load !402 + %9 = load i64, ptr @__csi_unit_task_exit_base_id, align 8, !dbg !401, !invariant.load !402 + %10 = add i64 %9, 1, !dbg !401 + %11 = load i64, ptr @__csi_unit_detach_continue_base_id, align 8, !dbg !401, !invariant.load !402 + %12 = add i64 %11, 1, !dbg !401 + %src = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 9, !dbg !401 + %13 = and i8 %6, 1, !dbg !403 + %14 = icmp eq i8 %13, 0, !dbg !403 + %15 = and i8 %4, 1, !dbg !403 + %16 = icmp eq i8 %15, 0, !dbg !403 + %17 = or i8 %6, %4, !dbg !403 + %18 = and i8 %17, 4, !dbg !403 + %19 = icmp ne i8 %18, 0, !dbg !403 + %20 = or i1 %16, %19, !dbg !403 + %21 = and i1 %14, %20, !dbg !403 + br i1 %21, label %22, label %24, !dbg !403 + +22: ; preds = %entry + %23 = load ptr, ptr %src, align 8, !dbg !403, !tbaa !404 + call void @llvm.dbg.value(metadata ptr %23, metadata !285, metadata !DIExpression()), !dbg !400 + %arrayidx3 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 9, i64 1, !dbg !408 + br label %29, !dbg !408 + +24: ; preds = %entry + %25 = load i64, ptr @__csi_unit_load_base_id, align 8, !dbg !403, !invariant.load !402 + %26 = add i64 %25, 27, !dbg !403 + call void @__csan_load(i64 %26, ptr nonnull %src, i32 8, i64 8), !dbg !403 + %27 = load ptr, ptr %src, align 8, !dbg !403, !tbaa !404 + call void @llvm.dbg.value(metadata ptr %27, metadata !285, metadata !DIExpression()), !dbg !400 + %arrayidx3408 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 9, i64 1, !dbg !408 + %28 = add i64 %25, 28, !dbg !408 + call void @__csan_load(i64 %28, ptr nonnull %arrayidx3408, i32 8, i64 8), !dbg !408 + br label %29, !dbg !408 + +29: ; preds = %22, %24 + %arrayidx3409 = phi ptr [ %arrayidx3, %22 ], [ %arrayidx3408, %24 ] + %30 = phi ptr [ %23, %22 ], [ %27, %24 ] + %31 = load ptr, ptr %arrayidx3409, align 8, !dbg !408, !tbaa !404 + call void @llvm.dbg.value(metadata ptr %31, metadata !288, metadata !DIExpression()), !dbg !400 + %ne = getelementptr inbounds %struct.ggml_tensor, ptr %30, i64 0, i32 3, !dbg !409 + %32 = load i64, ptr @__csi_unit_load_base_id, align 8, !dbg !409, !invariant.load !402 + call void @__csan_load(i64 %32, ptr nonnull %ne, i32 8, i64 8), !dbg !409 + %33 = load i64, ptr %ne, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %33, metadata !289, metadata !DIExpression()), !dbg !400 + %arrayidx6 = getelementptr inbounds %struct.ggml_tensor, ptr %30, i64 0, i32 3, i64 1, !dbg !409 + %34 = add i64 %32, 1, !dbg !409 + call void @__csan_load(i64 %34, ptr nonnull %arrayidx6, i32 8, i64 8), !dbg !409 + %35 = load i64, ptr %arrayidx6, align 8, !dbg !409, !tbaa !410 + %.fr = freeze i64 %35 + call void @llvm.dbg.value(metadata i64 %35, metadata !291, metadata !DIExpression()), !dbg !400 + %arrayidx8 = getelementptr inbounds %struct.ggml_tensor, ptr %30, i64 0, i32 3, i64 2, !dbg !409 + %36 = add i64 %32, 2, !dbg !409 + call void @__csan_load(i64 %36, ptr nonnull %arrayidx8, i32 8, i64 8), !dbg !409 + %37 = load i64, ptr %arrayidx8, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %37, metadata !292, metadata !DIExpression()), !dbg !400 + %arrayidx10 = getelementptr inbounds %struct.ggml_tensor, ptr %30, i64 0, i32 3, i64 3, !dbg !409 + %38 = add i64 %32, 3, !dbg !409 + call void @__csan_load(i64 %38, ptr nonnull %arrayidx10, i32 8, i64 8), !dbg !409 + %39 = load i64, ptr %arrayidx10, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %39, metadata !293, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 poison, metadata !294, metadata !DIExpression()), !dbg !400 + %arrayidx13 = getelementptr inbounds %struct.ggml_tensor, ptr %30, i64 0, i32 4, i64 1, !dbg !409 + %40 = add i64 %32, 4, !dbg !409 + call void @__csan_load(i64 %40, ptr nonnull %arrayidx13, i32 8, i64 8), !dbg !409 + %41 = load i64, ptr %arrayidx13, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %41, metadata !296, metadata !DIExpression()), !dbg !400 + %arrayidx15 = getelementptr inbounds %struct.ggml_tensor, ptr %30, i64 0, i32 4, i64 2, !dbg !409 + %42 = add i64 %32, 5, !dbg !409 + call void @__csan_load(i64 %42, ptr nonnull %arrayidx15, i32 8, i64 8), !dbg !409 + %43 = load i64, ptr %arrayidx15, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %43, metadata !297, metadata !DIExpression()), !dbg !400 + %arrayidx17 = getelementptr inbounds %struct.ggml_tensor, ptr %30, i64 0, i32 4, i64 3, !dbg !409 + %44 = add i64 %32, 6, !dbg !409 + call void @__csan_load(i64 %44, ptr nonnull %arrayidx17, i32 8, i64 8), !dbg !409 + %45 = load i64, ptr %arrayidx17, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %45, metadata !298, metadata !DIExpression()), !dbg !400 + %ne18 = getelementptr inbounds %struct.ggml_tensor, ptr %31, i64 0, i32 3, !dbg !409 + %46 = add i64 %32, 7, !dbg !409 + call void @__csan_load(i64 %46, ptr nonnull %ne18, i32 8, i64 8), !dbg !409 + %47 = load i64, ptr %ne18, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %47, metadata !299, metadata !DIExpression()), !dbg !400 + %arrayidx21 = getelementptr inbounds %struct.ggml_tensor, ptr %31, i64 0, i32 3, i64 1, !dbg !409 + %48 = add i64 %32, 8, !dbg !409 + call void @__csan_load(i64 %48, ptr nonnull %arrayidx21, i32 8, i64 8), !dbg !409 + %49 = load i64, ptr %arrayidx21, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %49, metadata !300, metadata !DIExpression()), !dbg !400 + %arrayidx23 = getelementptr inbounds %struct.ggml_tensor, ptr %31, i64 0, i32 3, i64 2, !dbg !409 + %50 = add i64 %32, 9, !dbg !409 + call void @__csan_load(i64 %50, ptr nonnull %arrayidx23, i32 8, i64 8), !dbg !409 + %51 = load i64, ptr %arrayidx23, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %51, metadata !301, metadata !DIExpression()), !dbg !400 + %arrayidx25 = getelementptr inbounds %struct.ggml_tensor, ptr %31, i64 0, i32 3, i64 3, !dbg !409 + %52 = add i64 %32, 10, !dbg !409 + call void @__csan_load(i64 %52, ptr nonnull %arrayidx25, i32 8, i64 8), !dbg !409 + %53 = load i64, ptr %arrayidx25, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %53, metadata !302, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 poison, metadata !303, metadata !DIExpression()), !dbg !400 + %arrayidx29 = getelementptr inbounds %struct.ggml_tensor, ptr %31, i64 0, i32 4, i64 1, !dbg !409 + %54 = add i64 %32, 11, !dbg !409 + call void @__csan_load(i64 %54, ptr nonnull %arrayidx29, i32 8, i64 8), !dbg !409 + %55 = load i64, ptr %arrayidx29, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %55, metadata !304, metadata !DIExpression()), !dbg !400 + %arrayidx32 = getelementptr inbounds %struct.ggml_tensor, ptr %31, i64 0, i32 4, i64 2, !dbg !409 + %56 = add i64 %32, 12, !dbg !409 + call void @__csan_load(i64 %56, ptr nonnull %arrayidx32, i32 8, i64 8), !dbg !409 + %57 = load i64, ptr %arrayidx32, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %57, metadata !305, metadata !DIExpression()), !dbg !400 + %arrayidx34 = getelementptr inbounds %struct.ggml_tensor, ptr %31, i64 0, i32 4, i64 3, !dbg !409 + %58 = add i64 %32, 13, !dbg !409 + call void @__csan_load(i64 %58, ptr nonnull %arrayidx34, i32 8, i64 8), !dbg !409 + %59 = load i64, ptr %arrayidx34, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %59, metadata !306, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 poison, metadata !307, metadata !DIExpression()), !dbg !400 + %arrayidx38 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 3, i64 1, !dbg !409 + br i1 %21, label %60, label %65, !dbg !409 + +60: ; preds = %29 + %61 = load i64, ptr %arrayidx38, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %61, metadata !308, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 poison, metadata !309, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 poison, metadata !310, metadata !DIExpression()), !dbg !400 + %nb43 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, !dbg !409 + %62 = load i64, ptr %nb43, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %62, metadata !311, metadata !DIExpression()), !dbg !400 + %arrayidx46 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 1, !dbg !409 + %63 = load i64, ptr %arrayidx46, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %63, metadata !312, metadata !DIExpression()), !dbg !400 + %arrayidx48 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 2, !dbg !409 + %64 = load i64, ptr %arrayidx48, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %64, metadata !313, metadata !DIExpression()), !dbg !400 + %arrayidx50 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 3, !dbg !409 + br label %75, !dbg !409 + +65: ; preds = %29 + %66 = add i64 %32, 29, !dbg !409 + call void @__csan_load(i64 %66, ptr nonnull %arrayidx38, i32 8, i64 8), !dbg !409 + %67 = load i64, ptr %arrayidx38, align 8, !dbg !409, !tbaa !410 + call void @llvm.dbg.value(metadata i64 %67, metadata !308, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 poison, metadata !309, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 poison, metadata !310, metadata !DIExpression()), !dbg !400 + %nb43412 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, !dbg !409 + %68 = add i64 %32, 30, !dbg !409 + call void @__csan_load(i64 %68, ptr nonnull %nb43412, i32 8, i64 8), !dbg !409 + %69 = load i64, ptr %nb43412, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %69, metadata !311, metadata !DIExpression()), !dbg !400 + %arrayidx46417 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 1, !dbg !409 + %70 = add i64 %32, 31, !dbg !409 + call void @__csan_load(i64 %70, ptr nonnull %arrayidx46417, i32 8, i64 8), !dbg !409 + %71 = load i64, ptr %arrayidx46417, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %71, metadata !312, metadata !DIExpression()), !dbg !400 + %arrayidx48422 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 2, !dbg !409 + %72 = add i64 %32, 32, !dbg !409 + call void @__csan_load(i64 %72, ptr nonnull %arrayidx48422, i32 8, i64 8), !dbg !409 + %73 = load i64, ptr %arrayidx48422, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %73, metadata !313, metadata !DIExpression()), !dbg !400 + %arrayidx50427 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 3, !dbg !409 + %74 = add i64 %32, 33, !dbg !409 + call void @__csan_load(i64 %74, ptr nonnull %arrayidx50427, i32 8, i64 8), !dbg !409 + br label %75, !dbg !409 + +75: ; preds = %60, %65 + %arrayidx50428 = phi ptr [ %arrayidx50, %60 ], [ %arrayidx50427, %65 ] + %76 = phi i64 [ %64, %60 ], [ %73, %65 ] + %77 = phi i64 [ %62, %60 ], [ %69, %65 ] + %78 = phi i64 [ %61, %60 ], [ %67, %65 ] + %79 = phi i64 [ %63, %60 ], [ %71, %65 ] + %80 = load i64, ptr %arrayidx50428, align 8, !dbg !409, !tbaa !412 + call void @llvm.dbg.value(metadata i64 %80, metadata !314, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i32 0, metadata !315, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i32 1, metadata !317, metadata !DIExpression()), !dbg !400 + %81 = add i64 %32, 14, !dbg !414 + call void @__csan_load(i64 %81, ptr nonnull %30, i32 4, i64 8), !dbg !414 + %82 = load i32, ptr %30, align 8, !dbg !414, !tbaa !415 + call void @llvm.dbg.value(metadata i32 %82, metadata !318, metadata !DIExpression()), !dbg !400 + %83 = load i64, ptr @__csi_func_id_ggml_is_contiguous, align 8, !dbg !421 + call void @__csan_set_MAAP(i8 3, i64 %83), !dbg !421 + %84 = load i64, ptr @__csi_unit_callsite_base_id, align 8, !dbg !421, !invariant.load !402 + call void @__csan_before_call(i64 %84, i64 %83, i8 1, i64 0), !dbg !421 + %call387 = invoke zeroext i1 @ggml_is_contiguous(ptr noundef nonnull %31) + to label %call.noexc unwind label %csi.cleanup.loopexit.split-lp.csi-split, !dbg !421 + +call.noexc: ; preds = %75 + call void @__csan_after_call(i64 %84, i64 %83, i8 1, i64 0), !dbg !400 + call void @llvm.dbg.value(metadata i1 %call387, metadata !320, metadata !DIExpression(DW_OP_LLVM_convert, 1, DW_ATE_unsigned, DW_OP_LLVM_convert, 8, DW_ATE_unsigned, DW_OP_stack_value)), !dbg !400 + %idxprom = zext i32 %82 to i64, !dbg !422 + %vec_dot53 = getelementptr inbounds [24 x %struct.ggml_type_traits_t], ptr @type_traits, i64 0, i64 %idxprom, i32 7, !dbg !423 + %85 = add i64 %32, 15, !dbg !423 + call void @__csan_load(i64 %85, ptr nonnull %vec_dot53, i32 8, i64 8), !dbg !423 + %86 = load ptr, ptr %vec_dot53, align 8, !dbg !423, !tbaa !424 + call void @llvm.dbg.value(metadata ptr %86, metadata !323, metadata !DIExpression()), !dbg !400 + %vec_dot_type56 = getelementptr inbounds [24 x %struct.ggml_type_traits_t], ptr @type_traits, i64 0, i64 %idxprom, i32 8, !dbg !427 + %87 = add i64 %32, 16, !dbg !427 + call void @__csan_load(i64 %87, ptr nonnull %vec_dot_type56, i32 4, i64 8), !dbg !427 + %88 = load i32, ptr %vec_dot_type56, align 8, !dbg !427, !tbaa !428 + call void @llvm.dbg.value(metadata i32 %88, metadata !331, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata ptr poison, metadata !332, metadata !DIExpression()), !dbg !400 + %nrows = getelementptr inbounds [24 x %struct.ggml_type_traits_t], ptr @type_traits, i64 0, i64 %idxprom, i32 9, !dbg !429 + %89 = add i64 %32, 17, !dbg !429 + call void @__csan_load(i64 %89, ptr nonnull %nrows, i32 8, i64 8), !dbg !429 + %90 = load i64, ptr %nrows, align 8, !dbg !429, !tbaa !430 + call void @llvm.dbg.value(metadata i64 %90, metadata !340, metadata !DIExpression()), !dbg !400 + %div = sdiv i64 %51, %37, !dbg !431 + call void @llvm.dbg.value(metadata i64 %div, metadata !341, metadata !DIExpression()), !dbg !400 + %div61 = sdiv i64 %53, %39, !dbg !432 + call void @llvm.dbg.value(metadata i64 %div61, metadata !342, metadata !DIExpression()), !dbg !400 + %91 = add i64 %32, 18, !dbg !433 + call void @__csan_load(i64 %91, ptr nonnull %31, i32 4, i64 8), !dbg !433 + %92 = load i32, ptr %31, align 8, !dbg !433, !tbaa !415 + %cmp = icmp eq i32 %92, %88, !dbg !434 + %data = getelementptr inbounds %struct.ggml_tensor, ptr %31, i64 0, i32 15, !dbg !435 + %wdata63 = getelementptr inbounds %struct.ggml_compute_params, ptr %params, i64 0, i32 4, !dbg !435 + %cond.in = select i1 %cmp, ptr %data, ptr %wdata63, !dbg !435 + %93 = add i64 %32, 19, !dbg !435 + call void @__csan_load(i64 %93, ptr nonnull %cond.in, i32 8, i64 8), !dbg !435 + %cond = load ptr, ptr %cond.in, align 8, !dbg !435, !tbaa !404 + call void @llvm.dbg.value(metadata ptr %cond, metadata !343, metadata !DIExpression()), !dbg !400 + %94 = add i64 %84, 1, !dbg !436 + %95 = load i64, ptr @__csi_func_id_ggml_row_size, align 8, !dbg !436 + call void @__csan_before_call(i64 %94, i64 %95, i8 0, i64 0), !dbg !436 + %call64388 = invoke i64 @ggml_row_size(i32 noundef %88, i64 noundef %47) + to label %call64.noexc unwind label %csi.cleanup.loopexit.split-lp.csi-split, !dbg !436 + +call64.noexc: ; preds = %call.noexc + call void @__csan_after_call(i64 %94, i64 %95, i8 0, i64 0), !dbg !400 + call void @llvm.dbg.value(metadata i64 %call64388, metadata !344, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 %35, metadata !345, metadata !DIExpression()), !dbg !400 + %mul = mul nsw i64 %78, %51, !dbg !437 + %mul65 = mul nsw i64 %mul, %53, !dbg !438 + %mul65.fr = freeze i64 %mul65 + call void @llvm.dbg.value(metadata i64 %mul65, metadata !346, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 1, metadata !347, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 1, metadata !348, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 0, metadata !349, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 0, metadata !350, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 %35, metadata !351, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 %mul65, metadata !352, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 0, metadata !353, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 %35, metadata !354, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 0, metadata !355, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 %mul65, metadata !356, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 16, metadata !357, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 16, metadata !358, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 %90, metadata !359, metadata !DIExpression()), !dbg !400 + %96 = and i64 %.fr, 1, !dbg !439 + %cmp93.not = icmp eq i64 %96, 0, !dbg !439 + %97 = and i64 %49, 1 + %cmp95.not = icmp eq i64 %97, 0 + %or.cond = select i1 %cmp93.not, i1 %cmp95.not, i1 false, !dbg !441 + %nrc.0 = select i1 %or.cond, i64 %90, i64 1, !dbg !441 + call void @llvm.dbg.value(metadata i64 %nrc.0, metadata !359, metadata !DIExpression()), !dbg !400 + br i1 %call387, label %cond.end101, label %lor.lhs.false96, !dbg !442 + +lor.lhs.false96: ; preds = %call64.noexc + %98 = add i64 %32, 20, !dbg !443 + call void @__csan_load(i64 %98, ptr nonnull %31, i32 4, i64 8), !dbg !443 + %99 = load i32, ptr %31, align 8, !dbg !443, !tbaa !415 + %cmp98.not = icmp eq i32 %99, %88, !dbg !444 + %spec.select = select i1 %cmp98.not, i64 %55, i64 %call64388, !dbg !445 + br label %cond.end101, !dbg !445 + +cond.end101: ; preds = %lor.lhs.false96, %call64.noexc + %cond102 = phi i64 [ %call64388, %call64.noexc ], [ %spec.select, %lor.lhs.false96 ], !dbg !445 + call void @llvm.dbg.value(metadata i64 %cond102, metadata !360, metadata !DIExpression()), !dbg !400 + call void @llvm.dbg.value(metadata i64 0, metadata !361, metadata !DIExpression()), !dbg !446 + call void @llvm.dbg.value(metadata i64 %35, metadata !363, metadata !DIExpression()), !dbg !446 + %cmp103 = icmp sgt i64 %.fr, 0, !dbg !447 + br i1 %cmp103, label %pfor.ph, label %cleanup244, !dbg !448 + +pfor.ph: ; preds = %cond.end101 + call void @llvm.dbg.value(metadata i64 0, metadata !364, metadata !DIExpression()), !dbg !446 + call void @llvm.dbg.value(metadata i64 %.fr, metadata !365, metadata !DIExpression(DW_OP_constu, 1, DW_OP_minus, DW_OP_constu, 4, DW_OP_shr, DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !446 + %cmp113 = icmp sgt i64 %mul65.fr, 0 + %data146 = getelementptr inbounds %struct.ggml_tensor, ptr %30, i64 0, i32 15 + %data171 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 15 + %conv186 = trunc i64 %33 to i32 + %cmp189 = icmp sgt i64 %nrc.0, 1 + %conv191 = select i1 %cmp189, i64 16, i64 0 + %cond198 = select i1 %cmp189, i64 %41, i64 0 + %cond203 = select i1 %cmp189, i64 %cond102, i64 0 + %conv204 = trunc i64 %nrc.0 to i32 + %cmp208376 = icmp sgt i64 %nrc.0, 0 + br i1 %cmp113, label %pfor.cond.us.preheader.split.split, label %pfor.cond.cleanup239, !dbg !449 + +pfor.cond.us.preheader.split.split: ; preds = %pfor.ph + %sub120 = add nsw i64 %mul65.fr, -1 + %div121374 = and i64 %sub120, -32, !dbg !450 + %sub105 = add nsw i64 %.fr, -1, !dbg !447 + call void @llvm.dbg.value(metadata i64 %sub105, metadata !365, metadata !DIExpression(DW_OP_constu, 4, DW_OP_shr, DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !446 + %div106373 = lshr i64 %sub105, 4 + call void @llvm.dbg.value(metadata i64 %div106373, metadata !365, metadata !DIExpression(DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !446 + %100 = icmp ult i64 %mul65.fr, 33 + %stripiter386 = lshr i64 %sub120, 5 + %add129.us.epil = or i64 %div121374, 16 + %invariant.smin379.us.epil = tail call i64 @llvm.smin.i64(i64 %add129.us.epil, i64 %mul65.fr) + %101 = icmp ult i64 %div121374, %invariant.smin379.us.epil + %102 = and i64 %sub120, 16 + %epil.iter.cmp.not = icmp eq i64 %102, 0 + %add129.us.epil.1 = add nuw i64 %div121374, 32 + %invariant.smin379.us.epil.1 = tail call i64 @llvm.smin.i64(i64 %add129.us.epil.1, i64 %mul65.fr) + %103 = icmp slt i64 %add129.us.epil, %invariant.smin379.us.epil.1 + %104 = load i64, ptr @__csi_unit_loop_base_id, align 8, !dbg !451, !invariant.load !402 + call void @__csan_before_loop(i64 %104, i64 -1, i64 1), !dbg !451 + %105 = load i64, ptr @__csi_unit_alloca_base_id, align 8 + %106 = add i64 %7, 2 + %107 = add i64 %8, 2 + %108 = add i64 %9, 4 + %109 = add i64 %9, 5 + %110 = add i64 %11, 4 + %111 = add i64 %11, 5 + %112 = add i64 %7, 1 + %113 = add i64 %8, 1 + %114 = add i64 %9, 2 + %115 = add i64 %9, 3 + %116 = add i64 %11, 2 + %117 = add i64 %11, 3 + %118 = add i64 %104, 1 + %119 = add i64 %105, 1 + %120 = add i64 %32, 25 + %121 = add i64 %32, 26 + %122 = add i64 %32, 36 + %123 = add i64 %84, 4 + %124 = add i64 %32, 39 + %125 = load i64, ptr @__csi_unit_sync_base_id, align 8 + %126 = add i64 %32, 21 + %127 = add i64 %32, 22 + %128 = add i64 %32, 34 + %129 = add i64 %84, 2 + %130 = add i64 %32, 37 + %131 = add i64 %32, 23 + %132 = add i64 %32, 24 + %133 = add i64 %32, 35 + %134 = add i64 %84, 3 + %135 = add i64 %32, 38 + %136 = add i64 %125, 1 + br label %pfor.cond.us, !dbg !451 + +pfor.cond.us: ; preds = %pfor.cond.us.preheader.split.split, %pfor.inc236.us + %__begin.0.us = phi i64 [ %inc237.us, %pfor.inc236.us ], [ 0, %pfor.cond.us.preheader.split.split ], !dbg !452 + call void @llvm.dbg.value(metadata i64 %__begin.0.us, metadata !364, metadata !DIExpression()), !dbg !446 + %mul108.us = shl nsw i64 %__begin.0.us, 4, !dbg !453 + call void @__csan_detach(i64 %7, i32 0, i64 1), !dbg !451 + detach within %syncreg, label %pfor.ph116.us.split, label %pfor.inc236.us unwind label %csi.cleanup.loopexit, !dbg !451 + +pfor.ph116.us.split: ; preds = %pfor.cond.us + %137 = call ptr @llvm.task.frameaddress(i32 0) + %138 = call ptr @llvm.stacksave() + call void @__csan_task(i64 %8, i64 %7, ptr %137, ptr %138, i64 3) + %tmp.us.epil = alloca [32 x float], align 4 + %syncreg110.us = call token @llvm.syncregion.start(), !dbg !454 + call void @llvm.dbg.value(metadata i64 %mul108.us, metadata !366, metadata !DIExpression()), !dbg !455 + call void @llvm.dbg.value(metadata i64 0, metadata !368, metadata !DIExpression()), !dbg !456 + call void @llvm.dbg.value(metadata i64 %mul65, metadata !371, metadata !DIExpression()), !dbg !456 + call void @llvm.dbg.value(metadata i64 0, metadata !372, metadata !DIExpression()), !dbg !456 + call void @llvm.dbg.value(metadata i64 %sub120, metadata !373, metadata !DIExpression(DW_OP_constu, 4, DW_OP_shr, DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !456 + call void @__csi_after_alloca(i64 %105, ptr nonnull %tmp.us.epil, i64 128, i64 0), !dbg !400 + %add179.us = add nuw nsw i64 %mul108.us, 16 + %invariant.smin.us = call i64 @llvm.smin.i64(i64 %add179.us, i64 %.fr) + %139 = icmp slt i64 %mul108.us, %invariant.smin.us + %sub225.us = sub nsw i64 %invariant.smin.us, %mul108.us + %mul226.us = shl i64 %sub225.us, 2 + br i1 %100, label %pfor.cond123.us.epil, label %pfor.ph116.us.new, !dbg !457 + +pfor.ph116.us.new: ; preds = %pfor.ph116.us.split + call void @__csan_detach(i64 %106, i32 0, i64 0), !dbg !458 + detach within %syncreg110.us, label %pfor.cond123.us.strpm.detachloop.entry, label %pfor.ph116.us.new.pfor.cond123.us.epil_crit_edge unwind label %csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split, !dbg !458 + +pfor.ph116.us.new.pfor.cond123.us.epil_crit_edge: ; preds = %pfor.cond123.us.strpm.detachloop.reattach.split, %pfor.ph116.us.new + call void @__csan_detach_continue(i64 %110, i64 %106, i32 0, i64 0), !dbg !458 + br label %pfor.cond123.us.epil, !dbg !458 + +pfor.cond123.us.strpm.detachloop.entry: ; preds = %pfor.ph116.us.new + %140 = call ptr @llvm.task.frameaddress(i32 0) + %141 = call ptr @llvm.stacksave() + call void @__csan_task(i64 %107, i64 %106, ptr %140, ptr %141, i64 2) + %syncreg110.us.strpm.detachloop = call token @llvm.syncregion.start() + call void @__csan_before_loop(i64 %118, i64 -1, i64 1), !dbg !458 + br label %pfor.cond123.us.strpm.outer, !dbg !458 + +pfor.cond123.us.strpm.outer: ; preds = %pfor.inc.us.strpm.outer, %pfor.cond123.us.strpm.detachloop.entry + %niter = phi i64 [ 0, %pfor.cond123.us.strpm.detachloop.entry ], [ %niter.nadd, %pfor.inc.us.strpm.outer ] + call void @__csan_detach(i64 %112, i32 0, i64 1), !dbg !458 + detach within %syncreg110.us.strpm.detachloop, label %pfor.body128.us.strpm.outer, label %pfor.inc.us.strpm.outer unwind label %csi.cleanup389391, !dbg !458 + +pfor.body128.us.strpm.outer: ; preds = %pfor.cond123.us.strpm.outer + %142 = call ptr @llvm.task.frameaddress(i32 0) + %143 = call ptr @llvm.stacksave() + call void @__csan_task(i64 %113, i64 %112, ptr %142, ptr %143, i64 1) + %tmp.us = alloca [32 x float], align 4 + call void @__csi_after_alloca(i64 %119, ptr nonnull %tmp.us, i64 128, i64 0), !dbg !458 + %144 = shl nuw i64 %niter, 1, !dbg !458 + br label %pfor.cond123.us.split, !dbg !458 + +pfor.cond123.us.split: ; preds = %for.cond.cleanup.us, %pfor.body128.us.strpm.outer + %__begin117.0.us = phi i64 [ %inc232.us, %for.cond.cleanup.us ], [ %144, %pfor.body128.us.strpm.outer ], !dbg !459 + %inneriter = phi i64 [ %inneriter.nsub, %for.cond.cleanup.us ], [ 2, %pfor.body128.us.strpm.outer ] + call void @llvm.dbg.value(metadata i64 %__begin117.0.us, metadata !372, metadata !DIExpression()), !dbg !456 + %mul125.us = shl nsw i64 %__begin117.0.us, 4, !dbg !450 + call void @llvm.dbg.value(metadata i64 %mul125.us, metadata !374, metadata !DIExpression()), !dbg !460 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tmp.us) #14, !dbg !461 + call void @llvm.dbg.declare(metadata ptr %tmp.us, metadata !376, metadata !DIExpression()), !dbg !462 + call void @llvm.dbg.value(metadata i64 %mul125.us, metadata !381, metadata !DIExpression()), !dbg !463 + %add129.us = add nuw nsw i64 %mul125.us, 16 + %invariant.smin379.us = call i64 @llvm.smin.i64(i64 %add129.us, i64 %mul65.fr), !dbg !464 + %145 = icmp slt i64 %mul125.us, %invariant.smin379.us, !dbg !465 + br i1 %145, label %for.body.us.preheader, label %for.cond.cleanup.us, !dbg !466 + +for.body.us.preheader: ; preds = %pfor.cond123.us.split + %146 = load i64, ptr @__csi_unit_store_base_id, align 8 + %147 = add i64 %146, 2 + br label %for.body.us, !dbg !466 + +for.cond.cleanup.us: ; preds = %for.cond.cleanup209.us, %pfor.cond123.us.split + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tmp.us) #14, !dbg !467 + %inc232.us = add nuw nsw i64 %__begin117.0.us, 1, !dbg !468 + call void @llvm.dbg.value(metadata i64 %inc232.us, metadata !372, metadata !DIExpression()), !dbg !456 + %inneriter.nsub = add nsw i64 %inneriter, -1, !dbg !469 + %inneriter.ncmp = icmp eq i64 %inneriter.nsub, 0, !dbg !469 + br i1 %inneriter.ncmp, label %pfor.inc.us.reattach, label %pfor.cond123.us.split, !dbg !469, !llvm.loop !470 + +pfor.inc.us.reattach: ; preds = %for.cond.cleanup.us + call void @__csan_task_exit(i64 %114, i64 %113, i64 %112, i32 0, i64 1), !dbg !458 + reattach within %syncreg110.us.strpm.detachloop, label %pfor.inc.us.strpm.outer, !dbg !458 + +pfor.inc.us.strpm.outer: ; preds = %pfor.cond123.us.strpm.outer, %pfor.inc.us.reattach + call void @__csan_detach_continue(i64 %116, i64 %112, i32 0, i64 2), !dbg !458 + %niter.nadd = add nuw nsw i64 %niter, 1, !dbg !458 + %niter.ncmp = icmp eq i64 %niter.nadd, %stripiter386, !dbg !458 + br i1 %niter.ncmp, label %pfor.cond123.us.strpm.detachloop.sync, label %pfor.cond123.us.strpm.outer, !dbg !458, !llvm.loop !475 + +pfor.cond123.us.strpm.detachloop.sync: ; preds = %pfor.inc.us.strpm.outer + call void @__csan_after_loop(i64 %118, i8 0, i64 1), !dbg !458 + call void @__csan_sync(i64 %125, i32 0), !dbg !458 + sync within %syncreg110.us.strpm.detachloop, label %pfor.cond123.us.strpm.detachloop.reattach.split, !dbg !458 + +pfor.cond123.us.strpm.detachloop.reattach.split: ; preds = %pfor.cond123.us.strpm.detachloop.sync + call void @__csan_task_exit(i64 %108, i64 %107, i64 %106, i32 0, i64 0), !dbg !458 + reattach within %syncreg110.us, label %pfor.ph116.us.new.pfor.cond123.us.epil_crit_edge, !dbg !458 + +pfor.cond123.us.epil: ; preds = %pfor.ph116.us.new.pfor.cond123.us.epil_crit_edge, %pfor.ph116.us.split + call void @llvm.dbg.value(metadata i64 %sub120, metadata !372, metadata !DIExpression(DW_OP_constu, 4, DW_OP_shr, DW_OP_constu, 1152921504606846974, DW_OP_and, DW_OP_stack_value)), !dbg !456 + call void @llvm.dbg.value(metadata i64 %div121374, metadata !374, metadata !DIExpression()), !dbg !460 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tmp.us.epil) #14, !dbg !461 + call void @llvm.dbg.declare(metadata ptr %tmp.us.epil, metadata !376, metadata !DIExpression()), !dbg !462 + call void @llvm.dbg.value(metadata i64 %div121374, metadata !381, metadata !DIExpression()), !dbg !463 + br i1 %101, label %for.body.us.epil.preheader, label %for.cond.cleanup.us.epil, !dbg !466 + +for.body.us.epil.preheader: ; preds = %pfor.cond123.us.epil + %148 = load i64, ptr @__csi_unit_store_base_id, align 8 + br label %for.body.us.epil, !dbg !466 + +for.body.us.epil: ; preds = %for.body.us.epil.preheader, %for.cond.cleanup209.us.epil + %ir1.0380.us.epil = phi i64 [ %add230.us.epil, %for.cond.cleanup209.us.epil ], [ %div121374, %for.body.us.epil.preheader ] + call void @llvm.dbg.value(metadata i64 %ir1.0380.us.epil, metadata !381, metadata !DIExpression()), !dbg !463 + %div134.us.epil = sdiv i64 %ir1.0380.us.epil, %mul, !dbg !478 + call void @llvm.dbg.value(metadata i64 %div134.us.epil, metadata !383, metadata !DIExpression()), !dbg !479 + %mul135.us.epil = mul nsw i64 %div134.us.epil, %51, !dbg !480 + %mul136.us.epil = mul nsw i64 %mul135.us.epil, %78, !dbg !481 + %sub137.us.epil = sub nsw i64 %ir1.0380.us.epil, %mul136.us.epil, !dbg !482 + %div138.us.epil = sdiv i64 %sub137.us.epil, %78, !dbg !483 + call void @llvm.dbg.value(metadata i64 %div138.us.epil, metadata !386, metadata !DIExpression()), !dbg !479 + %mul142.us.epil = mul nsw i64 %div138.us.epil, %78, !dbg !484 + %sub143.us.epil = sub nsw i64 %sub137.us.epil, %mul142.us.epil, !dbg !485 + call void @llvm.dbg.value(metadata i64 %sub143.us.epil, metadata !387, metadata !DIExpression()), !dbg !479 + %div144.us.epil = sdiv i64 %div134.us.epil, %div61, !dbg !486 + call void @llvm.dbg.value(metadata i64 %div144.us.epil, metadata !388, metadata !DIExpression()), !dbg !479 + %div145.us.epil = sdiv i64 %div138.us.epil, %div, !dbg !487 + call void @llvm.dbg.value(metadata i64 %div145.us.epil, metadata !389, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %sub143.us.epil, metadata !390, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %div138.us.epil, metadata !391, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %div134.us.epil, metadata !392, metadata !DIExpression()), !dbg !479 + call void @__csan_load(i64 %126, ptr nonnull %data146, i32 8, i64 8), !dbg !488 + %149 = load ptr, ptr %data146, align 8, !dbg !488, !tbaa !489 + %mul147.us.epil = mul i64 %div145.us.epil, %43, !dbg !490 + %mul149.us.epil = mul i64 %div144.us.epil, %45, !dbg !491 + %add150.us.epil = add i64 %mul147.us.epil, %mul149.us.epil, !dbg !492 + %add.ptr.us.epil = getelementptr inbounds i8, ptr %149, i64 %add150.us.epil, !dbg !493 + call void @llvm.dbg.value(metadata ptr %add.ptr.us.epil, metadata !393, metadata !DIExpression()), !dbg !479 + br i1 %call387, label %cond.true155.us.epil, label %lor.lhs.false152.us.epil, !dbg !494 + +lor.lhs.false152.us.epil: ; preds = %for.body.us.epil + call void @__csan_load(i64 %127, ptr nonnull %31, i32 4, i64 8), !dbg !495 + %150 = load i32, ptr %31, align 8, !dbg !495, !tbaa !415 + %cmp154.not.us.epil = icmp eq i32 %150, %88, !dbg !496 + br i1 %cmp154.not.us.epil, label %cond.false162.us.epil, label %cond.true155.us.epil, !dbg !497 + +cond.false162.us.epil: ; preds = %lor.lhs.false152.us.epil + %mul163.us.epil = mul i64 %sub143.us.epil, %55, !dbg !498 + %mul164.us.epil = mul i64 %div138.us.epil, %57, !dbg !499 + %mul166.us.epil = mul i64 %div134.us.epil, %59, !dbg !500 + %add165.us.epil = add i64 %mul164.us.epil, %mul166.us.epil, !dbg !501 + %add167.us.epil = add i64 %add165.us.epil, %mul163.us.epil, !dbg !502 + br label %cond.end168.us.epil, !dbg !497 + +cond.true155.us.epil: ; preds = %lor.lhs.false152.us.epil, %for.body.us.epil + %reass.add.us.epil = add i64 %mul135.us.epil, %div138.us.epil + %reass.mul.us.epil = mul i64 %reass.add.us.epil, %49 + %add160.us.epil = add i64 %sub143.us.epil, %reass.mul.us.epil, !dbg !503 + %mul161.us.epil = mul i64 %add160.us.epil, %call64388, !dbg !504 + br label %cond.end168.us.epil, !dbg !497 + +cond.end168.us.epil: ; preds = %cond.true155.us.epil, %cond.false162.us.epil + %cond169.us.epil = phi i64 [ %mul161.us.epil, %cond.true155.us.epil ], [ %add167.us.epil, %cond.false162.us.epil ], !dbg !497 + %add.ptr170.us.epil = getelementptr inbounds i8, ptr %cond, i64 %cond169.us.epil, !dbg !505 + call void @llvm.dbg.value(metadata ptr %add.ptr170.us.epil, metadata !394, metadata !DIExpression()), !dbg !479 + call void @__csan_load(i64 %128, ptr nonnull %data171, i32 8, i64 8), !dbg !506 + %151 = load ptr, ptr %data171, align 8, !dbg !506, !tbaa !489 + %mul172.us.epil = mul i64 %sub143.us.epil, %79, !dbg !507 + %mul173.us.epil = mul i64 %div138.us.epil, %76, !dbg !508 + %mul175.us.epil = mul i64 %div134.us.epil, %80, !dbg !509 + %add174.us.epil = add i64 %mul173.us.epil, %mul175.us.epil, !dbg !510 + %add176.us.epil = add i64 %add174.us.epil, %mul172.us.epil, !dbg !511 + %add.ptr177.us.epil = getelementptr inbounds i8, ptr %151, i64 %add176.us.epil, !dbg !512 + call void @llvm.dbg.value(metadata ptr %add.ptr177.us.epil, metadata !395, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %mul108.us, metadata !396, metadata !DIExpression()), !dbg !513 + br i1 %139, label %for.body185.us.epil, label %for.cond206.preheader.us.epil, !dbg !514 + +for.body185.us.epil: ; preds = %cond.end168.us.epil, %.noexc398 + %ir0.0375.us.epil = phi i64 [ %add205.us.epil, %.noexc398 ], [ %mul108.us, %cond.end168.us.epil ] + call void @llvm.dbg.value(metadata i64 %ir0.0375.us.epil, metadata !396, metadata !DIExpression()), !dbg !513 + %sub187.us.epil = sub nsw i64 %ir0.0375.us.epil, %mul108.us, !dbg !515 + %arrayidx188.us.epil = getelementptr inbounds [32 x float], ptr %tmp.us.epil, i64 0, i64 %sub187.us.epil, !dbg !518 + %mul192.us.epil = mul i64 %ir0.0375.us.epil, %41, !dbg !519 + %add.ptr193.us.epil = getelementptr inbounds i8, ptr %add.ptr.us.epil, i64 %mul192.us.epil, !dbg !520 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_before_call(i64 %129, i64 -1, i8 3, i64 1), !dbg !521 + invoke void %86(i32 noundef %conv186, ptr noundef nonnull %arrayidx188.us.epil, i64 noundef %conv191, ptr noundef %add.ptr193.us.epil, i64 noundef %cond198, ptr noundef %add.ptr170.us.epil, i64 noundef %cond203, i32 noundef %conv204) + to label %.noexc398 unwind label %csi.cleanup389.loopexit.split-lp.loopexit, !dbg !521 + +.noexc398: ; preds = %for.body185.us.epil + call void @__csan_after_call(i64 %129, i64 -1, i8 3, i64 1), !dbg !522 + %add205.us.epil = add nsw i64 %ir0.0375.us.epil, %nrc.0, !dbg !522 + call void @llvm.dbg.value(metadata i64 %add205.us.epil, metadata !396, metadata !DIExpression()), !dbg !513 + %152 = icmp slt i64 %add205.us.epil, %invariant.smin.us, !dbg !523 + br i1 %152, label %for.body185.us.epil, label %for.cond206.preheader.us.epil, !dbg !514, !llvm.loop !524 + +for.cond206.preheader.us.epil: ; preds = %.noexc398, %cond.end168.us.epil + call void @llvm.dbg.value(metadata i32 0, metadata !398, metadata !DIExpression()), !dbg !526 + br i1 %cmp208376, label %for.body210.us.epil.split, label %for.cond.cleanup209.us.epil, !dbg !527 + +for.body210.us.epil.split: ; preds = %for.cond206.preheader.us.epil, %for.body210.us.epil.split + %indvars.iv.epil = phi i64 [ %indvars.iv.next.epil, %for.body210.us.epil.split ], [ 0, %for.cond206.preheader.us.epil ] + call void @llvm.dbg.value(metadata i64 %indvars.iv.epil, metadata !398, metadata !DIExpression()), !dbg !526 + %mul212.us.epil = mul i64 %indvars.iv.epil, %79, !dbg !528 + %div213.us.epil = udiv i64 %mul212.us.epil, %77, !dbg !531 + %add214.us.epil = add i64 %div213.us.epil, %mul108.us, !dbg !532 + %arrayidx215.us.epil = getelementptr inbounds float, ptr %add.ptr177.us.epil, i64 %add214.us.epil, !dbg !533 + %mul216.us.epil = shl i64 %indvars.iv.epil, 4, !dbg !534 + %idx.ext.us.epil = and i64 %mul216.us.epil, 4294967280, !dbg !535 + %add.ptr217.us.epil = getelementptr inbounds float, ptr %tmp.us.epil, i64 %idx.ext.us.epil, !dbg !535 + call void @__csan_large_store(i64 %148, ptr %arrayidx215.us.epil, i64 %mul226.us, i64 4), !dbg !536 + call void @__csan_large_load(i64 %130, ptr nonnull %add.ptr217.us.epil, i64 %mul226.us, i64 4), !dbg !536 + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %arrayidx215.us.epil, ptr nonnull align 4 %add.ptr217.us.epil, i64 %mul226.us, i1 false), !dbg !536 + %indvars.iv.next.epil = add nuw nsw i64 %indvars.iv.epil, 1, !dbg !537 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.epil, metadata !398, metadata !DIExpression()), !dbg !526 + %exitcond.not.epil = icmp eq i64 %indvars.iv.next.epil, %nrc.0, !dbg !538 + br i1 %exitcond.not.epil, label %for.cond.cleanup209.us.epil, label %for.body210.us.epil.split, !dbg !527, !llvm.loop !539 + +for.cond.cleanup209.us.epil: ; preds = %for.body210.us.epil.split, %for.cond206.preheader.us.epil + %add230.us.epil = add nsw i64 %ir1.0380.us.epil, %nrc.0, !dbg !541 + call void @llvm.dbg.value(metadata i64 %add230.us.epil, metadata !381, metadata !DIExpression()), !dbg !463 + %153 = icmp slt i64 %add230.us.epil, %invariant.smin379.us.epil, !dbg !465 + br i1 %153, label %for.body.us.epil, label %for.cond.cleanup.us.epil, !dbg !466, !llvm.loop !542 + +for.cond.cleanup.us.epil: ; preds = %for.cond.cleanup209.us.epil, %pfor.cond123.us.epil + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tmp.us.epil) #14, !dbg !467 + call void @llvm.dbg.value(metadata i64 %sub120, metadata !372, metadata !DIExpression(DW_OP_constu, 4, DW_OP_shr, DW_OP_constu, 1152921504606846974, DW_OP_and, DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !456 + br i1 %epil.iter.cmp.not, label %pfor.cond.cleanup.us, label %pfor.cond123.us.epil.1, !dbg !469 + +pfor.cond123.us.epil.1: ; preds = %for.cond.cleanup.us.epil + call void @llvm.dbg.value(metadata i64 %sub120, metadata !372, metadata !DIExpression(DW_OP_constu, 4, DW_OP_shr, DW_OP_constu, 1152921504606846974, DW_OP_and, DW_OP_plus_uconst, 1, DW_OP_stack_value)), !dbg !456 + call void @llvm.dbg.value(metadata i64 %add129.us.epil, metadata !374, metadata !DIExpression()), !dbg !460 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tmp.us.epil) #14, !dbg !461 + call void @llvm.dbg.declare(metadata ptr %tmp.us.epil, metadata !376, metadata !DIExpression()), !dbg !462 + call void @llvm.dbg.value(metadata i64 %add129.us.epil, metadata !381, metadata !DIExpression()), !dbg !463 + br i1 %103, label %for.body.us.epil.1.preheader, label %for.cond.cleanup.us.epil.1, !dbg !466 + +for.body.us.epil.1.preheader: ; preds = %pfor.cond123.us.epil.1 + %154 = load i64, ptr @__csi_unit_store_base_id, align 8 + %155 = add i64 %154, 1 + br label %for.body.us.epil.1, !dbg !466 + +for.body.us.epil.1: ; preds = %for.body.us.epil.1.preheader, %for.cond.cleanup209.us.epil.1 + %ir1.0380.us.epil.1 = phi i64 [ %add230.us.epil.1, %for.cond.cleanup209.us.epil.1 ], [ %add129.us.epil, %for.body.us.epil.1.preheader ] + call void @llvm.dbg.value(metadata i64 %ir1.0380.us.epil.1, metadata !381, metadata !DIExpression()), !dbg !463 + %div134.us.epil.1 = sdiv i64 %ir1.0380.us.epil.1, %mul, !dbg !478 + call void @llvm.dbg.value(metadata i64 %div134.us.epil.1, metadata !383, metadata !DIExpression()), !dbg !479 + %mul135.us.epil.1 = mul nsw i64 %div134.us.epil.1, %51, !dbg !480 + %mul136.us.epil.1 = mul nsw i64 %mul135.us.epil.1, %78, !dbg !481 + %sub137.us.epil.1 = sub nsw i64 %ir1.0380.us.epil.1, %mul136.us.epil.1, !dbg !482 + %div138.us.epil.1 = sdiv i64 %sub137.us.epil.1, %78, !dbg !483 + call void @llvm.dbg.value(metadata i64 %div138.us.epil.1, metadata !386, metadata !DIExpression()), !dbg !479 + %mul142.us.epil.1 = mul nsw i64 %div138.us.epil.1, %78, !dbg !484 + %sub143.us.epil.1 = sub nsw i64 %sub137.us.epil.1, %mul142.us.epil.1, !dbg !485 + call void @llvm.dbg.value(metadata i64 %sub143.us.epil.1, metadata !387, metadata !DIExpression()), !dbg !479 + %div144.us.epil.1 = sdiv i64 %div134.us.epil.1, %div61, !dbg !486 + call void @llvm.dbg.value(metadata i64 %div144.us.epil.1, metadata !388, metadata !DIExpression()), !dbg !479 + %div145.us.epil.1 = sdiv i64 %div138.us.epil.1, %div, !dbg !487 + call void @llvm.dbg.value(metadata i64 %div145.us.epil.1, metadata !389, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %sub143.us.epil.1, metadata !390, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %div138.us.epil.1, metadata !391, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %div134.us.epil.1, metadata !392, metadata !DIExpression()), !dbg !479 + call void @__csan_load(i64 %131, ptr nonnull %data146, i32 8, i64 8), !dbg !488 + %156 = load ptr, ptr %data146, align 8, !dbg !488, !tbaa !489 + %mul147.us.epil.1 = mul i64 %div145.us.epil.1, %43, !dbg !490 + %mul149.us.epil.1 = mul i64 %div144.us.epil.1, %45, !dbg !491 + %add150.us.epil.1 = add i64 %mul147.us.epil.1, %mul149.us.epil.1, !dbg !492 + %add.ptr.us.epil.1 = getelementptr inbounds i8, ptr %156, i64 %add150.us.epil.1, !dbg !493 + call void @llvm.dbg.value(metadata ptr %add.ptr.us.epil.1, metadata !393, metadata !DIExpression()), !dbg !479 + br i1 %call387, label %cond.true155.us.epil.1, label %lor.lhs.false152.us.epil.1, !dbg !494 + +lor.lhs.false152.us.epil.1: ; preds = %for.body.us.epil.1 + call void @__csan_load(i64 %132, ptr nonnull %31, i32 4, i64 8), !dbg !495 + %157 = load i32, ptr %31, align 8, !dbg !495, !tbaa !415 + %cmp154.not.us.epil.1 = icmp eq i32 %157, %88, !dbg !496 + br i1 %cmp154.not.us.epil.1, label %cond.false162.us.epil.1, label %cond.true155.us.epil.1, !dbg !497 + +cond.false162.us.epil.1: ; preds = %lor.lhs.false152.us.epil.1 + %mul163.us.epil.1 = mul i64 %sub143.us.epil.1, %55, !dbg !498 + %mul164.us.epil.1 = mul i64 %div138.us.epil.1, %57, !dbg !499 + %mul166.us.epil.1 = mul i64 %div134.us.epil.1, %59, !dbg !500 + %add165.us.epil.1 = add i64 %mul164.us.epil.1, %mul166.us.epil.1, !dbg !501 + %add167.us.epil.1 = add i64 %add165.us.epil.1, %mul163.us.epil.1, !dbg !502 + br label %cond.end168.us.epil.1, !dbg !497 + +cond.true155.us.epil.1: ; preds = %lor.lhs.false152.us.epil.1, %for.body.us.epil.1 + %reass.add.us.epil.1 = add i64 %mul135.us.epil.1, %div138.us.epil.1 + %reass.mul.us.epil.1 = mul i64 %reass.add.us.epil.1, %49 + %add160.us.epil.1 = add i64 %sub143.us.epil.1, %reass.mul.us.epil.1, !dbg !503 + %mul161.us.epil.1 = mul i64 %add160.us.epil.1, %call64388, !dbg !504 + br label %cond.end168.us.epil.1, !dbg !497 + +cond.end168.us.epil.1: ; preds = %cond.true155.us.epil.1, %cond.false162.us.epil.1 + %cond169.us.epil.1 = phi i64 [ %mul161.us.epil.1, %cond.true155.us.epil.1 ], [ %add167.us.epil.1, %cond.false162.us.epil.1 ], !dbg !497 + %add.ptr170.us.epil.1 = getelementptr inbounds i8, ptr %cond, i64 %cond169.us.epil.1, !dbg !505 + call void @llvm.dbg.value(metadata ptr %add.ptr170.us.epil.1, metadata !394, metadata !DIExpression()), !dbg !479 + call void @__csan_load(i64 %133, ptr nonnull %data171, i32 8, i64 8), !dbg !506 + %158 = load ptr, ptr %data171, align 8, !dbg !506, !tbaa !489 + %mul172.us.epil.1 = mul i64 %sub143.us.epil.1, %79, !dbg !507 + %mul173.us.epil.1 = mul i64 %div138.us.epil.1, %76, !dbg !508 + %mul175.us.epil.1 = mul i64 %div134.us.epil.1, %80, !dbg !509 + %add174.us.epil.1 = add i64 %mul173.us.epil.1, %mul175.us.epil.1, !dbg !510 + %add176.us.epil.1 = add i64 %add174.us.epil.1, %mul172.us.epil.1, !dbg !511 + %add.ptr177.us.epil.1 = getelementptr inbounds i8, ptr %158, i64 %add176.us.epil.1, !dbg !512 + call void @llvm.dbg.value(metadata ptr %add.ptr177.us.epil.1, metadata !395, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %mul108.us, metadata !396, metadata !DIExpression()), !dbg !513 + br i1 %139, label %for.body185.us.epil.1, label %for.cond206.preheader.us.epil.1, !dbg !514 + +for.body185.us.epil.1: ; preds = %cond.end168.us.epil.1, %.noexc397 + %ir0.0375.us.epil.1 = phi i64 [ %add205.us.epil.1, %.noexc397 ], [ %mul108.us, %cond.end168.us.epil.1 ] + call void @llvm.dbg.value(metadata i64 %ir0.0375.us.epil.1, metadata !396, metadata !DIExpression()), !dbg !513 + %sub187.us.epil.1 = sub nsw i64 %ir0.0375.us.epil.1, %mul108.us, !dbg !515 + %arrayidx188.us.epil.1 = getelementptr inbounds [32 x float], ptr %tmp.us.epil, i64 0, i64 %sub187.us.epil.1, !dbg !518 + %mul192.us.epil.1 = mul i64 %ir0.0375.us.epil.1, %41, !dbg !519 + %add.ptr193.us.epil.1 = getelementptr inbounds i8, ptr %add.ptr.us.epil.1, i64 %mul192.us.epil.1, !dbg !520 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_before_call(i64 %134, i64 -1, i8 3, i64 1), !dbg !521 + invoke void %86(i32 noundef %conv186, ptr noundef nonnull %arrayidx188.us.epil.1, i64 noundef %conv191, ptr noundef %add.ptr193.us.epil.1, i64 noundef %cond198, ptr noundef %add.ptr170.us.epil.1, i64 noundef %cond203, i32 noundef %conv204) + to label %.noexc397 unwind label %csi.cleanup389.loopexit, !dbg !521 + +.noexc397: ; preds = %for.body185.us.epil.1 + call void @__csan_after_call(i64 %134, i64 -1, i8 3, i64 1), !dbg !522 + %add205.us.epil.1 = add nsw i64 %ir0.0375.us.epil.1, %nrc.0, !dbg !522 + call void @llvm.dbg.value(metadata i64 %add205.us.epil.1, metadata !396, metadata !DIExpression()), !dbg !513 + %159 = icmp slt i64 %add205.us.epil.1, %invariant.smin.us, !dbg !523 + br i1 %159, label %for.body185.us.epil.1, label %for.cond206.preheader.us.epil.1, !dbg !514, !llvm.loop !524 + +for.cond206.preheader.us.epil.1: ; preds = %.noexc397, %cond.end168.us.epil.1 + call void @llvm.dbg.value(metadata i32 0, metadata !398, metadata !DIExpression()), !dbg !526 + br i1 %cmp208376, label %for.body210.us.epil.1.split, label %for.cond.cleanup209.us.epil.1, !dbg !527 + +for.body210.us.epil.1.split: ; preds = %for.cond206.preheader.us.epil.1, %for.body210.us.epil.1.split + %indvars.iv.epil.1 = phi i64 [ %indvars.iv.next.epil.1, %for.body210.us.epil.1.split ], [ 0, %for.cond206.preheader.us.epil.1 ] + call void @llvm.dbg.value(metadata i64 %indvars.iv.epil.1, metadata !398, metadata !DIExpression()), !dbg !526 + %mul212.us.epil.1 = mul i64 %indvars.iv.epil.1, %79, !dbg !528 + %div213.us.epil.1 = udiv i64 %mul212.us.epil.1, %77, !dbg !531 + %add214.us.epil.1 = add i64 %div213.us.epil.1, %mul108.us, !dbg !532 + %arrayidx215.us.epil.1 = getelementptr inbounds float, ptr %add.ptr177.us.epil.1, i64 %add214.us.epil.1, !dbg !533 + %mul216.us.epil.1 = shl i64 %indvars.iv.epil.1, 4, !dbg !534 + %idx.ext.us.epil.1 = and i64 %mul216.us.epil.1, 4294967280, !dbg !535 + %add.ptr217.us.epil.1 = getelementptr inbounds float, ptr %tmp.us.epil, i64 %idx.ext.us.epil.1, !dbg !535 + call void @__csan_large_store(i64 %155, ptr %arrayidx215.us.epil.1, i64 %mul226.us, i64 4), !dbg !536 + call void @__csan_large_load(i64 %135, ptr nonnull %add.ptr217.us.epil.1, i64 %mul226.us, i64 4), !dbg !536 + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %arrayidx215.us.epil.1, ptr nonnull align 4 %add.ptr217.us.epil.1, i64 %mul226.us, i1 false), !dbg !536 + %indvars.iv.next.epil.1 = add nuw nsw i64 %indvars.iv.epil.1, 1, !dbg !537 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next.epil.1, metadata !398, metadata !DIExpression()), !dbg !526 + %exitcond.not.epil.1 = icmp eq i64 %indvars.iv.next.epil.1, %nrc.0, !dbg !538 + br i1 %exitcond.not.epil.1, label %for.cond.cleanup209.us.epil.1, label %for.body210.us.epil.1.split, !dbg !527, !llvm.loop !539 + +for.cond.cleanup209.us.epil.1: ; preds = %for.body210.us.epil.1.split, %for.cond206.preheader.us.epil.1 + %add230.us.epil.1 = add nsw i64 %ir1.0380.us.epil.1, %nrc.0, !dbg !541 + call void @llvm.dbg.value(metadata i64 %add230.us.epil.1, metadata !381, metadata !DIExpression()), !dbg !463 + %160 = icmp slt i64 %add230.us.epil.1, %invariant.smin379.us.epil.1, !dbg !465 + br i1 %160, label %for.body.us.epil.1, label %for.cond.cleanup.us.epil.1, !dbg !466, !llvm.loop !542 + +for.cond.cleanup.us.epil.1: ; preds = %for.cond.cleanup209.us.epil.1, %pfor.cond123.us.epil.1 + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tmp.us.epil) #14, !dbg !467 + call void @llvm.dbg.value(metadata i64 %sub120, metadata !372, metadata !DIExpression(DW_OP_constu, 4, DW_OP_shr, DW_OP_constu, 1152921504606846974, DW_OP_and, DW_OP_plus_uconst, 2, DW_OP_stack_value)), !dbg !456 + br label %pfor.cond.cleanup.us + +pfor.cond.cleanup.us: ; preds = %for.cond.cleanup.us.epil.1, %for.cond.cleanup.us.epil + call void @__csan_sync(i64 %136, i32 0), !dbg !469 + sync within %syncreg110.us, label %sync.continue.us, !dbg !469 + +sync.continue.us: ; preds = %pfor.cond.cleanup.us + invoke void @llvm.sync.unwind(token %syncreg110.us) + to label %.noexc396 unwind label %csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split-lp, !dbg !469 + +.noexc396: ; preds = %sync.continue.us + call void @__csan_task_exit(i64 %9, i64 %8, i64 %7, i32 0, i64 1), !dbg !543 + reattach within %syncreg, label %pfor.inc236.us, !dbg !543 + +pfor.inc236.us: ; preds = %pfor.cond.us, %.noexc396 + call void @__csan_detach_continue(i64 %11, i64 %7, i32 0, i64 2), !dbg !544 + %inc237.us = add nuw nsw i64 %__begin.0.us, 1, !dbg !544 + call void @llvm.dbg.value(metadata i64 %inc237.us, metadata !364, metadata !DIExpression()), !dbg !446 + %exitcond384.not = icmp eq i64 %__begin.0.us, %div106373, !dbg !545 + br i1 %exitcond384.not, label %pfor.cond.cleanup239.loopexit, label %pfor.cond.us, !dbg !546, !llvm.loop !547 + +for.body.us: ; preds = %for.body.us.preheader, %for.cond.cleanup209.us + %ir1.0380.us = phi i64 [ %add230.us, %for.cond.cleanup209.us ], [ %mul125.us, %for.body.us.preheader ] + call void @llvm.dbg.value(metadata i64 %ir1.0380.us, metadata !381, metadata !DIExpression()), !dbg !463 + %div134.us = sdiv i64 %ir1.0380.us, %mul, !dbg !478 + call void @llvm.dbg.value(metadata i64 %div134.us, metadata !383, metadata !DIExpression()), !dbg !479 + %mul135.us = mul nsw i64 %div134.us, %51, !dbg !480 + %mul136.us = mul nsw i64 %mul135.us, %78, !dbg !481 + %sub137.us = sub nsw i64 %ir1.0380.us, %mul136.us, !dbg !482 + %div138.us = sdiv i64 %sub137.us, %78, !dbg !483 + call void @llvm.dbg.value(metadata i64 %div138.us, metadata !386, metadata !DIExpression()), !dbg !479 + %mul142.us = mul nsw i64 %div138.us, %78, !dbg !484 + %sub143.us = sub nsw i64 %sub137.us, %mul142.us, !dbg !485 + call void @llvm.dbg.value(metadata i64 %sub143.us, metadata !387, metadata !DIExpression()), !dbg !479 + %div144.us = sdiv i64 %div134.us, %div61, !dbg !486 + call void @llvm.dbg.value(metadata i64 %div144.us, metadata !388, metadata !DIExpression()), !dbg !479 + %div145.us = sdiv i64 %div138.us, %div, !dbg !487 + call void @llvm.dbg.value(metadata i64 %div145.us, metadata !389, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %sub143.us, metadata !390, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %div138.us, metadata !391, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %div134.us, metadata !392, metadata !DIExpression()), !dbg !479 + call void @__csan_load(i64 %120, ptr nonnull %data146, i32 8, i64 8), !dbg !488 + %161 = load ptr, ptr %data146, align 8, !dbg !488, !tbaa !489 + %mul147.us = mul i64 %div145.us, %43, !dbg !490 + %mul149.us = mul i64 %div144.us, %45, !dbg !491 + %add150.us = add i64 %mul147.us, %mul149.us, !dbg !492 + %add.ptr.us = getelementptr inbounds i8, ptr %161, i64 %add150.us, !dbg !493 + call void @llvm.dbg.value(metadata ptr %add.ptr.us, metadata !393, metadata !DIExpression()), !dbg !479 + br i1 %call387, label %cond.true155.us, label %lor.lhs.false152.us, !dbg !494 + +lor.lhs.false152.us: ; preds = %for.body.us + call void @__csan_load(i64 %121, ptr nonnull %31, i32 4, i64 8), !dbg !495 + %162 = load i32, ptr %31, align 8, !dbg !495, !tbaa !415 + %cmp154.not.us = icmp eq i32 %162, %88, !dbg !496 + br i1 %cmp154.not.us, label %cond.false162.us, label %cond.true155.us, !dbg !497 + +cond.false162.us: ; preds = %lor.lhs.false152.us + %mul163.us = mul i64 %sub143.us, %55, !dbg !498 + %mul164.us = mul i64 %div138.us, %57, !dbg !499 + %mul166.us = mul i64 %div134.us, %59, !dbg !500 + %add165.us = add i64 %mul164.us, %mul166.us, !dbg !501 + %add167.us = add i64 %add165.us, %mul163.us, !dbg !502 + br label %cond.end168.us, !dbg !497 + +cond.true155.us: ; preds = %lor.lhs.false152.us, %for.body.us + %reass.add.us = add i64 %mul135.us, %div138.us + %reass.mul.us = mul i64 %reass.add.us, %49 + %add160.us = add i64 %sub143.us, %reass.mul.us, !dbg !503 + %mul161.us = mul i64 %add160.us, %call64388, !dbg !504 + br label %cond.end168.us, !dbg !497 + +cond.end168.us: ; preds = %cond.true155.us, %cond.false162.us + %cond169.us = phi i64 [ %mul161.us, %cond.true155.us ], [ %add167.us, %cond.false162.us ], !dbg !497 + %add.ptr170.us = getelementptr inbounds i8, ptr %cond, i64 %cond169.us, !dbg !505 + call void @llvm.dbg.value(metadata ptr %add.ptr170.us, metadata !394, metadata !DIExpression()), !dbg !479 + call void @__csan_load(i64 %122, ptr nonnull %data171, i32 8, i64 8), !dbg !506 + %163 = load ptr, ptr %data171, align 8, !dbg !506, !tbaa !489 + %mul172.us = mul i64 %sub143.us, %79, !dbg !507 + %mul173.us = mul i64 %div138.us, %76, !dbg !508 + %mul175.us = mul i64 %div134.us, %80, !dbg !509 + %add174.us = add i64 %mul173.us, %mul175.us, !dbg !510 + %add176.us = add i64 %add174.us, %mul172.us, !dbg !511 + %add.ptr177.us = getelementptr inbounds i8, ptr %163, i64 %add176.us, !dbg !512 + call void @llvm.dbg.value(metadata ptr %add.ptr177.us, metadata !395, metadata !DIExpression()), !dbg !479 + call void @llvm.dbg.value(metadata i64 %mul108.us, metadata !396, metadata !DIExpression()), !dbg !513 + br i1 %139, label %for.body185.us, label %for.cond206.preheader.us, !dbg !514 + +for.cond.cleanup209.us: ; preds = %for.body210.us.split, %for.cond206.preheader.us + %add230.us = add nsw i64 %ir1.0380.us, %nrc.0, !dbg !541 + call void @llvm.dbg.value(metadata i64 %add230.us, metadata !381, metadata !DIExpression()), !dbg !463 + %164 = icmp slt i64 %add230.us, %invariant.smin379.us, !dbg !465 + br i1 %164, label %for.body.us, label %for.cond.cleanup.us, !dbg !466, !llvm.loop !542 + +for.body210.us.split: ; preds = %for.cond206.preheader.us, %for.body210.us.split + %indvars.iv = phi i64 [ %indvars.iv.next, %for.body210.us.split ], [ 0, %for.cond206.preheader.us ] + call void @llvm.dbg.value(metadata i64 %indvars.iv, metadata !398, metadata !DIExpression()), !dbg !526 + %mul212.us = mul i64 %indvars.iv, %79, !dbg !528 + %div213.us = udiv i64 %mul212.us, %77, !dbg !531 + %add214.us = add i64 %div213.us, %mul108.us, !dbg !532 + %arrayidx215.us = getelementptr inbounds float, ptr %add.ptr177.us, i64 %add214.us, !dbg !533 + %mul216.us = shl i64 %indvars.iv, 4, !dbg !534 + %idx.ext.us = and i64 %mul216.us, 4294967280, !dbg !535 + %add.ptr217.us = getelementptr inbounds float, ptr %tmp.us, i64 %idx.ext.us, !dbg !535 + call void @__csan_large_store(i64 %147, ptr %arrayidx215.us, i64 %mul226.us, i64 4), !dbg !536 + call void @__csan_large_load(i64 %124, ptr nonnull %add.ptr217.us, i64 %mul226.us, i64 4), !dbg !536 + call void @llvm.memcpy.p0.p0.i64(ptr align 4 %arrayidx215.us, ptr nonnull align 4 %add.ptr217.us, i64 %mul226.us, i1 false), !dbg !536 + %indvars.iv.next = add nuw nsw i64 %indvars.iv, 1, !dbg !537 + call void @llvm.dbg.value(metadata i64 %indvars.iv.next, metadata !398, metadata !DIExpression()), !dbg !526 + %exitcond.not = icmp eq i64 %indvars.iv.next, %nrc.0, !dbg !538 + br i1 %exitcond.not, label %for.cond.cleanup209.us, label %for.body210.us.split, !dbg !527, !llvm.loop !539 + +for.body185.us: ; preds = %cond.end168.us, %.noexc395 + %ir0.0375.us = phi i64 [ %add205.us, %.noexc395 ], [ %mul108.us, %cond.end168.us ] + call void @llvm.dbg.value(metadata i64 %ir0.0375.us, metadata !396, metadata !DIExpression()), !dbg !513 + %sub187.us = sub nsw i64 %ir0.0375.us, %mul108.us, !dbg !515 + %arrayidx188.us = getelementptr inbounds [32 x float], ptr %tmp.us, i64 0, i64 %sub187.us, !dbg !518 + %mul192.us = mul i64 %ir0.0375.us, %41, !dbg !519 + %add.ptr193.us = getelementptr inbounds i8, ptr %add.ptr.us, i64 %mul192.us, !dbg !520 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_set_MAAP(i8 3, i64 -1), !dbg !521 + call void @__csan_before_call(i64 %123, i64 -1, i8 3, i64 1), !dbg !521 + invoke void %86(i32 noundef %conv186, ptr noundef nonnull %arrayidx188.us, i64 noundef %conv191, ptr noundef %add.ptr193.us, i64 noundef %cond198, ptr noundef %add.ptr170.us, i64 noundef %cond203, i32 noundef %conv204) + to label %.noexc395 unwind label %csi.cleanup389391393, !dbg !521 + +.noexc395: ; preds = %for.body185.us + call void @__csan_after_call(i64 %123, i64 -1, i8 3, i64 1), !dbg !522 + %add205.us = add nsw i64 %ir0.0375.us, %nrc.0, !dbg !522 + call void @llvm.dbg.value(metadata i64 %add205.us, metadata !396, metadata !DIExpression()), !dbg !513 + %165 = icmp slt i64 %add205.us, %invariant.smin.us, !dbg !523 + br i1 %165, label %for.body185.us, label %for.cond206.preheader.us, !dbg !514, !llvm.loop !524 + +for.cond206.preheader.us: ; preds = %.noexc395, %cond.end168.us + call void @llvm.dbg.value(metadata i32 0, metadata !398, metadata !DIExpression()), !dbg !526 + br i1 %cmp208376, label %for.body210.us.split, label %for.cond.cleanup209.us, !dbg !527 + +pfor.cond.cleanup239.loopexit: ; preds = %pfor.inc236.us + call void @__csan_after_loop(i64 %104, i8 0, i64 1), !dbg !546 + br label %pfor.cond.cleanup239, !dbg !546 + +pfor.cond.cleanup239: ; preds = %pfor.cond.cleanup239.loopexit, %pfor.ph + %166 = load i64, ptr @__csi_unit_sync_base_id, align 8, !dbg !546, !invariant.load !402 + %167 = add i64 %166, 2, !dbg !546 + call void @__csan_sync(i64 %167, i32 0), !dbg !546 + sync within %syncreg, label %sync.continue241, !dbg !546 + +sync.continue241: ; preds = %pfor.cond.cleanup239 + invoke void @llvm.sync.unwind(token %syncreg) + to label %cleanup244 unwind label %csi.cleanup.loopexit.split-lp.csi-split-lp, !dbg !546 + +cleanup244: ; preds = %sync.continue241, %cond.end101 + %168 = load i64, ptr @__csi_unit_func_exit_base_id, align 8, !dbg !549, !invariant.load !402 + call void @__csan_func_exit(i64 %168, i64 %0, i64 1), !dbg !400 + ret void, !dbg !549 + +csi.cleanup.loopexit: ; preds = %csi.cleanup389, %pfor.cond.us + %lpad.loopexit402 = landingpad { ptr, i32 } + cleanup, !dbg !451 + call void @__csan_after_loop(i64 %104, i8 0, i64 1), !dbg !451 + call void @__csan_detach_continue(i64 %12, i64 %7, i32 0, i64 3), !dbg !451 + br label %csi.cleanup, !dbg !451 + +csi.cleanup.loopexit.split-lp.csi-split-lp: ; preds = %sync.continue241 + %lpad.csi-split-lp405 = landingpad { ptr, i32 } + cleanup, !dbg !451 + br label %csi.cleanup, !dbg !451 + +csi.cleanup.loopexit.split-lp.csi-split: ; preds = %call.noexc, %75 + %169 = phi i64 [ %94, %call.noexc ], [ %84, %75 ], !dbg !451 + %170 = phi i64 [ %95, %call.noexc ], [ %83, %75 ], !dbg !451 + %171 = phi i8 [ 0, %call.noexc ], [ 1, %75 ], !dbg !451 + %lpad.csi-split406 = landingpad { ptr, i32 } + cleanup, !dbg !451 + call void @__csan_after_call(i64 %169, i64 %170, i8 %171, i64 2), !dbg !451 + br label %csi.cleanup, !dbg !451 + +csi.cleanup: ; preds = %csi.cleanup.loopexit.split-lp.csi-split-lp, %csi.cleanup.loopexit.split-lp.csi-split, %csi.cleanup.loopexit + %lpad.phi403 = phi { ptr, i32 } [ %lpad.loopexit402, %csi.cleanup.loopexit ], [ %lpad.csi-split-lp405, %csi.cleanup.loopexit.split-lp.csi-split-lp ], [ %lpad.csi-split406, %csi.cleanup.loopexit.split-lp.csi-split ] + %172 = load i64, ptr @__csi_unit_func_exit_base_id, align 8, !dbg !451, !invariant.load !402 + %173 = add i64 %172, 1, !dbg !451 + call void @__csan_func_exit(i64 %173, i64 %0, i64 3), !dbg !400 + resume { ptr, i32 } %lpad.phi403, !dbg !451 + +csi.cleanup.unreachable: ; preds = %csi.cleanup389391393, %csi.cleanup389391, %csi.cleanup389 + unreachable + +csi.cleanup389.loopexit: ; preds = %for.body185.us.epil.1 + %lpad.loopexit = landingpad { ptr, i32 } + cleanup, !dbg !451 + call void @__csan_after_call(i64 %134, i64 -1, i8 3, i64 3), !dbg !451 + br label %csi.cleanup389, !dbg !451 + +csi.cleanup389.loopexit.split-lp.loopexit: ; preds = %for.body185.us.epil + %lpad.loopexit399 = landingpad { ptr, i32 } + cleanup, !dbg !451 + call void @__csan_after_call(i64 %129, i64 -1, i8 3, i64 3), !dbg !451 + br label %csi.cleanup389, !dbg !451 + +csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split-lp: ; preds = %sync.continue.us + %lpad.csi-split-lp = landingpad { ptr, i32 } + cleanup, !dbg !451 + br label %csi.cleanup389, !dbg !451 + +csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split: ; preds = %csi.cleanup389391, %pfor.ph116.us.new + %lpad.csi-split = landingpad { ptr, i32 } + cleanup, !dbg !451 + call void @__csan_detach_continue(i64 %111, i64 %106, i32 0, i64 1), !dbg !451 + br label %csi.cleanup389, !dbg !451 + +csi.cleanup389: ; preds = %csi.cleanup389.loopexit.split-lp.loopexit, %csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split, %csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split-lp, %csi.cleanup389.loopexit + %lpad.phi = phi { ptr, i32 } [ %lpad.loopexit, %csi.cleanup389.loopexit ], [ %lpad.loopexit399, %csi.cleanup389.loopexit.split-lp.loopexit ], [ %lpad.csi-split-lp, %csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split-lp ], [ %lpad.csi-split, %csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split ] + call void @__csan_task_exit(i64 %10, i64 %8, i64 %7, i32 0, i64 1), !dbg !451 + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg, { ptr, i32 } %lpad.phi) + to label %csi.cleanup.unreachable unwind label %csi.cleanup.loopexit, !dbg !451 + +csi.cleanup389391: ; preds = %pfor.cond123.us.strpm.outer, %csi.cleanup389391393 + %csi.cleanup.lpad390392 = landingpad { ptr, i32 } + cleanup, !dbg !458 + call void @__csan_after_loop(i64 %118, i8 0, i64 1), !dbg !458 + call void @__csan_detach_continue(i64 %117, i64 %112, i32 0, i64 3), !dbg !458 + call void @__csan_task_exit(i64 %109, i64 %107, i64 %106, i32 0, i64 0), !dbg !458 + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg110.us, { ptr, i32 } %csi.cleanup.lpad390392) + to label %csi.cleanup.unreachable unwind label %csi.cleanup389.loopexit.split-lp.loopexit.split-lp.csi-split, !dbg !458 + +csi.cleanup389391393: ; preds = %for.body185.us + %csi.cleanup.lpad390392394 = landingpad { ptr, i32 } + cleanup, !dbg !458 + call void @__csan_after_call(i64 %123, i64 -1, i8 3, i64 3), !dbg !458 + call void @__csan_task_exit(i64 %115, i64 %113, i64 %112, i32 0, i64 1), !dbg !458 + invoke void @llvm.detached.rethrow.sl_p0i32s(token %syncreg110.us.strpm.detachloop, { ptr, i32 } %csi.cleanup.lpad390392394) + to label %csi.cleanup.unreachable unwind label %csi.cleanup389391, !dbg !458 +} + +; CHECK: define internal fastcc void @_Z28ggml_compute_forward_mul_matPK19ggml_compute_paramsP11ggml_tensor.outline_pfor.cond.us.ls1( +; CHECK: pfor.cond.us.preheader.split.split.ls1: +; CHECK-NEXT: %[[NESTED_SPAWN_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +; CHECK-NEXT: %[[LOOP_DAC_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +; CHECK: br label %[[LOOP_DAC_HEADER:.+]], !dbg + +; CHECK: [[LOOP_DAC_HEADER]]: + +; The task-simplify pass should optimize away the unwind edge from the DAC spawn, +; which should not depend on the inserted taskframe for the loop body. +; CHECK: detach within %[[LOOP_DAC_SYNCREG]], label %[[DAC_SPAWN:.+]], label %[[DAC_SPAWN_CONT:.+]], !dbg + +; CHECK: [[DAC_SPAWN]]: +; CHECK-NEXT: call {{.*}}void @_Z28ggml_compute_forward_mul_matPK19ggml_compute_paramsP11ggml_tensor.outline_pfor.cond.us.ls1( +; CHECK-NEXT: reattach within %[[LOOP_DAC_SYNCREG]], label %[[DAC_SPAWN_CONT]] + +; CHECK: [[DAC_SPAWN_CONT]]: +; CHECK: br label %[[LOOP_DAC_HEADER]] + +; Check for a newly introduced taskframe that contains the static alloca. +; CHECK: call void @__csan_detach( +; CHECK: call void @__csan_task( +; CHECK: %[[NEW_TF:.+]] = call token @llvm.taskframe.create(), !dbg +; CHECK-NEXT: %[[TMP:.+]] = alloca [32 x float] +; CHECK: br label %[[NESTED_LOOP_HEADER:.+]], !dbg + +; CHECK: [[NESTED_LOOP_HEADER]]: +; CHECK: br i1 %{{.*}}, label %[[NESTED_LOOP_EPIL:.+]], label %[[NESTED_LOOP_SPAWN:.+]], !dbg + +; Check for the nested spawn +; CHECK: [[NESTED_LOOP_SPAWN]]: +; CHECK-NEXT: call void @__csan_detach( +; CHECK-NEXT: detach within %[[NESTED_SPAWN_SYNCREG]], label %[[NESTED_SPAWN:.+]], label %[[NESTED_LOOP_EPIL_CRIT_EDGE:.+]] unwind label %[[NESTED_CSI_CLEANUP:.+]], !dbg + +; CHECK: [[NESTED_LOOP_EPIL]]: +; CHECK: sync within %[[NESTED_SPAWN_SYNCREG]], label %[[NESTED_LOOP_SYNC_CONT:.+]], !dbg + +; CHECK: [[NESTED_LOOP_SYNC_CONT]]: +; CHECK-NEXT: invoke void @llvm.sync.unwind(token %[[NESTED_SPAWN_SYNCREG]]) +; CHECK-NEXT: to label %[[NESTED_LOOP_INC:.+]] unwind label %[[NESTED_CSI_CLEANUP_SPLIT:.+]], !dbg + +; CHECK: [[NESTED_CSI_CLEANUP_SPLIT]]: +; CHECK-NEXT: landingpad +; CHECK-NEXT: cleanup +; CHECK-NEXT: br label %csi.cleanup389.ls1 + +; CHECK: csi.cleanup389.ls1: +; CHECK-NEXT: %[[CSI_CLEANUP_LPAD:.+]] = phi +; CHECK-NEXT: call void @__csan_task_exit( +; CHECK-NEXT: invoke void @llvm.taskframe.resume.sl_p0i32s(token %[[NEW_TF]], { ptr, i32 } %[[CSI_CLEANUP_LPAD]]) +; CHECK-NEXT: to label %{{.*}} unwind label %[[NEW_TF_UNWIND:.+]], !dbg + +; CHECK: [[NESTED_LOOP_INC]]: +; CHECK: br i1 %{{.*}}, label %[[NESTED_LOOP_EXIT:.+]], label %[[NESTED_LOOP_HEADER]] + +; CHECK: [[NESTED_LOOP_EXIT]]: +; CHECK-NEXT: call void @llvm.taskframe.end(token %[[NEW_TF]]) +; CHECK-NEXT: call void @__csan_task_exit( +; CHECK-NEXT: call void @__csan_detach_continue( +; CHECK-NEXT: sync within %[[LOOP_DAC_SYNCREG]], label %{{.*}} + +; CHECK: [[NEW_TF_UNWIND]]: +; CHECK-NEXT: %[[NEW_TF_LPAD:.+]] = landingpad { ptr, i32 } +; CHECK-NEXT: cleanup +; CHECK-NEXT: resume { ptr, i32 } %[[NEW_TF_LPAD]] + +; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.declare(metadata, metadata, metadata) #1 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2 + +declare !dbg !550 zeroext i1 @ggml_is_contiguous(ptr noundef) local_unnamed_addr #3 + +declare !dbg !553 i64 @ggml_row_size(i32 noundef, i64 noundef) local_unnamed_addr #3 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #4 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2 + +; Function Attrs: mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #5 + +; Function Attrs: mustprogress willreturn memory(argmem: readwrite) +declare void @llvm.sync.unwind(token) #6 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare void @llvm.dbg.value(metadata, metadata, metadata) #7 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.smin.i64(i64, i64) #7 + +; Function Attrs: willreturn memory(argmem: readwrite) +declare void @llvm.detached.rethrow.sl_p0i32s(token, { ptr, i32 }) #8 + +declare i32 @__gcc_personality_v0(...) + +; Function Attrs: nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) +declare void @__csi_after_alloca(i64, ptr nocapture readnone, i64, i64) local_unnamed_addr #9 + +define internal void @__csi_init_callsite_to_function() { + %1 = load i64, ptr @__csi_unit_func_base_id, align 8 + store i64 %1, ptr @__csi_func_id__Z28ggml_compute_forward_mul_matPK19ggml_compute_paramsP11ggml_tensor, align 8 + ret void +} + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_func_entry(i64, ptr nocapture readnone, ptr nocapture readnone, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_func_exit(i64, i64, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_load(i64, ptr nocapture readnone, i32, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_large_load(i64, ptr nocapture readnone, i64, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_large_store(i64, ptr nocapture readnone, i64, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_before_call(i64, i64, i8, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_after_call(i64, i64, i8, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_detach(i64, i32, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_task(i64, i64, ptr nocapture readnone, ptr nocapture readnone, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_task_exit(i64, i64, i64, i32, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_detach_continue(i64, i64, i32, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_sync(i64, i32) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) +declare void @__csan_get_MAAP(ptr nocapture, i64, i8) local_unnamed_addr #9 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_set_MAAP(i8, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_before_loop(i64, i64, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind memory(argmem: read, inaccessiblemem: readwrite) +declare void @__csan_after_loop(i64, i8, i64) local_unnamed_addr #10 + +; Function Attrs: nounwind willreturn +declare ptr @llvm.task.frameaddress(i32) #11 + +; Function Attrs: nocallback nofree nosync nounwind willreturn +declare ptr @llvm.stacksave() #12 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(none) +declare ptr @llvm.frameaddress.p0(i32 immarg) #13 + +define internal void @csirt.unit_ctor() { + call void @__csanrt_unit_init(ptr nonnull @0, ptr nonnull @__csi_unit_fed_tables, ptr nonnull @__csi_unit_obj_tables, ptr nonnull @__csi_init_callsite_to_function) + ret void +} + +declare void @__csanrt_unit_init(ptr, ptr, ptr, ptr) local_unnamed_addr + +attributes #0 = { mustprogress ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #3 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #4 = { mustprogress nounwind willreturn memory(argmem: readwrite) } +attributes #5 = { mustprogress nocallback nofree nounwind willreturn memory(argmem: readwrite) } +attributes #6 = { mustprogress willreturn memory(argmem: readwrite) } +attributes #7 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #8 = { willreturn memory(argmem: readwrite) } +attributes #9 = { nounwind memory(argmem: readwrite, inaccessiblemem: readwrite) } +attributes #10 = { nounwind memory(argmem: read, inaccessiblemem: readwrite) } +attributes #11 = { nounwind willreturn } +attributes #12 = { nocallback nofree nosync nounwind willreturn } +attributes #13 = { nocallback nofree nosync nounwind willreturn memory(none) } +attributes #14 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4, !5, !6} +!llvm.dbg.cu = !{!7} +!llvm.ident = !{!227} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 14, i32 2]} +!1 = !{i32 7, !"Dwarf Version", i32 4} +!2 = !{i32 2, !"Debug Info Version", i32 3} +!3 = !{i32 1, !"wchar_size", i32 4} +!4 = !{i32 8, !"PIC Level", i32 2} +!5 = !{i32 7, !"uwtable", i32 1} +!6 = !{i32 7, !"frame-pointer", i32 1} +!7 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_11, file: !8, producer: "clang version 17.0.6 (git@github.com:OpenCilk/opencilk-project.git c85f242a46d579145a8538338c78acd94c43c5f4)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, enums: !9, retainedTypes: !122, imports: !129, splitDebugInlining: false, nameTableKind: Apple, sysroot: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk", sdk: "MacOSX.sdk") +!8 = !DIFile(filename: "/Users/neboat/Software/llama.cpp/ggml_compute_forward_mul_mat.cpp", directory: "/Users/neboat/Software/llama.cpp") +!9 = !{!10, !17, !42, !47} +!10 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "ggml_task_type", file: !11, line: 642, baseType: !12, size: 32, elements: !13, identifier: "_ZTS14ggml_task_type") +!11 = !DIFile(filename: "ggml.h", directory: "/Users/neboat/Software/llama.cpp") +!12 = !DIBasicType(name: "unsigned int", size: 32, encoding: DW_ATE_unsigned) +!13 = !{!14, !15, !16} +!14 = !DIEnumerator(name: "GGML_TASK_INIT", value: 0, isUnsigned: true) +!15 = !DIEnumerator(name: "GGML_TASK_COMPUTE", value: 1, isUnsigned: true) +!16 = !DIEnumerator(name: "GGML_TASK_FINALIZE", value: 2, isUnsigned: true) +!17 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "ggml_type", file: !11, line: 330, baseType: !12, size: 32, elements: !18, identifier: "_ZTS9ggml_type") +!18 = !{!19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41} +!19 = !DIEnumerator(name: "GGML_TYPE_F32", value: 0, isUnsigned: true) +!20 = !DIEnumerator(name: "GGML_TYPE_F16", value: 1, isUnsigned: true) +!21 = !DIEnumerator(name: "GGML_TYPE_Q4_0", value: 2, isUnsigned: true) +!22 = !DIEnumerator(name: "GGML_TYPE_Q4_1", value: 3, isUnsigned: true) +!23 = !DIEnumerator(name: "GGML_TYPE_Q5_0", value: 6, isUnsigned: true) +!24 = !DIEnumerator(name: "GGML_TYPE_Q5_1", value: 7, isUnsigned: true) +!25 = !DIEnumerator(name: "GGML_TYPE_Q8_0", value: 8, isUnsigned: true) +!26 = !DIEnumerator(name: "GGML_TYPE_Q8_1", value: 9, isUnsigned: true) +!27 = !DIEnumerator(name: "GGML_TYPE_Q2_K", value: 10, isUnsigned: true) +!28 = !DIEnumerator(name: "GGML_TYPE_Q3_K", value: 11, isUnsigned: true) +!29 = !DIEnumerator(name: "GGML_TYPE_Q4_K", value: 12, isUnsigned: true) +!30 = !DIEnumerator(name: "GGML_TYPE_Q5_K", value: 13, isUnsigned: true) +!31 = !DIEnumerator(name: "GGML_TYPE_Q6_K", value: 14, isUnsigned: true) +!32 = !DIEnumerator(name: "GGML_TYPE_Q8_K", value: 15, isUnsigned: true) +!33 = !DIEnumerator(name: "GGML_TYPE_IQ2_XXS", value: 16, isUnsigned: true) +!34 = !DIEnumerator(name: "GGML_TYPE_IQ2_XS", value: 17, isUnsigned: true) +!35 = !DIEnumerator(name: "GGML_TYPE_IQ3_XXS", value: 18, isUnsigned: true) +!36 = !DIEnumerator(name: "GGML_TYPE_IQ1_S", value: 19, isUnsigned: true) +!37 = !DIEnumerator(name: "GGML_TYPE_IQ4_NL", value: 20, isUnsigned: true) +!38 = !DIEnumerator(name: "GGML_TYPE_I8", value: 21, isUnsigned: true) +!39 = !DIEnumerator(name: "GGML_TYPE_I16", value: 22, isUnsigned: true) +!40 = !DIEnumerator(name: "GGML_TYPE_I32", value: 23, isUnsigned: true) +!41 = !DIEnumerator(name: "GGML_TYPE_COUNT", value: 24, isUnsigned: true) +!42 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "ggml_backend_type", file: !11, line: 365, baseType: !12, size: 32, elements: !43, identifier: "_ZTS17ggml_backend_type") +!43 = !{!44, !45, !46} +!44 = !DIEnumerator(name: "GGML_BACKEND_CPU", value: 0, isUnsigned: true) +!45 = !DIEnumerator(name: "GGML_BACKEND_GPU", value: 10, isUnsigned: true) +!46 = !DIEnumerator(name: "GGML_BACKEND_GPU_SPLIT", value: 20, isUnsigned: true) +!47 = !DICompositeType(tag: DW_TAG_enumeration_type, name: "ggml_op", file: !11, line: 395, baseType: !12, size: 32, elements: !48, identifier: "_ZTS7ggml_op") +!48 = !{!49, !50, !51, !52, !53, !54, !55, !56, !57, !58, !59, !60, !61, !62, !63, !64, !65, !66, !67, !68, !69, !70, !71, !72, !73, !74, !75, !76, !77, !78, !79, !80, !81, !82, !83, !84, !85, !86, !87, !88, !89, !90, !91, !92, !93, !94, !95, !96, !97, !98, !99, !100, !101, !102, !103, !104, !105, !106, !107, !108, !109, !110, !111, !112, !113, !114, !115, !116, !117, !118, !119, !120, !121} +!49 = !DIEnumerator(name: "GGML_OP_NONE", value: 0, isUnsigned: true) +!50 = !DIEnumerator(name: "GGML_OP_DUP", value: 1, isUnsigned: true) +!51 = !DIEnumerator(name: "GGML_OP_ADD", value: 2, isUnsigned: true) +!52 = !DIEnumerator(name: "GGML_OP_ADD1", value: 3, isUnsigned: true) +!53 = !DIEnumerator(name: "GGML_OP_ACC", value: 4, isUnsigned: true) +!54 = !DIEnumerator(name: "GGML_OP_SUB", value: 5, isUnsigned: true) +!55 = !DIEnumerator(name: "GGML_OP_MUL", value: 6, isUnsigned: true) +!56 = !DIEnumerator(name: "GGML_OP_DIV", value: 7, isUnsigned: true) +!57 = !DIEnumerator(name: "GGML_OP_SQR", value: 8, isUnsigned: true) +!58 = !DIEnumerator(name: "GGML_OP_SQRT", value: 9, isUnsigned: true) +!59 = !DIEnumerator(name: "GGML_OP_LOG", value: 10, isUnsigned: true) +!60 = !DIEnumerator(name: "GGML_OP_SUM", value: 11, isUnsigned: true) +!61 = !DIEnumerator(name: "GGML_OP_SUM_ROWS", value: 12, isUnsigned: true) +!62 = !DIEnumerator(name: "GGML_OP_MEAN", value: 13, isUnsigned: true) +!63 = !DIEnumerator(name: "GGML_OP_ARGMAX", value: 14, isUnsigned: true) +!64 = !DIEnumerator(name: "GGML_OP_REPEAT", value: 15, isUnsigned: true) +!65 = !DIEnumerator(name: "GGML_OP_REPEAT_BACK", value: 16, isUnsigned: true) +!66 = !DIEnumerator(name: "GGML_OP_CONCAT", value: 17, isUnsigned: true) +!67 = !DIEnumerator(name: "GGML_OP_SILU_BACK", value: 18, isUnsigned: true) +!68 = !DIEnumerator(name: "GGML_OP_NORM", value: 19, isUnsigned: true) +!69 = !DIEnumerator(name: "GGML_OP_RMS_NORM", value: 20, isUnsigned: true) +!70 = !DIEnumerator(name: "GGML_OP_RMS_NORM_BACK", value: 21, isUnsigned: true) +!71 = !DIEnumerator(name: "GGML_OP_GROUP_NORM", value: 22, isUnsigned: true) +!72 = !DIEnumerator(name: "GGML_OP_MUL_MAT", value: 23, isUnsigned: true) +!73 = !DIEnumerator(name: "GGML_OP_MUL_MAT_ID", value: 24, isUnsigned: true) +!74 = !DIEnumerator(name: "GGML_OP_OUT_PROD", value: 25, isUnsigned: true) +!75 = !DIEnumerator(name: "GGML_OP_SCALE", value: 26, isUnsigned: true) +!76 = !DIEnumerator(name: "GGML_OP_SET", value: 27, isUnsigned: true) +!77 = !DIEnumerator(name: "GGML_OP_CPY", value: 28, isUnsigned: true) +!78 = !DIEnumerator(name: "GGML_OP_CONT", value: 29, isUnsigned: true) +!79 = !DIEnumerator(name: "GGML_OP_RESHAPE", value: 30, isUnsigned: true) +!80 = !DIEnumerator(name: "GGML_OP_VIEW", value: 31, isUnsigned: true) +!81 = !DIEnumerator(name: "GGML_OP_PERMUTE", value: 32, isUnsigned: true) +!82 = !DIEnumerator(name: "GGML_OP_TRANSPOSE", value: 33, isUnsigned: true) +!83 = !DIEnumerator(name: "GGML_OP_GET_ROWS", value: 34, isUnsigned: true) +!84 = !DIEnumerator(name: "GGML_OP_GET_ROWS_BACK", value: 35, isUnsigned: true) +!85 = !DIEnumerator(name: "GGML_OP_DIAG", value: 36, isUnsigned: true) +!86 = !DIEnumerator(name: "GGML_OP_DIAG_MASK_INF", value: 37, isUnsigned: true) +!87 = !DIEnumerator(name: "GGML_OP_DIAG_MASK_ZERO", value: 38, isUnsigned: true) +!88 = !DIEnumerator(name: "GGML_OP_SOFT_MAX", value: 39, isUnsigned: true) +!89 = !DIEnumerator(name: "GGML_OP_SOFT_MAX_BACK", value: 40, isUnsigned: true) +!90 = !DIEnumerator(name: "GGML_OP_ROPE", value: 41, isUnsigned: true) +!91 = !DIEnumerator(name: "GGML_OP_ROPE_BACK", value: 42, isUnsigned: true) +!92 = !DIEnumerator(name: "GGML_OP_ALIBI", value: 43, isUnsigned: true) +!93 = !DIEnumerator(name: "GGML_OP_CLAMP", value: 44, isUnsigned: true) +!94 = !DIEnumerator(name: "GGML_OP_CONV_TRANSPOSE_1D", value: 45, isUnsigned: true) +!95 = !DIEnumerator(name: "GGML_OP_IM2COL", value: 46, isUnsigned: true) +!96 = !DIEnumerator(name: "GGML_OP_CONV_TRANSPOSE_2D", value: 47, isUnsigned: true) +!97 = !DIEnumerator(name: "GGML_OP_POOL_1D", value: 48, isUnsigned: true) +!98 = !DIEnumerator(name: "GGML_OP_POOL_2D", value: 49, isUnsigned: true) +!99 = !DIEnumerator(name: "GGML_OP_UPSCALE", value: 50, isUnsigned: true) +!100 = !DIEnumerator(name: "GGML_OP_PAD", value: 51, isUnsigned: true) +!101 = !DIEnumerator(name: "GGML_OP_ARGSORT", value: 52, isUnsigned: true) +!102 = !DIEnumerator(name: "GGML_OP_LEAKY_RELU", value: 53, isUnsigned: true) +!103 = !DIEnumerator(name: "GGML_OP_FLASH_ATTN", value: 54, isUnsigned: true) +!104 = !DIEnumerator(name: "GGML_OP_FLASH_FF", value: 55, isUnsigned: true) +!105 = !DIEnumerator(name: "GGML_OP_FLASH_ATTN_BACK", value: 56, isUnsigned: true) +!106 = !DIEnumerator(name: "GGML_OP_WIN_PART", value: 57, isUnsigned: true) +!107 = !DIEnumerator(name: "GGML_OP_WIN_UNPART", value: 58, isUnsigned: true) +!108 = !DIEnumerator(name: "GGML_OP_GET_REL_POS", value: 59, isUnsigned: true) +!109 = !DIEnumerator(name: "GGML_OP_ADD_REL_POS", value: 60, isUnsigned: true) +!110 = !DIEnumerator(name: "GGML_OP_UNARY", value: 61, isUnsigned: true) +!111 = !DIEnumerator(name: "GGML_OP_MAP_UNARY", value: 62, isUnsigned: true) +!112 = !DIEnumerator(name: "GGML_OP_MAP_BINARY", value: 63, isUnsigned: true) +!113 = !DIEnumerator(name: "GGML_OP_MAP_CUSTOM1_F32", value: 64, isUnsigned: true) +!114 = !DIEnumerator(name: "GGML_OP_MAP_CUSTOM2_F32", value: 65, isUnsigned: true) +!115 = !DIEnumerator(name: "GGML_OP_MAP_CUSTOM3_F32", value: 66, isUnsigned: true) +!116 = !DIEnumerator(name: "GGML_OP_MAP_CUSTOM1", value: 67, isUnsigned: true) +!117 = !DIEnumerator(name: "GGML_OP_MAP_CUSTOM2", value: 68, isUnsigned: true) +!118 = !DIEnumerator(name: "GGML_OP_MAP_CUSTOM3", value: 69, isUnsigned: true) +!119 = !DIEnumerator(name: "GGML_OP_CROSS_ENTROPY_LOSS", value: 70, isUnsigned: true) +!120 = !DIEnumerator(name: "GGML_OP_CROSS_ENTROPY_LOSS_BACK", value: 71, isUnsigned: true) +!121 = !DIEnumerator(name: "GGML_OP_COUNT", value: 72, isUnsigned: true) +!122 = !{!123, !126, !128} +!123 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !124, size: 64) +!124 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !125) +!125 = !DIBasicType(name: "char", size: 8, encoding: DW_ATE_signed_char) +!126 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !127, size: 64) +!127 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!128 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !125, size: 64) +!129 = !{!130, !137, !141, !144, !148, !153, !157, !161, !165, !169, !173, !176, !180, !183, !185, !187, !189, !191, !193, !195, !197, !199, !201, !203, !205, !207, !209, !211, !213, !218, !221, !224} +!130 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !133, file: !136, line: 59) +!131 = !DINamespace(name: "__1", scope: !132, exportSymbols: true) +!132 = !DINamespace(name: "std", scope: null) +!133 = !DIDerivedType(tag: DW_TAG_typedef, name: "nullptr_t", file: !134, line: 50, baseType: !135) +!134 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1/stddef.h", directory: "") +!135 = !DIBasicType(tag: DW_TAG_unspecified_type, name: "decltype(nullptr)") +!136 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1/cstddef", directory: "") +!137 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !138, file: !136, line: 60) +!138 = !DIDerivedType(tag: DW_TAG_typedef, name: "ptrdiff_t", file: !139, line: 35, baseType: !140) +!139 = !DIFile(filename: "opencilk-project/build-17/lib/clang/17/include/stddef.h", directory: "/Users/neboat/Software") +!140 = !DIBasicType(name: "long", size: 64, encoding: DW_ATE_signed) +!141 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !142, file: !136, line: 61) +!142 = !DIDerivedType(tag: DW_TAG_typedef, name: "size_t", file: !139, line: 46, baseType: !143) +!143 = !DIBasicType(name: "unsigned long", size: 64, encoding: DW_ATE_unsigned) +!144 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !145, file: !136, line: 64) +!145 = !DIDerivedType(tag: DW_TAG_typedef, name: "max_align_t", file: !146, line: 16, baseType: !147) +!146 = !DIFile(filename: "opencilk-project/build-17/lib/clang/17/include/__stddef_max_align_t.h", directory: "/Users/neboat/Software") +!147 = !DIBasicType(name: "long double", size: 64, encoding: DW_ATE_float) +!148 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !149, file: !152, line: 162) +!149 = !DIDerivedType(tag: DW_TAG_typedef, name: "int8_t", file: !150, line: 30, baseType: !151) +!150 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/sys/_types/_int8_t.h", directory: "") +!151 = !DIBasicType(name: "signed char", size: 8, encoding: DW_ATE_signed_char) +!152 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/c++/v1/cstdint", directory: "") +!153 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !154, file: !152, line: 163) +!154 = !DIDerivedType(tag: DW_TAG_typedef, name: "int16_t", file: !155, line: 30, baseType: !156) +!155 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/sys/_types/_int16_t.h", directory: "") +!156 = !DIBasicType(name: "short", size: 16, encoding: DW_ATE_signed) +!157 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !158, file: !152, line: 164) +!158 = !DIDerivedType(tag: DW_TAG_typedef, name: "int32_t", file: !159, line: 30, baseType: !160) +!159 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/sys/_types/_int32_t.h", directory: "") +!160 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!161 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !162, file: !152, line: 165) +!162 = !DIDerivedType(tag: DW_TAG_typedef, name: "int64_t", file: !163, line: 30, baseType: !164) +!163 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/sys/_types/_int64_t.h", directory: "") +!164 = !DIBasicType(name: "long long", size: 64, encoding: DW_ATE_signed) +!165 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !166, file: !152, line: 167) +!166 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint8_t", file: !167, line: 31, baseType: !168) +!167 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/_types/_uint8_t.h", directory: "") +!168 = !DIBasicType(name: "unsigned char", size: 8, encoding: DW_ATE_unsigned_char) +!169 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !170, file: !152, line: 168) +!170 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint16_t", file: !171, line: 31, baseType: !172) +!171 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/_types/_uint16_t.h", directory: "") +!172 = !DIBasicType(name: "unsigned short", size: 16, encoding: DW_ATE_unsigned) +!173 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !174, file: !152, line: 169) +!174 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint32_t", file: !175, line: 31, baseType: !12) +!175 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/_types/_uint32_t.h", directory: "") +!176 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !177, file: !152, line: 170) +!177 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint64_t", file: !178, line: 31, baseType: !179) +!178 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/_types/_uint64_t.h", directory: "") +!179 = !DIBasicType(name: "unsigned long long", size: 64, encoding: DW_ATE_unsigned) +!180 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !181, file: !152, line: 172) +!181 = !DIDerivedType(tag: DW_TAG_typedef, name: "int_least8_t", file: !182, line: 29, baseType: !149) +!182 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/stdint.h", directory: "") +!183 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !184, file: !152, line: 173) +!184 = !DIDerivedType(tag: DW_TAG_typedef, name: "int_least16_t", file: !182, line: 30, baseType: !154) +!185 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !186, file: !152, line: 174) +!186 = !DIDerivedType(tag: DW_TAG_typedef, name: "int_least32_t", file: !182, line: 31, baseType: !158) +!187 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !188, file: !152, line: 175) +!188 = !DIDerivedType(tag: DW_TAG_typedef, name: "int_least64_t", file: !182, line: 32, baseType: !162) +!189 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !190, file: !152, line: 177) +!190 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint_least8_t", file: !182, line: 33, baseType: !166) +!191 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !192, file: !152, line: 178) +!192 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint_least16_t", file: !182, line: 34, baseType: !170) +!193 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !194, file: !152, line: 179) +!194 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint_least32_t", file: !182, line: 35, baseType: !174) +!195 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !196, file: !152, line: 180) +!196 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint_least64_t", file: !182, line: 36, baseType: !177) +!197 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !198, file: !152, line: 182) +!198 = !DIDerivedType(tag: DW_TAG_typedef, name: "int_fast8_t", file: !182, line: 40, baseType: !149) +!199 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !200, file: !152, line: 183) +!200 = !DIDerivedType(tag: DW_TAG_typedef, name: "int_fast16_t", file: !182, line: 41, baseType: !154) +!201 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !202, file: !152, line: 184) +!202 = !DIDerivedType(tag: DW_TAG_typedef, name: "int_fast32_t", file: !182, line: 42, baseType: !158) +!203 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !204, file: !152, line: 185) +!204 = !DIDerivedType(tag: DW_TAG_typedef, name: "int_fast64_t", file: !182, line: 43, baseType: !162) +!205 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !206, file: !152, line: 187) +!206 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint_fast8_t", file: !182, line: 44, baseType: !166) +!207 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !208, file: !152, line: 188) +!208 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint_fast16_t", file: !182, line: 45, baseType: !170) +!209 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !210, file: !152, line: 189) +!210 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint_fast32_t", file: !182, line: 46, baseType: !174) +!211 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !212, file: !152, line: 190) +!212 = !DIDerivedType(tag: DW_TAG_typedef, name: "uint_fast64_t", file: !182, line: 47, baseType: !177) +!213 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !214, file: !152, line: 192) +!214 = !DIDerivedType(tag: DW_TAG_typedef, name: "intptr_t", file: !215, line: 32, baseType: !216) +!215 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/sys/_types/_intptr_t.h", directory: "") +!216 = !DIDerivedType(tag: DW_TAG_typedef, name: "__darwin_intptr_t", file: !217, line: 27, baseType: !140) +!217 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/arm/_types.h", directory: "") +!218 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !219, file: !152, line: 193) +!219 = !DIDerivedType(tag: DW_TAG_typedef, name: "uintptr_t", file: !220, line: 34, baseType: !143) +!220 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/sys/_types/_uintptr_t.h", directory: "") +!221 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !222, file: !152, line: 195) +!222 = !DIDerivedType(tag: DW_TAG_typedef, name: "intmax_t", file: !223, line: 32, baseType: !140) +!223 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/_types/_intmax_t.h", directory: "") +!224 = !DIImportedEntity(tag: DW_TAG_imported_declaration, scope: !131, entity: !225, file: !152, line: 196) +!225 = !DIDerivedType(tag: DW_TAG_typedef, name: "uintmax_t", file: !226, line: 32, baseType: !143) +!226 = !DIFile(filename: "/Applications/Xcode.app/Contents/Developer/Platforms/MacOSX.platform/Developer/SDKs/MacOSX.sdk/usr/include/_types/_uintmax_t.h", directory: "") +!227 = !{!"clang version 17.0.6 (git@github.com:OpenCilk/opencilk-project.git c85f242a46d579145a8538338c78acd94c43c5f4)"} +!228 = distinct !DISubprogram(name: "ggml_compute_forward_mul_mat", linkageName: "_Z28ggml_compute_forward_mul_matPK19ggml_compute_paramsP11ggml_tensor", scope: !229, file: !229, line: 33, type: !230, scopeLine: 35, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !282) +!229 = !DIFile(filename: "ggml_compute_forward_mul_mat.cpp", directory: "/Users/neboat/Software/llama.cpp") +!230 = !DISubroutineType(types: !231) +!231 = !{null, !232, !242} +!232 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !233, size: 64) +!233 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !234) +!234 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ggml_compute_params", file: !11, line: 648, size: 256, flags: DIFlagTypePassByValue, elements: !235, identifier: "_ZTS19ggml_compute_params") +!235 = !{!236, !237, !238, !239, !240} +!236 = !DIDerivedType(tag: DW_TAG_member, name: "type", scope: !234, file: !11, line: 649, baseType: !10, size: 32) +!237 = !DIDerivedType(tag: DW_TAG_member, name: "ith", scope: !234, file: !11, line: 652, baseType: !160, size: 32, offset: 32) +!238 = !DIDerivedType(tag: DW_TAG_member, name: "nth", scope: !234, file: !11, line: 652, baseType: !160, size: 32, offset: 64) +!239 = !DIDerivedType(tag: DW_TAG_member, name: "wsize", scope: !234, file: !11, line: 655, baseType: !142, size: 64, offset: 128) +!240 = !DIDerivedType(tag: DW_TAG_member, name: "wdata", scope: !234, file: !11, line: 656, baseType: !241, size: 64, offset: 192) +!241 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: null, size: 64) +!242 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !243, size: 64) +!243 = distinct !DICompositeType(tag: DW_TAG_structure_type, name: "ggml_tensor", file: !11, line: 532, size: 2944, flags: DIFlagTypePassByValue, elements: !244, identifier: "_ZTS11ggml_tensor") +!244 = !{!245, !246, !247, !250, !254, !256, !257, !261, !262, !263, !267, !268, !269, !270, !271, !272, !273, !277, !278} +!245 = !DIDerivedType(tag: DW_TAG_member, name: "type", scope: !243, file: !11, line: 533, baseType: !17, size: 32) +!246 = !DIDerivedType(tag: DW_TAG_member, name: "backend", scope: !243, file: !11, line: 534, baseType: !42, size: 32, offset: 32) +!247 = !DIDerivedType(tag: DW_TAG_member, name: "buffer", scope: !243, file: !11, line: 536, baseType: !248, size: 64, offset: 64) +!248 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !249, size: 64) +!249 = !DICompositeType(tag: DW_TAG_structure_type, name: "ggml_backend_buffer", file: !11, line: 536, flags: DIFlagFwdDecl | DIFlagNonTrivial, identifier: "_ZTS19ggml_backend_buffer") +!250 = !DIDerivedType(tag: DW_TAG_member, name: "ne", scope: !243, file: !11, line: 538, baseType: !251, size: 256, offset: 128) +!251 = !DICompositeType(tag: DW_TAG_array_type, baseType: !162, size: 256, elements: !252) +!252 = !{!253} +!253 = !DISubrange(count: 4) +!254 = !DIDerivedType(tag: DW_TAG_member, name: "nb", scope: !243, file: !11, line: 539, baseType: !255, size: 256, offset: 384) +!255 = !DICompositeType(tag: DW_TAG_array_type, baseType: !142, size: 256, elements: !252) +!256 = !DIDerivedType(tag: DW_TAG_member, name: "op", scope: !243, file: !11, line: 545, baseType: !47, size: 32, offset: 640) +!257 = !DIDerivedType(tag: DW_TAG_member, name: "op_params", scope: !243, file: !11, line: 548, baseType: !258, size: 512, offset: 672) +!258 = !DICompositeType(tag: DW_TAG_array_type, baseType: !158, size: 512, elements: !259) +!259 = !{!260} +!260 = !DISubrange(count: 16) +!261 = !DIDerivedType(tag: DW_TAG_member, name: "flags", scope: !243, file: !11, line: 550, baseType: !158, size: 32, offset: 1184) +!262 = !DIDerivedType(tag: DW_TAG_member, name: "grad", scope: !243, file: !11, line: 552, baseType: !242, size: 64, offset: 1216) +!263 = !DIDerivedType(tag: DW_TAG_member, name: "src", scope: !243, file: !11, line: 553, baseType: !264, size: 640, offset: 1280) +!264 = !DICompositeType(tag: DW_TAG_array_type, baseType: !242, size: 640, elements: !265) +!265 = !{!266} +!266 = !DISubrange(count: 10) +!267 = !DIDerivedType(tag: DW_TAG_member, name: "perf_runs", scope: !243, file: !11, line: 556, baseType: !160, size: 32, offset: 1920) +!268 = !DIDerivedType(tag: DW_TAG_member, name: "perf_cycles", scope: !243, file: !11, line: 557, baseType: !162, size: 64, offset: 1984) +!269 = !DIDerivedType(tag: DW_TAG_member, name: "perf_time_us", scope: !243, file: !11, line: 558, baseType: !162, size: 64, offset: 2048) +!270 = !DIDerivedType(tag: DW_TAG_member, name: "view_src", scope: !243, file: !11, line: 560, baseType: !242, size: 64, offset: 2112) +!271 = !DIDerivedType(tag: DW_TAG_member, name: "view_offs", scope: !243, file: !11, line: 561, baseType: !142, size: 64, offset: 2176) +!272 = !DIDerivedType(tag: DW_TAG_member, name: "data", scope: !243, file: !11, line: 563, baseType: !241, size: 64, offset: 2240) +!273 = !DIDerivedType(tag: DW_TAG_member, name: "name", scope: !243, file: !11, line: 565, baseType: !274, size: 512, offset: 2304) +!274 = !DICompositeType(tag: DW_TAG_array_type, baseType: !125, size: 512, elements: !275) +!275 = !{!276} +!276 = !DISubrange(count: 64) +!277 = !DIDerivedType(tag: DW_TAG_member, name: "extra", scope: !243, file: !11, line: 567, baseType: !241, size: 64, offset: 2816) +!278 = !DIDerivedType(tag: DW_TAG_member, name: "padding", scope: !243, file: !11, line: 569, baseType: !279, size: 64, offset: 2880) +!279 = !DICompositeType(tag: DW_TAG_array_type, baseType: !125, size: 64, elements: !280) +!280 = !{!281} +!281 = !DISubrange(count: 8) +!282 = !{!283, !284, !285, !288, !289, !291, !292, !293, !294, !296, !297, !298, !299, !300, !301, !302, !303, !304, !305, !306, !307, !308, !309, !310, !311, !312, !313, !314, !315, !317, !318, !320, !323, !331, !332, !340, !341, !342, !343, !344, !345, !346, !347, !348, !349, !350, !351, !352, !353, !354, !355, !356, !357, !358, !359, !360, !361, !363, !364, !365, !366, !368, !371, !372, !373, !374, !376, !381, !383, !386, !387, !388, !389, !390, !391, !392, !393, !394, !395, !396, !398} +!283 = !DILocalVariable(name: "params", arg: 1, scope: !228, file: !229, line: 34, type: !232) +!284 = !DILocalVariable(name: "dst", arg: 2, scope: !228, file: !229, line: 35, type: !242) +!285 = !DILocalVariable(name: "src0", scope: !228, file: !229, line: 37, type: !286) +!286 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !287, size: 64) +!287 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !243) +!288 = !DILocalVariable(name: "src1", scope: !228, file: !229, line: 38, type: !286) +!289 = !DILocalVariable(name: "ne00", scope: !228, file: !229, line: 43, type: !290) +!290 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !162) +!291 = !DILocalVariable(name: "ne01", scope: !228, file: !229, line: 43, type: !290) +!292 = !DILocalVariable(name: "ne02", scope: !228, file: !229, line: 43, type: !290) +!293 = !DILocalVariable(name: "ne03", scope: !228, file: !229, line: 43, type: !290) +!294 = !DILocalVariable(name: "nb00", scope: !228, file: !229, line: 43, type: !295) +!295 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !142) +!296 = !DILocalVariable(name: "nb01", scope: !228, file: !229, line: 43, type: !295) +!297 = !DILocalVariable(name: "nb02", scope: !228, file: !229, line: 43, type: !295) +!298 = !DILocalVariable(name: "nb03", scope: !228, file: !229, line: 43, type: !295) +!299 = !DILocalVariable(name: "ne10", scope: !228, file: !229, line: 43, type: !290) +!300 = !DILocalVariable(name: "ne11", scope: !228, file: !229, line: 43, type: !290) +!301 = !DILocalVariable(name: "ne12", scope: !228, file: !229, line: 43, type: !290) +!302 = !DILocalVariable(name: "ne13", scope: !228, file: !229, line: 43, type: !290) +!303 = !DILocalVariable(name: "nb10", scope: !228, file: !229, line: 43, type: !295) +!304 = !DILocalVariable(name: "nb11", scope: !228, file: !229, line: 43, type: !295) +!305 = !DILocalVariable(name: "nb12", scope: !228, file: !229, line: 43, type: !295) +!306 = !DILocalVariable(name: "nb13", scope: !228, file: !229, line: 43, type: !295) +!307 = !DILocalVariable(name: "ne0", scope: !228, file: !229, line: 43, type: !290) +!308 = !DILocalVariable(name: "ne1", scope: !228, file: !229, line: 43, type: !290) +!309 = !DILocalVariable(name: "ne2", scope: !228, file: !229, line: 43, type: !290) +!310 = !DILocalVariable(name: "ne3", scope: !228, file: !229, line: 43, type: !290) +!311 = !DILocalVariable(name: "nb0", scope: !228, file: !229, line: 43, type: !295) +!312 = !DILocalVariable(name: "nb1", scope: !228, file: !229, line: 43, type: !295) +!313 = !DILocalVariable(name: "nb2", scope: !228, file: !229, line: 43, type: !295) +!314 = !DILocalVariable(name: "nb3", scope: !228, file: !229, line: 43, type: !295) +!315 = !DILocalVariable(name: "ith", scope: !228, file: !229, line: 45, type: !316) +!316 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !160) +!317 = !DILocalVariable(name: "nth", scope: !228, file: !229, line: 46, type: !316) +!318 = !DILocalVariable(name: "type", scope: !228, file: !229, line: 48, type: !319) +!319 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !17) +!320 = !DILocalVariable(name: "src1_cont", scope: !228, file: !229, line: 50, type: !321) +!321 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !322) +!322 = !DIBasicType(name: "bool", size: 8, encoding: DW_ATE_boolean) +!323 = !DILocalVariable(name: "vec_dot", scope: !228, file: !229, line: 52, type: !324) +!324 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !325) +!325 = !DIDerivedType(tag: DW_TAG_typedef, name: "ggml_vec_dot_t", file: !11, line: 2320, baseType: !326) +!326 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !327, size: 64) +!327 = !DISubroutineType(types: !328) +!328 = !{null, !160, !126, !142, !329, !142, !329, !142, !160} +!329 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !330, size: 64) +!330 = !DIDerivedType(tag: DW_TAG_const_type, baseType: null) +!331 = !DILocalVariable(name: "vec_dot_type", scope: !228, file: !229, line: 53, type: !319) +!332 = !DILocalVariable(name: "from_float_to_vec_dot", scope: !228, file: !229, line: 54, type: !333) +!333 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !334) +!334 = !DIDerivedType(tag: DW_TAG_typedef, name: "ggml_from_float_t", file: !11, line: 2319, baseType: !335) +!335 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !336, size: 64) +!336 = !DISubroutineType(types: !337) +!337 = !{null, !338, !241, !160} +!338 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !339, size: 64) +!339 = !DIDerivedType(tag: DW_TAG_const_type, baseType: !127) +!340 = !DILocalVariable(name: "vec_dot_num_rows", scope: !228, file: !229, line: 55, type: !290) +!341 = !DILocalVariable(name: "r2", scope: !228, file: !229, line: 73, type: !290) +!342 = !DILocalVariable(name: "r3", scope: !228, file: !229, line: 74, type: !290) +!343 = !DILocalVariable(name: "wdata", scope: !228, file: !229, line: 80, type: !329) +!344 = !DILocalVariable(name: "row_size", scope: !228, file: !229, line: 81, type: !295) +!345 = !DILocalVariable(name: "nr0", scope: !228, file: !229, line: 83, type: !290) +!346 = !DILocalVariable(name: "nr1", scope: !228, file: !229, line: 84, type: !290) +!347 = !DILocalVariable(name: "nth0", scope: !228, file: !229, line: 90, type: !290) +!348 = !DILocalVariable(name: "nth1", scope: !228, file: !229, line: 91, type: !290) +!349 = !DILocalVariable(name: "ith0", scope: !228, file: !229, line: 93, type: !290) +!350 = !DILocalVariable(name: "ith1", scope: !228, file: !229, line: 94, type: !290) +!351 = !DILocalVariable(name: "dr0", scope: !228, file: !229, line: 96, type: !290) +!352 = !DILocalVariable(name: "dr1", scope: !228, file: !229, line: 97, type: !290) +!353 = !DILocalVariable(name: "ir010", scope: !228, file: !229, line: 99, type: !290) +!354 = !DILocalVariable(name: "ir011", scope: !228, file: !229, line: 100, type: !290) +!355 = !DILocalVariable(name: "ir110", scope: !228, file: !229, line: 102, type: !290) +!356 = !DILocalVariable(name: "ir111", scope: !228, file: !229, line: 103, type: !290) +!357 = !DILocalVariable(name: "blck_0", scope: !228, file: !229, line: 111, type: !290) +!358 = !DILocalVariable(name: "blck_1", scope: !228, file: !229, line: 112, type: !290) +!359 = !DILocalVariable(name: "nrc", scope: !228, file: !229, line: 115, type: !162) +!360 = !DILocalVariable(name: "src1_col_stride", scope: !228, file: !229, line: 122, type: !295) +!361 = !DILocalVariable(name: "__init", scope: !362, type: !162, flags: DIFlagArtificial) +!362 = distinct !DILexicalBlock(scope: !228, file: !229, line: 124, column: 5) +!363 = !DILocalVariable(name: "__limit", scope: !362, type: !162, flags: DIFlagArtificial) +!364 = !DILocalVariable(name: "__begin", scope: !362, type: !162, flags: DIFlagArtificial) +!365 = !DILocalVariable(name: "__end", scope: !362, type: !162, flags: DIFlagArtificial) +!366 = !DILocalVariable(name: "iir0", scope: !367, file: !229, line: 124, type: !162) +!367 = distinct !DILexicalBlock(scope: !362, file: !229, line: 124, column: 5) +!368 = !DILocalVariable(name: "__init", scope: !369, type: !162, flags: DIFlagArtificial) +!369 = distinct !DILexicalBlock(scope: !370, file: !229, line: 127, column: 9) +!370 = distinct !DILexicalBlock(scope: !367, file: !229, line: 124, column: 67) +!371 = !DILocalVariable(name: "__limit", scope: !369, type: !162, flags: DIFlagArtificial) +!372 = !DILocalVariable(name: "__begin", scope: !369, type: !162, flags: DIFlagArtificial) +!373 = !DILocalVariable(name: "__end", scope: !369, type: !162, flags: DIFlagArtificial) +!374 = !DILocalVariable(name: "iir1", scope: !375, file: !229, line: 127, type: !162) +!375 = distinct !DILexicalBlock(scope: !369, file: !229, line: 127, column: 9) +!376 = !DILocalVariable(name: "tmp", scope: !377, file: !229, line: 130, type: !378) +!377 = distinct !DILexicalBlock(scope: !375, file: !229, line: 127, column: 71) +!378 = !DICompositeType(tag: DW_TAG_array_type, baseType: !127, size: 1024, elements: !379) +!379 = !{!380} +!380 = !DISubrange(count: 32) +!381 = !DILocalVariable(name: "ir1", scope: !382, file: !229, line: 131, type: !162) +!382 = distinct !DILexicalBlock(scope: !377, file: !229, line: 131, column: 13) +!383 = !DILocalVariable(name: "i13", scope: !384, file: !229, line: 132, type: !290) +!384 = distinct !DILexicalBlock(scope: !385, file: !229, line: 131, column: 86) +!385 = distinct !DILexicalBlock(scope: !382, file: !229, line: 131, column: 13) +!386 = !DILocalVariable(name: "i12", scope: !384, file: !229, line: 133, type: !290) +!387 = !DILocalVariable(name: "i11", scope: !384, file: !229, line: 134, type: !290) +!388 = !DILocalVariable(name: "i03", scope: !384, file: !229, line: 137, type: !290) +!389 = !DILocalVariable(name: "i02", scope: !384, file: !229, line: 138, type: !290) +!390 = !DILocalVariable(name: "i1", scope: !384, file: !229, line: 140, type: !290) +!391 = !DILocalVariable(name: "i2", scope: !384, file: !229, line: 141, type: !290) +!392 = !DILocalVariable(name: "i3", scope: !384, file: !229, line: 142, type: !290) +!393 = !DILocalVariable(name: "src0_row", scope: !384, file: !229, line: 144, type: !123) +!394 = !DILocalVariable(name: "src1_col", scope: !384, file: !229, line: 150, type: !123) +!395 = !DILocalVariable(name: "dst_col", scope: !384, file: !229, line: 154, type: !126) +!396 = !DILocalVariable(name: "ir0", scope: !397, file: !229, line: 160, type: !162) +!397 = distinct !DILexicalBlock(scope: !384, file: !229, line: 160, column: 17) +!398 = !DILocalVariable(name: "cn", scope: !399, file: !229, line: 164, type: !160) +!399 = distinct !DILexicalBlock(scope: !384, file: !229, line: 164, column: 17) +!400 = !DILocation(line: 0, scope: !228) +!401 = !DILocation(line: 37, column: 44, scope: !228) +!402 = !{} +!403 = !DILocation(line: 37, column: 39, scope: !228) +!404 = !{!405, !405, i64 0} +!405 = !{!"any pointer", !406, i64 0} +!406 = !{!"omnipotent char", !407, i64 0} +!407 = !{!"Simple C++ TBAA"} +!408 = !DILocation(line: 38, column: 39, scope: !228) +!409 = !DILocation(line: 43, column: 5, scope: !228) +!410 = !{!411, !411, i64 0} +!411 = !{!"long long", !406, i64 0} +!412 = !{!413, !413, i64 0} +!413 = !{!"long", !406, i64 0} +!414 = !DILocation(line: 48, column: 39, scope: !228) +!415 = !{!416, !417, i64 0} +!416 = !{!"_ZTS11ggml_tensor", !417, i64 0, !418, i64 4, !405, i64 8, !406, i64 16, !406, i64 48, !419, i64 80, !406, i64 84, !420, i64 148, !405, i64 152, !406, i64 160, !420, i64 240, !411, i64 248, !411, i64 256, !405, i64 264, !413, i64 272, !405, i64 280, !406, i64 288, !405, i64 352, !406, i64 360} +!417 = !{!"_ZTS9ggml_type", !406, i64 0} +!418 = !{!"_ZTS17ggml_backend_type", !406, i64 0} +!419 = !{!"_ZTS7ggml_op", !406, i64 0} +!420 = !{!"int", !406, i64 0} +!421 = !DILocation(line: 50, column: 28, scope: !228) +!422 = !DILocation(line: 52, column: 53, scope: !228) +!423 = !DILocation(line: 52, column: 71, scope: !228) +!424 = !{!425, !405, i64 56} +!425 = !{!"_ZTS18ggml_type_traits_t", !405, i64 0, !420, i64 8, !413, i64 16, !426, i64 24, !405, i64 32, !405, i64 40, !405, i64 48, !405, i64 56, !417, i64 64, !411, i64 72} +!426 = !{!"bool", !406, i64 0} +!427 = !DILocation(line: 53, column: 71, scope: !228) +!428 = !{!425, !417, i64 64} +!429 = !DILocation(line: 55, column: 71, scope: !228) +!430 = !{!425, !411, i64 72} +!431 = !DILocation(line: 73, column: 28, scope: !228) +!432 = !DILocation(line: 74, column: 28, scope: !228) +!433 = !DILocation(line: 80, column: 36, scope: !228) +!434 = !DILocation(line: 80, column: 41, scope: !228) +!435 = !DILocation(line: 80, column: 29, scope: !228) +!436 = !DILocation(line: 81, column: 29, scope: !228) +!437 = !DILocation(line: 84, column: 28, scope: !228) +!438 = !DILocation(line: 84, column: 33, scope: !228) +!439 = !DILocation(line: 118, column: 18, scope: !440) +!440 = distinct !DILexicalBlock(scope: !228, file: !229, line: 118, column: 9) +!441 = !DILocation(line: 118, column: 24, scope: !440) +!442 = !DILocation(line: 122, column: 46, scope: !228) +!443 = !DILocation(line: 122, column: 55, scope: !228) +!444 = !DILocation(line: 122, column: 60, scope: !228) +!445 = !DILocation(line: 122, column: 36, scope: !228) +!446 = !DILocation(line: 0, scope: !362) +!447 = !DILocation(line: 124, column: 42, scope: !362) +!448 = !DILocation(line: 124, column: 44, scope: !362) +!449 = !DILocation(line: 127, column: 48, scope: !369) +!450 = !DILocation(line: 127, column: 34, scope: !375) +!451 = !DILocation(line: 124, column: 5, scope: !362) +!452 = !DILocation(line: 124, scope: !362) +!453 = !DILocation(line: 124, column: 30, scope: !367) +!454 = !DILocation(line: 0, scope: !370) +!455 = !DILocation(line: 0, scope: !367) +!456 = !DILocation(line: 0, scope: !369) +!457 = !DILocation(line: 127, column: 46, scope: !369) +!458 = !DILocation(line: 127, column: 9, scope: !369) +!459 = !DILocation(line: 127, scope: !369) +!460 = !DILocation(line: 0, scope: !375) +!461 = !DILocation(line: 130, column: 13, scope: !377) +!462 = !DILocation(line: 130, column: 19, scope: !377) +!463 = !DILocation(line: 0, scope: !382) +!464 = !DILocation(line: 131, column: 18, scope: !382) +!465 = !DILocation(line: 131, column: 58, scope: !385) +!466 = !DILocation(line: 131, column: 13, scope: !382) +!467 = !DILocation(line: 167, column: 13, scope: !382) +!468 = !DILocation(line: 127, column: 60, scope: !375) +!469 = !DILocation(line: 127, column: 9, scope: !375) +!470 = distinct !{!470, !458, !471, !472, !473, !474} +!471 = !DILocation(line: 168, column: 9, scope: !369) +!472 = !{!"llvm.loop.mustprogress"} +!473 = !{!"llvm.loop.unroll.disable"} +!474 = !{!"llvm.loop.fromtapirloop"} +!475 = distinct !{!475, !458, !471, !472, !476, !473, !477} +!476 = !{!"tapir.loop.spawn.strategy", i32 1} +!477 = !{!"tapir.loop.grainsize", i32 1} +!478 = !DILocation(line: 132, column: 41, scope: !384) +!479 = !DILocation(line: 0, scope: !384) +!480 = !DILocation(line: 133, column: 47, scope: !384) +!481 = !DILocation(line: 133, column: 52, scope: !384) +!482 = !DILocation(line: 133, column: 42, scope: !384) +!483 = !DILocation(line: 133, column: 57, scope: !384) +!484 = !DILocation(line: 134, column: 62, scope: !384) +!485 = !DILocation(line: 134, column: 57, scope: !384) +!486 = !DILocation(line: 137, column: 40, scope: !384) +!487 = !DILocation(line: 138, column: 40, scope: !384) +!488 = !DILocation(line: 144, column: 62, scope: !384) +!489 = !{!416, !405, i64 280} +!490 = !DILocation(line: 144, column: 77, scope: !384) +!491 = !DILocation(line: 144, column: 88, scope: !384) +!492 = !DILocation(line: 144, column: 83, scope: !384) +!493 = !DILocation(line: 144, column: 67, scope: !384) +!494 = !DILocation(line: 151, column: 32, scope: !384) +!495 = !DILocation(line: 151, column: 41, scope: !384) +!496 = !DILocation(line: 151, column: 46, scope: !384) +!497 = !DILocation(line: 151, column: 22, scope: !384) +!498 = !DILocation(line: 153, column: 28, scope: !384) +!499 = !DILocation(line: 153, column: 39, scope: !384) +!500 = !DILocation(line: 153, column: 50, scope: !384) +!501 = !DILocation(line: 153, column: 34, scope: !384) +!502 = !DILocation(line: 153, column: 45, scope: !384) +!503 = !DILocation(line: 152, column: 45, scope: !384) +!504 = !DILocation(line: 152, column: 61, scope: !384) +!505 = !DILocation(line: 150, column: 62, scope: !384) +!506 = !DILocation(line: 154, column: 60, scope: !384) +!507 = !DILocation(line: 154, column: 70, scope: !384) +!508 = !DILocation(line: 154, column: 79, scope: !384) +!509 = !DILocation(line: 154, column: 88, scope: !384) +!510 = !DILocation(line: 154, column: 75, scope: !384) +!511 = !DILocation(line: 154, column: 84, scope: !384) +!512 = !DILocation(line: 154, column: 65, scope: !384) +!513 = !DILocation(line: 0, scope: !397) +!514 = !DILocation(line: 160, column: 17, scope: !397) +!515 = !DILocation(line: 161, column: 44, scope: !516) +!516 = distinct !DILexicalBlock(scope: !517, file: !229, line: 160, column: 90) +!517 = distinct !DILexicalBlock(scope: !397, file: !229, line: 160, column: 17) +!518 = !DILocation(line: 161, column: 36, scope: !516) +!519 = !DILocation(line: 161, column: 85, scope: !516) +!520 = !DILocation(line: 161, column: 80, scope: !516) +!521 = !DILocation(line: 161, column: 21, scope: !516) +!522 = !DILocation(line: 160, column: 82, scope: !517) +!523 = !DILocation(line: 160, column: 62, scope: !517) +!524 = distinct !{!524, !514, !525, !472} +!525 = !DILocation(line: 162, column: 17, scope: !397) +!526 = !DILocation(line: 0, scope: !399) +!527 = !DILocation(line: 164, column: 17, scope: !399) +!528 = !DILocation(line: 165, column: 46, scope: !529) +!529 = distinct !DILexicalBlock(scope: !530, file: !229, line: 164, column: 50) +!530 = distinct !DILexicalBlock(scope: !399, file: !229, line: 164, column: 17) +!531 = !DILocation(line: 165, column: 50, scope: !529) +!532 = !DILocation(line: 165, column: 42, scope: !529) +!533 = !DILocation(line: 165, column: 29, scope: !529) +!534 = !DILocation(line: 165, column: 66, scope: !529) +!535 = !DILocation(line: 165, column: 61, scope: !529) +!536 = !DILocation(line: 165, column: 21, scope: !529) +!537 = !DILocation(line: 164, column: 44, scope: !530) +!538 = !DILocation(line: 164, column: 37, scope: !530) +!539 = distinct !{!539, !527, !540, !472} +!540 = !DILocation(line: 166, column: 17, scope: !399) +!541 = !DILocation(line: 131, column: 78, scope: !385) +!542 = distinct !{!542, !466, !467, !472} +!543 = !DILocation(line: 169, column: 5, scope: !370) +!544 = !DILocation(line: 124, column: 56, scope: !367) +!545 = !DILocation(line: 124, column: 42, scope: !367) +!546 = !DILocation(line: 124, column: 5, scope: !367) +!547 = distinct !{!547, !451, !548, !472, !476, !477} +!548 = !DILocation(line: 169, column: 5, scope: !362) +!549 = !DILocation(line: 170, column: 1, scope: !228) +!550 = !DISubprogram(name: "ggml_is_contiguous", scope: !11, file: !11, line: 713, type: !551, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +!551 = !DISubroutineType(types: !552) +!552 = !{!322, !286} +!553 = !DISubprogram(name: "ggml_row_size", scope: !11, file: !11, line: 692, type: !554, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized) +!554 = !DISubroutineType(types: !555) +!555 = !{!142, !17, !162} diff --git a/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc.ll b/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc.ll new file mode 100644 index 000000000000..ecfe17bb1a07 --- /dev/null +++ b/llvm/test/Transforms/Tapir/loop-spawning-nested-spawn-alloc.ll @@ -0,0 +1,322 @@ +; Check that loop-spawning and task-simplify correctly handle a static memory allocation and +; nested spawn within a parallel loop body. +; +; RUN: opt < %s -passes="loop-spawning,task-simplify" -S | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx14.0.0" + +%struct.ggml_type_traits_t = type { ptr, i32, i64, i8, ptr, ptr, ptr, ptr, i32, i64 } +%struct.ggml_tensor = type { i32, i32, ptr, [4 x i64], [4 x i64], i32, [16 x i32], i32, ptr, [10 x ptr], i32, i64, i64, ptr, i64, ptr, [64 x i8], ptr, [8 x i8] } + +@type_traits = local_unnamed_addr constant [24 x %struct.ggml_type_traits_t] zeroinitializer, align 8 + +; Function Attrs: nounwind ssp uwtable(sync) +define void @ggml_compute_forward_mul_mat(ptr nocapture noundef readnone %params, ptr nocapture noundef readonly %dst) local_unnamed_addr #0 { +entry: + %syncreg = tail call token @llvm.syncregion.start() + %src = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 9 + %0 = load ptr, ptr %src, align 8, !tbaa !6 + %arrayidx3 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 9, i64 1 + %1 = load ptr, ptr %arrayidx3, align 8, !tbaa !6 + %arrayidx6 = getelementptr inbounds %struct.ggml_tensor, ptr %0, i64 0, i32 3, i64 1 + %2 = load i64, ptr %arrayidx6, align 8, !tbaa !10 + %ne18 = getelementptr inbounds %struct.ggml_tensor, ptr %1, i64 0, i32 3 + %3 = load i64, ptr %ne18, align 8, !tbaa !10 + %arrayidx21 = getelementptr inbounds %struct.ggml_tensor, ptr %1, i64 0, i32 3, i64 1 + %4 = load i64, ptr %arrayidx21, align 8, !tbaa !10 + %arrayidx23 = getelementptr inbounds %struct.ggml_tensor, ptr %1, i64 0, i32 3, i64 2 + %5 = load i64, ptr %arrayidx23, align 8, !tbaa !10 + %arrayidx25 = getelementptr inbounds %struct.ggml_tensor, ptr %1, i64 0, i32 3, i64 3 + %6 = load i64, ptr %arrayidx25, align 8, !tbaa !10 + %arrayidx38 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 3, i64 1 + %7 = load i64, ptr %arrayidx38, align 8, !tbaa !10 + %arrayidx46 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 1 + %8 = load i64, ptr %arrayidx46, align 8, !tbaa !12 + %arrayidx48 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 2 + %9 = load i64, ptr %arrayidx48, align 8, !tbaa !12 + %arrayidx50 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 4, i64 3 + %10 = load i64, ptr %arrayidx50, align 8, !tbaa !12 + %call = tail call zeroext i1 @ggml_is_contiguous(ptr noundef %1) #7 + %call64 = tail call i64 @ggml_row_size(i32 noundef 0, i64 noundef %3) #7 + %mul = mul i64 %7, %5 + %mul65 = mul nsw i64 %mul, %6 + %11 = and i64 %2, 1 + %cmp96.not = icmp ne i64 %11, 0 + %12 = and i64 %4, 1 + %cmp99.not = icmp ne i64 %12, 0 + %or.cond.not = select i1 %cmp96.not, i1 true, i1 %cmp99.not + %or.cond.not.fr = freeze i1 %or.cond.not + %nrc.0 = zext i1 %or.cond.not.fr to i64 + %cmp110 = icmp sgt i64 %2, 0 + br i1 %cmp110, label %pfor.ph, label %cleanup272 + +pfor.ph: ; preds = %entry + %sub113 = add nsw i64 %2, -1 + %div114425 = lshr i64 %sub113, 4 + %cmp121 = icmp sgt i64 %mul65, 0 + %data184 = getelementptr inbounds %struct.ggml_tensor, ptr %dst, i64 0, i32 15 + %brmerge.not = and i1 %cmp121, %or.cond.not.fr + br i1 %brmerge.not, label %pfor.cond.us.us.preheader, label %pfor.cond.cleanup267 + +pfor.cond.us.us.preheader: ; preds = %pfor.ph + %sub129 = add nsw i64 %mul65, -1 + %div130426 = lshr i64 %sub129, 4 + %xtraiter = and i64 %div130426, 1 + %13 = add nuw nsw i64 %xtraiter, 1 + %14 = icmp ult i64 %mul65, 33 + %stripiter453 = lshr i64 %sub129, 5 + %15 = and i64 %div130426, 1152921504606846974 + br label %pfor.cond.us.us + +pfor.cond.us.us: ; preds = %pfor.cond.us.us.preheader, %pfor.inc263.us.us + %__begin.0.us.us = phi i64 [ %inc264.us.us, %pfor.inc263.us.us ], [ 0, %pfor.cond.us.us.preheader ] + %mul116.us.us = shl nsw i64 %__begin.0.us.us, 4 + detach within %syncreg, label %pfor.ph125.us.us, label %pfor.inc263.us.us + +pfor.ph125.us.us: ; preds = %pfor.cond.us.us + %tmp.us.us.us.epil = alloca [32 x float], align 4 + %syncreg118.us.us = tail call token @llvm.syncregion.start() + %add192.us.us = add nuw nsw i64 %mul116.us.us, 16 + %cmp196.us.us = icmp sge i64 %mul116.us.us, %2 + %cond244.us.us = tail call i64 @llvm.smin.i64(i64 %add192.us.us, i64 %2) + %sub245.us.us = sub nsw i64 %cond244.us.us, %mul116.us.us + %mul246.us.us = shl i64 %sub245.us.us, 2 + br i1 %14, label %pfor.cond132.us.us.us.epil.preheader, label %pfor.ph125.us.us.new + +pfor.ph125.us.us.new: ; preds = %pfor.ph125.us.us + detach within %syncreg118.us.us, label %pfor.cond132.us.us.us.strpm.detachloop.entry, label %pfor.cond132.us.us.us.epil.preheader + +pfor.cond132.us.us.us.epil.preheader: ; preds = %pfor.ph125.us.us, %pfor.cond132.us.us.us.strpm.detachloop.reattach.split, %pfor.ph125.us.us.new + br label %pfor.cond132.us.us.us.epil + +cleanup.us.us: ; preds = %pfor.cond.cleanup.split.us.us.us + reattach within %syncreg, label %pfor.inc263.us.us + +pfor.inc263.us.us: ; preds = %cleanup.us.us, %pfor.cond.us.us + %inc264.us.us = add nuw nsw i64 %__begin.0.us.us, 1 + %exitcond449.not = icmp eq i64 %__begin.0.us.us, %div114425 + br i1 %exitcond449.not, label %pfor.cond.cleanup267, label %pfor.cond.us.us, !llvm.loop !14 + +pfor.cond132.us.us.us.strpm.detachloop.entry: ; preds = %pfor.ph125.us.us.new + %syncreg118.us.us.strpm.detachloop = tail call token @llvm.syncregion.start() + br label %pfor.cond132.us.us.us.strpm.outer + +pfor.cond132.us.us.us.strpm.outer: ; preds = %pfor.inc.us.us.us.strpm.outer, %pfor.cond132.us.us.us.strpm.detachloop.entry + %niter = phi i64 [ 0, %pfor.cond132.us.us.us.strpm.detachloop.entry ], [ %niter.nadd, %pfor.inc.us.us.us.strpm.outer ] + detach within %syncreg118.us.us.strpm.detachloop, label %pfor.body137.us.us.us.strpm.outer, label %pfor.inc.us.us.us.strpm.outer + +pfor.body137.us.us.us.strpm.outer: ; preds = %pfor.cond132.us.us.us.strpm.outer + %tmp.us.us.us = alloca [32 x float], align 4 + %16 = shl nuw i64 %niter, 1 + br label %pfor.cond132.us.us.us + +pfor.cond132.us.us.us: ; preds = %pfor.body137.us.us.us.strpm.outer, %for.cond.cleanup.us.us.us + %__begin126.0.us.us.us = phi i64 [ %inc258.us.us.us, %for.cond.cleanup.us.us.us ], [ %16, %pfor.body137.us.us.us.strpm.outer ] + %inneriter = phi i64 [ %inneriter.nsub, %for.cond.cleanup.us.us.us ], [ 2, %pfor.body137.us.us.us.strpm.outer ] + %mul134.us.us.us = shl nsw i64 %__begin126.0.us.us.us, 4 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tmp.us.us.us) + %add138.us.us.us = add nuw nsw i64 %mul134.us.us.us, 16 + %cmp141429.us.us.us = icmp slt i64 %mul134.us.us.us, %mul65 + br i1 %cmp141429.us.us.us, label %for.body.lr.ph.us.us.us, label %for.cond.cleanup.us.us.us + +for.cond.cleanup.us.us.us: ; preds = %for.body.us.us.us.us, %pfor.cond132.us.us.us + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tmp.us.us.us) + %inc258.us.us.us = add nuw nsw i64 %__begin126.0.us.us.us, 1 + %inneriter.nsub = add nsw i64 %inneriter, -1 + %inneriter.ncmp = icmp eq i64 %inneriter.nsub, 0 + br i1 %inneriter.ncmp, label %pfor.inc.us.us.us.reattach, label %pfor.cond132.us.us.us, !llvm.loop !18 + +pfor.inc.us.us.us.reattach: ; preds = %for.cond.cleanup.us.us.us + reattach within %syncreg118.us.us.strpm.detachloop, label %pfor.inc.us.us.us.strpm.outer + +pfor.inc.us.us.us.strpm.outer: ; preds = %pfor.inc.us.us.us.reattach, %pfor.cond132.us.us.us.strpm.outer + %niter.nadd = add nuw nsw i64 %niter, 1 + %niter.ncmp = icmp eq i64 %niter.nadd, %stripiter453 + br i1 %niter.ncmp, label %pfor.cond132.us.us.us.strpm.detachloop.sync, label %pfor.cond132.us.us.us.strpm.outer, !llvm.loop !21 + +pfor.cond132.us.us.us.strpm.detachloop.sync: ; preds = %pfor.inc.us.us.us.strpm.outer + sync within %syncreg118.us.us.strpm.detachloop, label %pfor.cond132.us.us.us.strpm.detachloop.reattach.split + +pfor.cond132.us.us.us.strpm.detachloop.reattach.split: ; preds = %pfor.cond132.us.us.us.strpm.detachloop.sync + reattach within %syncreg118.us.us, label %pfor.cond132.us.us.us.epil.preheader + +for.body.lr.ph.us.us.us: ; preds = %pfor.cond132.us.us.us + tail call void @llvm.assume(i1 %cmp196.us.us) + br label %for.body.us.us.us.us + +for.body.us.us.us.us: ; preds = %for.body.us.us.us.us, %for.body.lr.ph.us.us.us + %ir1.0430.us.us.us.us = phi i64 [ %mul134.us.us.us, %for.body.lr.ph.us.us.us ], [ %add256.us.us.us.us, %for.body.us.us.us.us ] + %div145.us.us.us.us = sdiv i64 %ir1.0430.us.us.us.us, %mul + %mul147.us.us.us.us = mul i64 %mul, %div145.us.us.us.us + %sub148.us.us.us.us = sub nsw i64 %ir1.0430.us.us.us.us, %mul147.us.us.us.us + %div149.us.us.us.us = sdiv i64 %sub148.us.us.us.us, %7 + %mul153.us.us.us.us = mul nsw i64 %div149.us.us.us.us, %7 + %sub154.us.us.us.us = sub nsw i64 %sub148.us.us.us.us, %mul153.us.us.us.us + %17 = load ptr, ptr %data184, align 8, !tbaa !22 + %mul185.us.us.us.us = mul i64 %sub154.us.us.us.us, %8 + %mul186.us.us.us.us = mul i64 %div149.us.us.us.us, %9 + %mul188.us.us.us.us = mul i64 %div145.us.us.us.us, %10 + %add187.us.us.us.us = add i64 %mul186.us.us.us.us, %mul188.us.us.us.us + %add189.us.us.us.us = add i64 %add187.us.us.us.us, %mul185.us.us.us.us + %add.ptr190.us.us.us.us = getelementptr inbounds i8, ptr %17, i64 %add189.us.us.us.us + %arrayidx234.us.us.us.us = getelementptr inbounds float, ptr %add.ptr190.us.us.us.us, i64 %mul116.us.us + call void @llvm.memcpy.p0.p0.i64(ptr noundef align 1 %arrayidx234.us.us.us.us, ptr noundef nonnull align 4 %tmp.us.us.us, i64 noundef %mul246.us.us, i1 noundef false) #7 + %add256.us.us.us.us = add nuw nsw i64 %ir1.0430.us.us.us.us, %nrc.0 + %cmp139.us.us.us.us = icmp ult i64 %add256.us.us.us.us, %add138.us.us.us + %cmp141.us.us.us.us = icmp slt i64 %add256.us.us.us.us, %mul65 + %18 = select i1 %cmp139.us.us.us.us, i1 %cmp141.us.us.us.us, i1 false + br i1 %18, label %for.body.us.us.us.us, label %for.cond.cleanup.us.us.us, !llvm.loop !25 + +pfor.cond132.us.us.us.epil: ; preds = %pfor.cond132.us.us.us.epil.preheader, %for.cond.cleanup.us.us.us.epil + %__begin126.0.us.us.us.epil = phi i64 [ %inc258.us.us.us.epil, %for.cond.cleanup.us.us.us.epil ], [ %15, %pfor.cond132.us.us.us.epil.preheader ] + %epil.iter = phi i64 [ %epil.iter.sub, %for.cond.cleanup.us.us.us.epil ], [ %13, %pfor.cond132.us.us.us.epil.preheader ] + %mul134.us.us.us.epil = shl nsw i64 %__begin126.0.us.us.us.epil, 4 + call void @llvm.lifetime.start.p0(i64 128, ptr nonnull %tmp.us.us.us.epil) + %add138.us.us.us.epil = add nuw nsw i64 %mul134.us.us.us.epil, 16 + %cmp141429.us.us.us.epil = icmp slt i64 %mul134.us.us.us.epil, %mul65 + br i1 %cmp141429.us.us.us.epil, label %for.body.lr.ph.us.us.us.epil, label %for.cond.cleanup.us.us.us.epil + +for.body.lr.ph.us.us.us.epil: ; preds = %pfor.cond132.us.us.us.epil + tail call void @llvm.assume(i1 %cmp196.us.us) + br label %for.body.us.us.us.us.epil + +for.body.us.us.us.us.epil: ; preds = %for.body.us.us.us.us.epil, %for.body.lr.ph.us.us.us.epil + %ir1.0430.us.us.us.us.epil = phi i64 [ %mul134.us.us.us.epil, %for.body.lr.ph.us.us.us.epil ], [ %add256.us.us.us.us.epil, %for.body.us.us.us.us.epil ] + %div145.us.us.us.us.epil = sdiv i64 %ir1.0430.us.us.us.us.epil, %mul + %mul147.us.us.us.us.epil = mul i64 %mul, %div145.us.us.us.us.epil + %sub148.us.us.us.us.epil = sub nsw i64 %ir1.0430.us.us.us.us.epil, %mul147.us.us.us.us.epil + %div149.us.us.us.us.epil = sdiv i64 %sub148.us.us.us.us.epil, %7 + %mul153.us.us.us.us.epil = mul nsw i64 %div149.us.us.us.us.epil, %7 + %sub154.us.us.us.us.epil = sub nsw i64 %sub148.us.us.us.us.epil, %mul153.us.us.us.us.epil + %19 = load ptr, ptr %data184, align 8, !tbaa !22 + %mul185.us.us.us.us.epil = mul i64 %sub154.us.us.us.us.epil, %8 + %mul186.us.us.us.us.epil = mul i64 %div149.us.us.us.us.epil, %9 + %mul188.us.us.us.us.epil = mul i64 %div145.us.us.us.us.epil, %10 + %add187.us.us.us.us.epil = add i64 %mul186.us.us.us.us.epil, %mul188.us.us.us.us.epil + %add189.us.us.us.us.epil = add i64 %add187.us.us.us.us.epil, %mul185.us.us.us.us.epil + %add.ptr190.us.us.us.us.epil = getelementptr inbounds i8, ptr %19, i64 %add189.us.us.us.us.epil + %arrayidx234.us.us.us.us.epil = getelementptr inbounds float, ptr %add.ptr190.us.us.us.us.epil, i64 %mul116.us.us + call void @llvm.memcpy.p0.p0.i64(ptr noundef align 1 %arrayidx234.us.us.us.us.epil, ptr noundef nonnull align 4 %tmp.us.us.us.epil, i64 noundef %mul246.us.us, i1 noundef false) #7 + %add256.us.us.us.us.epil = add nuw nsw i64 %ir1.0430.us.us.us.us.epil, %nrc.0 + %cmp139.us.us.us.us.epil = icmp ult i64 %add256.us.us.us.us.epil, %add138.us.us.us.epil + %cmp141.us.us.us.us.epil = icmp slt i64 %add256.us.us.us.us.epil, %mul65 + %20 = select i1 %cmp139.us.us.us.us.epil, i1 %cmp141.us.us.us.us.epil, i1 false + br i1 %20, label %for.body.us.us.us.us.epil, label %for.cond.cleanup.us.us.us.epil, !llvm.loop !25 + +for.cond.cleanup.us.us.us.epil: ; preds = %for.body.us.us.us.us.epil, %pfor.cond132.us.us.us.epil + call void @llvm.lifetime.end.p0(i64 128, ptr nonnull %tmp.us.us.us.epil) + %inc258.us.us.us.epil = add nuw nsw i64 %__begin126.0.us.us.us.epil, 1 + %epil.iter.sub = add nsw i64 %epil.iter, -1 + %epil.iter.cmp.not = icmp eq i64 %epil.iter.sub, 0 + br i1 %epil.iter.cmp.not, label %pfor.cond.cleanup.split.us.us.us, label %pfor.cond132.us.us.us.epil, !llvm.loop !26 + +pfor.cond.cleanup.split.us.us.us: ; preds = %for.cond.cleanup.us.us.us.epil + sync within %syncreg118.us.us, label %cleanup.us.us + +pfor.cond.cleanup267: ; preds = %pfor.inc263.us.us, %pfor.ph + sync within %syncreg, label %cleanup272 + +cleanup272: ; preds = %pfor.cond.cleanup267, %entry + ret void +} + +; CHECK: define internal fastcc void @ggml_compute_forward_mul_mat.outline_pfor.cond.us.us.ls1( +; CHECK: pfor.cond.us.us.preheader.ls1: +; CHECK-NEXT: %[[NESTED_SPAWN_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +; CHECK-NEXT: %[[LOOP_DAC_SYNCREG:.+]] = {{.*}}call token @llvm.syncregion.start() +; CHECK: br label %[[LOOP_DAC_HEADER:.+]] + +; CHECK: [[LOOP_DAC_HEADER]]: + +; CHECK: detach within %[[LOOP_DAC_SYNCREG]], label %[[DAC_SPAWN:.+]], label %[[DAC_SPAWN_CONT:.+]] + +; CHECK: [[DAC_SPAWN]]: +; CHECK-NEXT: call {{.*}}void @ggml_compute_forward_mul_mat.outline_pfor.cond.us.us.ls1( +; CHECK-NEXT: reattach within %[[LOOP_DAC_SYNCREG]], label %[[DAC_SPAWN_CONT]] + +; CHECK: [[DAC_SPAWN_CONT]]: +; CHECK: br label %[[LOOP_DAC_HEADER]] + +; Check for a newly introduced taskframe that contains the static alloca. +; CHECK: %[[NEW_TF:.+]] = call token @llvm.taskframe.create() +; CHECK-NEXT: %[[TMP:.+]] = alloca [32 x float] +; CHECK-NEXT: br label %[[NESTED_LOOP_HEADER:.+]] + +; CHECK: [[NESTED_LOOP_HEADER]]: +; CHECK: br i1 %{{.*}}, label %[[NESTED_LOOP_EPIL:.+]], label %[[NESTED_LOOP_SPAWN:.+]] + +; Check for the nested spawn +; CHECK: [[NESTED_LOOP_SPAWN]]: +; CHECK-NEXT: detach within %[[NESTED_SPAWN_SYNCREG]], label %[[NESTED_SPAWN:.+]], label %[[NESTED_LOOP_EPIL]] + +; CHECK: [[NESTED_LOOP_EPIL]]: +; CHECK: sync within %[[NESTED_SPAWN_SYNCREG]], label %[[NESTED_LOOP_INC:.+]] + +; CHECK: [[NESTED_LOOP_INC]]: +; CHECK: br i1 %{{.*}}, label %[[NESTED_LOOP_EXIT:.+]], label %[[NESTED_LOOP_HEADER]] + +; CHECK: [[NESTED_LOOP_EXIT]]: +; CHECK-NEXT: call void @llvm.taskframe.end(token %[[NEW_TF]]) +; CHECK-NEXT: sync within %[[LOOP_DAC_SYNCREG]], label %{{.*}} + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1 + +declare zeroext i1 @ggml_is_contiguous(ptr noundef) local_unnamed_addr #2 + +declare i64 @ggml_row_size(i32 noundef, i64 noundef) local_unnamed_addr #2 + +; Function Attrs: mustprogress nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #3 + +; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) +declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1 + +; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none) +declare i64 @llvm.smin.i64(i64, i64) #4 + +; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) +declare void @llvm.assume(i1 noundef) #5 + +; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite) +declare void @llvm.memcpy.p0.p0.i64(ptr noalias nocapture writeonly, ptr noalias nocapture readonly, i64, i1 immarg) #6 + +attributes #0 = { nounwind ssp uwtable(sync) "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) } +attributes #2 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="apple-m1" "target-features"="+aes,+crc,+dotprod,+fp-armv8,+fp16fml,+fullfp16,+lse,+neon,+ras,+rcpc,+rdm,+sha2,+sha3,+v8.1a,+v8.2a,+v8.3a,+v8.4a,+v8.5a,+v8a,+zcm,+zcz" } +attributes #3 = { mustprogress nounwind willreturn memory(argmem: readwrite) } +attributes #4 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) } +attributes #5 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) } +attributes #6 = { nocallback nofree nounwind willreturn memory(argmem: readwrite) } +attributes #7 = { nounwind } + +!llvm.module.flags = !{!0, !1, !2, !3, !4} +!llvm.ident = !{!5} + +!0 = !{i32 2, !"SDK Version", [2 x i32] [i32 14, i32 2]} +!1 = !{i32 1, !"wchar_size", i32 4} +!2 = !{i32 8, !"PIC Level", i32 2} +!3 = !{i32 7, !"uwtable", i32 1} +!4 = !{i32 7, !"frame-pointer", i32 1} +!5 = !{!"clang version 17.0.6 (git@github.com:OpenCilk/opencilk-project.git c85f242a46d579145a8538338c78acd94c43c5f4)"} +!6 = !{!7, !7, i64 0} +!7 = !{!"any pointer", !8, i64 0} +!8 = !{!"omnipotent char", !9, i64 0} +!9 = !{!"Simple C/C++ TBAA"} +!10 = !{!11, !11, i64 0} +!11 = !{!"long long", !8, i64 0} +!12 = !{!13, !13, i64 0} +!13 = !{!"long", !8, i64 0} +!14 = distinct !{!14, !15, !16, !17} +!15 = !{!"llvm.loop.mustprogress"} +!16 = !{!"tapir.loop.spawn.strategy", i32 1} +!17 = !{!"tapir.loop.grainsize", i32 1} +!18 = distinct !{!18, !15, !19, !20} +!19 = !{!"llvm.loop.unroll.disable"} +!20 = !{!"llvm.loop.fromtapirloop"} +!21 = distinct !{!21, !15, !16, !19, !17} +!22 = !{!23, !7, i64 280} +!23 = !{!"ggml_tensor", !8, i64 0, !8, i64 4, !7, i64 8, !8, i64 16, !8, i64 48, !8, i64 80, !8, i64 84, !24, i64 148, !7, i64 152, !8, i64 160, !24, i64 240, !11, i64 248, !11, i64 256, !7, i64 264, !13, i64 272, !7, i64 280, !8, i64 288, !7, i64 352, !8, i64 360} +!24 = !{!"int", !8, i64 0} +!25 = distinct !{!25, !15} +!26 = distinct !{!26, !20, !15, !19} diff --git a/llvm/test/Transforms/Tapir/nested-loop-spawning-with-exceptions.ll b/llvm/test/Transforms/Tapir/nested-loop-spawning-with-exceptions.ll index daa158c4df57..29790e0a1410 100644 --- a/llvm/test/Transforms/Tapir/nested-loop-spawning-with-exceptions.ll +++ b/llvm/test/Transforms/Tapir/nested-loop-spawning-with-exceptions.ll @@ -1083,8 +1083,8 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 ; CHECK-LABEL: define internal fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach29.ls2(i64 ; CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() -; CHECK: detach within %[[SYNCREG]], label %.split, label %{{.+}} unwind label %[[DUNWIND:.+]] -; CHECK: {{^.split}}: +; CHECK: detach within %[[SYNCREG]], label %pfor.detach29.preheader.ls2.dac.detach, label %{{.+}} unwind label %[[DUNWIND:.+]] +; CHECK: {{^pfor.detach29.preheader.ls2.dac.detach}}: ; CHECK-NEXT: invoke fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach29.ls2(i64 ; CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LSUNWIND:.+]] ; CHECK: sync within %[[SYNCREG]] @@ -1100,8 +1100,8 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 ; CHECK-LABEL: define internal fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach.ls1(i64 ; CHECK: %[[SYNCREG:.+]] = tail call token @llvm.syncregion.start() -; CHECK: detach within %[[SYNCREG]], label %.split, label %{{.+}} unwind label %[[DUNWIND:.+]] -; CHECK: {{^.split:}} +; CHECK: detach within %[[SYNCREG]], label %pfor.detach.lr.ph.ls1.dac.detach, label %{{.+}} unwind label %[[DUNWIND:.+]] +; CHECK: {{^pfor.detach.lr.ph.ls1.dac.detach:}} ; CHECK-NEXT: invoke fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach.ls1(i64 ; CHECK-NEXT: to label %[[INVOKECONT:.+]] unwind label %[[LSUNWIND:.+]] ; CHECK: {{^pfor.detach29.preheader.ls1:}} diff --git a/llvm/test/Transforms/Tapir/nested-loop-spawning.ll b/llvm/test/Transforms/Tapir/nested-loop-spawning.ll index 5fcecc316bd6..ca64e96a807c 100644 --- a/llvm/test/Transforms/Tapir/nested-loop-spawning.ll +++ b/llvm/test/Transforms/Tapir/nested-loop-spawning.ll @@ -845,14 +845,14 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 ; CHECK-LABEL: define internal fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach21.ls2(i64 ; CHECK: %[[SYNCREG:.+]] = call token @llvm.syncregion.start() -; CHECK: detach within %[[SYNCREG]], label %.split, label %{{.+}} -; CHECK: {{^.split}}: +; CHECK: detach within %[[SYNCREG]], label %pfor.detach21.preheader.ls2.dac.detach, label %{{.+}} +; CHECK: {{^pfor.detach21.preheader.ls2.dac.detach}}: ; CHECK-NEXT: call fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach21.ls2(i64 ; CHECK-LABEL: define internal fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach.ls1(i64 ; CHECK: %[[SYNCREG:.+]] = tail call token @llvm.syncregion.start() -; CHECK: detach within %[[SYNCREG]], label %.split, label %{{.+}} -; CHECK: {{^.split:}} +; CHECK: detach within %[[SYNCREG]], label %pfor.detach.lr.ph.ls1.dac.detach, label %{{.+}} +; CHECK: {{^pfor.detach.lr.ph.ls1.dac.detach:}} ; CHECK-NEXT: call fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach.ls1(i64 ; CHECK: {{^pfor.detach21.preheader.ls1:}} ; CHECK: call fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach21.ls2(i64 0, diff --git a/llvm/test/Transforms/Tapir/nested-outline-vector-width.ll b/llvm/test/Transforms/Tapir/nested-outline-vector-width.ll index 1ffcaff2e85d..a5b0746f9b64 100644 --- a/llvm/test/Transforms/Tapir/nested-outline-vector-width.ll +++ b/llvm/test/Transforms/Tapir/nested-outline-vector-width.ll @@ -3209,7 +3209,7 @@ pfor.preattach.3: ; preds = %if.then.3, %pfor.pr ; CHECK-DAG: <8 x i32> %broadcast.splat1309.ls3 ; CHECK: ) unnamed_addr #[[ATTRIBUTES:[0-9]+]] -; CHECK-LABEL: define {{.*}}void @_Z19suffixArrayInternalPhl.outline_pfor.cond284.strpm.outer.ls3.outline_.split.otd1( +; CHECK-LABEL: define {{.*}}void @_Z19suffixArrayInternalPhl.outline_pfor.cond284.strpm.outer.ls3.outline_pfor.cond284.strpm.detachloop.entry.ls3.dac.detach.otd1( ; CHECK-DAG: <8 x i64> %broadcast.splat1311.ls3.otd1 ; CHECK-DAG: <8 x i32> %broadcast.splat1309.ls3.otd1 ; CHECK: ) unnamed_addr #[[ATTRIBUTES2:[0-9]+]] diff --git a/llvm/test/Transforms/Tapir/sret-param.ll b/llvm/test/Transforms/Tapir/sret-param.ll index 1cfc4062ecba..4bcc771bf4a5 100644 --- a/llvm/test/Transforms/Tapir/sret-param.ll +++ b/llvm/test/Transforms/Tapir/sret-param.ll @@ -800,7 +800,7 @@ declare void @llvm.memset.p0i8.i64(i8* nocapture writeonly, i8, i64, i32, i1) #1 ; LS-LABEL: define internal fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach.ls1(ptr noalias sret(%"class.std::vector.0") align 8 %agg.result.ls1, ; LS: [[IVTYPE]] {{.+}}, [[IVTYPE]] {{.+}}, [[IVTYPE]] {{.*}}%[[GRAINSIZE:.+]], -; LS: {{^.split:}} +; LS: {{^pfor.detach.lr.ph.ls1.dac.detach:}} ; LS-NEXT: call fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.detach.ls1(ptr %agg.result.ls1, [[IVTYPE]] {{.+}}, [[IVTYPE]] {{.+}}, [[IVTYPE]] {{.*}}[[GRAINSIZE]], ; TT-LABEL: define internal fastcc void @_Z14func_with_sretidRSt6vectorI6paramsSaIS0_EE.outline_pfor.body.otd1(ptr {{.*}}sret(%"class.std::vector.0") {{.*}}%agg.result.otd1, From ebff1ee26c783e3e5c38def4f676ffab6edea6ec Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Sun, 3 Mar 2024 08:31:59 -0500 Subject: [PATCH 4/7] [Tapir][TapirTaskInfo] Cleanup code using clang-tidy. --- llvm/lib/Analysis/TapirTaskInfo.cpp | 66 +++--- .../Instrumentation/CilkSanitizer.cpp | 216 +++++++++--------- .../ComprehensiveStaticInstrumentation.cpp | 82 +++---- llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp | 6 +- llvm/lib/Transforms/Tapir/LoopStripMine.cpp | 61 +++-- .../Transforms/Tapir/LoopStripMinePass.cpp | 15 +- llvm/lib/Transforms/Tapir/LoweringUtils.cpp | 111 +++++---- llvm/lib/Transforms/Tapir/Outline.cpp | 20 +- .../Transforms/Tapir/SerializeSmallTasks.cpp | 2 +- llvm/lib/Transforms/Tapir/Tapir.cpp | 5 - llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp | 6 +- llvm/lib/Transforms/Tapir/TapirToTarget.cpp | 2 +- llvm/lib/Transforms/Utils/InlineFunction.cpp | 2 +- llvm/lib/Transforms/Utils/TapirUtils.cpp | 40 ++-- .../lib/Transforms/Utils/TaskCanonicalize.cpp | 2 +- 15 files changed, 308 insertions(+), 328 deletions(-) diff --git a/llvm/lib/Analysis/TapirTaskInfo.cpp b/llvm/lib/Analysis/TapirTaskInfo.cpp index 665ce309c0b6..8d08b9ceb823 100644 --- a/llvm/lib/Analysis/TapirTaskInfo.cpp +++ b/llvm/lib/Analysis/TapirTaskInfo.cpp @@ -14,14 +14,12 @@ #include "llvm/Analysis/TapirTaskInfo.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/ScopeExit.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/Constants.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/IRPrintingPasses.h" @@ -34,7 +32,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include using namespace llvm; @@ -214,11 +211,9 @@ bool Spindle::succInSameTask(const Spindle *Succ) const { // properly contained in ParentTask, return true. if (getParentTask()->contains(Succ)) return true; - else { - // Otherwise, check if Succ is a shared EH spindle tracked by the parent of - // ParentTask. - return getParentTask()->isSharedEHExit(Succ); - } + // Otherwise, check if Succ is a shared EH spindle tracked by the parent of + // ParentTask. + return getParentTask()->isSharedEHExit(Succ); } /// Return true if the successor spindle Succ is in a subtask of the task @@ -284,14 +279,14 @@ void Task::getSharedEHExits(SmallVectorImpl &SpindleVec) const { // themselves detach. It's not clear how this case could arise in practice, // however. SmallPtrSet Ancestors; - bool tracksSharedEHSpindles = false; + bool TracksSharedEHSpindles = false; const Task *Parent = this; do { Parent = Parent->getParentTask(); Ancestors.insert(Parent); - tracksSharedEHSpindles |= Parent->tracksSharedEHSpindles(); + TracksSharedEHSpindles |= Parent->tracksSharedEHSpindles(); } while (!Parent->isRootTask()); - if (!tracksSharedEHSpindles) return; + if (!TracksSharedEHSpindles) return; // Scan the successors of the spindles in this task to find shared EH exits. SmallVector WorkList; @@ -327,14 +322,14 @@ bool Task::isSharedEHExit(const Spindle *SharedEH) const { // themselves detach. It's not clear how this case could arise in practice, // however. SmallPtrSet Ancestors; - bool tracksSharedEHSpindles = false; + bool TracksSharedEHSpindles = false; const Task *Parent = this; do { Parent = Parent->getParentTask(); Ancestors.insert(Parent); - tracksSharedEHSpindles |= Parent->tracksSharedEHSpindles(); + TracksSharedEHSpindles |= Parent->tracksSharedEHSpindles(); } while (!Parent->isRootTask()); - if (!tracksSharedEHSpindles) return false; + if (!TracksSharedEHSpindles) return false; Task *SharedEHParent = SharedEH->getParentTask(); if (!Ancestors.contains(SharedEHParent)) @@ -375,9 +370,8 @@ bool Task::isSharedEHExit(const Spindle *SharedEH) const { // Add the unassociated spindles to the task T in order of a DFS CFG traversal // starting at the entry block of T. -static void -AssociateWithTask(TaskInfo *TI, Task *T, - SmallPtrSetImpl &UnassocSpindles) { +static void associateWithTask(TaskInfo *TI, Task *T, + SmallPtrSetImpl &UnassocSpindles) { SmallVector WorkList; SmallPtrSet Visited; // Add the successor spindles of the entry block of T to the worklist. @@ -423,9 +417,8 @@ AssociateWithTask(TaskInfo *TI, Task *T, // Add the unassociated blocks to the spindle S in order of a DFS CFG traversal // starting at the entry block of S. -static void -AssociateWithSpindle(TaskInfo *TI, Spindle *S, - SmallPtrSetImpl &UnassocBlocks) { +static void associateWithSpindle(TaskInfo *TI, Spindle *S, + SmallPtrSetImpl &UnassocBlocks) { SmallVector WorkList; SmallPtrSet Visited; // Add the successor blocks of the entry of S to the worklist. @@ -482,7 +475,7 @@ static void computeSpindleEdges(TaskInfo *TI) { // Search the PHI nodes in BB for a user of Val. Return Val if no PHI node in // BB uses Val. -static Value *FindUserAmongPHIs(Value *Val, BasicBlock *BB) { +static Value *findUserAmongPHIs(Value *Val, BasicBlock *BB) { for (PHINode &PN : BB->phis()) { if (Val->getType() != PN.getType()) continue; @@ -526,7 +519,7 @@ static void recordContinuationSpindles(TaskInfo *TI) { "Unwind destination of detach has many successors, but belongs to " "the same spindle as the detach."); Unwind = Unwind->getUniqueSuccessor(); - LPadVal = FindUserAmongPHIs(LPadVal, Unwind); + LPadVal = findUserAmongPHIs(LPadVal, Unwind); } // Set the exceptional continuation spindle for this task. Spindle *UnwindSpindle = TI->getSpindleFor(Unwind); @@ -777,7 +770,7 @@ void TaskInfo::analyze(Function &F, DominatorTree &DomTree) { SmallVector FoundSpindles; SmallVector FoundTFCreates; SmallVector UnassocTasks; - for (auto DomNode : post_order(DomTree.getRootNode())) { + for (auto *DomNode : post_order(DomTree.getRootNode())) { BasicBlock *BB = DomNode->getBlock(); // If a basic block is not a spindle entry, mark it found and continue. if (!getSpindleFor(BB)) { @@ -810,7 +803,7 @@ void TaskInfo::analyze(Function &F, DominatorTree &DomTree) { // Associate the unassociated blocks with spindle S. if (!UnassocBlocks.empty()) - AssociateWithSpindle(this, S, UnassocBlocks); + associateWithSpindle(this, S, UnassocBlocks); } // Mark taskframe.create spindles found. @@ -851,7 +844,7 @@ void TaskInfo::analyze(Function &F, DominatorTree &DomTree) { } // Associate the unassociated spindles with task T. if (!UnassocSpindles.empty()) - AssociateWithTask(this, T, UnassocSpindles); + associateWithTask(this, T, UnassocSpindles); } // If the last task is dominated by this task, add the unassociated tasks as @@ -964,14 +957,13 @@ void TaskInfo::findTaskFrameTreeHelper( // Recur into the new taskframe. findTaskFrameTreeHelper(SubTF, WorkList, SubTFVisited); continue; - } else { - LLVM_DEBUG({ - if (!TFSpindle->SubTaskFrames.count(SuccEdge.first)) - dbgs() << "Search encountered subtask@" - << SubT->getEntry()->getName() << " with taskframe " - << "before that subtask's taskframe.create."; - }); } + LLVM_DEBUG({ + if (!TFSpindle->SubTaskFrames.count(SuccEdge.first)) + dbgs() << "Search encountered subtask@" + << SubT->getEntry()->getName() << " with taskframe " + << "before that subtask's taskframe.create."; + }); } } @@ -1090,7 +1082,7 @@ void TaskInfo::findTaskFrameTree() { /// These are blocks which lead to uses. Knowing this allows us to avoid /// inserting PHI nodes into blocks which don't lead to uses (thus, the inserted /// phi nodes would be dead). -static void ComputeLiveInBlocks( +static void computeLiveInBlocks( const AllocaInst *AI, const SmallVectorImpl &UsingBlocks, const SmallPtrSetImpl &DefBlocks, @@ -1223,7 +1215,7 @@ bool TaskInfo::isAllocaParallelPromotable(const AllocaInst *AIP) const { // Determine which blocks the value is live in. These are blocks which lead // to uses. SmallPtrSet LiveInBlocks; - ComputeLiveInBlocks(AI, UsingBlocks, DefBlocks, LiveInBlocks); + computeLiveInBlocks(AI, UsingBlocks, DefBlocks, LiveInBlocks); // Filter out live-in blocks that are not dominated by the alloca. if (AI->getParent() != DomTree.getRoot()) { SmallVector LiveInToRemove; @@ -1289,9 +1281,9 @@ bool IsSyncedState::evaluate(const Spindle *S, unsigned EvalNum) { if (!EvalNum && !SyncedState.count(Pred)) { SyncedState[S] = setIncomplete(SyncedState[S]); continue; - } else - assert(SyncedState.count(Pred) && - "All predecessors should have synced states after first eval."); + } + assert(SyncedState.count(Pred) && + "All predecessors should have synced states after first eval."); // If we find an unsynced predecessor that is not terminated by a sync // instruction, then we must be unsynced. diff --git a/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp b/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp index ed6833dda581..cf63c0db9add 100644 --- a/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp +++ b/llvm/lib/Transforms/Instrumentation/CilkSanitizer.cpp @@ -44,9 +44,9 @@ #include "llvm/InitializePasses.h" #include "llvm/ProfileData/InstrProf.h" #include "llvm/Support/ModRef.h" +#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Instrumentation.h" #include "llvm/Transforms/Instrumentation/CSI.h" -#include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" #include "llvm/Transforms/Utils/Local.h" @@ -202,12 +202,12 @@ struct CilkSanitizerImpl : public CSIImpl { : CilkSanImpl(CilkSanImpl), TI(TI), LI(LI), DT(DT), DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy), TLI(TLI) {} - bool InstrumentSimpleInstructions( + bool instrumentSimpleInstructions( SmallVectorImpl &Instructions); - bool InstrumentAnyMemIntrinsics( + bool instrumentAnyMemIntrinsics( SmallVectorImpl &MemIntrinsics); - bool InstrumentCalls(SmallVectorImpl &Calls); - bool InstrumentAncillaryInstructions( + bool instrumentCalls(SmallVectorImpl &Calls); + bool instrumentAncillaryInstructions( SmallPtrSetImpl &Allocas, SmallPtrSetImpl &AllocationFnCalls, SmallPtrSetImpl &FreeCalls, @@ -235,26 +235,26 @@ struct CilkSanitizerImpl : public CSIImpl { : CilkSanImpl(CilkSanImpl), RI(RI), TI(TI), LI(LI), DT(DT), DTU(DT, DomTreeUpdater::UpdateStrategy::Lazy), TLI(TLI) {} - void InsertArgMAAPs(Function &F, Value *FuncId); - bool InstrumentSimpleInstructions( + void insertArgMAAPs(Function &F, Value *FuncId); + bool instrumentSimpleInstructions( SmallVectorImpl &Instructions); - bool InstrumentAnyMemIntrinsics( + bool instrumentAnyMemIntrinsics( SmallVectorImpl &MemIntrinsics); - bool InstrumentCalls(SmallVectorImpl &Calls); - void GetDetachesForCoalescedInstrumentation( + bool instrumentCalls(SmallVectorImpl &Calls); + void getDetachesForCoalescedInstrumentation( SmallPtrSetImpl &LoopInstToHoist, SmallPtrSetImpl &LoopInstToSink); - bool InstrumentAncillaryInstructions( + bool instrumentAncillaryInstructions( SmallPtrSetImpl &Allocas, SmallPtrSetImpl &AllocationFnCalls, SmallPtrSetImpl &FreeCalls, DenseMap &SyncRegNums, DenseMap &SRCounters, const DataLayout &DL); - bool InstrumentLoops(SmallPtrSetImpl &LoopInstToHoist, + bool instrumentLoops(SmallPtrSetImpl &LoopInstToHoist, SmallPtrSetImpl &LoopInstToSink, SmallPtrSetImpl &TapirLoops, ScalarEvolution *); - bool PerformDelayedInstrumentation(); + bool performDelayedInstrumentation(); private: void getDetachesForInstruction(Instruction *I); @@ -273,7 +273,7 @@ struct CilkSanitizerImpl : public CSIImpl { ModRef = Mod | Ref, NoAlias = 4, }; - static unsigned RaceTypeToFlagVal(RaceInfo::RaceType RT); + static unsigned raceTypeToFlagVal(RaceInfo::RaceType RT); // Get the MAAP value for specific instruction and operand. Value *getMAAPValue(Instruction *I, IRBuilder<> &IRB, unsigned OperandNum = static_cast(-1), @@ -425,7 +425,7 @@ struct CilkSanitizerImpl : public CSIImpl { // Initialize custom hooks for CilkSanitizer void initializeCsanHooks(); - Value *GetCalleeFuncID(const Function *Callee, IRBuilder<> &IRB); + Value *getCalleeFuncID(const Function *Callee, IRBuilder<> &IRB); // Helper function for prepareToInstrumentFunction that chooses loads and // stores in a basic block to instrument. @@ -535,9 +535,9 @@ struct CilkSanitizerImpl : public CSIImpl { DenseMap> DetachToSync; - bool LocalBaseObj(const Value *Addr, LoopInfo *LI, + bool localBaseObj(const Value *Addr, LoopInfo *LI, const TargetLibraryInfo *TLI) const; - bool PossibleRaceByCapture(const Value *Addr, const TaskInfo &TI, + bool possibleRaceByCapture(const Value *Addr, const TaskInfo &TI, LoopInfo *LI) const; bool unknownObjectUses(const Value *Addr, LoopInfo *LI, const TargetLibraryInfo *TLI) const; @@ -557,12 +557,12 @@ struct CilkSanitizerImpl : public CSIImpl { return BaseObjects[Addr]; } - bool MightHaveDetachedUse(const Value *Addr, const TaskInfo &TI) const; + bool mightHaveDetachedUse(const Value *Addr, const TaskInfo &TI) const; // // Cached results of calls to MightHaveDetachedUse. // using DetachedUseMapTy = DenseMap; // mutable DetachedUseMapTy DetachedUseCache; bool lookupMightHaveDetachedUse(const Value *Addr, const TaskInfo &TI) const { - return MightHaveDetachedUse(Addr, TI); + return mightHaveDetachedUse(Addr, TI); // if (!DetachedUseCache.count(Addr)) // DetachedUseCache[Addr] = MightHaveDetachedUse(Addr, TI); // return DetachedUseCache[Addr]; @@ -1091,7 +1091,7 @@ void CilkSanitizerImpl::initializeCsanHooks() { CsiAfterAllocaFn->setDoesNotThrow(); } -static BasicBlock *SplitOffPreds( +static BasicBlock *splitOffPreds( BasicBlock *BB, SmallVectorImpl &Preds, DominatorTree *DT, LoopInfo *LI) { if (BB->isLandingPad()) { @@ -1186,42 +1186,42 @@ static void setupBlock(BasicBlock *BB, DominatorTree *DT, LoopInfo *LI, BasicBlock *BBToSplit = BB; // Split off the predecessors of each type. if (!SyncPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, SyncPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, SyncPreds, DT, LI); NumPredTypes--; } if (!SyncUnwindPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, SyncUnwindPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, SyncUnwindPreds, DT, LI); NumPredTypes--; } if (!AllocFnPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, AllocFnPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, AllocFnPreds, DT, LI); NumPredTypes--; } if (!FreeFnPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, FreeFnPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, FreeFnPreds, DT, LI); NumPredTypes--; } if (!LibCallPreds.empty() && NumPredTypes > NumPredTypesRequired) { for (auto KeyVal : LibCallPreds) { if (NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, KeyVal.second, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, KeyVal.second, DT, LI); NumPredTypes--; } } } if (!InvokePreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, InvokePreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, InvokePreds, DT, LI); NumPredTypes--; } if (!TFResumePreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, TFResumePreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, TFResumePreds, DT, LI); NumPredTypes--; } // We handle detach and detached.rethrow predecessors at the end to preserve // invariants on the CFG structure about the deadness of basic blocks after // detached-rethrows. if (!DetachPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, DetachPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, DetachPreds, DT, LI); NumPredTypes--; } } @@ -1282,7 +1282,7 @@ static bool shouldInstrumentReadWriteFromAddress(const Module *M, Value *Addr) { /// Returns true if Addr can only refer to a locally allocated base object, that /// is, an object created via an AllocaInst or an AllocationFn. -bool CilkSanitizerImpl::LocalBaseObj(const Value *Addr, LoopInfo *LI, +bool CilkSanitizerImpl::localBaseObj(const Value *Addr, LoopInfo *LI, const TargetLibraryInfo *TLI) const { // If we don't have an address, give up. if (!Addr) @@ -1315,7 +1315,7 @@ bool CilkSanitizerImpl::LocalBaseObj(const Value *Addr, LoopInfo *LI, // Examine the uses of a Instruction AI to determine if it is used in a subtask. // This method assumes that AI is an allocation instruction, i.e., either an // AllocaInst or an AllocationFn. -bool CilkSanitizerImpl::MightHaveDetachedUse(const Value *V, +bool CilkSanitizerImpl::mightHaveDetachedUse(const Value *V, const TaskInfo &TI) const { // Get the task for this allocation. const Task *AllocTask = nullptr; @@ -1379,7 +1379,7 @@ bool CilkSanitizerImpl::MightHaveDetachedUse(const Value *V, } /// Returns true if accesses on Addr could race due to pointer capture. -bool CilkSanitizerImpl::PossibleRaceByCapture(const Value *Addr, +bool CilkSanitizerImpl::possibleRaceByCapture(const Value *Addr, const TaskInfo &TI, LoopInfo *LI) const { if (isa(Addr)) @@ -1484,8 +1484,8 @@ void CilkSanitizerImpl::chooseInstructionsToInstrument( Value *Addr = isa(*I) ? cast(I)->getPointerOperand() : cast(I)->getPointerOperand(); - if (LocalBaseObj(Addr, &LI, TLI) && - !PossibleRaceByCapture(Addr, TI, &LI)) { + if (localBaseObj(Addr, &LI, TLI) && + !possibleRaceByCapture(Addr, TI, &LI)) { // The variable is addressable but not captured, so it cannot be // referenced from a different thread and participate in a data race // (see llvm/Analysis/CaptureTracking.h for details). @@ -1539,7 +1539,7 @@ bool CilkSanitizerImpl::shouldIgnoreCall(const Instruction &I) { // Helper function to get the ID of a function being called. These IDs are // stored in separate global variables in the program. This method will create // a new global variable for the Callee's ID if necessary. -Value *CilkSanitizerImpl::GetCalleeFuncID(const Function *Callee, +Value *CilkSanitizerImpl::getCalleeFuncID(const Function *Callee, IRBuilder<> &IRB) { if (!Callee) // Unknown targets (i.e., indirect calls) are always unknown. @@ -1567,7 +1567,7 @@ Value *CilkSanitizerImpl::GetCalleeFuncID(const Function *Callee, // SimpleInstrumentor methods, which do not do static race detection. //------------------------------------------------------------------------------ -bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentSimpleInstructions( +bool CilkSanitizerImpl::SimpleInstrumentor::instrumentSimpleInstructions( SmallVectorImpl &Instructions) { bool Result = false; for (Instruction *I : Instructions) { @@ -1589,7 +1589,7 @@ bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentSimpleInstructions( return Result; } -bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentAnyMemIntrinsics( +bool CilkSanitizerImpl::SimpleInstrumentor::instrumentAnyMemIntrinsics( SmallVectorImpl &MemIntrinsics) { bool Result = false; for (Instruction *I : MemIntrinsics) { @@ -1612,7 +1612,7 @@ bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentAnyMemIntrinsics( return Result; } -bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentCalls( +bool CilkSanitizerImpl::SimpleInstrumentor::instrumentCalls( SmallVectorImpl &Calls) { bool Result = false; for (Instruction *I : Calls) { @@ -1639,7 +1639,7 @@ bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentCalls( return Result; } -bool CilkSanitizerImpl::SimpleInstrumentor::InstrumentAncillaryInstructions( +bool CilkSanitizerImpl::SimpleInstrumentor::instrumentAncillaryInstructions( SmallPtrSetImpl &Allocas, SmallPtrSetImpl &AllocationFnCalls, SmallPtrSetImpl &FreeCalls, @@ -1745,7 +1745,7 @@ void CilkSanitizerImpl::Instrumentor::getDetachesForInstruction( } } -unsigned CilkSanitizerImpl::Instrumentor::RaceTypeToFlagVal( +unsigned CilkSanitizerImpl::Instrumentor::raceTypeToFlagVal( RaceInfo::RaceType RT) { unsigned FlagVal = static_cast(MAAPValue::NoAccess); if (RaceInfo::isLocalRace(RT) || RaceInfo::isOpaqueRace(RT)) @@ -1762,7 +1762,7 @@ static Value *getMAAPIRValue(IRBuilder<> &IRB, unsigned MV) { } // Insert per-argument MAAPs for this function -void CilkSanitizerImpl::Instrumentor::InsertArgMAAPs(Function &F, +void CilkSanitizerImpl::Instrumentor::insertArgMAAPs(Function &F, Value *FuncId) { if (!MAAPChecks) return; @@ -1780,7 +1780,7 @@ void CilkSanitizerImpl::Instrumentor::InsertArgMAAPs(Function &F, Value *FinalMV; // If this function is main, then it has no ancestors that can create races. if (F.getName() == "main") { - FinalMV = getMAAPIRValue(IRB, RaceTypeToFlagVal(RaceInfo::None)); + FinalMV = getMAAPIRValue(IRB, raceTypeToFlagVal(RaceInfo::None)); IRB.CreateStore(FinalMV, NewFlag); } else { // Call the runtime function to set the value of this flag. @@ -1827,7 +1827,7 @@ void CilkSanitizerImpl::Instrumentor::InsertArgMAAPs(Function &F, } } -bool CilkSanitizerImpl::Instrumentor::InstrumentSimpleInstructions( +bool CilkSanitizerImpl::Instrumentor::instrumentSimpleInstructions( SmallVectorImpl &Instructions) { bool Result = false; for (Instruction *I : Instructions) { @@ -1866,7 +1866,7 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentSimpleInstructions( return Result; } -bool CilkSanitizerImpl::Instrumentor::InstrumentAnyMemIntrinsics( +bool CilkSanitizerImpl::Instrumentor::instrumentAnyMemIntrinsics( SmallVectorImpl &MemIntrinsics) { bool Result = false; for (Instruction *I : MemIntrinsics) { @@ -1911,7 +1911,7 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentAnyMemIntrinsics( return Result; } -bool CilkSanitizerImpl::Instrumentor::InstrumentCalls( +bool CilkSanitizerImpl::Instrumentor::instrumentCalls( SmallVectorImpl &Calls) { bool Result = false; for (Instruction *I : Calls) { @@ -2021,7 +2021,7 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentCalls( ++OpIdx; } - Value *CalleeID = CilkSanImpl.GetCalleeFuncID(CB->getCalledFunction(), IRB); + Value *CalleeID = CilkSanImpl.getCalleeFuncID(CB->getCalledFunction(), IRB); // We set the MAAPs in reverse order to support stack-like access of the // MAAPs by in-order calls to GetMAAP in the callee. for (Value *MAAPVal : reverse(MAAPVals)) @@ -2082,14 +2082,14 @@ static MemoryLocation getMemoryLocation(Instruction *I, unsigned OperandNum, return MemoryLocation::getForSource(MT); } return MemoryLocation::getForDest(MI); - } else if (OperandNum == static_cast(-1)) { + } + if (OperandNum == static_cast(-1)) { return MemoryLocation::get(I); - } else { - assert(isa(I) && - "Unknown instruction and operand ID for getting MemoryLocation."); - CallBase *CB = cast(I); - return MemoryLocation::getForArgument(CB, OperandNum, TLI); } + assert(isa(I) && + "Unknown instruction and operand ID for getting MemoryLocation."); + CallBase *CB = cast(I); + return MemoryLocation::getForArgument(CB, OperandNum, TLI); } // Evaluate the noalias value in the MAAP for Obj, and intersect that result @@ -2275,7 +2275,7 @@ Value *CilkSanitizerImpl::Instrumentor::getMAAPValue(Instruction *I, Value *FlagLoad = readMAAPVal(LocalMAAPs[Obj], IRB); Value *FlagCheck = IRB.CreateAnd( - FlagLoad, getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type))); + FlagLoad, getMAAPIRValue(IRB, raceTypeToFlagVal(RD.Type))); MV = IRB.CreateOr(MV, FlagCheck); // Get the dynamic no-alias bit from the MAAP value. @@ -2367,7 +2367,7 @@ Value *CilkSanitizerImpl::Instrumentor::getMAAPValue(Instruction *I, IRB.CreateOr(NoAliasCheck, ArgNoAliasCheck), getMAAPIRValue(IRB, 0), IRB.CreateAnd(FlagLoad, - getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type)))); + getMAAPIRValue(IRB, raceTypeToFlagVal(RD.Type)))); MV = IRB.CreateOr(MV, FlagCheck); } } @@ -2446,7 +2446,7 @@ Value *CilkSanitizerImpl::Instrumentor::getMAAPCheck(Instruction *I, // not disable checking of local races. Value *LocalCheck; Value *FlagCheck = IRB.CreateAnd( - FlagLoad, getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type))); + FlagLoad, getMAAPIRValue(IRB, raceTypeToFlagVal(RD.Type))); LLVM_DEBUG(dbgs() << " FlagCheck " << *FlagCheck << "\n"); LocalCheck = IRB.CreateICmpEQ(getMAAPIRValue(IRB, 0), FlagCheck); LLVM_DEBUG(dbgs() << " LocalCheck " << *LocalCheck << "\n"); @@ -2486,7 +2486,7 @@ Value *CilkSanitizerImpl::Instrumentor::getMAAPCheck(Instruction *I, LLVM_DEBUG(dbgs() << " FlagLoad " << *FlagLoad << "\n"); Value *LocalCheck; Value *FlagCheck = IRB.CreateAnd( - FlagLoad, getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type))); + FlagLoad, getMAAPIRValue(IRB, raceTypeToFlagVal(RD.Type))); LLVM_DEBUG(dbgs() << " FlagCheck " << *FlagCheck << "\n"); LocalCheck = IRB.CreateICmpEQ(getMAAPIRValue(IRB, 0), FlagCheck); LLVM_DEBUG(dbgs() << " LocalCheck " << *LocalCheck << "\n"); @@ -2528,7 +2528,7 @@ Value *CilkSanitizerImpl::Instrumentor::getMAAPCheck(Instruction *I, Value *FlagLoad = readMAAPVal(LocalMAAPs[&Arg], IRB); Value *FlagCheck; FlagCheck = IRB.CreateAnd( - FlagLoad, getMAAPIRValue(IRB, RaceTypeToFlagVal(RD.Type))); + FlagLoad, getMAAPIRValue(IRB, raceTypeToFlagVal(RD.Type))); Value *LocalCheck = IRB.CreateICmpEQ(getMAAPIRValue(IRB, 0), FlagCheck); Value *ArgNoAliasFlag = IRB.CreateAnd( @@ -2545,7 +2545,7 @@ Value *CilkSanitizerImpl::Instrumentor::getMAAPCheck(Instruction *I, return MAAPChk; } -bool CilkSanitizerImpl::Instrumentor::PerformDelayedInstrumentation() { +bool CilkSanitizerImpl::Instrumentor::performDelayedInstrumentation() { bool Result = false; // Handle delayed simple instructions for (Instruction *I : DelayedSimpleInsts) { @@ -2600,7 +2600,7 @@ bool CilkSanitizerImpl::Instrumentor::PerformDelayedInstrumentation() { // Helper function to walk the hierarchy of tasks containing BasicBlock BB to // get the top-level task in loop L that contains BB. -static Task *GetTopLevelTaskFor(BasicBlock *BB, Loop *L, TaskInfo &TI) { +static Task *getTopLevelTaskFor(BasicBlock *BB, Loop *L, TaskInfo &TI) { Task *T = TI.getTaskFor(BB); // Return null if we don't find a task for BB contained in L. if (!T || !L->contains(T->getEntry())) @@ -2614,7 +2614,7 @@ static Task *GetTopLevelTaskFor(BasicBlock *BB, Loop *L, TaskInfo &TI) { return T; } -void CilkSanitizerImpl::Instrumentor::GetDetachesForCoalescedInstrumentation( +void CilkSanitizerImpl::Instrumentor::getDetachesForCoalescedInstrumentation( SmallPtrSetImpl &LoopInstToHoist, SmallPtrSetImpl &LoopInstToSink) { // Determine detaches to instrument for the coalesced instrumentation. @@ -2629,7 +2629,7 @@ void CilkSanitizerImpl::Instrumentor::GetDetachesForCoalescedInstrumentation( SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); for (BasicBlock *ExitBB : ExitBlocks) { - if (GetTopLevelTaskFor(ExitBB, L, TI)) + if (getTopLevelTaskFor(ExitBB, L, TI)) // Skip any exit blocks in a Tapir task inside the loop. These exit // blocks lie on exception-handling paths, and to handle these blocks, // it suffices to insert instrumentation in the unwind destination of @@ -2643,7 +2643,7 @@ void CilkSanitizerImpl::Instrumentor::GetDetachesForCoalescedInstrumentation( } } -bool CilkSanitizerImpl::Instrumentor::InstrumentAncillaryInstructions( +bool CilkSanitizerImpl::Instrumentor::instrumentAncillaryInstructions( SmallPtrSetImpl &Allocas, SmallPtrSetImpl &AllocationFnCalls, SmallPtrSetImpl &FreeCalls, @@ -2779,19 +2779,17 @@ static Instruction *getLoopBlockInsertPt(BasicBlock *BB, FunctionCallee LoopHook // with respect to it. if (AfterHook) return &*CB->getIterator()->getNextNode(); - else - return CB; + return CB; } if (AfterHook) return &*BB->getFirstInsertionPt(); - else - return BB->getTerminator(); + return BB->getTerminator(); } // TODO: Maybe to avoid confusion with CilkSanImpl.Options.InstrumentLoops // (which is unrelated to this), rename this to involve the word "hoist" or something. -bool CilkSanitizerImpl::Instrumentor::InstrumentLoops( +bool CilkSanitizerImpl::Instrumentor::instrumentLoops( SmallPtrSetImpl &LoopInstToHoist, SmallPtrSetImpl &LoopInstToSink, SmallPtrSetImpl &TapirLoops, ScalarEvolution *SE) { @@ -2927,7 +2925,7 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentLoops( SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); for (BasicBlock *ExitBB : ExitBlocks) { - if (GetTopLevelTaskFor(ExitBB, L, TI)) + if (getTopLevelTaskFor(ExitBB, L, TI)) // Skip any exit blocks in a Tapir task inside the loop. These exit // blocks lie on exception-handling paths, and to handle these blocks, // it suffices to insert instrumentation in the unwind destination of @@ -3056,7 +3054,7 @@ bool CilkSanitizerImpl::Instrumentor::InstrumentLoops( SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); for (BasicBlock *ExitBB : ExitBlocks) { - if (GetTopLevelTaskFor(ExitBB, L, TI)) + if (getTopLevelTaskFor(ExitBB, L, TI)) // Skip any exit blocks in a Tapir task inside the loop. These exit // blocks lie on exception-handling paths, and to handle these blocks, // it suffices to insert instrumentation in the unwind destination of @@ -3120,7 +3118,7 @@ bool CilkSanitizerImpl::instrumentLoadOrStoreHoisted(Instruction *I, return true; } -static bool CheckSanitizeCilkAttr(Function &F) { +static bool checkSanitizeCilkAttr(Function &F) { if (IgnoreSanitizeCilkAttr) return true; return F.hasFnAttribute(Attribute::SanitizeCilk); @@ -3128,7 +3126,7 @@ static bool CheckSanitizeCilkAttr(Function &F) { bool CilkSanitizerImpl::setupFunction(Function &F, bool NeedToSetupCalls) { if (F.empty() || shouldNotInstrumentFunction(F) || - LinkedFromBitcode.count(&F) || !CheckSanitizeCilkAttr(F)) { + LinkedFromBitcode.count(&F) || !checkSanitizeCilkAttr(F)) { LLVM_DEBUG({ dbgs() << "Skipping " << F.getName() << "\n"; if (F.empty()) @@ -3137,7 +3135,7 @@ bool CilkSanitizerImpl::setupFunction(Function &F, bool NeedToSetupCalls) { dbgs() << " Function should not be instrumented\n"; else if (LinkedFromBitcode.count(&F)) dbgs() << " Function from linked-in bitcode\n"; - else if (!CheckSanitizeCilkAttr(F)) + else if (!checkSanitizeCilkAttr(F)) dbgs() << " Function lacks sanitize_cilk attribute\n"; }); return false; @@ -3182,14 +3180,14 @@ static void setInstrumentationDebugLoc(Function &Instrumented, bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { if (F.empty() || shouldNotInstrumentFunction(F) || - !CheckSanitizeCilkAttr(F)) { + !checkSanitizeCilkAttr(F)) { LLVM_DEBUG({ dbgs() << "Skipping " << F.getName() << "\n"; if (F.empty()) dbgs() << " Empty function\n"; else if (shouldNotInstrumentFunction(F)) dbgs() << " Function should not be instrumented\n"; - else if (!CheckSanitizeCilkAttr(F)) + else if (!checkSanitizeCilkAttr(F)) dbgs() << " Function lacks sanitize_cilk attribute\n";}); return false; } @@ -3263,7 +3261,8 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { << "\n Local race with opaque racer.\n"); OtherRace = true; break; - } else if (LI.getLoopFor(RD.Racer.I->getParent()) == L) { + } + if (LI.getLoopFor(RD.Racer.I->getParent()) == L) { LLVM_DEBUG(dbgs() << "Can't hoist or sink instrumentation for " << Inst << "\n Local race with racer in same loop: " @@ -3290,10 +3289,10 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { // SE.isKnownNonNegative(Diff) will be false. Diff = SE.getAddExpr(Size, Stride); } - bool isTapirLoop = static_cast(getTaskIfTapirLoop(L, &TI)); - if (isTapirLoop) + bool IsTapirLoop = static_cast(getTaskIfTapirLoop(L, &TI)); + if (IsTapirLoop) TapirLoops.insert(L); - const SCEV *TripCount = getRuntimeTripCount(*L, &SE, isTapirLoop); + const SCEV *TripCount = getRuntimeTripCount(*L, &SE, IsTapirLoop); if (SE.isKnownNonNegative(Diff)) { if (!isa(TripCount) && @@ -3416,39 +3415,39 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { bool Result = false; if (!EnableStaticRaceDetection) { SimpleInstrumentor FuncI(*this, TI, LI, DT, TLI); - Result |= FuncI.InstrumentSimpleInstructions(AllLoadsAndStores); - Result |= FuncI.InstrumentSimpleInstructions(AtomicAccesses); - Result |= FuncI.InstrumentAnyMemIntrinsics(MemIntrinCalls); - Result |= FuncI.InstrumentCalls(IntrinsicCalls); - Result |= FuncI.InstrumentCalls(LibCalls); - Result |= FuncI.InstrumentCalls(Callsites); + Result |= FuncI.instrumentSimpleInstructions(AllLoadsAndStores); + Result |= FuncI.instrumentSimpleInstructions(AtomicAccesses); + Result |= FuncI.instrumentAnyMemIntrinsics(MemIntrinCalls); + Result |= FuncI.instrumentCalls(IntrinsicCalls); + Result |= FuncI.instrumentCalls(LibCalls); + Result |= FuncI.instrumentCalls(Callsites); // Instrument ancillary instructions including allocas, allocation-function // calls, free calls, detaches, and syncs. - Result |= FuncI.InstrumentAncillaryInstructions(Allocas, AllocationFnCalls, + Result |= FuncI.instrumentAncillaryInstructions(Allocas, AllocationFnCalls, FreeCalls, SyncRegNums, SRCounters, DL); } else { Instrumentor FuncI(*this, RI, TI, LI, DT, TLI); // Insert MAAP flags for each function argument. - FuncI.InsertArgMAAPs(F, FuncId); + FuncI.insertArgMAAPs(F, FuncId); - Result |= FuncI.InstrumentSimpleInstructions(AllLoadsAndStores); - Result |= FuncI.InstrumentSimpleInstructions(AtomicAccesses); - Result |= FuncI.InstrumentAnyMemIntrinsics(MemIntrinCalls); - Result |= FuncI.InstrumentCalls(IntrinsicCalls); - Result |= FuncI.InstrumentCalls(LibCalls); - Result |= FuncI.InstrumentCalls(Callsites); + Result |= FuncI.instrumentSimpleInstructions(AllLoadsAndStores); + Result |= FuncI.instrumentSimpleInstructions(AtomicAccesses); + Result |= FuncI.instrumentAnyMemIntrinsics(MemIntrinCalls); + Result |= FuncI.instrumentCalls(IntrinsicCalls); + Result |= FuncI.instrumentCalls(LibCalls); + Result |= FuncI.instrumentCalls(Callsites); // Find detaches that need to be instrumented for loop instructions whose // instrumentation will be coalesced. - FuncI.GetDetachesForCoalescedInstrumentation(LoopInstToHoist, + FuncI.getDetachesForCoalescedInstrumentation(LoopInstToHoist, LoopInstToSink); // Instrument ancillary instructions including allocas, allocation-function // calls, free calls, detaches, and syncs. - Result |= FuncI.InstrumentAncillaryInstructions(Allocas, AllocationFnCalls, + Result |= FuncI.instrumentAncillaryInstructions(Allocas, AllocationFnCalls, FreeCalls, SyncRegNums, SRCounters, DL); @@ -3456,12 +3455,12 @@ bool CilkSanitizerImpl::instrumentFunctionUsingRI(Function &F) { // not just Tapir loops) // Also inserts MAAP checks for hoisted/sinked instrumentation Result |= - FuncI.InstrumentLoops(LoopInstToHoist, LoopInstToSink, TapirLoops, &SE); + FuncI.instrumentLoops(LoopInstToHoist, LoopInstToSink, TapirLoops, &SE); // Once we have handled ancillary instructions, we've done the necessary // analysis on this function. We now perform delayed instrumentation, which // can involve changing the CFG and thereby violating some analyses. - Result |= FuncI.PerformDelayedInstrumentation(); + Result |= FuncI.performDelayedInstrumentation(); } if (Result) { @@ -3706,7 +3705,7 @@ FunctionCallee CilkSanitizerImpl::getOrInsertSynthesizedHook(StringRef Name, // Check if we need to spill a value of this type onto the stack to pass it to a // hook. -static bool NeedToSpillType(const Type *T) { +static bool needToSpillType(const Type *T) { return T->isVectorTy() || T->isStructTy(); } @@ -3728,7 +3727,7 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( LLVMContext &Ctx = IRB.getContext(); uint64_t LocalId = CallsiteFED.add(*I); Value *CallsiteId = CallsiteFED.localToGlobalId(LocalId, IRB); - Value *FuncId = GetCalleeFuncID(Called, IRB); + Value *FuncId = getCalleeFuncID(Called, IRB); assert(FuncId != NULL); Value *NumMVVal = IRB.getInt8(0); @@ -3767,7 +3766,7 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( const DataLayout &DL = M.getDataLayout(); for (Value *Arg : CB->args()) { Type *ArgTy = Arg->getType(); - if (!NeedToSpillType(ArgTy)) { + if (!needToSpillType(ArgTy)) { // We can simply pass the argument directly to the hook. BeforeHookParamTys.push_back(ArgTy); BeforeHookParamVals.push_back(Arg); @@ -3824,7 +3823,7 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( const DataLayout &DL = M.getDataLayout(); if (!Called->getReturnType()->isVoidTy()) { Type *RetTy = Called->getReturnType(); - if (!NeedToSpillType(RetTy)) { + if (!needToSpillType(RetTy)) { // We can simply pass the return value directly to the hook. AfterHookParamTys.push_back(RetTy); AfterHookParamVals.push_back(CB); @@ -3847,7 +3846,7 @@ bool CilkSanitizerImpl::instrumentIntrinsicCall( } for (Value *Arg : CB->args()) { Type *ArgTy = Arg->getType(); - if (!NeedToSpillType(ArgTy)) { + if (!needToSpillType(ArgTy)) { // We can simply pass the argument directly to the hook. AfterHookParamTys.push_back(ArgTy); AfterHookParamVals.push_back(Arg); @@ -3918,7 +3917,7 @@ bool CilkSanitizerImpl::instrumentLibCall(Instruction *I, uint64_t LocalId = CallsiteFED.add(*I); Value *DefaultID = getDefaultID(IRB); Value *CallsiteId = CallsiteFED.localToGlobalId(LocalId, IRB); - Value *FuncId = GetCalleeFuncID(Called, IRB); + Value *FuncId = getCalleeFuncID(Called, IRB); assert(FuncId != NULL); Value *NumMVVal = IRB.getInt8(0); @@ -4017,7 +4016,7 @@ bool CilkSanitizerImpl::instrumentCallsite(Instruction *I, uint64_t LocalId = CallsiteFED.add(*I); Value *DefaultID = getDefaultID(IRB); Value *CallsiteId = CallsiteFED.localToGlobalId(LocalId, IRB); - Value *FuncId = GetCalleeFuncID(Called, IRB); + Value *FuncId = getCalleeFuncID(Called, IRB); assert(FuncId != NULL); Value *NumMVVal = IRB.getInt8(0); @@ -4097,12 +4096,12 @@ bool CilkSanitizerImpl::suppressCallsite(Instruction *I) { return true; } -static bool IsMemTransferDstOperand(unsigned OperandNum) { +static bool isMemTransferDstOperand(unsigned OperandNum) { // This check should be kept in sync with TapirRaceDetect::GetGeneralAccesses. return (OperandNum == 0); } -static bool IsMemTransferSrcOperand(unsigned OperandNum) { +static bool isMemTransferSrcOperand(unsigned OperandNum) { // This check should be kept in sync with TapirRaceDetect::GetGeneralAccesses. return (OperandNum == 1); } @@ -4116,7 +4115,7 @@ bool CilkSanitizerImpl::instrumentAnyMemIntrinAcc(Instruction *I, // necessary. bool Instrumented = false; - if (IsMemTransferDstOperand(OperandNum)) { + if (isMemTransferDstOperand(OperandNum)) { // Only insert instrumentation if requested if (!(InstrumentationSet & SHADOWMEMORY)) return true; @@ -4142,7 +4141,7 @@ bool CilkSanitizerImpl::instrumentAnyMemIntrinAcc(Instruction *I, Instrumented = true; } - if (IsMemTransferSrcOperand(OperandNum)) { + if (isMemTransferSrcOperand(OperandNum)) { // Only insert instrumentation if requested if (!(InstrumentationSet & SHADOWMEMORY)) return true; @@ -4168,7 +4167,8 @@ bool CilkSanitizerImpl::instrumentAnyMemIntrinAcc(Instruction *I, Instrumented = true; } return Instrumented; - } else if (AnyMemIntrinsic *M = dyn_cast(I)) { + } + if (AnyMemIntrinsic *M = dyn_cast(I)) { // Only insert instrumentation if requested if (!(InstrumentationSet & SHADOWMEMORY)) return true; @@ -4564,7 +4564,7 @@ bool CilkSanitizerImpl::instrumentAllocFnLibCall(Instruction *I, Value *DefaultID = getDefaultID(IRB); uint64_t LocalId = AllocFnFED.add(*I); Value *AllocFnId = AllocFnFED.localToGlobalId(LocalId, IRB); - Value *FuncId = GetCalleeFuncID(Called, IRB); + Value *FuncId = getCalleeFuncID(Called, IRB); assert(FuncId != NULL); // Get the ID for the corresponding heap object diff --git a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp index 41749ae9a0b7..73482313d5ae 100644 --- a/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp +++ b/llvm/lib/Transforms/Instrumentation/ComprehensiveStaticInstrumentation.cpp @@ -135,12 +135,12 @@ static cl::opt "csi-split-blocks-at-calls", cl::init(true), cl::Hidden, cl::desc("Split basic blocks at function calls.")); -static size_t numPassRuns = 0; -bool IsFirstRun() { return numPassRuns == 0; } +static size_t NumPassRuns = 0; +bool isFirstRun() { return NumPassRuns == 0; } namespace { -static CSIOptions OverrideFromCL(CSIOptions Options) { +static CSIOptions overrideFromCL(CSIOptions Options) { Options.InstrumentFuncEntryExit = ClInstrumentFuncEntryExit; Options.InstrumentLoops = ClInstrumentLoops; Options.InstrumentBasicBlocks = ClInstrumentBasicBlocks; @@ -162,7 +162,7 @@ struct ComprehensiveStaticInstrumentationLegacyPass : public ModulePass { static char ID; // Pass identification, replacement for typeid. ComprehensiveStaticInstrumentationLegacyPass( - const CSIOptions &Options = OverrideFromCL(CSIOptions())) + const CSIOptions &Options = overrideFromCL(CSIOptions())) : ModulePass(ID), Options(Options) { initializeComprehensiveStaticInstrumentationLegacyPassPass( *PassRegistry::getPassRegistry()); @@ -322,7 +322,7 @@ bool CSIImpl::run() { finalizeCsi(); - if (IsFirstRun() && Options.jitMode) { + if (isFirstRun() && Options.jitMode) { llvm::sys::DynamicLibrary::LoadLibraryPermanently(ClToolLibrary.c_str()); } // Link the tool bitcode a second time, for definitions of used functions. @@ -759,7 +759,7 @@ void CSIImpl::setupCalls(Function &F) { promoteCallsInTasksToInvokes(F, "csi.cleanup"); } -static BasicBlock *SplitOffPreds(BasicBlock *BB, +static BasicBlock *splitOffPreds(BasicBlock *BB, SmallVectorImpl &Preds, DominatorTree *DT, LoopInfo *LI) { if (BB->isLandingPad()) { @@ -842,34 +842,34 @@ static void setupBlock(BasicBlock *BB, const TargetLibraryInfo *TLI, BasicBlock *BBToSplit = BB; // Split off the predecessors of each type. if (!SyncPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, SyncPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, SyncPreds, DT, LI); NumPredTypes--; } if (!SyncUnwindPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, SyncUnwindPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, SyncUnwindPreds, DT, LI); NumPredTypes--; } if (!AllocFnPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, AllocFnPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, AllocFnPreds, DT, LI); NumPredTypes--; } if (!FreeFnPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, FreeFnPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, FreeFnPreds, DT, LI); NumPredTypes--; } if (!InvokePreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, InvokePreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, InvokePreds, DT, LI); NumPredTypes--; } if (!TFResumePreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, TFResumePreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, TFResumePreds, DT, LI); NumPredTypes--; } // We handle detach and detached.rethrow predecessors at the end to preserve // invariants on the CFG structure about the deadness of basic blocks after // detached-rethrows. if (!DetachPreds.empty() && NumPredTypes > NumPredTypesRequired) { - BBToSplit = SplitOffPreds(BBToSplit, DetachPreds, DT, LI); + BBToSplit = splitOffPreds(BBToSplit, DetachPreds, DT, LI); NumPredTypes--; } } @@ -1124,7 +1124,8 @@ bool CSIImpl::instrumentMemIntrinsic(Instruction *I) { setInstrumentationDebugLoc(I, Call); I->eraseFromParent(); return true; - } else if (MemTransferInst *M = dyn_cast(I)) { + } + if (MemTransferInst *M = dyn_cast(I)) { Instruction *Call = IRB.CreateCall( isa(M) ? MemcpyFn : MemmoveFn, {IRB.CreatePointerCast(M->getArgOperand(0), IRB.getInt8PtrTy()), @@ -1139,8 +1140,8 @@ bool CSIImpl::instrumentMemIntrinsic(Instruction *I) { void CSIImpl::instrumentBasicBlock(BasicBlock &BB, const TaskInfo &TI) { IRBuilder<> IRB(&*BB.getFirstInsertionPt()); - bool isEntry = isEntryBlock(BB, TI); - if (isEntry) + bool IsEntry = isEntryBlock(BB, TI); + if (IsEntry) IRB.SetInsertPoint(getEntryBBInsertPt(BB)); uint64_t LocalId = BasicBlockFED.add(BB); uint64_t BBSizeId = BBSize.add(BB, GetTTI ? @@ -1158,7 +1159,7 @@ void CSIImpl::instrumentBasicBlock(BasicBlock &BB, const TaskInfo &TI) { CallInst *Call = insertHookCall(TermI, CsiBBExit, {CsiId, PropVal}); // If this is an entry block and the insert point is the terminator, make the // BBExit hook be the insert point instead. - if (isEntry && getEntryBBInsertPt(BB) == TermI) + if (IsEntry && getEntryBBInsertPt(BB) == TermI) EntryBBInsertPt[&BB] = Call; } @@ -1269,18 +1270,18 @@ void CSIImpl::instrumentCallsite(Instruction *I, DominatorTree *DT) { else if (InvokeInst *II = dyn_cast(I)) Called = II->getCalledFunction(); - bool shouldInstrumentBefore = true; - bool shouldInstrumentAfter = true; + bool ShouldInstrumentBefore = true; + bool ShouldInstrumentAfter = true; // Does this call require instrumentation before or after? if (Called) { - shouldInstrumentBefore = Config->DoesFunctionRequireInstrumentationForPoint( + ShouldInstrumentBefore = Config->DoesFunctionRequireInstrumentationForPoint( Called->getName(), InstrumentationPoint::INSTR_BEFORE_CALL); - shouldInstrumentAfter = Config->DoesFunctionRequireInstrumentationForPoint( + ShouldInstrumentAfter = Config->DoesFunctionRequireInstrumentationForPoint( Called->getName(), InstrumentationPoint::INSTR_AFTER_CALL); } - if (!shouldInstrumentAfter && !shouldInstrumentBefore) + if (!ShouldInstrumentAfter && !ShouldInstrumentBefore) return; IRBuilder<> IRB(I); @@ -1311,11 +1312,11 @@ void CSIImpl::instrumentCallsite(Instruction *I, DominatorTree *DT) { Value *DefaultPropVal = Prop.getValue(IRB); Prop.setIsIndirect(!Called); Value *PropVal = Prop.getValue(IRB); - if (shouldInstrumentBefore) + if (ShouldInstrumentBefore) insertHookCall(I, CsiBeforeCallsite, {CallsiteId, FuncId, PropVal}); BasicBlock::iterator Iter(I); - if (shouldInstrumentAfter) { + if (ShouldInstrumentAfter) { if (IsInvoke) { // There are two "after" positions for invokes: the normal block and the // exception block. @@ -1347,13 +1348,13 @@ void CSIImpl::interposeCall(Instruction *I) { // Should we interpose this call? if (Called && Called->getName().size() > 0) { - bool shouldInterpose = + bool ShouldInterpose = Config->DoesFunctionRequireInterposition(Called->getName()); - if (shouldInterpose) { - Function *interpositionFunction = getInterpositionFunction(Called); - assert(interpositionFunction != nullptr); - CB->setCalledFunction(interpositionFunction); + if (ShouldInterpose) { + Function *InterpositionFunction = getInterpositionFunction(Called); + assert(InterpositionFunction != nullptr); + CB->setCalledFunction(InterpositionFunction); } } } @@ -1960,8 +1961,8 @@ void CSIImpl::generateInitCallsiteToFunction() { // Traverse the map of function name -> function local id. Generate // a store of each function's global ID to the corresponding weak // global variable. - for (const auto &it : FuncOffsetMap) { - std::string GVName = CsiFuncIdVariablePrefix + it.first.str(); + for (const auto &It : FuncOffsetMap) { + std::string GVName = CsiFuncIdVariablePrefix + It.first.str(); GlobalVariable *GV = nullptr; if ((GV = M.getGlobalVariable(GVName)) == nullptr) { GV = new GlobalVariable(M, IRB.getInt64Ty(), false, @@ -1970,7 +1971,7 @@ void CSIImpl::generateInitCallsiteToFunction() { IRB.getInt64(CsiCallsiteUnknownTargetId), GVName); } assert(GV); - IRB.CreateStore(IRB.CreateAdd(LI, IRB.getInt64(it.second)), GV); + IRB.CreateStore(IRB.CreateAdd(LI, IRB.getInt64(It.second)), GV); } } @@ -2325,7 +2326,8 @@ void CSIImpl::linkInToolFromBitcode(const std::string &BitcodePath) { if (GVName == "llvm.global_ctors") { BitcodeAddsCtors = true; continue; - } else if (GVName == "llvm.global_dtors") { + } + if (GVName == "llvm.global_dtors") { BitcodeAddsDtors = true; continue; } @@ -2774,9 +2776,9 @@ void CSIImpl::instrumentFunction(Function &F) { // Do this work in a separate loop after copying the iterators so that we // aren't modifying the list as we're iterating. if (Options.InstrumentMemoryAccesses) - for (std::pair p : + for (std::pair P : LoadAndStoreProperties) - instrumentLoadOrStore(p.first, p.second); + instrumentLoadOrStore(P.first, P.second); // Instrument atomic memory accesses in any case (they can be used to // implement synchronization). @@ -2884,18 +2886,18 @@ bool ComprehensiveStaticInstrumentationLegacyPass::runOnModule(Module &M) { return this->getAnalysis(F).getTaskInfo(); }; - bool res = CSIImpl(M, CG, GetDomTree, GetLoopInfo, GetTaskInfo, GetTLI, GetSE, + bool Res = CSIImpl(M, CG, GetDomTree, GetLoopInfo, GetTaskInfo, GetTLI, GetSE, GetTTI, Options) .run(); verifyModule(M, &llvm::errs()); - numPassRuns++; + NumPassRuns++; - return res; + return Res; } -CSISetupPass::CSISetupPass() : Options(OverrideFromCL(CSIOptions())) {} +CSISetupPass::CSISetupPass() : Options(overrideFromCL(CSIOptions())) {} CSISetupPass::CSISetupPass(const CSIOptions &Options) : Options(Options) {} @@ -2907,7 +2909,7 @@ PreservedAnalyses CSISetupPass::run(Module &M, ModuleAnalysisManager &AM) { } ComprehensiveStaticInstrumentationPass::ComprehensiveStaticInstrumentationPass() - : Options(OverrideFromCL(CSIOptions())) {} + : Options(overrideFromCL(CSIOptions())) {} ComprehensiveStaticInstrumentationPass::ComprehensiveStaticInstrumentationPass( const CSIOptions &Options) diff --git a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp index 8c25f142444b..42d99e88c82c 100644 --- a/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp +++ b/llvm/lib/Transforms/Tapir/LoopSpawningTI.cpp @@ -35,10 +35,8 @@ #include "llvm/IR/Verifier.h" #include "llvm/InitializePasses.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" -#include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Transforms/Utils/Local.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/IndVarSimplify.h" #include "llvm/Transforms/Scalar/LoopDeletion.h" @@ -47,7 +45,9 @@ #include "llvm/Transforms/Tapir/Outline.h" #include "llvm/Transforms/Tapir/TapirLoopInfo.h" #include "llvm/Transforms/Utils.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/EscapeEnumerator.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/TapirUtils.h" diff --git a/llvm/lib/Transforms/Tapir/LoopStripMine.cpp b/llvm/lib/Transforms/Tapir/LoopStripMine.cpp index 02e04f4fefbf..f1f15475938d 100644 --- a/llvm/lib/Transforms/Tapir/LoopStripMine.cpp +++ b/llvm/lib/Transforms/Tapir/LoopStripMine.cpp @@ -19,11 +19,9 @@ #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/TapirTaskInfo.h" -#include "llvm/Transforms/Utils/Local.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/Dominators.h" -#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Metadata.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -31,6 +29,7 @@ #include "llvm/Transforms/Tapir/TapirLoopInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" +#include "llvm/Transforms/Utils/Local.h" #include "llvm/Transforms/Utils/LoopSimplify.h" #include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" @@ -59,10 +58,10 @@ static cl::opt StripMineUnrollRemainder( namespace StripMineConstants { /// Default coarsening factor for strpimined Tapir loops. const unsigned DefaultCoarseningFactor = 2048; -} +} // namespace StripMineConstants /// The function chooses which type of stripmine (epilog or prolog) is more -/// profitabale. +/// profitable. /// Epilog stripmine is more profitable when there is PHI that starts from /// constant. In this case epilog will leave PHI start from constant, /// but prolog will convert it to non-constant. @@ -169,7 +168,7 @@ TargetTransformInfo::StripMiningPreferences llvm::gatherStripMiningPreferences( // If loop has an grainsize pragma return the (necessarily positive) value from // the pragma for stripmining. Otherwise return 0. -static unsigned StripMineCountPragmaValue(const Loop *L) { +static unsigned stripMineCountPragmaValue(const Loop *L) { TapirLoopHints Hints(L); return Hints.getGrainsize(); } @@ -189,7 +188,7 @@ bool llvm::computeStripMineCount( } // 2nd priority is stripmine count set by pragma. - unsigned PragmaCount = StripMineCountPragmaValue(L); + unsigned PragmaCount = stripMineCountPragmaValue(L); if (PragmaCount > 0) { SMP.Count = PragmaCount; SMP.AllowExpensiveTripCount = true; @@ -346,11 +345,11 @@ static Task *getTapirLoopForStripMining(const Loop *L, TaskInfo &TI, /// 'extra' iterations if the run-time trip count modulo the /// stripmine count is non-zero. /// -/// This function performs the following: +/// This function does the following: /// - Update PHI operands in the epilog loop by the new PHI nodes /// - Branch around the epilog loop if extra iters (ModVal) is zero. /// -static void ConnectEpilog(TapirLoopInfo &TL, Value *EpilStartIter, +static void connectEpilog(TapirLoopInfo &TL, Value *EpilStartIter, Value *ModVal, BasicBlock *LoopDet, BasicBlock *LoopEnd, BasicBlock *NewExit, BasicBlock *Exit, BasicBlock *Preheader, @@ -416,7 +415,7 @@ static void ConnectEpilog(TapirLoopInfo &TL, Value *EpilStartIter, /// new loop exit. /// Return the new cloned loop that is created when CreateRemainderLoop is true. static Loop * -CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, +cloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, const bool UseEpilogRemainder, const bool UnrollRemainder, BasicBlock *InsertTop, BasicBlock *InsertBot, BasicBlock *Preheader, std::vector &NewBlocks, @@ -424,7 +423,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, SmallVectorImpl &ExtraTaskBlocks, SmallVectorImpl &SharedEHTaskBlocks, ValueToValueMapTy &VMap, DominatorTree *DT, LoopInfo *LI) { - StringRef suffix = UseEpilogRemainder ? "epil" : "prol"; + StringRef Suffix = UseEpilogRemainder ? "epil" : "prol"; BasicBlock *Header = L->getHeader(); BasicBlock *Latch = L->getLoopLatch(); Function *F = Header->getParent(); @@ -439,7 +438,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, // For each block in the original loop, create a new copy, // and update the value map with the newly created values. for (LoopBlocksDFS::RPOIterator BB = BlockBegin; BB != BlockEnd; ++BB) { - BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + suffix, F); + BasicBlock *NewBB = CloneBasicBlock(*BB, VMap, "." + Suffix, F); NewBlocks.push_back(NewBB); // Add the cloned block to loop info. @@ -474,7 +473,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, Builder.CreateBr(InsertBot); } else { PHINode *NewIdx = PHINode::Create(NewIter->getType(), 2, - suffix + ".iter", + Suffix + ".iter", FirstLoopBB->getFirstNonPHI()); Value *IdxSub = Builder.CreateSub(NewIdx, ConstantInt::get(NewIdx->getType(), 1), @@ -493,7 +492,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, // Create new copies of the EH blocks to clone. We can handle these blocks // more simply than the loop blocks. for (BasicBlock *BB : ExtraTaskBlocks) { - BasicBlock *NewBB = CloneBasicBlock(BB, VMap, "." + suffix, F); + BasicBlock *NewBB = CloneBasicBlock(BB, VMap, "." + Suffix, F); NewBlocks.push_back(NewBB); // Add the cloned block to loop info. @@ -564,22 +563,22 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, PHINode *NewPHI = cast(VMap[&*I]); if (!CreateRemainderLoop) { if (UseEpilogRemainder) { - unsigned idx = NewPHI->getBasicBlockIndex(Preheader); - NewPHI->setIncomingBlock(idx, InsertTop); + unsigned Idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(Idx, InsertTop); NewPHI->removeIncomingValue(Latch, false); } else { VMap[&*I] = NewPHI->getIncomingValueForBlock(Preheader); NewPHI->eraseFromParent(); } } else { - unsigned idx = NewPHI->getBasicBlockIndex(Preheader); - NewPHI->setIncomingBlock(idx, InsertTop); + unsigned Idx = NewPHI->getBasicBlockIndex(Preheader); + NewPHI->setIncomingBlock(Idx, InsertTop); BasicBlock *NewLatch = cast(VMap[Latch]); - idx = NewPHI->getBasicBlockIndex(Latch); - Value *InVal = NewPHI->getIncomingValue(idx); - NewPHI->setIncomingBlock(idx, NewLatch); + Idx = NewPHI->getBasicBlockIndex(Latch); + Value *InVal = NewPHI->getIncomingValue(Idx); + NewPHI->setIncomingBlock(Idx, NewLatch); if (Value *V = VMap.lookup(InVal)) - NewPHI->setIncomingValue(idx, V); + NewPHI->setIncomingValue(Idx, V); } } @@ -608,8 +607,7 @@ CloneLoopBlocks(Loop *L, Value *NewIter, const bool CreateRemainderLoop, // NewLoop->setLoopAlreadyUnrolled(); return NewLoop; } - else - return nullptr; + return nullptr; } // Helper function to get the basic-block predecessors of the given exceptional @@ -647,7 +645,7 @@ static void getEHContPredecessors(BasicBlock *BB, Task *T, // Helper method to nest the exception-handling code of a task with exceptional // continuation EHCont within a new parent task. -static BasicBlock *NestDetachUnwindPredecessors( +static BasicBlock *nestDetachUnwindPredecessors( BasicBlock *EHCont, Value *EHContLPad, ArrayRef Preds, BasicBlock *NewDetachBB, const char *Suffix1, const char *Suffix2, LandingPadInst *OrigLPad, Value *SyncReg, Module *M, DominatorTree *DT, @@ -957,12 +955,9 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, // Branch to either remainder (extra iterations) loop or stripmined loop. B.CreateCondBr(BranchVal, RemainderLoopBB, StripminedLoopBB); PreheaderBR->eraseFromParent(); - if (DT) { - // if (UseEpilogRemainder) - DT->changeImmediateDominator(NewExit, Preheader); - // else - // DT->changeImmediateDominator(PrologExit, Preheader); - } + if (DT) + DT->changeImmediateDominator(NewExit, Preheader); + Function *F = Header->getParent(); // Get an ordered list of blocks in the loop to help with the ordering of the // cloned blocks in the prolog/epilog code @@ -1040,7 +1035,7 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, BasicBlock *InsertBot = LatchExit; BasicBlock *InsertTop = EpilogPreheader; *RemainderLoop = - CloneLoopBlocks(L, ModVal, CreateRemainderLoop, true, UnrollRemainder, + cloneLoopBlocks(L, ModVal, CreateRemainderLoop, true, UnrollRemainder, InsertTop, InsertBot, NewPreheader, NewBlocks, LoopBlocks, ExtraTaskBlocks, SharedEHTaskBlocks, VMap, DT, LI); @@ -1161,7 +1156,7 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, PN.addIncoming(UndefValue::get(PN.getType()), LoopDetach); // Nest the exceptional code in the original task into the new task. - /* BasicBlock *OuterUD = */ NestDetachUnwindPredecessors( + /* BasicBlock *OuterUD = */ nestDetachUnwindPredecessors( EHCont, EHContLPadVal, UDPreds, LoopDetach, ".strpm", ".strpm.detachloop.unwind", DI->getUnwindDest()->getLandingPadInst(), SyncReg, M, DT, LI, nullptr, PreserveLCSSA); @@ -1509,7 +1504,7 @@ Loop *llvm::StripMineLoop(Loop *L, unsigned Count, bool AllowExpensiveTripCount, ConstantInt::get(TripCount->getType(), Count)); if (Instruction *ESIInst = dyn_cast(EpilStartIter)) ESIInst->copyIRFlags(PrimaryInc); - ConnectEpilog(TL, EpilStartIter, ModVal, EpilogPred, LoopReattach, NewExit, + connectEpilog(TL, EpilStartIter, ModVal, EpilogPred, LoopReattach, NewExit, LatchExit, Preheader, EpilogPreheader, VMap, DT, LI, SE, DL, PreserveLCSSA); diff --git a/llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp b/llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp index 2d55d6387eab..6dc00239f45b 100644 --- a/llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp +++ b/llvm/lib/Transforms/Tapir/LoopStripMinePass.cpp @@ -26,7 +26,6 @@ #include "llvm/IR/PassManager.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Casting.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -89,7 +88,7 @@ createMissedAnalysis(StringRef RemarkName, const Loop *TheLoop, /// Approximate the work of the body of the loop L. Returns several relevant /// properties of loop L via by-reference arguments. -static InstructionCost ApproximateLoopCost( +static InstructionCost approximateLoopCost( const Loop *L, unsigned &NumCalls, bool &NotDuplicatable, bool &Convergent, bool &IsRecursive, bool &UnknownSize, const TargetTransformInfo &TTI, LoopInfo *LI, ScalarEvolution &SE, @@ -148,14 +147,14 @@ static bool tryToStripMineLoop( CodeMetrics::collectEphemeralValues(L, &AC, EphValues); InstructionCost LoopCost = - ApproximateLoopCost(L, NumCalls, NotDuplicatable, Convergent, IsRecursive, + approximateLoopCost(L, NumCalls, NotDuplicatable, Convergent, IsRecursive, UnknownSize, TTI, LI, SE, EphValues, TLI); // Determine the iteration count of the eventual stripmined the loop. - bool explicitCount = computeStripMineCount(L, TTI, LoopCost, SMP); + bool ExplicitCount = computeStripMineCount(L, TTI, LoopCost, SMP); // If the loop size is unknown, then we cannot compute a stripmining count for // it. - if (!explicitCount && UnknownSize) { + if (!ExplicitCount && UnknownSize) { LLVM_DEBUG(dbgs() << " Not stripmining loop with unknown size.\n"); ORE.emit(createMissedAnalysis("UnknownSize", L) << "Cannot stripmine loop with unknown size."); @@ -165,7 +164,7 @@ static bool tryToStripMineLoop( // If the loop size is enormous, then we might want to use a stripmining count // of 1 for it. LLVM_DEBUG(dbgs() << " Loop Cost = " << LoopCost << "\n"); - if (!explicitCount && InstructionCost::getMax() == LoopCost) { + if (!ExplicitCount && InstructionCost::getMax() == LoopCost) { LLVM_DEBUG(dbgs() << " Not stripmining loop with very large size.\n"); if (Hints.getGrainsize() == 1) return false; @@ -179,7 +178,7 @@ static bool tryToStripMineLoop( } // If the loop is recursive, set the stripmine factor to be 1. - if (!explicitCount && IsRecursive) { + if (!ExplicitCount && IsRecursive) { LLVM_DEBUG(dbgs() << " Not stripmining loop that recursively calls the " << "containing function.\n"); if (Hints.getGrainsize() == 1) @@ -215,7 +214,7 @@ static bool tryToStripMineLoop( // If the loop contains potentially expensive function calls, then we don't // want to stripmine it. - if (NumCalls > 0 && !explicitCount && !StripMiningRequested) { + if (NumCalls > 0 && !ExplicitCount && !StripMiningRequested) { LLVM_DEBUG(dbgs() << " Skipping loop with expensive function calls.\n"); ORE.emit(createMissedAnalysis("ExpensiveCalls", L) << "Not stripmining loop with potentially expensive calls."); diff --git a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp index 319083f81e1e..6b6503f2f53a 100644 --- a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp +++ b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp @@ -11,11 +11,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/TapirTaskInfo.h" -#include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/Dominators.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/IR/IRBuilder.h" #include "llvm/Support/Timer.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" #include "llvm/Transforms/Tapir/CilkABI.h" @@ -361,7 +360,7 @@ void llvm::findAllTaskFrameInputs( std::pair llvm::createTaskArgsStruct(const ValueSet &Inputs, Task *T, Instruction *StorePt, Instruction *LoadPt, - bool staticStruct, ValueToValueMapTy &InputsMap, + bool StaticStruct, ValueToValueMapTy &InputsMap, Loop *TapirL) { assert(T && T->getParentTask() && "Expected spawned task."); SmallPtrSet TaskFrameBlocks; @@ -416,7 +415,7 @@ llvm::createTaskArgsStruct(const ValueSet &Inputs, Task *T, AllocaInst *Closure; StructType *ST = StructType::get(T->getEntry()->getContext(), StructIT); LLVM_DEBUG(dbgs() << "Closure struct type " << *ST << "\n"); - if (staticStruct) { + if (StaticStruct) { Spindle *ParentTF = T->getEntrySpindle()->getTaskFrameParent(); BasicBlock *AllocaInsertBlk = ParentTF ? ParentTF->getEntry() : T->getParentTask()->getEntry(); @@ -444,8 +443,8 @@ llvm::createTaskArgsStruct(const ValueSet &Inputs, Task *T, // values. IRBuilder<> B2(LoadPt); for (unsigned i = 0; i < StructInputs.size(); ++i) { - auto STGEP = cast(B2.CreateConstGEP2_32(ST, Closure, 0, i)); - auto STLoad = B2.CreateLoad(StructIT[i], STGEP); + auto *STGEP = cast(B2.CreateConstGEP2_32(ST, Closure, 0, i)); + auto *STLoad = B2.CreateLoad(StructIT[i], STGEP); InputsMap[StructInputs[i]] = STLoad; // Update all uses of the struct inputs in the loop body. @@ -521,12 +520,12 @@ void llvm::fixupInputSet(Function &F, const ValueSet &Inputs, ValueSet &Fixed) { Instruction *llvm::fixupHelperInputs( Function &F, Task *T, ValueSet &TaskInputs, ValueSet &HelperArgs, Instruction *StorePt, Instruction *LoadPt, - TapirTarget::ArgStructMode useArgStruct, + TapirTarget::ArgStructMode UseArgStruct, ValueToValueMapTy &InputsMap, Loop *TapirL) { - if (TapirTarget::ArgStructMode::None != useArgStruct) { + if (TapirTarget::ArgStructMode::None != UseArgStruct) { std::pair ArgsStructInfo = createTaskArgsStruct(TaskInputs, T, StorePt, LoadPt, - TapirTarget::ArgStructMode::Static == useArgStruct, + TapirTarget::ArgStructMode::Static == UseArgStruct, InputsMap, TapirL); HelperArgs.insert(ArgsStructInfo.first); return ArgsStructInfo.second; @@ -967,10 +966,10 @@ Function *llvm::createHelperForTaskFrame( TaskOutlineInfo llvm::outlineTaskFrame( Spindle *TF, ValueSet &Inputs, SmallVectorImpl &HelperInputs, Module *DestM, ValueToValueMapTy &VMap, - TapirTarget::ArgStructMode useArgStruct, Type *ReturnType, + TapirTarget::ArgStructMode UseArgStruct, Type *ReturnType, ValueToValueMapTy &InputMap, OutlineAnalysis &OA) { if (Task *T = TF->getTaskFromTaskFrame()) - return outlineTask(T, Inputs, HelperInputs, DestM, VMap, useArgStruct, + return outlineTask(T, Inputs, HelperInputs, DestM, VMap, UseArgStruct, ReturnType, InputMap, OA); Function &F = *TF->getEntry()->getParent(); @@ -1030,31 +1029,30 @@ Instruction *llvm::replaceTaskFrameWithCallToOutline( // Replace the detach with an unconditional branch to its continuation. ReplaceInstWithInst(ToReplace, BranchInst::Create(Out.ReplRet)); return TopCall; - } else { - // The detach might catch an exception from the task. Replace the detach - // with an invoke of the outline. - InvokeInst *TopCall; - // Create invoke instruction. The ordinary return of the invoke is the - // detach's continuation, and the unwind return is the detach's unwind. - TopCall = InvokeInst::Create(Out.Outline, Out.ReplRet, Out.ReplUnwind, - OutlineInputs, "", ToReplace->getParent()); - if (TFResumeBB) { - // Update PHI nodes in the unwind destination of TFResumeBB. - for (PHINode &PN : Out.ReplUnwind->phis()) - PN.replaceIncomingBlockWith(TFResumeBB, ToReplace->getParent()); - // Replace the terminator of TFResumeBB with an unreachable. - IRBuilder<> B(TFResumeBB->getTerminator()); - B.CreateUnreachable()->setDebugLoc( - TFResumeBB->getTerminator()->getDebugLoc()); - TFResumeBB->getTerminator()->eraseFromParent(); - } - // Use a fast calling convention for the outline. - TopCall->setCallingConv(Out.Outline->getCallingConv()); - TopCall->setDebugLoc(ToReplace->getDebugLoc()); - // Remove the detach. The invoke serves as a replacement terminator. - ToReplace->eraseFromParent(); - return TopCall; } + // The detach might catch an exception from the task. Replace the detach + // with an invoke of the outline. + InvokeInst *TopCall; + // Create invoke instruction. The ordinary return of the invoke is the + // detach's continuation, and the unwind return is the detach's unwind. + TopCall = InvokeInst::Create(Out.Outline, Out.ReplRet, Out.ReplUnwind, + OutlineInputs, "", ToReplace->getParent()); + if (TFResumeBB) { + // Update PHI nodes in the unwind destination of TFResumeBB. + for (PHINode &PN : Out.ReplUnwind->phis()) + PN.replaceIncomingBlockWith(TFResumeBB, ToReplace->getParent()); + // Replace the terminator of TFResumeBB with an unreachable. + IRBuilder<> B(TFResumeBB->getTerminator()); + B.CreateUnreachable()->setDebugLoc( + TFResumeBB->getTerminator()->getDebugLoc()); + TFResumeBB->getTerminator()->eraseFromParent(); + } + // Use a fast calling convention for the outline. + TopCall->setCallingConv(Out.Outline->getCallingConv()); + TopCall->setDebugLoc(ToReplace->getDebugLoc()); + // Remove the detach. The invoke serves as a replacement terminator. + ToReplace->eraseFromParent(); + return TopCall; } /// Outlines a task \p T into a helper function that accepts the inputs \p @@ -1064,7 +1062,7 @@ Instruction *llvm::replaceTaskFrameWithCallToOutline( TaskOutlineInfo llvm::outlineTask( Task *T, ValueSet &Inputs, SmallVectorImpl &HelperInputs, Module *DestM, ValueToValueMapTy &VMap, - TapirTarget::ArgStructMode useArgStruct, Type *ReturnType, + TapirTarget::ArgStructMode UseArgStruct, Type *ReturnType, ValueToValueMapTy &InputMap, OutlineAnalysis &OA) { assert(!T->isRootTask() && "Cannot outline the root task."); Function &F = *T->getEntry()->getParent(); @@ -1091,7 +1089,7 @@ TaskOutlineInfo llvm::outlineTask( // Convert the inputs of the task to inputs to the helper. ValueSet HelperArgs; Instruction *ArgsStart = fixupHelperInputs(F, T, Inputs, HelperArgs, StorePt, - LoadPt, useArgStruct, InputMap); + LoadPt, UseArgStruct, InputMap); for (Value *V : HelperArgs) HelperInputs.push_back(V); @@ -1204,27 +1202,26 @@ Instruction *llvm::replaceLoopWithCallToOutline( L->getHeader()->removePredecessor(Out.ReplCall->getParent()); ReplaceInstWithInst(Out.ReplCall, BranchInst::Create(Out.ReplRet)); return TopCall; - } else { - // The detach might catch an exception from the task. Replace the detach - // with an invoke of the outline. - InvokeInst *TopCall; - - // Create invoke instruction. The ordinary return of the invoke is the - // detach's continuation, and the unwind return is the detach's unwind. - TopCall = InvokeInst::Create(Out.Outline, Out.ReplRet, Out.ReplUnwind, - OutlineInputs); - // Use a fast calling convention for the outline. - TopCall->setCallingConv(Out.Outline->getCallingConv()); - TopCall->setDebugLoc(TL->getDebugLoc()); - // Replace the loop with the invoke. - L->getHeader()->removePredecessor(Out.ReplCall->getParent()); - ReplaceInstWithInst(Out.ReplCall, TopCall); - // Add invoke parent as a predecessor for all Phi nodes in ReplUnwind. - for (PHINode &Phi : Out.ReplUnwind->phis()) - Phi.addIncoming(Phi.getIncomingValueForBlock(L->getHeader()), - TopCall->getParent()); - return TopCall; } + // The detach might catch an exception from the task. Replace the detach + // with an invoke of the outline. + InvokeInst *TopCall; + + // Create invoke instruction. The ordinary return of the invoke is the + // detach's continuation, and the unwind return is the detach's unwind. + TopCall = InvokeInst::Create(Out.Outline, Out.ReplRet, Out.ReplUnwind, + OutlineInputs); + // Use a fast calling convention for the outline. + TopCall->setCallingConv(Out.Outline->getCallingConv()); + TopCall->setDebugLoc(TL->getDebugLoc()); + // Replace the loop with the invoke. + L->getHeader()->removePredecessor(Out.ReplCall->getParent()); + ReplaceInstWithInst(Out.ReplCall, TopCall); + // Add invoke parent as a predecessor for all Phi nodes in ReplUnwind. + for (PHINode &Phi : Out.ReplUnwind->phis()) + Phi.addIncoming(Phi.getIncomingValueForBlock(L->getHeader()), + TopCall->getParent()); + return TopCall; } bool TapirTarget::shouldProcessFunction(const Function &F) const { diff --git a/llvm/lib/Transforms/Tapir/Outline.cpp b/llvm/lib/Transforms/Tapir/Outline.cpp index 952a72f82be7..06323ca5408a 100644 --- a/llvm/lib/Transforms/Tapir/Outline.cpp +++ b/llvm/lib/Transforms/Tapir/Outline.cpp @@ -17,8 +17,6 @@ #include "llvm/IR/AttributeMask.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/IntrinsicInst.h" -#include "llvm/Support/ModRef.h" #include "llvm/Support/Timer.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Cloning.h" @@ -291,28 +289,28 @@ Function *llvm::CreateHelper( if (Type::getVoidTy(Header->getContext()) == RetTy) VoidRet = true; - std::vector paramTy; + std::vector ParamTy; // Add the types of the input values to the function's argument list - for (Value *value : Inputs) { - LLVM_DEBUG(dbgs() << "value used in func: " << *value << "\n"); - paramTy.push_back(value->getType()); + for (Value *Value : Inputs) { + LLVM_DEBUG(dbgs() << "value used in func: " << *Value << "\n"); + ParamTy.push_back(Value->getType()); } // Add the types of the output values to the function's argument list. - for (Value *output : Outputs) { - LLVM_DEBUG(dbgs() << "instr used in func: " << *output << "\n"); - paramTy.push_back(PointerType::getUnqual(output->getType())); + for (Value *Output : Outputs) { + LLVM_DEBUG(dbgs() << "instr used in func: " << *Output << "\n"); + ParamTy.push_back(PointerType::getUnqual(Output->getType())); } LLVM_DEBUG({ dbgs() << "Function type: " << *RetTy << " f("; - for (Type *i : paramTy) + for (Type *i : ParamTy) dbgs() << *i << ", "; dbgs() << ")\n"; }); - FunctionType *FTy = FunctionType::get(RetTy, paramTy, false); + FunctionType *FTy = FunctionType::get(RetTy, ParamTy, false); // Create the new function Function *NewFunc = Function::Create( diff --git a/llvm/lib/Transforms/Tapir/SerializeSmallTasks.cpp b/llvm/lib/Transforms/Tapir/SerializeSmallTasks.cpp index 52c866c30554..3750dd9b1a1c 100644 --- a/llvm/lib/Transforms/Tapir/SerializeSmallTasks.cpp +++ b/llvm/lib/Transforms/Tapir/SerializeSmallTasks.cpp @@ -115,7 +115,7 @@ struct SerializeSmallTasks : public FunctionPass { AU.addPreserved(); } }; -} +} // namespace char SerializeSmallTasks::ID = 0; INITIALIZE_PASS_BEGIN(SerializeSmallTasks, "serialize-small-tasks", diff --git a/llvm/lib/Transforms/Tapir/Tapir.cpp b/llvm/lib/Transforms/Tapir/Tapir.cpp index bc4e056fc9ab..2ab648cd61d9 100644 --- a/llvm/lib/Transforms/Tapir/Tapir.cpp +++ b/llvm/lib/Transforms/Tapir/Tapir.cpp @@ -12,13 +12,8 @@ // //===----------------------------------------------------------------------===// -#include "llvm-c/Transforms/Tapir.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/IR/LegacyPassManager.h" #include "llvm/InitializePasses.h" -#include "llvm/Pass.h" #include "llvm/PassRegistry.h" -#include "llvm/Transforms/Tapir.h" using namespace llvm; diff --git a/llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp b/llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp index 1846e2a782ad..5b2b98171d74 100644 --- a/llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp +++ b/llvm/lib/Transforms/Tapir/TapirLoopInfo.cpp @@ -562,8 +562,8 @@ Value *llvm::emitTransformedIndex( const InductionDescriptor &ID) { SCEVExpander Exp(*SE, DL, "induction"); - auto Step = ID.getStep(); - auto StartValue = ID.getStartValue(); + const auto *Step = ID.getStep(); + auto *StartValue = ID.getStartValue(); assert(Index->getType() == Step->getType() && "Index type does not match StepValue type"); @@ -615,7 +615,7 @@ Value *llvm::emitTransformedIndex( } case InductionDescriptor::IK_FpInduction: { assert(Step->getType()->isFloatingPointTy() && "Expected FP Step value"); - auto InductionBinOp = ID.getInductionBinOp(); + auto *InductionBinOp = ID.getInductionBinOp(); assert(InductionBinOp && (InductionBinOp->getOpcode() == Instruction::FAdd || InductionBinOp->getOpcode() == Instruction::FSub) && diff --git a/llvm/lib/Transforms/Tapir/TapirToTarget.cpp b/llvm/lib/Transforms/Tapir/TapirToTarget.cpp index d632c7113af3..0e87a441fd18 100644 --- a/llvm/lib/Transforms/Tapir/TapirToTarget.cpp +++ b/llvm/lib/Transforms/Tapir/TapirToTarget.cpp @@ -23,9 +23,9 @@ #include "llvm/InitializePasses.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Timer.h" -#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Tapir.h" #include "llvm/Transforms/Tapir/LoweringUtils.h" +#include "llvm/Transforms/Utils/Cloning.h" #include "llvm/Transforms/Utils/TapirUtils.h" #define DEBUG_TYPE "tapir2target" diff --git a/llvm/lib/Transforms/Utils/InlineFunction.cpp b/llvm/lib/Transforms/Utils/InlineFunction.cpp index f77184a08456..5c3afaba6b93 100644 --- a/llvm/lib/Transforms/Utils/InlineFunction.cpp +++ b/llvm/lib/Transforms/Utils/InlineFunction.cpp @@ -2951,7 +2951,7 @@ llvm::InlineResult llvm::InlineFunction(CallBase &CB, InlineFunctionInfo &IFI, if (InlinedFunctionInfo.ContainsDetach && (InlinedFunctionInfo.ContainsDynamicAllocas || MayBeUnsyncedAtCall)) { Module *M = Caller->getParent(); - // Get the two intrinsics we care about. + // Get the taskframe.create intrinsic. Function *TFCreateFn = Intrinsic::getDeclaration(M, Intrinsic::taskframe_create); diff --git a/llvm/lib/Transforms/Utils/TapirUtils.cpp b/llvm/lib/Transforms/Utils/TapirUtils.cpp index 2746d45a648a..c15b5a39ebff 100644 --- a/llvm/lib/Transforms/Utils/TapirUtils.cpp +++ b/llvm/lib/Transforms/Utils/TapirUtils.cpp @@ -1193,8 +1193,7 @@ const BasicBlock *llvm::GetDetachedCtx(const BasicBlock *BB) { // Return the current block, which is the entry of this detached // sub-CFG. return CurrBB; - else if (const Value *SubTaskFrame = - getTaskFrameUsed(DI->getDetached())) + if (const Value *SubTaskFrame = getTaskFrameUsed(DI->getDetached())) // Ignore this tasks's taskframe, if it has one. TaskFramesToIgnore.insert(SubTaskFrame); } @@ -1355,7 +1354,8 @@ void llvm::GetDetachedCFG(const DetachInst &DI, const DominatorTree &DT, "Reattach terminating detached CFG has nonmatching sync region."); TaskReturns.insert(BB); continue; - } else if (DetachInst *NestedDI = dyn_cast(Term)) { + } + if (DetachInst *NestedDI = dyn_cast(Term)) { assert(NestedDI != &DI && "Found recursive Detach"); // Add the successors of the nested detach instruction for searching. Todo.push_back(NestedDI->getDetached()); @@ -1363,15 +1363,17 @@ void llvm::GetDetachedCFG(const DetachInst &DI, const DominatorTree &DT, if (NestedDI->hasUnwindDest()) Todo.push_back(NestedDI->getUnwindDest()); continue; - } else if (SyncInst *SI = dyn_cast(Term)) { + } + if (SyncInst *SI = dyn_cast(Term)) { // A sync instruction should only apply to nested detaches within this // task. Hence it can be treated like a branch. assert(SI->getSyncRegion() != SyncRegion && "Sync in detached task applies to parent parallel context."); Todo.push_back(SI->getSuccessor(0)); continue; - } else if (isa(Term) || isa(Term) || - isa(Term)) { + } + if (isa(Term) || isa(Term) || + isa(Term)) { if (isDetachedRethrow(Term, SyncRegion)) { // A detached rethrow terminates this task and is included in the set of // exception-handling blocks that might not be unique to this task. @@ -1395,14 +1397,14 @@ void llvm::GetDetachedCFG(const DetachInst &DI, const DominatorTree &DT, } } continue; - } else if (isa(Term)) { + } + if (isa(Term)) { // We don't bother cloning unreachable exits from the detached CFG at this // point. We're cloning the entire detached CFG anyway when we outline // the function. continue; - } else { - llvm_unreachable("Detached task does not absolutely terminate in reattach"); } + llvm_unreachable("Detached task does not absolutely terminate in reattach"); } // Find the exception-handling exit blocks. @@ -1955,7 +1957,7 @@ BasicBlock *llvm::CreateSubTaskUnwindEdge(Intrinsic::ID TermFunc, Value *Token, return NewUnwindEdge; } -static BasicBlock *MaybePromoteCallInBlock(BasicBlock *BB, +static BasicBlock *maybePromoteCallInBlock(BasicBlock *BB, BasicBlock *UnwindEdge, const Value *TaskFrame) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { @@ -1999,7 +2001,7 @@ static BasicBlock *MaybePromoteCallInBlock(BasicBlock *BB, return nullptr; } -static Instruction *GetTaskFrameInstructionInBlock(BasicBlock *BB, +static Instruction *getTaskFrameInstructionInBlock(BasicBlock *BB, const Value *TaskFrame) { for (BasicBlock::iterator BBI = BB->begin(), E = BB->end(); BBI != E;) { Instruction *I = &*BBI++; @@ -2022,7 +2024,7 @@ static Instruction *GetTaskFrameInstructionInBlock(BasicBlock *BB, } // Recursively handle inlined tasks. -static void PromoteCallsInTasksHelper( +static void promoteCallsInTasksHelper( BasicBlock *EntryBlock, BasicBlock *UnwindEdge, BasicBlock *Unreachable, Value *CurrentTaskFrame, SmallVectorImpl *ParentWorklist, @@ -2041,10 +2043,10 @@ static void PromoteCallsInTasksHelper( // Promote any calls in the block to invokes. while (BasicBlock *NewBB = - MaybePromoteCallInBlock(BB, UnwindEdge, CurrentTaskFrame)) + maybePromoteCallInBlock(BB, UnwindEdge, CurrentTaskFrame)) BB = cast(NewBB->getTerminator())->getNormalDest(); - Instruction *TFI = GetTaskFrameInstructionInBlock(BB, CurrentTaskFrame); + Instruction *TFI = getTaskFrameInstructionInBlock(BB, CurrentTaskFrame); if (TFI && isTapirIntrinsic(Intrinsic::taskframe_create, TFI)) { Processed.insert(BB); Instruction *TFCreate = TFI; @@ -2062,7 +2064,7 @@ static void PromoteCallsInTasksHelper( Unreachable, TFCreate); // Recursively check all blocks - PromoteCallsInTasksHelper(NewBB, TaskFrameUnwindEdge, Unreachable, + promoteCallsInTasksHelper(NewBB, TaskFrameUnwindEdge, Unreachable, TFCreate, &Worklist, Processed); // Remove the unwind edge for the taskframe if it is not needed. @@ -2118,7 +2120,7 @@ static void PromoteCallsInTasksHelper( Intrinsic::detached_rethrow, DI->getSyncRegion(), UnwindEdge, Unreachable, DI); // Recursively check all blocks in the detached task. - PromoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge, + promoteCallsInTasksHelper(DI->getDetached(), SubTaskUnwindEdge, Unreachable, CurrentTaskFrame, &Worklist, Processed); // If the new unwind edge is not used, remove it. @@ -2178,7 +2180,7 @@ void llvm::promoteCallsInTasksToInvokes(Function &F, const Twine Name) { SmallVector ToProcess; ToProcess.push_back(&F.getEntryBlock()); for (BasicBlock &BB : F) { - Instruction *TFI = GetTaskFrameInstructionInBlock(&BB, nullptr); + Instruction *TFI = getTaskFrameInstructionInBlock(&BB, nullptr); if (TFI && isTapirIntrinsic(Intrinsic::taskframe_create, TFI)) ToProcess.push_back(&BB); @@ -2203,7 +2205,7 @@ void llvm::promoteCallsInTasksToInvokes(Function &F, const Twine Name) { SmallPtrSet Processed; for (BasicBlock *BB : ToProcess) { if (!Processed.contains(BB)) - PromoteCallsInTasksHelper(BB, CleanupBB, UnreachableBlk, nullptr, nullptr, + promoteCallsInTasksHelper(BB, CleanupBB, UnreachableBlk, nullptr, nullptr, Processed); } @@ -2301,7 +2303,7 @@ void llvm::TapirLoopHints::setHint(StringRef Name, Metadata *Arg) { unsigned Val = C->getZExtValue(); Hint *Hints[] = {&Strategy, &Grainsize}; - for (auto H : Hints) { + for (auto *H : Hints) { if (Name == H->Name) { if (H->validate(Val)) H->Value = Val; diff --git a/llvm/lib/Transforms/Utils/TaskCanonicalize.cpp b/llvm/lib/Transforms/Utils/TaskCanonicalize.cpp index c73344d52a90..a8bf0c6e11e4 100644 --- a/llvm/lib/Transforms/Utils/TaskCanonicalize.cpp +++ b/llvm/lib/Transforms/Utils/TaskCanonicalize.cpp @@ -33,7 +33,7 @@ struct TaskCanonicalize : public FunctionPass { AU.addPreserved(); } }; -} +} // namespace char TaskCanonicalize::ID = 0; INITIALIZE_PASS_BEGIN(TaskCanonicalize, "task-canonicalize", From 2230c48ed739c819b9b73259f442f36e98b18d46 Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Wed, 24 Apr 2024 20:32:51 -0400 Subject: [PATCH 5/7] [LoweringUtils] Handle outlining and remapping of unreachable successors of a parallel task when those unreachable blocks are shared with the task parent. --- .../llvm/Transforms/Tapir/LoweringUtils.h | 4 +- llvm/lib/Transforms/Tapir/LoweringUtils.cpp | 17 +++++-- .../Tapir/outline-shared-unreachable.ll | 50 +++++++++++++++++++ 3 files changed, 64 insertions(+), 7 deletions(-) create mode 100644 llvm/test/Transforms/Tapir/outline-shared-unreachable.ll diff --git a/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h b/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h index c600a04d4582..d8ba4f0e3d56 100644 --- a/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h +++ b/llvm/include/llvm/Transforms/Tapir/LoweringUtils.h @@ -14,7 +14,6 @@ #define LOWERING_UTILS_H_ #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/MapVector.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/IR/Instructions.h" @@ -506,6 +505,7 @@ void getTaskBlocks(Task *T, std::vector &TaskBlocks, SmallPtrSetImpl &ReattachBlocks, SmallPtrSetImpl &TaskResumeBlocks, SmallPtrSetImpl &SharedEHEntries, + SmallPtrSetImpl &UnreachableExits, const DominatorTree *DT); /// Outlines the content of task \p T in function \p F into a new helper @@ -563,6 +563,6 @@ Instruction *replaceLoopWithCallToOutline( TapirLoopInfo *TL, TaskOutlineInfo &Out, SmallVectorImpl &OutlineInputs); -} // end namepsace llvm +} // namespace llvm #endif diff --git a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp index 6b6503f2f53a..51771f6e00ae 100644 --- a/llvm/lib/Transforms/Tapir/LoweringUtils.cpp +++ b/llvm/lib/Transforms/Tapir/LoweringUtils.cpp @@ -14,6 +14,7 @@ #include "llvm/IR/Dominators.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/Timer.h" #include "llvm/Transforms/IPO/FunctionAttrs.h" @@ -554,6 +555,7 @@ void llvm::getTaskBlocks(Task *T, std::vector &TaskBlocks, SmallPtrSetImpl &ReattachBlocks, SmallPtrSetImpl &TaskResumeBlocks, SmallPtrSetImpl &SharedEHEntries, + SmallPtrSetImpl &UnreachableExits, const DominatorTree *DT) { NamedRegionTimer NRT("getTaskBlocks", "Get task blocks", TimerGroupName, TimerGroupDescription, TimePassesIsEnabled); @@ -639,8 +641,12 @@ void llvm::getTaskBlocks(Task *T, std::vector &TaskBlocks, // Record the blocks terminated by reattaches and detached rethrows. if (isa(B->getTerminator())) ReattachBlocks.insert(B); - if (isDetachedRethrow(B->getTerminator())) + else if (isDetachedRethrow(B->getTerminator())) TaskResumeBlocks.insert(B); + else if (S->isSpindleExiting(B)) + for (BasicBlock *Succ : successors(B)) + if (isa(Succ->getFirstNonPHIOrDbg())) + UnreachableExits.insert(Succ); } } } @@ -658,11 +664,12 @@ Function *llvm::createHelperForTask( // handling. SmallPtrSet ReattachBlocks; SmallPtrSet TaskResumeBlocks; + SmallPtrSet UnreachableExits; // Entry blocks of shared-EH spindles may contain PHI nodes that need to be // rewritten in the cloned helper. SmallPtrSet SharedEHEntries; getTaskBlocks(T, TaskBlocks, ReattachBlocks, TaskResumeBlocks, - SharedEHEntries, &OA.DT); + SharedEHEntries, UnreachableExits, &OA.DT); SmallVector Returns; // Ignore returns cloned. ValueSet Outputs; @@ -687,8 +694,8 @@ Function *llvm::createHelperForTask( Helper = CreateHelper( Args, Outputs, TaskBlocks, Header, Entry, DI->getContinue(), VMap, DestM, F.getSubprogram() != nullptr, Returns, NameSuffix.str(), - &ReattachBlocks, &TaskResumeBlocks, &SharedEHEntries, nullptr, nullptr, - ReturnType, nullptr, nullptr, Mat.get()); + &ReattachBlocks, &TaskResumeBlocks, &SharedEHEntries, nullptr, + &UnreachableExits, ReturnType, nullptr, nullptr, Mat.get()); } assert(Returns.empty() && "Returns cloned when cloning detached CFG."); @@ -760,7 +767,7 @@ static void unlinkTaskEHFromParent(Task *T) { getDetachUnwindPHIUses(DI, UnwindPHIs); SmallVector ToRemove; - // Look through PHI's that use the landing pad of the detach's unwind, and + // Look through PHI's that use the landingpad of the detach's unwind, and // update those PHI's to not refer to task T. for (BasicBlock *BB : UnwindPHIs) { for (BasicBlock *Pred : predecessors(BB)) { diff --git a/llvm/test/Transforms/Tapir/outline-shared-unreachable.ll b/llvm/test/Transforms/Tapir/outline-shared-unreachable.ll new file mode 100644 index 000000000000..ec0224e66939 --- /dev/null +++ b/llvm/test/Transforms/Tapir/outline-shared-unreachable.ll @@ -0,0 +1,50 @@ +; Check that Tapir lowering handles branches to unreachable blocks +; when those blocks are shared with the parent spawner. +; +; RUN: opt < %s -passes="tapir2target" -use-opencilk-runtime-bc=false -debug-abi-calls -S | FileCheck %s + +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx13.0.0" + +define void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiRNSt3__13mapIiNS1_6vectorIiNS1_9allocatorIiEEEENS1_4lessIiEENS4_INS1_4pairIKiS6_EEEEEESE_RNS2_IiiS8_NS4_INS9_ISA_iEEEEEESI_PPdSK_() personality ptr null { +entry: + br i1 false, label %entry.unreachable_crit_edge, label %pfor.detach + +entry.unreachable_crit_edge: ; preds = %entry + br label %unreachable + +pfor.detach: ; preds = %pfor.detach, %entry + detach within none, label %pfor.body, label %pfor.detach unwind label %lpad714.loopexit + +pfor.body: ; preds = %pfor.detach + br label %unreachable + +lpad714.loopexit: ; preds = %pfor.detach + %lpad.loopexit = landingpad { ptr, i32 } + cleanup + ret void + +unreachable: ; preds = %entry.unreachable_crit_edge, %pfor.body + unreachable + +; uselistorder directives + uselistorder label %unreachable, { 1, 0 } +} + +; CHECK: define void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiRNSt3__13mapIiNS1_6vectorIiNS1_9allocatorIiEEEENS1_4lessIiEENS4_INS1_4pairIKiS6_EEEEEESE_RNS2_IiiS8_NS4_INS9_ISA_iEEEEEESI_PPdSK_() +; CHECK: pfor.detach: +; CHECK: invoke fastcc void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiRNSt3__13mapIiNS1_6vectorIiNS1_9allocatorIiEEEENS1_4lessIiEENS4_INS1_4pairIKiS6_EEEEEESE_RNS2_IiiS8_NS4_INS9_ISA_iEEEEEESI_PPdSK_.outline_pfor.body.otd1() +; CHECK: to label %pfor.detach unwind label %lpad714.loopexit + +; CHECK: define internal fastcc void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiRNSt3__13mapIiNS1_6vectorIiNS1_9allocatorIiEEEENS1_4lessIiEENS4_INS1_4pairIKiS6_EEEEEESE_RNS2_IiiS8_NS4_INS9_ISA_iEEEEEESI_PPdSK_.outline_pfor.body.otd1() +; CHECK: pfor.detach.otd1: +; CHECK: br label %pfor.body.otd1 + +; CHECK: pfor.body.otd1: +; CHECK-NEXT: br label %unreachable.otd1 + +; CHECK: unreachable.otd1: +; CHECK-NEXT: unreachable + +; uselistorder directives +uselistorder ptr null, { 1, 2, 0 } From c299a04d3562751af7801ead89495e631ece738c Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Wed, 24 Apr 2024 21:53:48 -0400 Subject: [PATCH 6/7] [TapirTaskInfo] Fix the check for when a block is simply enclosed by a task to handle unreachable blocks. --- llvm/include/llvm/Analysis/TapirTaskInfo.h | 6 +- .../Tapir/unlink-unreachable-detach-unwind.ll | 81 +++++++++++++++++++ 2 files changed, 86 insertions(+), 1 deletion(-) create mode 100644 llvm/test/Transforms/Tapir/unlink-unreachable-detach-unwind.ll diff --git a/llvm/include/llvm/Analysis/TapirTaskInfo.h b/llvm/include/llvm/Analysis/TapirTaskInfo.h index 88066904dce8..a5a1cc114fd3 100644 --- a/llvm/include/llvm/Analysis/TapirTaskInfo.h +++ b/llvm/include/llvm/Analysis/TapirTaskInfo.h @@ -859,7 +859,11 @@ class Task { /// Returns true if this task encloses basic block BB simply, that is, without /// checking any shared EH exits of this task. bool simplyEncloses(const BasicBlock *BB) const { - return DomTree.dominates(getEntry(), BB); + // DomTree.dominates(getEntry(), BB) will return true if BB is not reachable + // and getEntry() is reachable. This method should return that BB is not + // simply enclosed in that case. + return DomTree.isReachableFromEntry(BB) && + DomTree.dominates(getEntry(), BB); } /// Return true if specified task encloses basic block BB. diff --git a/llvm/test/Transforms/Tapir/unlink-unreachable-detach-unwind.ll b/llvm/test/Transforms/Tapir/unlink-unreachable-detach-unwind.ll new file mode 100644 index 000000000000..53a67a632edf --- /dev/null +++ b/llvm/test/Transforms/Tapir/unlink-unreachable-detach-unwind.ll @@ -0,0 +1,81 @@ +; Check that unlinking a task from its unwind destination handles +; unreachable blocks correctly. +; +; RUN: opt < %s -passes="tapir2target" -use-opencilk-runtime-bc=false -debug-abi-calls -S | FileCheck %s +target datalayout = "e-m:o-i64:64-i128:128-n32:64-S128" +target triple = "arm64-apple-macosx13.0.0" + +define void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiPNSt3__16vectorIiNS1_9allocatorIiEEEES6_PiS7_PPdS9_() personality ptr null { +entry: + %syncreg = call token @llvm.syncregion.start() + %syncreg359 = call token @llvm.syncregion.start() + %syncreg461 = call token @llvm.syncregion.start() + %syncreg783 = call token @llvm.syncregion.start() + %syncreg1089 = call token @llvm.syncregion.start() + br label %pfor.detach470 + +for.body87: ; No predecessors! + %call116 = invoke i64 null(ptr null, ptr null) + to label %invoke.cont115 unwind label %lpad651 + +invoke.cont115: ; preds = %for.body87 + store i64 %call116, ptr null, align 8 + br label %for.cond131 + +for.cond131: ; preds = %for.cond131, %invoke.cont115 + br label %for.cond131 + +pfor.body.entry: ; preds = %pfor.inc273 + %syncreg223 = call token @llvm.syncregion.start() + reattach within %syncreg, label %pfor.inc273 + +pfor.inc273: ; preds = %pfor.inc273, %pfor.body.entry + detach within %syncreg, label %pfor.body.entry, label %pfor.inc273 unwind label %lpad651 + +pfor.detach470: ; preds = %pfor.detach470, %entry + detach within none, label %pfor.body.entry472, label %pfor.detach470 unwind label %lpad651 + +pfor.body.entry472: ; preds = %pfor.detach470 + %syncreg481 = call token @llvm.syncregion.start() + br label %pfor.detach490 + +pfor.detach490: ; preds = %pfor.detach490, %pfor.body.entry472 + br label %pfor.detach490 + +lpad651: ; preds = %pfor.inc907, %pfor.detach470, %pfor.inc273, %for.body87 + %0 = landingpad { ptr, i32 } + cleanup + resume { ptr, i32 } zeroinitializer + +pfor.body.entry799: ; preds = %pfor.inc907 + %syncreg843 = call token @llvm.syncregion.start() + reattach within none, label %pfor.inc907 + +pfor.inc907: ; preds = %pfor.inc907, %pfor.body.entry799 + detach within none, label %pfor.body.entry799, label %pfor.inc907 unwind label %lpad651 +} + +; CHECK: define void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiPNSt3__16vectorIiNS1_9allocatorIiEEEES6_PiS7_PPdS9_() + +; Outlining the task detached at pfor.detach470 should not delete for.body87 and leave the use of %call116 in invoke.cont115 without its definition. + +; CHECK: for.body87: +; CHECK-NEXT: %call116 = invoke i64 null(ptr null, ptr null) +; CHECK-NEXT: to label %invoke.cont115 unwind label %lpad651 + +; CHECK: invoke.cont115: +; CHECK-NEXT: store i64 %call116, ptr null, align 8 + +; CHECK: pfor.detach470: +; CHECK-NOT: detach within +; CHECK: invoke fastcc void @_ZN9LAMMPS_NS6Verlet14run_stencil_mdEiPNSt3__16vectorIiNS1_9allocatorIiEEEES6_PiS7_PPdS9_.outline_pfor.body.entry472.otd1() +; CHECK-NEXT: to label %pfor.detach470 unwind label %lpad651 + +; Function Attrs: nounwind willreturn memory(argmem: readwrite) +declare token @llvm.syncregion.start() #0 + +; uselistorder directives +uselistorder ptr null, { 1, 2, 3, 4, 5, 6, 0 } +uselistorder ptr @llvm.syncregion.start, { 7, 6, 5, 4, 3, 2, 1, 0 } + +attributes #0 = { nounwind willreturn memory(argmem: readwrite) } From 490d34a5e00074c55ed3442dc53124752916348a Mon Sep 17 00:00:00 2001 From: TB Schardl Date: Sun, 3 Mar 2024 11:49:56 -0500 Subject: [PATCH 7/7] [Darwin] Use per-architecture LLVM bitcode files for the OpenCilk runtime. --- clang/lib/Driver/ToolChains/Darwin.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/clang/lib/Driver/ToolChains/Darwin.cpp b/clang/lib/Driver/ToolChains/Darwin.cpp index dc6f8cdeeac0..81e473a487a6 100644 --- a/clang/lib/Driver/ToolChains/Darwin.cpp +++ b/clang/lib/Driver/ToolChains/Darwin.cpp @@ -3426,6 +3426,8 @@ void DarwinClang::AddOpenCilkABIBitcode(const ArgList &Args, : "libopencilk-abi"); BitcodeFilename += "_"; BitcodeFilename += getOSLibraryNameSuffix(); + BitcodeFilename += "-"; + BitcodeFilename += getMachOArchName(Args); BitcodeFilename += ".bc"; for (auto RuntimePath : getOpenCilkRuntimePaths(Args)) {