diff --git a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel index b4d566946643..63776506746a 100644 --- a/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/BUILD.bazel @@ -122,6 +122,7 @@ iree_compiler_cc_library( "MaterializeEncodingIntoNop.cpp", "MaterializeEncodingIntoPackUnPack.cpp", "MemrefCopyToLinalg.cpp", + "NormalizeLoopBounds.cpp", "OptimizeTensorInsertExtractSlices.cpp", "OptimizeVectorTransferPass.cpp", "PadDynamicAlloc.cpp", diff --git a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt index 611920389c54..ee3a6204582b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/CMakeLists.txt @@ -113,6 +113,7 @@ iree_cc_library( "MaterializeEncodingIntoNop.cpp" "MaterializeEncodingIntoPackUnPack.cpp" "MemrefCopyToLinalg.cpp" + "NormalizeLoopBounds.cpp" "OptimizeTensorInsertExtractSlices.cpp" "OptimizeVectorTransferPass.cpp" "PadDynamicAlloc.cpp" diff --git a/compiler/src/iree/compiler/Codegen/Common/NormalizeLoopBounds.cpp b/compiler/src/iree/compiler/Codegen/Common/NormalizeLoopBounds.cpp new file mode 100644 index 000000000000..61f02e6cbe03 --- /dev/null +++ b/compiler/src/iree/compiler/Codegen/Common/NormalizeLoopBounds.cpp @@ -0,0 +1,199 @@ +// Copyright 2024 The IREE Authors +// +// Licensed under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception + +#include "iree/compiler/Codegen/Common/PassDetail.h" +#include "iree/compiler/Codegen/Common/Passes.h" +#include "mlir/Dialect/Affine/IR/AffineOps.h" +#include "mlir/Dialect/Arith/Utils/Utils.h" +#include "mlir/Dialect/SCF/IR/SCF.h" +#include "mlir/Dialect/Utils/StaticValueUtils.h" +#include "mlir/IR/AffineExpr.h" +#include "mlir/IR/OpDefinition.h" +#include "mlir/Support/LogicalResult.h" + +#define DEBUG_TYPE "iree-codegen-normalize-loop-bounds" + +namespace mlir::iree_compiler { + +static OpFoldResult emitNormalizedUpperBound(RewriterBase &rewriter, + Location loc, OpFoldResult lb, + OpFoldResult ub, + OpFoldResult step) { + AffineExpr d0, d1, d2; + bindDims(rewriter.getContext(), d0, d1, d2); + return affine::makeComposedFoldedAffineApply( + rewriter, loc, (d0 - d1).ceilDiv(d2), {ub, lb, step}); +} + +/// Helper structure for storing the newly computed loop bounds. +namespace { +struct LoopRanges { + SmallVector lowerBounds; + SmallVector upperBounds; + SmallVector steps; +}; +} // namespace + +static FailureOr +emitNormalizedLoopBounds(RewriterBase &rewriter, Location loc, Block *body, + ValueRange ivs, ArrayRef lbs, + ArrayRef ubs, + ArrayRef steps) { + Attribute zero = rewriter.getIndexAttr(0); + Attribute one = rewriter.getIndexAttr(1); + SmallVector newLbs; + SmallVector newUbs; + SmallVector newSteps; + for (auto &&[iv, lb, ub, step] : llvm::zip(ivs, lbs, ubs, steps)) { + std::optional stepInt = getConstantIntValue(step); + // Bail out on negative steps. + if (!stepInt || stepInt.value() <= 0) { + return failure(); + } + + // The lower bound and step of a normalized loop is always zero/one. + newLbs.push_back(zero); + newSteps.push_back(one); + + // Compute the normalized upper bound. + OpFoldResult newUb = emitNormalizedUpperBound(rewriter, loc, lb, ub, step); + newUbs.push_back(newUb); + + // Compute and replace the denormalized loop iterator argument in the loop + // body with an insertion guard. + { + OpBuilder::InsertionGuard g(rewriter); + rewriter.setInsertionPointToStart(body); + AffineExpr idx, stepExpr, lbExpr; + bindDims(rewriter.getContext(), idx, stepExpr, lbExpr); + affine::AffineApplyOp denormalizedIV = affine::makeComposedAffineApply( + rewriter, loc, idx * stepExpr + lbExpr, {iv, step, lb}); + SmallPtrSet preserve = {iv.getDefiningOp(), + denormalizedIV}; + rewriter.replaceAllUsesExcept(iv, denormalizedIV.getResult(), preserve); + } + } + return LoopRanges{newLbs, newUbs, newSteps}; +} + +/// Transform a `scf.for` loop with a strictly positive step +/// for %i = %lb to %ub step %s +/// into a 0-based loop with step 1 +/// for %ii = 0 to ceildiv(%ub - %lb, %s) step 1 +/// Insert an `affine.apply` operation to compute the denormalized index value. +static LogicalResult normalizeLoopBounds(RewriterBase &rewriter, + scf::ForOp forOp) { + OpBuilder::InsertionGuard g(rewriter); + // Return if already normalized. + std::optional lbInt = getConstantIntValue(forOp.getLowerBound()); + std::optional stepInt = getConstantIntValue(forOp.getStep()); + if (lbInt && stepInt && lbInt.value() == 0 && stepInt.value() == 1) { + return success(); + } + + // Bail out on non-index types because the affine applies that are generated + // require it. + if (!isa(forOp.getInductionVar().getType())) { + return failure(); + } + + Location loc = forOp.getLoc(); + + rewriter.setInsertionPoint(forOp); + FailureOr newLoopParams = emitNormalizedLoopBounds( + rewriter, loc, forOp.getBody(), forOp.getInductionVar(), + getAsOpFoldResult(forOp.getLowerBound()), + getAsOpFoldResult(forOp.getUpperBound()), + getAsOpFoldResult(forOp.getStep())); + if (failed(newLoopParams)) { + return failure(); + } + + assert(newLoopParams->lowerBounds.size() == 1 && + newLoopParams->upperBounds.size() == 1 && + newLoopParams->steps.size() == 1 && + "expected single range for scf.for"); + + rewriter.modifyOpInPlace(forOp, [&]() { + forOp.setLowerBound(getValueOrCreateConstantIndexOp( + rewriter, loc, newLoopParams->lowerBounds.front())); + forOp.setUpperBound(getValueOrCreateConstantIndexOp( + rewriter, loc, newLoopParams->upperBounds.front())); + forOp.setStep(getValueOrCreateConstantIndexOp( + rewriter, loc, newLoopParams->steps.front())); + }); + return success(); +} + +/// Transform a `scf.forall` loop with a strictly positive steps +/// forall (%i, %j) = (%lb0, %lb1) to (%ub0, %ub1) step (%s0, %s1) +/// into a 0-based loop with step 1 (normalized) +/// forall (%i, %j) in (ceildiv(%ub0 - %lb0, %s0), ceildiv(%ub1 - %lb1, %s1)) +/// Insert `affine.apply` operations to compute the denormalized index values. +static LogicalResult normalizeLoopBounds(RewriterBase &rewriter, + scf::ForallOp forallOp) { + OpBuilder::InsertionGuard g(rewriter); + if (forallOp.isNormalized()) + return success(); + + // `scf.forall` requires that all lbs/ubs/steps/ivs are index type so no need + // to check here. + + rewriter.setInsertionPoint(forallOp); + FailureOr newLoopParams = emitNormalizedLoopBounds( + rewriter, forallOp.getLoc(), forallOp.getBody(), + forallOp.getInductionVars(), forallOp.getMixedLowerBound(), + forallOp.getMixedUpperBound(), forallOp.getMixedStep()); + if (failed(newLoopParams)) { + return failure(); + } + + rewriter.setInsertionPointAfter(forallOp); + auto newLoop = rewriter.create( + rewriter.getUnknownLoc(), newLoopParams->lowerBounds, + newLoopParams->upperBounds, newLoopParams->steps, forallOp.getOutputs(), + forallOp.getMapping()); + rewriter.eraseOp(newLoop.getTerminator()); + rewriter.mergeBlocks(forallOp.getBody(), newLoop.getBody(), + newLoop.getBody()->getArguments()); + rewriter.replaceOp(forallOp, newLoop); + + return success(); +} + +namespace { +struct NormalizeLoopBoundsPass + : public NormalizeLoopBoundsPassBase { + NormalizeLoopBoundsPass(bool nFor, bool nForall) + : normalizeFor(nFor), normalizeForall(nForall) {} + void runOnOperation() override { + Operation *op = getOperation(); + IRRewriter rewriter(op); + if (normalizeFor) { + op->walk([&](scf::ForOp forOp) { + (void)normalizeLoopBounds(rewriter, forOp); + }); + } + if (normalizeForall) { + op->walk([&](scf::ForallOp forallOp) { + (void)normalizeLoopBounds(rewriter, forallOp); + }); + } + } + +private: + bool normalizeFor; + bool normalizeForall; +}; +} // namespace + +std::unique_ptr createNormalizeLoopBoundsPass(bool normalizeFor, + bool normalizeForall) { + return std::make_unique(normalizeFor, + normalizeForall); +} + +} // namespace mlir::iree_compiler diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.h b/compiler/src/iree/compiler/Codegen/Common/Passes.h index 577d7aa66b11..81f70cb6a1b7 100644 --- a/compiler/src/iree/compiler/Codegen/Common/Passes.h +++ b/compiler/src/iree/compiler/Codegen/Common/Passes.h @@ -226,6 +226,12 @@ createMemrefCopyToLinalgPass(); /// Extracts lowering configs and translation info from user configs. std::unique_ptr> createMaterializeUserConfigsPass(); +/// Normalizes the iteration range of `scf.for` and `scf.forall` loops to +/// [0, ub) += 1. +std::unique_ptr +createNormalizeLoopBoundsPass(bool normalizeFor = true, + bool normalizeForall = true); + /// Pass to optimize vector transfer_read and transfer_write. std::unique_ptr> createOptimizeVectorTransferPass(bool flatten = false); diff --git a/compiler/src/iree/compiler/Codegen/Common/Passes.td b/compiler/src/iree/compiler/Codegen/Common/Passes.td index 29283e0401d5..357104176da9 100644 --- a/compiler/src/iree/compiler/Codegen/Common/Passes.td +++ b/compiler/src/iree/compiler/Codegen/Common/Passes.td @@ -391,6 +391,22 @@ def MemrefCopyToLinalgPass : "mlir::iree_compiler::createMemrefCopyToLinalgPass()"; } +def NormalizeLoopBoundsPass : + Pass<"iree-codegen-normalize-loop-bounds", ""> { + let summary = "Normalize the loop bounds of `scf.for` and `scf.forall`"; + let constructor = "mlir::iree_compiler::createNormalizeLoopBoundsPass()"; + let options = [ + Option<"normalizeFor", "normalize-for", "bool", "true", + "Enable normalization for `scf.for` loops">, + Option<"normalizeForall", "normalize-forall", "bool", "true", + "Enable normalization for `scf.forall` loops">, + ]; + let dependentDialects = [ + "affine::AffineDialect", + "arith::ArithDialect" + ]; +} + def OptimizeVectorTransfer : InterfacePass<"iree-codegen-optimize-vector-transfer", "mlir::FunctionOpInterface"> { let summary = diff --git a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel index 94b7f4f50990..ba278b7f4a08 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel +++ b/compiler/src/iree/compiler/Codegen/Common/test/BUILD.bazel @@ -51,6 +51,7 @@ iree_lit_test_suite( "lower_ukernel_to_calls.mlir", "materialize_encoding_into_nop.mlir", "materialize_user_configs.mlir", + "normalize_loop_bounds.mlir", "optimize_tensor_insert_extract_slices.mlir", "pad_dynamic_alloc.mlir", "polynomial_approximation.mlir", diff --git a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt index 63480138c979..d4fcfde3a85b 100644 --- a/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt +++ b/compiler/src/iree/compiler/Codegen/Common/test/CMakeLists.txt @@ -47,6 +47,7 @@ iree_lit_test_suite( "lower_ukernel_to_calls.mlir" "materialize_encoding_into_nop.mlir" "materialize_user_configs.mlir" + "normalize_loop_bounds.mlir" "optimize_tensor_insert_extract_slices.mlir" "pad_dynamic_alloc.mlir" "polynomial_approximation.mlir" diff --git a/compiler/src/iree/compiler/Codegen/Common/test/normalize_loop_bounds.mlir b/compiler/src/iree/compiler/Codegen/Common/test/normalize_loop_bounds.mlir new file mode 100644 index 000000000000..6435cbd26425 --- /dev/null +++ b/compiler/src/iree/compiler/Codegen/Common/test/normalize_loop_bounds.mlir @@ -0,0 +1,118 @@ + +// RUN: iree-opt --split-input-file --pass-pipeline="builtin.module(iree-codegen-normalize-loop-bounds, cse)" --allow-unregistered-dialect --verify-diagnostics %s | FileCheck %s +module { + func.func @for_normalize_step() { + %c0 = arith.constant 0 : index + %c2 = arith.constant 2 : index + %c8 = arith.constant 8 : index + scf.for %arg0 = %c0 to %c8 step %c2 { + "iree.keep"(%arg0) : (index) -> () + } + return + } +} + +// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0 * 2)> +// CHECK-LABEL: func.func @for_normalize_step +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C4:.+]] = arith.constant 4 : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[C4]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]]) + +// ----- + +module { + func.func @for_normalize_lowerbound() { + %c1 = arith.constant 1 : index + %c2 = arith.constant 2 : index + %c8 = arith.constant 8 : index + scf.for %arg0 = %c2 to %c8 step %c1 { + "iree.keep"(%arg0) : (index) -> () + } + return + } +} + +// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0 + 2)> +// CHECK-LABEL: func.func @for_normalize_lowerbound +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C6:.+]] = arith.constant 6 : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[C6]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]]) + +// ----- + +module { + func.func @for_normalize_lowerbound_and_step() { + %c1 = arith.constant 1 : index + %c4 = arith.constant 4 : index + %c13 = arith.constant 13 : index + scf.for %arg0 = %c1 to %c13 step %c4 { + "iree.keep"(%arg0) : (index) -> () + } + return + } +} + +// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0 * 4 + 1)> +// CHECK-LABEL: func.func @for_normalize_lowerbound_and_step +// CHECK-DAG: %[[C0:.+]] = arith.constant 0 : index +// CHECK-DAG: %[[C1:.+]] = arith.constant 1 : index +// CHECK-DAG: %[[C3:.+]] = arith.constant 3 : index +// CHECK: scf.for %[[ARG:.+]] = %[[C0]] to %[[C3]] step %[[C1]] +// CHECK-NEXT: affine.apply #[[$MAP]](%[[ARG]]) + +// ----- + +module { + func.func @forall_normalize_step() { + scf.forall (%arg0, %arg1) = (0, 0) to (8, 16) step (8, 8) { + "iree.keep"(%arg0, %arg1) : (index, index) -> () + } + return + } +} + +// CHECK: #[[$MAP:.+]] = affine_map<(d0) -> (d0 * 8)> +// CHECK-LABEL: func.func @forall_normalize_step +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (1, 2) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG0]]) +// CHECK-DAG: affine.apply #[[$MAP]](%[[ARG1]]) + +// ----- + +module { + func.func @forall_normalize_lowerbound() { + scf.forall (%arg0, %arg1) = (2, 4) to (8, 16) step (1, 1) { + "iree.keep"(%arg0, %arg1) : (index, index) -> () + } + return + } +} + +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (d0 + 4)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0) -> (d0 + 2)> +// CHECK-LABEL: func.func @forall_normalize_lowerbound +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (6, 12) +// CHECK-DAG: affine.apply #[[$MAP1]](%[[ARG0]]) +// CHECK-DAG: affine.apply #[[$MAP0]](%[[ARG1]]) + +// ----- + +module { + func.func @forall_normalize_lowerbound_and_step() { + scf.forall (%arg0, %arg1) = (2, 4) to (8, 16) step (2, 4) { + "iree.keep"(%arg0, %arg1) : (index, index) -> () + } + return + } +} + +// CHECK-DAG: #[[$MAP0:.+]] = affine_map<(d0) -> (d0 * 4 + 4)> +// CHECK-DAG: #[[$MAP1:.+]] = affine_map<(d0) -> (d0 * 2 + 2)> +// CHECK-LABEL: func.func @forall_normalize_lowerbound +// CHECK: scf.forall (%[[ARG0:.+]], %[[ARG1:.+]]) in (3, 3) +// CHECK-DAG: affine.apply #[[$MAP1]](%[[ARG0]]) +// CHECK-DAG: affine.apply #[[$MAP0]](%[[ARG1]])