diff --git a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp index 21f988fe359e..9725432ec63e 100644 --- a/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/CPU/CPUMaterializeEncodings.cpp @@ -466,7 +466,7 @@ materializeFuncOpEncodings(FunctionOpInterface funcOp, auto materializeEncodingValueFn = getMaterializeEncodingValueFn(targetAttr); populateMaterializeEncodingIntoPackUnPackPatterns( materializeEncodingPattern, typeConverter, materializeEncodingValueFn); - populateIREEMaterializeEncodingIntoPackUnPackPatterns( + populateShapeIndependentMaterializeEncodingPatterns( materializeEncodingPattern, target, typeConverter, materializeEncodingValueFn); diff --git a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h index bfe190262afe..b7d75c9516e5 100644 --- a/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h +++ b/compiler/src/iree/compiler/Codegen/Common/EncodingUtils.h @@ -129,9 +129,10 @@ void populateMaterializeEncodingIntoPackUnPackPatterns( MaterializeEncodingTypeConverter &typeConverter, MaterializeEncodingValueFn materializeEncodingValueFn); -/// Pouplates the set of patterns that lowers IREE dialect (e.g., Flow, Hal, -/// etc) ops with encoding types to pack/unpack ops. -void populateIREEMaterializeEncodingIntoPackUnPackPatterns( +/// Pouplates the set of patterns that lowers shape-like operations (e.g., Flow +/// ops, Hal ops, tensor.empty, linalg.fill, etc) with encoding types to the +/// same op with materialized shapes. +void populateShapeIndependentMaterializeEncodingPatterns( RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target, MaterializeEncodingTypeConverter &typeConverter, MaterializeEncodingValueFn materializeEncodingValueFn); diff --git a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp index feb55536add5..9275d4e7d224 100644 --- a/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/GPU/GPUMaterializeEncoding.cpp @@ -489,7 +489,7 @@ void GPUMaterializeDeviceEncodingPass::runOnOperation() { MaterializeEncodingValueFn materializeEncodingValueFn = [](RankedTensorType, OpBuilder, Location) -> FailureOr { return {}; }; - populateIREEMaterializeEncodingIntoPackUnPackPatterns( + populateShapeIndependentMaterializeEncodingPatterns( patterns, target, typeConverter, materializeEncodingValueFn); patterns.insert, + user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], + round_dims_to = array> +#pipeline_layout = #hal.pipeline.layout, + #hal.pipeline.binding +]> +func.func @empty_fill_encoding_unroll8x8x4_MFMA_F32_16x16x4_F32() { + %c0 = arith.constant 0 : index + %0 = hal.interface.binding.subspan layout(#pipeline_layout) binding(1) alignment(64) offset(%c0) : !flow.dispatch.tensor> + %cst = arith.constant 0.0 : f32 + %1 = tensor.empty() : tensor<255x513xf32, #encoding> + %2 = linalg.fill ins(%cst : f32) outs(%1 : tensor<255x513xf32, #encoding>) -> tensor<255x513xf32, #encoding> + flow.dispatch.tensor.store %2, %0, offsets = [0, 0], sizes = [255, 513], strides = [1, 1] : tensor<255x513xf32, #encoding> -> !flow.dispatch.tensor> + return +} +// CHECK-LABEL: func.func @empty_fill_encoding_unroll8x8x4_MFMA_F32_16x16x4_F32 +// CHECK: %[[EMPTY:.+]] = tensor.empty() : tensor<2x33x8x4x16x4xf32> +// CHECK: %{{.+}} = linalg.fill ins({{.+}}) outs(%[[EMPTY]] + +// ----- + #encoding = #iree_encoding.encoding, user_indexing_maps = [affine_map<(d0, d1, d2) -> (d0, d2)>, affine_map<(d0, d1, d2) -> (d2, d1)>, affine_map<(d0, d1, d2) -> (d0, d1)>], round_dims_to = array> diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp index 8024a7a71edf..6a7230d2cc7f 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoNop.cpp @@ -49,7 +49,7 @@ struct MaterializeEncodingIntoNopPass final MaterializeEncodingConversionTarget target(*context); populateMaterializeEncodingIntoPackUnPackPatterns( materializeEncodingPattern, typeConverter, materializeEncodingValueFn); - populateIREEMaterializeEncodingIntoPackUnPackPatterns( + populateShapeIndependentMaterializeEncodingPatterns( materializeEncodingPattern, target, typeConverter, materializeEncodingValueFn); diff --git a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp index 852183289304..d487e95f907d 100644 --- a/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp +++ b/compiler/src/iree/compiler/Codegen/Common/MaterializeEncodingIntoPackUnPack.cpp @@ -31,6 +31,33 @@ namespace mlir::iree_compiler { // Utility methods //===---------------------------------------------------------------------===// +// Utility to apply a tile-swizzling to a packed shape. +static SmallVector +getSwizzledShape(ArrayRef packedShape, + MaterializeEncodingInfo encodingInfo) { + if (packedShape.empty() || !encodingInfo.swizzle) { + return SmallVector(packedShape); + } + + int64_t srcRank = packedShape.size() - encodingInfo.innerTileSizes.size(); + SmallVector perm = llvm::to_vector(llvm::seq(0, srcRank)); + for (auto i : encodingInfo.swizzle->permutation) { + perm.push_back(i + srcRank); + } + + SmallVector newShape(packedShape.take_front(srcRank)); + SmallVector expandedTileShape = + getExpandedTileShape(encodingInfo.swizzle->expandShape); + MLIRContext *ctx = packedShape[0].getContext(); + Builder b(ctx); + for (int64_t d : expandedTileShape) { + newShape.push_back(b.getIndexAttr(d)); + } + applyPermutationToVector(newShape, perm); + + return newShape; +} + static Operation *dropEncodingAndCloneOp(OpBuilder &builder, Operation *op, ValueRange convertedInputOperands, ValueRange convertedOutputOperands) { @@ -368,6 +395,7 @@ lowerOpWithEncoding(RewriterBase &rewriter, tensor::EmptyOp emptyOp, SmallVector newShape = tensor::PackOp::getResultShape( rewriter, loc, sourceDims, *innerTileSizesOfr, encodingInfo->innerDimsPos, encodingInfo->outerDimsPerm); + newShape = getSwizzledShape(newShape, *encodingInfo); Operation *newEmptyOp = rewriter.create( loc, newShape, emptyType.getElementType()); return newEmptyOp; @@ -507,33 +535,6 @@ lowerOpWithEncoding(RewriterBase &rewriter, linalg::LinalgOp linalgOp, .Default([](Operation *op) { return failure(); }); } -// Utility to apply a tile-swizzling to a packed shape. -static SmallVector -getSwizzledShape(ArrayRef packedShape, - MaterializeEncodingInfo encodingInfo) { - if (packedShape.empty() || !encodingInfo.swizzle) { - return SmallVector(packedShape); - } - - int64_t srcRank = packedShape.size() - encodingInfo.innerTileSizes.size(); - SmallVector perm = llvm::to_vector(llvm::seq(0, srcRank)); - for (auto i : encodingInfo.swizzle->permutation) { - perm.push_back(i + srcRank); - } - - SmallVector newShape(packedShape.take_front(srcRank)); - SmallVector expandedTileShape = - getExpandedTileShape(encodingInfo.swizzle->expandShape); - MLIRContext *ctx = packedShape[0].getContext(); - Builder b(ctx); - for (int64_t d : expandedTileShape) { - newShape.push_back(b.getIndexAttr(d)); - } - applyPermutationToVector(newShape, perm); - - return newShape; -} - /// For `dispatchTensorType` that bind a `RankedTensorType` with encoding, /// returns the materialized shape of the `dispatchTensorType`. The /// dynamic dimensions of the `dispatchTensorType` are provided in @@ -818,6 +819,11 @@ struct UnsetEncodingOpToUnPackOpConversion }; /// Generic pattern to convert operation that is in Destination Passing Style. +/// TODO(hanchung): Implement a different pattern for non-elementwise +/// operations. Because they should implement their own patterns based on +/// backends. The elementwise operations are just like shape-like op in +/// data-tiling concept. They still have the same computation but with different +/// shapes. template struct MaterializeDPSOperation : public OpMaterializeEncodingPattern { using OpMaterializeEncodingPattern::OpMaterializeEncodingPattern; @@ -914,16 +920,14 @@ void populateMaterializeEncodingIntoPackUnPackPatterns( MaterializeEncodingTypeConverter &typeConverter, MaterializeEncodingValueFn materializeEncodingValueFn) { MLIRContext *context = patterns.getContext(); - patterns.insert, - MaterializeDPSOperation, - MaterializeOperation, + patterns.insert, MaterializeContractionOp, SetEncodingOpToPackOpConversion, UnsetEncodingOpToUnPackOpConversion>( context, typeConverter, materializeEncodingValueFn); memref::populateResolveRankedShapedTypeResultDimsPatterns(patterns); } -void populateIREEMaterializeEncodingIntoPackUnPackPatterns( +void populateShapeIndependentMaterializeEncodingPatterns( RewritePatternSet &patterns, MaterializeEncodingConversionTarget &target, MaterializeEncodingTypeConverter &typeConverter, MaterializeEncodingValueFn materializeEncodingValueFn) { @@ -949,7 +953,9 @@ void populateIREEMaterializeEncodingIntoPackUnPackPatterns( return resultType == typeConverter.convertType(resultType); }); - patterns.insert, + MaterializeOperation, + MaterializeFlowDispatchTensorLoadOp, MaterializeFlowDispatchTensorStoreOp, MaterializeInterfaceBindingEncoding>( context, typeConverter, materializeEncodingValueFn);