-
Notifications
You must be signed in to change notification settings - Fork 663
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[CPU] Limit unrolling factors for generic ops. #17227
Changes from 3 commits
3a870c7
4ee8a86
f231dd8
f178fc1
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -289,6 +289,34 @@ static int64_t getVectorSize(mlir::FunctionOpInterface entryPointFn, | |
return getVectorSize(entryPointFn, byteWidth); | ||
} | ||
|
||
/// Returns true if the operation is a GenericOp implementing a supported | ||
/// transposition. | ||
static bool isSupportedTransposeOp(linalg::GenericOp genericOp) { | ||
// Check that the op has at least 2 dimensions. | ||
if (genericOp.getNumLoops() < 2) { | ||
return false; | ||
} | ||
|
||
// Check that the op has only one input and one output. | ||
// TODO(diegocaballero): Generalize to multiple inputs. | ||
if ((genericOp.getNumDpsInputs() != 1) || (genericOp.getNumDpsInits() != 1)) { | ||
return false; | ||
} | ||
|
||
// Check that all the iterators are parallel. | ||
if (genericOp.getNumParallelLoops() != genericOp.getNumLoops()) { | ||
return false; | ||
} | ||
|
||
// Check that the two indexing maps are a permutation of each other. | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The code seems to implement Also, this condition could be simplified to What role is played by the separate checks for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. As same as above response, the code is quite old. I can help update it either in the PR or in a follow-up. |
||
auto indexingMaps = genericOp.getIndexingMapsArray(); | ||
return !indexingMaps[0].isEmpty() && !indexingMaps[1].isEmpty() && | ||
((indexingMaps[0].isIdentity() && !indexingMaps[1].isIdentity() && | ||
indexingMaps[1].isPermutation()) || | ||
(!indexingMaps[0].isIdentity() && indexingMaps[0].isPermutation() && | ||
indexingMaps[1].isIdentity())); | ||
} | ||
|
||
/// Returns minimum tiling sizes for each dimension. One dimension is possible | ||
/// to access at different element types. It determines the tiling sizes by | ||
/// looking into all the operands. | ||
|
@@ -325,21 +353,33 @@ getMinTilingSizesForEachDim(mlir::FunctionOpInterface entryPointFn, | |
|
||
// Limit unroll factor. For now, we assume the rightmost non-one tiled | ||
// dimension is for vectorization and any other non-one dimension is for | ||
// unrolling. | ||
// unrolling. The util limits the second rightmost non-one tiled dimension | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What is "the util" ? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I meant the lambda function, let me update the comment. |
||
// to be not larger than `maxUnrollFactor` and others tiled dimension to 1. | ||
auto limitUnrollFactor = [&](int64_t maxUnrollFactor) { | ||
int vecDim; | ||
for (vecDim = minTileSizes.size() - 1; vecDim >= 0; --vecDim) { | ||
if (minTileSizes[vecDim] > 1) { | ||
break; | ||
} | ||
} | ||
bool seen = false; | ||
for (int unrollDim = vecDim - 1; unrollDim >= 0; --unrollDim) { | ||
if (minTileSizes[unrollDim] <= 1) { | ||
continue; | ||
} | ||
int64_t factor = seen ? 1LL : maxUnrollFactor; | ||
seen = true; | ||
LLVM_DEBUG(KD_DBGS() << "Adjusted min tile sizes: " | ||
<< minTileSizes[unrollDim] | ||
<< " with factor=" << factor << "\n"); | ||
minTileSizes[unrollDim] = | ||
std::min<int64_t>(minTileSizes[unrollDim], maxUnrollFactor); | ||
std::min<int64_t>(minTileSizes[unrollDim], factor); | ||
} | ||
}; | ||
|
||
if (linalgOpInfo.isTranspose()) { | ||
auto genericOp = dyn_cast<linalg::GenericOp>(op.getOperation()); | ||
if (linalgOpInfo.isTranspose() && genericOp && | ||
isSupportedTransposeOp(genericOp)) { | ||
// Limit unrolling on transpose operations. | ||
// TODO(dcaballe): Consider input and output transposes. | ||
limitUnrollFactor(targetMLTransInfo.defaultMaxTransposeUnrollFactor); | ||
|
@@ -1729,34 +1769,6 @@ static void setVectorTileSizes(linalg::LinalgOp op, | |
} | ||
} | ||
|
||
/// Returns true if the operation is a GenericOp implementing a supported | ||
/// transposition. | ||
static bool isSupportedTransposeOp(linalg::GenericOp genericOp) { | ||
// Check that the op has at least 2 dimensions. | ||
if (genericOp.getNumLoops() < 2) { | ||
return false; | ||
} | ||
|
||
// Check that the op has only one input and one output. | ||
// TODO(diegocaballero): Generalize to multiple inputs. | ||
if ((genericOp.getNumDpsInputs() != 1) || (genericOp.getNumDpsInits() != 1)) { | ||
return false; | ||
} | ||
|
||
// Check that all the iterators are parallel. | ||
if (genericOp.getNumParallelLoops() != genericOp.getNumLoops()) { | ||
return false; | ||
} | ||
|
||
// Check that the two indexing maps are a permutation of each other. | ||
auto indexingMaps = genericOp.getIndexingMapsArray(); | ||
return !indexingMaps[0].isEmpty() && !indexingMaps[1].isEmpty() && | ||
((indexingMaps[0].isIdentity() && !indexingMaps[1].isIdentity() && | ||
indexingMaps[1].isPermutation()) || | ||
(!indexingMaps[0].isIdentity() && indexingMaps[0].isPermutation() && | ||
indexingMaps[1].isIdentity())); | ||
} | ||
|
||
/// Sets the default lowering configuration for a generic op to use | ||
/// CPUDoubleTilingExpert pipeline. | ||
static LogicalResult | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I was curious what "a supported transposition" means: supported in what sense?
Looking at how this function is used, it seems to mean "is
defaultMaxTransposeUnrollFactor
should be used instead ofdefaultMaxUnrollFactor
for this op".If I get that right, I would suggest that this relatively minor nuance --- just which numeric threshold value is used in some logic, not a fundamental switch of what logic is applied to the op --- is not what a reader of this comment would guess from reading the above comment, which suggests a major difference instead ("supported" vs... "not supported" ? which sounds scary).
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This is sort of precondition for transpose op lowering. We have specialized codegen for transpose ops on x86 path. It only supports n-D transpose with single input and single output. Here I'm just moving the code from below to here, but I can update the comment and and rename it to
x86TransposeLoweringPrecondition
. What do you think?There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
SG!