onnx · AlexandreEichenberger · Jul 11, 2024 · Jul 10, 2024 · Jul 10, 2024 · Jul 10, 2024
diff --git a/docs/SupportedONNXOps-NNPA.md b/docs/SupportedONNXOps-NNPA.md
@@ -10,7 +10,7 @@ Onnx-mlir currently supports ONNX operations targeting up to opset 20. Limitatio
    * A * indicates onnx-mlir is compatible with the latest version of that operator available as of opset 20.
 
 
-NNPA has hardware limitations in dimension index size and tensor size, which are described in [NNPALimit.h](../src/Accelerators/NNPA/Support/NNPALimit.h). They are large enough for normal use cases, but if your model exceeds the limitations, CPU is used instead of NNPA.
+NNPA has hardware limitations in dimension index size and tensor size, which are described in [NNPALimit.hpp](../src/Accelerators/NNPA/Support/NNPALimit.hpp). They are large enough for normal use cases, but if your model exceeds the limitations, CPU is used instead of NNPA.
 
 
 | Op |Supported Opsets (inclusive) |Limitations |Notes |

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/CMakeLists.txt b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/CMakeLists.txt
@@ -37,6 +37,8 @@ add_onnx_mlir_library(OMRewriteONNXForZHigh
   OMONNXOps
   OMONNXToKrnl
   OMZHighOps
+  OMLayoutHelper
+
 
   ACCEL_INCLUDE_DIRS PRIVATE
   ${NNPA_INCLUDE_PATH}

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.cpp
@@ -15,7 +15,7 @@
 
 #include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.hpp"
 #include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp"
-#include "src/Accelerators/NNPA/Support/NNPALimit.h"
+#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
 #include "src/Compiler/CompilerOptions.hpp"
 #include "src/Conversion/ONNXToKrnl/RNN/RNNBase.hpp"
 #include "src/Dialect/ONNX/ONNXDimAnalysis.hpp"
@@ -46,29 +46,6 @@ bool onnxToZHighInCompatibilityReport(Operation *op) {
   return onnxToZHighUnsupportedReport(op, message);
 }
 
-/// Convert the input NNPA level, ie. "z16", to a floating point value
-/// representing the level, ie. "16.0".
-float convertNNPALevel(std::string inputNNPALevel) {
-  float retNNPAFloat = 0;
-  try {
-    retNNPAFloat = std::strtof(
-        inputNNPALevel.substr(1, inputNNPALevel.size()).c_str(), NULL);
-  } catch (...) {
-    retNNPAFloat = 0;
-  }
-  return retNNPAFloat;
-}
-
-/// A function to check whether the input NNPA level, ie. "z16", is compatible
-/// with the current NNPA level.
-bool isCompatibleWithNNPALevel(std::string inputNNPALevel) {
-  float inLevel = convertNNPALevel(inputNNPALevel);
-  float mcpuLevel = convertNNPALevel(mcpu);
-  if (inLevel == 0 && mcpuLevel == 0)
-    return false;
-  return inLevel <= mcpuLevel;
-}
-
 /// A function to check whether a value's element type is valid for zAIU or not.
 /// zAIU supports only F16, F32 and BFLOAT. Since MLIR does not support BFLOAT,
 /// we check F16 and F32 here only. zAIU only supports rank in range of (0, 4].

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.cpp
@@ -4,7 +4,7 @@
 
 //====----- ONNXToZHighCommon.cpp - Common functions to ZHigh lowering ----===//
 //
-// Copyright 2019-2020 The IBM Research Authors.
+// Copyright 2019-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -40,25 +40,27 @@ Value emitONNXTransposeWithType(Location loc, PatternRewriter &rewriter,
 }
 
 /// Split a tensor along an axis in which each chunk has a size of
-/// NNPA_MAXIMUM_DIMENSION_INDEX_SIZE and the last chunk can be smaller.
+/// NNPAGetMaxForDim and the last chunk can be smaller.
 ValueRange splitAlongAxis(
     MultiDialectBuilder<OnnxBuilder> &create, Value X, int64_t axis) {
   Type xType = X.getType();
   ArrayRef<int64_t> xShape = getShape(xType);
+  int64_t xRank = xShape.size();
   Type elementTy = getElementType(xType);
 
   // Compute split sizes.
   SmallVector<Type> splitTy;
   SmallVector<int64_t> splitSizesI64;
   SmallVector<int64_t> splitShape(xShape);
   int64_t dimSize = xShape[axis];
-  // First splits have the same size of NNPA_MAXIMUM_DIMENSION_INDEX_SIZE.
-  while (dimSize > NNPA_MAXIMUM_DIMENSION_INDEX_SIZE) {
-    splitShape[axis] = NNPA_MAXIMUM_DIMENSION_INDEX_SIZE;
+  // First splits have the same size of NNPAGetMaxForDim.
+  int64_t maxSize = NNPAGetMaxForDim(axis, xRank);
+  while (dimSize > maxSize) {
+    splitShape[axis] = maxSize;
     auto ty = RankedTensorType::get(splitShape, elementTy);
     splitTy.emplace_back(ty);
-    splitSizesI64.emplace_back(NNPA_MAXIMUM_DIMENSION_INDEX_SIZE);
-    dimSize -= NNPA_MAXIMUM_DIMENSION_INDEX_SIZE;
+    splitSizesI64.emplace_back(maxSize);
+    dimSize -= maxSize;
   }
   // The last split.
   splitShape[axis] = dimSize;

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp
@@ -4,7 +4,7 @@
 
 //===---------- ONNXToZHigh.hpp - Common functions in ONNXToZHigh ---------===//
 //
-// Copyright 2019-2020 The IBM Research Authors.
+// Copyright 2019-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -18,7 +18,7 @@
 
 #include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXLegalityCheck.hpp"
 #include "src/Accelerators/NNPA/Support/LayoutHelper.hpp"
-#include "src/Accelerators/NNPA/Support/NNPALimit.h"
+#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
 #include "src/Dialect/ONNX/ONNXDimAnalysis.hpp"
 
 namespace onnx_mlir {
@@ -68,11 +68,13 @@ void addDynamicallyLegalOpFor(mlir::ConversionTarget *target,
                     mlir::dyn_cast<mlir::ShapedType>(operand.getType())) {
               // Check if static dimension size exceeds zDNN limitations
               llvm::ArrayRef<int64_t> valueShape = valueType.getShape();
-              if (llvm::any_of(valueShape, [](int64_t dim) {
-                    return (!mlir::ShapedType::isDynamic(dim)) &&
-                           (dim > NNPA_MAXIMUM_DIMENSION_INDEX_SIZE);
-                  }))
-                return true;
+              int64_t valueRank = valueShape.size();
+              for (int64_t i = 0; i < valueRank; ++i) {
+                int64_t dim = valueShape[i];
+                if (!mlir::ShapedType::isDynamic(dim) &&
+                    dim > NNPAGetMaxForDim(i, valueRank))
+                  return true;
+              }
             }
             return false;
           });
@@ -97,7 +99,7 @@ mlir::Value emitONNXTransposeWithType(mlir::Location loc,
     mlir::ArrayRef<int64_t> perms);
 
 /// Split a tensor along an axis in which each chunk has a size of
-/// NNPA_MAXIMUM_DIMENSION_INDEX_SIZE and the last chucnk can be smaller.
+/// NNPAGetMaxForDim and the last chuck can be smaller.
 mlir::ValueRange splitAlongAxis(
     onnx_mlir::MultiDialectBuilder<onnx_mlir::OnnxBuilder> &create,
     mlir::Value X, int64_t axis);

diff --git a/src/Accelerators/NNPA/Conversion/ONNXToZHigh/RewriteONNXForZHigh.cpp b/src/Accelerators/NNPA/Conversion/ONNXToZHigh/RewriteONNXForZHigh.cpp
@@ -4,7 +4,7 @@
 
 //===--- RewriteONNXForZHigh.cpp - Rewrite ONNX ops for ZHigh lowering ----===//
 //
-// Copyright 2019-2023 The IBM Research Authors.
+// Copyright 2019-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -28,7 +28,7 @@
 #include "src/Accelerators/NNPA/Conversion/ONNXToZHigh/ONNXToZHighCommon.hpp"
 #include "src/Accelerators/NNPA/Dialect/ZHigh/ZHighOps.hpp"
 #include "src/Accelerators/NNPA/Pass/NNPAPasses.hpp"
-#include "src/Accelerators/NNPA/Support/NNPALimit.h"
+#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
 #include "src/Conversion/ONNXToKrnl/ONNXToKrnlCommon.hpp"
 #include "src/Dialect/ONNX/DialectBuilder.hpp"
 #include "src/Dialect/ONNX/ElementsAttr/WideNum.hpp"
@@ -292,8 +292,7 @@ Type CreatePaddedXType(Value x, ArrayAttr pads) {
 
 /// This pattern is to split a large MatMul into smaller ones that fit into
 /// NNPA. Given (NxK) * (K*M), the pattern considers dimensions N and/or M to
-/// split, if N and/or M is greater than NNPA_MAXIMUM_DIMENSION_INDEX_SIZE
-/// (MDIS).
+/// split, if N and/or M is greater than NNPAGetMaxForDim (MDIS).
 /// For example, given A(NxK) * B(KxM), we will split A and B as follows.
 // clang-format off
 ///
@@ -406,8 +405,8 @@ class SplitLargeMatMulPattern : public OpRewritePattern<ONNXMatMulOp> {
     // Expect N or M exceeds NNPA limitation.
     int64_t N = aShape[aRank - 2];
     int64_t M = bShape[bRank - 1];
-    nExceeded = N > NNPA_MAXIMUM_DIMENSION_INDEX_SIZE;
-    mExceeded = M > NNPA_MAXIMUM_DIMENSION_INDEX_SIZE;
+    nExceeded = N > NNPAGetMaxForDim(aRank - 2, aRank);
+    mExceeded = M > NNPAGetMaxForDim(bRank - 1, bRank);
     if (!(nExceeded || mExceeded))
       return false;
 

diff --git a/src/Accelerators/NNPA/NNPAAccelerator.cpp b/src/Accelerators/NNPA/NNPAAccelerator.cpp
@@ -25,7 +25,7 @@
 #include "src/Accelerators/NNPA/Dialect/ZLow/ZLowOps.hpp"
 #include "src/Accelerators/NNPA/NNPAAccelerator.hpp"
 #include "src/Accelerators/NNPA/Pass/NNPAPasses.hpp"
-#include "src/Accelerators/NNPA/Support/NNPALimit.h"
+#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
 #include "src/Compiler/CompilerOptions.hpp"
 #include "zdnn.h"
 

diff --git a/src/Accelerators/NNPA/Support/CMakeLists.txt b/src/Accelerators/NNPA/Support/CMakeLists.txt
@@ -2,12 +2,18 @@ add_subdirectory(Stickify)
 
 add_onnx_mlir_library(OMLayoutHelper
   LayoutHelper.cpp
+  NNPALimit.cpp
 
   DEPENDS
   libzdnn
+  OMCompilerOptions
 
   LINK_LIBS PUBLIC
   MLIRIR
+  OMCompilerOptions
+
+  INCLUDE_DIRS PUBLIC
+  ${ONNX_MLIR_SRC_ROOT}/include
 
   ACCEL_INCLUDE_DIRS PRIVATE
   ${NNPA_SRC_ROOT}

diff --git a/src/Accelerators/NNPA/Support/NNPALimit.cpp b/src/Accelerators/NNPA/Support/NNPALimit.cpp
@@ -0,0 +1,61 @@
+/*
+ * SPDX-License-Identifier: Apache-2.0
+ */
+
+//===----------------------- NNPALimit.cpp --------------------------------===//
+//
+// Copyright 2022-2024 The IBM Research Authors.
+//
+// =============================================================================
+//
+// The NNPA constant values.
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
+#include "src/Compiler/CompilerOptions.hpp"
+
+#include <assert.h>
+#include <string>
+
+//===----------------------------------------------------------------------===//
+// Compatibility checks
+
+/// Convert the input NNPA level, ie. "z16", to a integer value representing the
+/// level, ie. "16". When unkown / out of bounds, returns 0.
+int64_t convertNNPALevel(std::string inputNNPALevel) {
+  if (inputNNPALevel.size() != 3 || inputNNPALevel[0] != 'z')
+    return 0;
+  if (inputNNPALevel[1] == '1') {
+    if (inputNNPALevel[2] == '6')
+      return 16;
+  }
+  return 0;
+}
+
+/// A function to check whether the input NNPA level, ie. "z16", is compatible
+/// with the current NNPA level.
+bool isCompatibleWithNNPALevel(std::string inputNNPALevel) {
+  int64_t inLevel = convertNNPALevel(inputNNPALevel);
+  int64_t mcpuLevel = convertNNPALevel(onnx_mlir::mcpu);
+  if (inLevel == 0 && mcpuLevel == 0)
+    return false;
+  return inLevel <= mcpuLevel;
+}
+
+//===----------------------------------------------------------------------===//
+// Max dimension checks
+
+// The NNPA maximum supported dimension index size value by using
+// zdnn_get_nnpa_max_dim_idx_size() This value depends on HW.
+static constexpr int64_t NNPA_Z16_MAXIMUM_DIMENSION_INDEX_SIZE = 32768;
+
+int64_t NNPAGetMaxForDim(int64_t dim, int64_t rank) {
+  assert(rank >= 0 && "expected positive rank");
+  assert(dim >= 0 && dim < rank && "dim outside range [0..rank)");
+  if (rank > 4)
+    return 0;
+  if (isCompatibleWithNNPALevel(NNPA_Z16))
+    return NNPA_Z16_MAXIMUM_DIMENSION_INDEX_SIZE;
+  return 0;
+}
diff --git a/src/Accelerators/NNPA/Support/NNPALimit.h → src/Accelerators/NNPA/Support/NNPALimit.hpp b/src/Accelerators/NNPA/Support/NNPALimit.h → src/Accelerators/NNPA/Support/NNPALimit.hpp
@@ -2,9 +2,9 @@
  * SPDX-License-Identifier: Apache-2.0
  */
 
-//===----------------------- NNPALimit.h ----------------------------------===//
+//===----------------------- NNPALimit.hpp --------------------------------===//
 //
-// Copyright 2022-2023 The IBM Research Authors.
+// Copyright 2022-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -16,9 +16,11 @@
 
 #include <stdint.h>
 
-// The NNPA maximum supported dimension index size value by using
-// zdnn_get_nnpa_max_dim_idx_size() This value depends on HW.
-static constexpr int64_t NNPA_MAXIMUM_DIMENSION_INDEX_SIZE = 32768;
+// Get maximum number of element for a given NNPA tensor. Dim is a tensor/memref
+// index (from 0 to rank-1), with dim=0 being the outermost dimension and
+// dim=(rank-1) being the innermost dimension. Return 0 if dimension is invalid.
+// Generate assert if dim outside of rank, rank non-positive.
+int64_t NNPAGetMaxForDim(int64_t dim, int64_t rank);
 
 // The NNPA maximum supported tensor size (in bytes)
 // by using zdnn_get_nnpa_max_tensor_size()

diff --git a/src/Accelerators/NNPA/Support/Stickify/CMakeLists.txt b/src/Accelerators/NNPA/Support/Stickify/CMakeLists.txt
@@ -7,6 +7,7 @@ add_onnx_mlir_library(OMStickify
 
   LINK_LIBS PUBLIC
   LLVMSupport
+  OMLayoutHelper
 
   ACCEL_INCLUDE_DIRS PRIVATE
   ${NNPA_INCLUDE_PATH}

diff --git a/src/Accelerators/NNPA/Support/Stickify/Convert.cpp b/src/Accelerators/NNPA/Support/Stickify/Convert.cpp
@@ -13,7 +13,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "src/Accelerators/NNPA/Support/Stickify/Convert.hpp"
-#include "src/Accelerators/NNPA/Support/NNPALimit.h"
+#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
 #include "src/Accelerators/NNPA/Support/Stickify/DLF16Conversion.hpp"
 
 /// fp32 -> dlf16 conversion.

diff --git a/src/Accelerators/NNPA/Support/Stickify/Stickify.cpp b/src/Accelerators/NNPA/Support/Stickify/Stickify.cpp
@@ -4,7 +4,7 @@
 
 //===------- stickify.cpp - Data Stickify ---------------------------------===//
 //
-// Copyright 2020-2022 The IBM Research Authors.
+// Copyright 2020-2024 The IBM Research Authors.
 //
 // =============================================================================
 //
@@ -21,7 +21,7 @@
 #include <stdlib.h>
 #include <string.h>
 
-#include "src/Accelerators/NNPA/Support/NNPALimit.h"
+#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
 #include "src/Accelerators/NNPA/Support/Stickify/Convert.hpp"
 #include "src/Accelerators/NNPA/Support/Stickify/Stickify.hpp"
 
@@ -412,7 +412,7 @@ zdnn_status verify_transformed_descriptor(const zdnn_tensor_desc *tfrmd_desc) {
   // is the dimension above the limit or zero?
   // transformed layout uses all dim* entries, so we'll check them all
   for (int i = 0; i < ZDNN_MAX_DIMS; i++) {
-    if (!dims_ptr[i] || dims_ptr[i] > NNPA_MAXIMUM_DIMENSION_INDEX_SIZE) {
+    if (!dims_ptr[i] || dims_ptr[i] > NNPAGetMaxForDim(i, ZDNN_MAX_DIMS)) {
       return ZDNN_INVALID_SHAPE;
     }
   }

diff --git a/src/Accelerators/NNPA/Transform/ZHigh/ZHighClipToDLFloat.cpp b/src/Accelerators/NNPA/Transform/ZHigh/ZHighClipToDLFloat.cpp
@@ -23,7 +23,7 @@
 #include "src/Accelerators/NNPA/Dialect/ZHigh/ZHighOps.hpp"
 #include "src/Accelerators/NNPA/Dialect/ZHigh/ZHighOps/OpHelper.hpp"
 #include "src/Accelerators/NNPA/Pass/NNPAPasses.hpp"
-#include "src/Accelerators/NNPA/Support/NNPALimit.h"
+#include "src/Accelerators/NNPA/Support/NNPALimit.hpp"
 #include "src/Dialect/ONNX/DialectBuilder.hpp"
 #include "src/Dialect/ONNX/ONNXOps.hpp"
 #include "src/Dialect/ONNX/ONNXOps/OpHelper.hpp"

diff --git a/test/accelerators/NNPA/backend/CMakeLists.txt b/test/accelerators/NNPA/backend/CMakeLists.txt
@@ -104,7 +104,7 @@ endif()
 set(NNPA_TEST_LIST
 
     # ==ARCH== NNPA
-    # ==ADDITIONAL_PARAGRAPH== NNPA has hardware limitations in dimension index size and tensor size, which are described in [NNPALimit.h](../src/Accelerators/NNPA/Support/NNPALimit.h). They are large enough for normal use cases, but if your model exceeds the limitations, CPU is used instead of NNPA.
+    # ==ADDITIONAL_PARAGRAPH== NNPA has hardware limitations in dimension index size and tensor size, which are described in [NNPALimit.hpp](../src/Accelerators/NNPA/Support/NNPALimit.hpp). They are large enough for normal use cases, but if your model exceeds the limitations, CPU is used instead of NNPA.
 
     # ==OP== Add
     # ==MIN== 6

diff --git a/test/mlir/accelerators/nnpa/conversion/rewrite-onnx-for-zhigh.mlir b/test/mlir/accelerators/nnpa/conversion/rewrite-onnx-for-zhigh.mlir
@@ -485,7 +485,7 @@ func.func @test_matmul_unknown_batch_dim(%arg0: tensor<?x?x256x256xf32>) -> (ten
 
 // -----
 
-// Split MatMul because a dimension exceeds NNPA_MAXIMUM_DIMENSION_INDEX_SIZE = 32768.
+// Split MatMul because a dimension exceeds NNPAGetMaxForDim = 32768.
 func.func @test_matmul_splitting_A(%arg0: tensor<?x50257x768xf32>, %arg1: tensor<768x1024xf32>) -> (tensor<?x50257x1024xf32>) {
   %0 = "onnx.MatMul"(%arg0, %arg1) : (tensor<?x50257x768xf32>, tensor<768x1024xf32>) -> tensor<?x50257x1024xf32>
   return %0 : tensor<?x50257x1024xf32>
@@ -502,7 +502,7 @@ func.func @test_matmul_splitting_A(%arg0: tensor<?x50257x768xf32>, %arg1: tensor
 // CHECK:         }
 }
 
-// Split MatMul because a dimension exceeds NNPA_MAXIMUM_DIMENSION_INDEX_SIZE = 32768.
+// Split MatMul because a dimension exceeds NNPAGetMaxForDim = 32768.
 func.func @test_matmul_splitting_B(%arg0: tensor<?x?x768xf32>, %arg1: tensor<768x50257xf32>) -> (tensor<?x?x50257xf32>) {
   %0 = "onnx.MatMul"(%arg0, %arg1) : (tensor<?x?x768xf32>, tensor<768x50257xf32>) -> tensor<?x?x50257xf32>
   return %0 : tensor<?x?x50257xf32>
@@ -521,7 +521,7 @@ func.func @test_matmul_splitting_B(%arg0: tensor<?x?x768xf32>, %arg1: tensor<768
 
 // -----
 
-// Split MatMul because a dimension exceeds NNPA_MAXIMUM_DIMENSION_INDEX_SIZE = 32768.
+// Split MatMul because a dimension exceeds NNPAGetMaxForDim = 32768.
 func.func @test_matmul_splitting_A_B(%arg0: tensor<?x50257x768xf32>, %arg1: tensor<768x50258xf32>) -> (tensor<?x50257x50258xf32>) {
   %0 = "onnx.MatMul"(%arg0, %arg1) : (tensor<?x50257x768xf32>, tensor<768x50258xf32>) -> tensor<?x50257x50258xf32>
   return %0 : tensor<?x50257x50258xf32>