From 70bd2368089a89b3edae145edc4bcae12f1e4d74 Mon Sep 17 00:00:00 2001 From: KeDengMS Date: Wed, 25 Sep 2019 15:03:21 -0700 Subject: [PATCH] ReduceMax/Min performance improvements on CPU On behalf of Tracy --- .../providers/cpu/reduction/reduction_ops.cc | 31 ++++++++++++++----- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc b/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc index 1bbdb7473b101..78e4acae36e16 100644 --- a/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc +++ b/onnxruntime/core/providers/cpu/reduction/reduction_ops.cc @@ -102,8 +102,9 @@ bool PrepareForReduce(OpKernelContext* ctx, if (axes.empty()) { // This is the default case for non-arg kind reductions. Reduce on all dimensions. - for (size_t i = 0; i < ndim; i++) + for (size_t i = 0; i < ndim; i++) { axes.push_back(i); + } } std::sort(axes.begin(), axes.end()); @@ -320,12 +321,20 @@ Status ReduceMax::Compute(OpKernelContext* ctx) const { int64_t block_size; int64_t blocks; Tensor* reduced; - PrepareForReduce(ctx, transposedInputData, &reduced, block_size, blocks, axes_, keepdims_); + bool no_transpose = PrepareForReduce(ctx, transposedInputData, &reduced, block_size, blocks, axes_, keepdims_, true); T* output_data = reduced->template MutableData(); - EigenVectorMap out_vec(output_data, block_size); - out_vec = ConstEigenMatrixMap(&transposedInputData[0], block_size, blocks).rowwise().maxCoeff(); + if (no_transpose) { + const T* input_data = ctx->Input(0)->template Data(); + + for (int64_t i = 0; i < block_size; ++i) { + output_data[i] = ConstEigenVectorMap(input_data + (i * blocks), blocks).maxCoeff(); + } + } else { + EigenVectorMap out_vec(output_data, block_size); + out_vec = ConstEigenMatrixMap(&transposedInputData[0], block_size, blocks).rowwise().maxCoeff(); + } return Status::OK(); } @@ -363,12 +372,20 @@ Status ReduceMin::Compute(OpKernelContext* ctx) const { int64_t block_size; int64_t blocks; Tensor* reduced; - PrepareForReduce(ctx, transposedInputData, &reduced, block_size, blocks, axes_, keepdims_); + bool no_transpose = PrepareForReduce(ctx, transposedInputData, &reduced, block_size, blocks, axes_, keepdims_, true); T* output_data = reduced->template MutableData(); - EigenVectorMap out_vec(output_data, block_size); - out_vec = ConstEigenMatrixMap(&transposedInputData[0], block_size, blocks).rowwise().minCoeff(); + if (no_transpose) { + const T* input_data = ctx->Input(0)->template Data(); + + for (int64_t i = 0; i < block_size; ++i) { + output_data[i] = ConstEigenVectorMap(input_data + (i * blocks), blocks).minCoeff(); + } + } else { + EigenVectorMap out_vec(output_data, block_size); + out_vec = ConstEigenMatrixMap(&transposedInputData[0], block_size, blocks).rowwise().minCoeff(); + } return Status::OK(); }