diff --git a/build-tools/code_generator/function_types.yaml b/build-tools/code_generator/function_types.yaml index c8c325353..cc33f9d7b 100644 --- a/build-tools/code_generator/function_types.yaml +++ b/build-tools/code_generator/function_types.yaml @@ -417,6 +417,9 @@ FixedPointQuantize: Pow2Quantize: float: [float] half: [Half] +MinMaxQuantize: + float: [float] + half: [Half] TopNError: float: [float, int] half: [Half, int] diff --git a/include/nbla/cuda/function/min_max_quantize.hpp b/include/nbla/cuda/function/min_max_quantize.hpp new file mode 100644 index 000000000..b19694d40 --- /dev/null +++ b/include/nbla/cuda/function/min_max_quantize.hpp @@ -0,0 +1,48 @@ +// Copyright (c) 2017 Sony Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#ifndef NBLA_CUDA_FUNCTION_MIN_MAX_QUANTIZE_HPP +#define NBLA_CUDA_FUNCTION_MIN_MAX_QUANTIZE_HPP + +#include +#include + +namespace nbla { + +template class MinMaxQuantizeCuda : public MinMaxQuantize { +public: + typedef typename CudaType::type Tcu; + + explicit MinMaxQuantizeCuda(const Context &ctx, float decay, bool train, + bool ema, bool ste_fine_grained, float eps) + : MinMaxQuantize(ctx, decay, train, ema, ste_fine_grained, eps), + device_(std::stoi(ctx.device_id)) {} + virtual ~MinMaxQuantizeCuda() {} + virtual string name() { return "MinMaxQuantizeCuda"; } + virtual vector allowed_array_classes() { + return SingletonManager::get()->array_classes(); + } + +protected: + int device_; + virtual void setup_impl(const Variables &inputs, const Variables &outputs); + NBLA_API virtual void nudge_range(Variable *qr_min, Variable *qr_max); + NBLA_API virtual void nudge_qr_min_max(Variable *qr_min, Variable *qr_max, + Variable *ql_min, Variable *ql_max, + Variable *scale, + Variable *qr_min_nudged, + Variable *qr_max_nudged); +}; +} +#endif diff --git a/src/nbla/cuda/function/generic/min_max_quantize.cu b/src/nbla/cuda/function/generic/min_max_quantize.cu new file mode 100644 index 000000000..fa3782f83 --- /dev/null +++ b/src/nbla/cuda/function/generic/min_max_quantize.cu @@ -0,0 +1,91 @@ +// Copyright (c) 2017 Sony Corporation. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include +#include + +namespace nbla { + +template +void MinMaxQuantizeCuda::setup_impl(const Variables &inputs, + const Variables &outputs) { + MinMaxQuantize::setup_impl(inputs, outputs); + cuda_set_device(this->device_); +} + +template +__global__ void kernel_nudge_range(const int size, float eps, + const T *qr_min_data, T *qr_max_data) { + NBLA_CUDA_KERNEL_LOOP(idx, size) { + if (qr_max_data[idx] - qr_min_data[idx] < eps) { + qr_max_data[idx] = qr_min_data[idx] + eps; + } + } +} + +template +__global__ void +kernel_nudge_qr_min_max(const int size, const T *qr_min_data, + const T *qr_max_data, const T *ql_min_data, + const T *ql_max_data, const T *scale_data, + T *qr_min_nudged_data, T *qr_max_nudged_data) { + T zero_point_nudged = T(0.0); + T zero_point_from_min = T(0.0); + NBLA_CUDA_KERNEL_LOOP(idx, size) { + zero_point_from_min = ql_min_data[idx] - qr_min_data[idx] / scale_data[idx]; + if (zero_point_from_min <= ql_min_data[idx]) { + zero_point_nudged = ql_min_data[idx]; + } else if (zero_point_from_min >= ql_max_data[idx]) { + zero_point_nudged = ql_max_data[idx]; + } else { + zero_point_nudged = round(zero_point_from_min); + } + qr_min_nudged_data[idx] = + (ql_min_data[idx] - zero_point_nudged) * scale_data[idx]; + qr_max_nudged_data[idx] = + (ql_max_data[idx] - zero_point_nudged) * scale_data[idx]; + } +} + +template +void MinMaxQuantizeCuda::nudge_range(Variable *qr_min, Variable *qr_max) { + const Tcu *qr_min_data = qr_min->get_data_pointer(this->ctx_); + Tcu *qr_max_data = qr_max->cast_data_and_get_pointer(this->ctx_); + NBLA_CUDA_LAUNCH_KERNEL_SIMPLE(kernel_nudge_range, qr_min->size(), + this->eps_, qr_min_data, qr_max_data); +} + +template +void MinMaxQuantizeCuda::nudge_qr_min_max(Variable *qr_min, Variable *qr_max, + Variable *ql_min, Variable *ql_max, + Variable *scale, + Variable *qr_min_nudged, + Variable *qr_max_nudged) { + const Tcu *qr_min_data = qr_min->get_data_pointer(this->ctx_); + const Tcu *qr_max_data = qr_max->get_data_pointer(this->ctx_); + const Tcu *ql_min_data = ql_min->get_data_pointer(this->ctx_); + const Tcu *ql_max_data = ql_max->get_data_pointer(this->ctx_); + const Tcu *scale_data = scale->get_data_pointer(this->ctx_); + Tcu *qr_min_nudged_data = + qr_min_nudged->cast_data_and_get_pointer(this->ctx_); + Tcu *qr_max_nudged_data = + qr_max_nudged->cast_data_and_get_pointer(this->ctx_); + NBLA_CUDA_LAUNCH_KERNEL_SIMPLE(kernel_nudge_qr_min_max, qr_min->size(), + qr_min_data, qr_max_data, ql_min_data, + ql_max_data, scale_data, qr_min_nudged_data, + qr_max_nudged_data); +} +}