Skip to content
This repository has been archived by the owner on Mar 21, 2024. It is now read-only.

Support __half in histogram #514

Merged
merged 2 commits into from
Jun 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 55 additions & 5 deletions cub/device/dispatch/dispatch_histogram.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -175,6 +175,26 @@ template <
typename OffsetT> ///< Signed integer type for global offsets
struct DispatchHistogram
{
private:
template <class T>
CUB_RUNTIME_FUNCTION
static T ComputeScale(T lower_level, T upper_level, int bins)
{
return static_cast<T>(upper_level - lower_level) / bins;
}

#if defined(__CUDA_FP16_TYPES_EXIST__)
// There are no host versions of arithmetic operations on `__half`, so
// all arithmetic operations on host shall be done on `float`
CUB_RUNTIME_FUNCTION
static __half ComputeScale(__half lower_level, __half upper_level, int bins)
{
return __float2half(
alliepiper marked this conversation as resolved.
Show resolved Hide resolved
(__half2float(upper_level) - __half2float(lower_level)) / bins);
}
#endif

public:
//---------------------------------------------------------------------
// Types and constants
//---------------------------------------------------------------------
Expand Down Expand Up @@ -282,15 +302,45 @@ struct DispatchHistogram
this->scale = double(1.0) / scale_;
}

template <typename T>
static __device__ __forceinline__ void
BinSelectImpl(T sample, T min, T max, T scale, int &bin, bool valid)
{
if (valid && (sample >= min) && (sample < max))
{
bin = static_cast<int>((sample - min) / scale);
}
}

// Method for converting samples to bin-ids
template <CacheLoadModifier LOAD_MODIFIER, typename _SampleT>
__host__ __device__ __forceinline__ void BinSelect(_SampleT sample, int &bin, bool valid)
__host__ __device__ __forceinline__ void BinSelect(_SampleT sample,
int &bin,
bool valid)
{
LevelT level_sample = (LevelT) sample;
BinSelectImpl(static_cast<LevelT>(sample),
min,
max,
scale,
bin,
valid);
}

if (valid && (level_sample >= min) && (level_sample < max))
bin = (int) ((level_sample - min) / scale);
#if defined(__CUDA_FP16_TYPES_EXIST__)
template <CacheLoadModifier LOAD_MODIFIER>
__device__ __forceinline__ void BinSelect(__half sample, int &bin, bool valid)
{
NV_IF_TARGET(NV_PROVIDES_SM_53,
(BinSelectImpl<__half>(sample,
min, max, scale,
bin, valid);),
(BinSelectImpl<float>(__half2float(sample),
__half2float(min),
__half2float(max),
__half2float(scale),
bin, valid);));
}
#endif

// Method for converting samples to bin-ids (float specialization)
template <CacheLoadModifier LOAD_MODIFIER>
Expand Down Expand Up @@ -865,7 +915,7 @@ struct DispatchHistogram
for (int channel = 0; channel < NUM_ACTIVE_CHANNELS; ++channel)
{
int bins = num_output_levels[channel] - 1;
LevelT scale = static_cast<LevelT>((upper_level[channel] - lower_level[channel]) / bins);
LevelT scale = ComputeScale(lower_level[channel], upper_level[channel], bins);

privatized_decode_op[channel].Init(num_output_levels[channel], upper_level[channel], lower_level[channel], scale);

Expand Down
33 changes: 33 additions & 0 deletions cub/thread/thread_search.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@
#include <cub/util_type.cuh>
#include <cub/config.cuh>

#include <nv/target>

CUB_NAMESPACE_BEGIN


Expand Down Expand Up @@ -146,7 +148,38 @@ __device__ __forceinline__ OffsetT UpperBound(
}


#if defined(__CUDA_FP16_TYPES_EXIST__)
template <
typename InputIteratorT,
typename OffsetT>
__device__ __forceinline__ OffsetT UpperBound(
InputIteratorT input, ///< [in] Input sequence
OffsetT num_items, ///< [in] Input sequence length
__half val) ///< [in] Search key
{
OffsetT retval = 0;
while (num_items > 0)
{
OffsetT half = num_items >> 1;

bool lt;
NV_IF_TARGET(NV_PROVIDES_SM_53,
(lt = val < input[retval + half];),
(lt = __half2float(val) < __half2float(input[retval + half]);));

if (lt)
{
num_items = half;
}
else
{
retval = retval + (half + 1);
num_items = num_items - (half + 1);
}
}

return retval;
}
#endif

CUB_NAMESPACE_END
14 changes: 14 additions & 0 deletions test/half.h
Original file line number Diff line number Diff line change
Expand Up @@ -237,13 +237,27 @@ struct half_t
{
return half_t(float(*this) * float(other));
}

/// Divide
__host__ __device__ __forceinline__
half_t operator/(const half_t &other) const
{
return half_t(float(*this) / float(other));
}

/// Add
__host__ __device__ __forceinline__
half_t operator+(const half_t &other)
{
return half_t(float(*this) + float(other));
}

/// Sub
__host__ __device__ __forceinline__
half_t operator-(const half_t &other) const
{
return half_t(float(*this) - float(other));
}

/// Less-than
__host__ __device__ __forceinline__
Expand Down
Loading