Skip to content

Commit

Permalink
Merge pull request #2085 from psychocrypt/topic-amdOptimizeDiv
Browse files Browse the repository at this point in the history
OpenCL: optimize cn-heavy div
  • Loading branch information
fireice-uk authored Nov 20, 2018
2 parents b7ffd6b + 9813e1c commit 1b2b4d3
Showing 1 changed file with 4 additions and 8 deletions.
12 changes: 4 additions & 8 deletions xmrstak/backend/amd/amd_gpu/opencl/fast_div_heavy.cl
Original file line number Diff line number Diff line change
Expand Up @@ -6,23 +6,19 @@ inline long fast_div_heavy(long _a, int _b)
{
long a = abs(_a);
int b = abs(_b);

float rcp = native_recip(convert_float_rte(b));
float rcp2 = as_float(as_uint(rcp) + (32U << 23));

ulong q1 = convert_ulong_rte(convert_float_rte(as_int2(a).s1) * rcp2);
ulong q1 = convert_ulong(convert_float_rte(as_int2(a).s1) * rcp2);
a -= q1 * as_uint(b);

long q2 = convert_long_rte(convert_float_rtn(a) * rcp);
float q2f = convert_float_rte(as_int2(a >> 12).s0) * rcp;
q2f = as_float(as_uint(q2f) + (12U << 23));
long q2 = convert_long_rte(q2f);
int a2 = as_int2(a).s0 - as_int2(q2).s0 * b;

int q3 = convert_int_rte(convert_float_rte(a2) * rcp);
q3 += (a2 - q3 * b) >> 31;

const long q = q1 + q2 + q3;
return ((as_int2(_a).s1 ^ _b) < 0) ? -q : q;
}

#endif
)==="

0 comments on commit 1b2b4d3

Please sign in to comment.