Skip to content

Commit

Permalink
Merge pull request #1329 from martin-frbg/dsdot
Browse files Browse the repository at this point in the history
(Trivial) optimized dsdot implementation for HASWELL
  • Loading branch information
martin-frbg authored Oct 25, 2017
2 parents b71f4fe + a07807c commit ab87ee6
Show file tree
Hide file tree
Showing 3 changed files with 42 additions and 5 deletions.
2 changes: 2 additions & 0 deletions kernel/x86_64/KERNEL.HASWELL
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ DDOTKERNEL = ddot.c
CDOTKERNEL = cdot.c
ZDOTKERNEL = zdot.c

DSDOTKERNEL = sdot.c

SAXPYKERNEL = saxpy.c
DAXPYKERNEL = daxpy.c
CAXPYKERNEL = caxpy.c
Expand Down
43 changes: 38 additions & 5 deletions kernel/x86_64/sdot.c
Original file line number Diff line number Diff line change
Expand Up @@ -68,13 +68,22 @@ static void sdot_kernel_16(BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *d)

#endif

#if defined (DSDOT)
double CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#else
FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
#endif
{
BLASLONG i=0;
BLASLONG ix=0,iy=0;
double dot = 0.0 ;

#if defined (DSDOT)
double mydot = 0.0;
FLOAT asmdot = 0.0;
#else
FLOAT mydot=0.0;
#endif
BLASLONG n1;

if ( n <= 0 ) return(dot);
Expand All @@ -85,17 +94,35 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)
n1 = n & (BLASLONG)(-32);

if ( n1 )
#if defined(DSDOT)
{
FLOAT *x1=x;
FLOAT *y1=y;
BLASLONG n2 = 32;
while (i<n1) {
sdot_kernel_16(n2, x1, y1 , &asmdot );
mydot += (double)asmdot;
asmdot=0.;
x1+=32;
y1+=32;
i+=32;
}
}
#else
sdot_kernel_16(n1, x, y , &mydot );


#endif
i = n1;
while(i < n)
{

#if defined(DSDOT)
dot += (double)y[i] * (double)x[i] ;
#else
dot += y[i] * x[i] ;
#endif
i++ ;

}

dot+=mydot;
return(dot);

Expand All @@ -106,8 +133,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)

while(i < n1)
{

#if defined (DSDOT)
dot += (double)y[iy] * (double)x[ix] + (double)y[iy+inc_y] * (double)x[ix+inc_x];
#else
dot += y[iy] * x[ix] + y[iy+inc_y] * x[ix+inc_x];
#endif
ix += inc_x*2 ;
iy += inc_y*2 ;
i+=2 ;
Expand All @@ -116,8 +146,11 @@ FLOAT CNAME(BLASLONG n, FLOAT *x, BLASLONG inc_x, FLOAT *y, BLASLONG inc_y)

while(i < n)
{

#if defined (DSDOT)
dot += (double)y[iy] * (double)x[ix] ;
#else
dot += y[iy] * x[ix] ;
#endif
ix += inc_x ;
iy += inc_y ;
i++ ;
Expand Down
2 changes: 2 additions & 0 deletions kernel/x86_64/sdot_microk_haswell-2.c
Original file line number Diff line number Diff line change
Expand Up @@ -53,9 +53,11 @@ static void sdot_kernel_16( BLASLONG n, FLOAT *x, FLOAT *y, FLOAT *dot)
"vfmadd231ps 64(%3,%0,4), %%ymm14, %%ymm6 \n\t" // 2 * y
"vfmadd231ps 96(%3,%0,4), %%ymm15, %%ymm7 \n\t" // 2 * y

#ifndef DSDOT
"addq $32 , %0 \n\t"
"subq $32 , %1 \n\t"
"jnz 1b \n\t"
#endif

"vextractf128 $1 , %%ymm4 , %%xmm12 \n\t"
"vextractf128 $1 , %%ymm5 , %%xmm13 \n\t"
Expand Down

0 comments on commit ab87ee6

Please sign in to comment.