From 8d10fb37421355732d212955a2fe71e884024d0b Mon Sep 17 00:00:00 2001 From: "m.petschow" Date: Wed, 15 May 2013 15:46:36 +0000 Subject: [PATCH] bug: if multiple blocks initial approx in parallel --- SRC/LAPACK/odebz.f | 16 ---------------- SRC/mrrr.c | 3 ++- SRC/mrrr_val.c | 4 +++- 3 files changed, 5 insertions(+), 18 deletions(-) diff --git a/SRC/LAPACK/odebz.f b/SRC/LAPACK/odebz.f index 921f921..ff259ce 100644 --- a/SRC/LAPACK/odebz.f +++ b/SRC/LAPACK/odebz.f @@ -251,7 +251,6 @@ SUBROUTINE ODEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL, * Compute the number of eigenvalues in the initial intervals. * MOUT = 0 -*DIR$ NOVECTOR DO 30 JI = 1, MINP DO 20 JP = 1, 2 TMP1 = D( 1 ) - AB( JI, JP ) @@ -406,21 +405,6 @@ SUBROUTINE ODEBZ( IJOB, NITMAX, N, MMAX, MINP, NBMIN, ABSTOL, ITMP1 = 1 TMP2 = MIN( TMP2, -PIVMIN ) END IF -* -* A series of compiler directives to defeat vectorization -* for the next loop -* -*$PL$ CMCHAR=' ' -CDIR$ NEXTSCALAR -C$DIR SCALAR -CDIR$ NEXT SCALAR -CVD$L NOVECTOR -CDEC$ NOVECTOR -CVD$ NOVECTOR -*VDIR NOVECTOR -*VOCL LOOP,SCALAR -CIBM PREFER SCALAR -*$PL$ CMCHAR='*' * DO 90 J = 2, N TMP2 = D( J ) - E2( J-1 ) / TMP2 - TMP1 diff --git a/SRC/mrrr.c b/SRC/mrrr.c index b28ddfc..ec90a20 100644 --- a/SRC/mrrr.c +++ b/SRC/mrrr.c @@ -238,7 +238,8 @@ int mrrr(char *jobz, char *range, int *np, double *restrict D, tolstruct->rtol2 = fmax(tolstruct->rtol2, 4.0 * DBL_EPSILON); } /* LAPACK: tolstruct->bsrtol = sqrt(DBL_EPSILON); */ - tolstruct->bsrtol = fmin(tolstruct->rtol1, sqrt(DBL_EPSILON)); + // tolstruct->bsrtol = fmin(tolstruct->rtol1, sqrt(DBL_EPSILON)); + tolstruct->bsrtol = MIN_RELGAP; tolstruct->RQtol = 2.0 * DBL_EPSILON; /* Compute the desired eigenvalues */ diff --git a/SRC/mrrr_val.c b/SRC/mrrr_val.c index 0f12a0a..2950460 100644 --- a/SRC/mrrr_val.c +++ b/SRC/mrrr_val.c @@ -501,7 +501,9 @@ int find_eigval_approx(int max_nthreads, char *range, in_t *Dstruct, while (nthreads > 1 && nvals/nthreads < MIN_BISEC_CHUNK) nthreads--; - if (nthreads > 1) { + /* Disabled the parallel execution if matrix splits into multiple blocks + as it is not reliable as it is; need to be done with more care */ + if (nthreads > 1 && nsplit == 1) { threads = (pthread_t *) malloc( nthreads * sizeof(pthread_t) ); assert(threads != NULL);