From 547d8b13b4b3a4beded9cf183db4be47252f7c94 Mon Sep 17 00:00:00 2001 From: Reese Baird Date: Mon, 28 Aug 2017 09:50:07 -0700 Subject: [PATCH] PATCH for https://github.com/xianyi/OpenBLAS/pull/1262 PATCH for https://github.com/xianyi/OpenBLAS/pull/1236 PATCH for https://github.com/xianyi/OpenBLAS/pull/1247 --- .../serial-libs/openblas/SOURCES/1236.patch | 221 ++++++++++++++++++ .../serial-libs/openblas/SOURCES/1247.patch | 144 ++++++++++++ .../serial-libs/openblas/SOURCES/1262.patch | 197 ++++++++++++++++ .../serial-libs/openblas/SPECS/openblas.spec | 9 + 4 files changed, 571 insertions(+) create mode 100644 components/serial-libs/openblas/SOURCES/1236.patch create mode 100644 components/serial-libs/openblas/SOURCES/1247.patch create mode 100644 components/serial-libs/openblas/SOURCES/1262.patch diff --git a/components/serial-libs/openblas/SOURCES/1236.patch b/components/serial-libs/openblas/SOURCES/1236.patch new file mode 100644 index 0000000000..f605429031 --- /dev/null +++ b/components/serial-libs/openblas/SOURCES/1236.patch @@ -0,0 +1,221 @@ +From 6497aae57c77253b2d717b01f5ec17e137954395 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 12 Jul 2017 20:43:09 +0200 +Subject: [PATCH] Use cpuid 4 with subleafs to query L1 cache size on Intel + processors + +--- + cpuid_x86.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------- + 1 file changed, 102 insertions(+), 15 deletions(-) + +diff --git a/cpuid_x86.c b/cpuid_x86.c +index ab2ecdcaf..73b4df6b3 100644 +--- a/cpuid_x86.c ++++ b/cpuid_x86.c +@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) + *edx = cpuInfo[3]; + } + ++void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx) ++{ ++ int cpuInfo[4] = {-1}; ++ __cpuidex(cpuInfo, op, count); ++ *eax = cpuInfo[0]; ++ *ebx = cpuInfo[1]; ++ *ecx = cpuInfo[2]; ++ *edx = cpuInfo[3]; ++} ++ + #else + + #ifndef CPUIDEMU + + #if defined(__APPLE__) && defined(__i386__) + void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); ++void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx); + #else + static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ + #if defined(__i386__) && defined(__PIC__) +@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ + ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); + #endif + } ++ ++static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){ ++#if defined(__i386__) && defined(__PIC__) ++ __asm__ __volatile__ ++ ("mov %%ebx, %%edi;" ++ "cpuid;" ++ "xchgl %%ebx, %%edi;" ++ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc"); ++#else ++ __asm__ __volatile__ ++ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc"); ++#endif ++} + #endif + + #else +@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + cpuid(0, &cpuid_level, &ebx, &ecx, &edx); + + if (cpuid_level > 1) { +- ++ int numcalls =0 ; + cpuid(2, &eax, &ebx, &ecx, &edx); +- ++ numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries + info[ 0] = BITMASK(eax, 8, 0xff); + info[ 1] = BITMASK(eax, 16, 0xff); + info[ 2] = BITMASK(eax, 24, 0xff); +@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + info[14] = BITMASK(edx, 24, 0xff); + + for (i = 0; i < 15; i++){ +- + switch (info[i]){ + + /* This table is from http://www.sandpile.org/ia32/cpuid.htm */ +@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + LD1.linesize = 64; + break; + case 0x63 : +- DTB.size = 2048; +- DTB.associative = 4; +- DTB.linesize = 32; +- LDTB.size = 4096; +- LDTB.associative= 4; +- LDTB.linesize = 32; ++ DTB.size = 2048; ++ DTB.associative = 4; ++ DTB.linesize = 32; ++ LDTB.size = 4096; ++ LDTB.associative= 4; ++ LDTB.linesize = 32; ++ break; + case 0x66 : + LD1.size = 8; + LD1.associative = 4; +@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + LC1.associative = 8; + break; + case 0x76 : +- ITB.size = 2048; +- ITB.associative = 0; +- ITB.linesize = 8; +- LITB.size = 4096; +- LITB.associative= 0; +- LITB.linesize = 8; ++ ITB.size = 2048; ++ ITB.associative = 0; ++ ITB.linesize = 8; ++ LITB.size = 4096; ++ LITB.associative= 0; ++ LITB.linesize = 8; ++ break; + case 0x77 : + LC1.size = 16; + LC1.associative = 4; +@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + } + + if (get_vendor() == VENDOR_INTEL) { ++ if(LD1.size<=0 || LC1.size<=0){ ++ //If we didn't detect L1 correctly before, ++ int count; ++ for (count=0;count <4;count++) { ++ cpuid_count(4, count, &eax, &ebx, &ecx, &edx); ++ switch (eax &0x1f) { ++ case 0: ++ continue; ++ case 1: ++ case 3: ++ { ++ switch ((eax >>5) &0x07) ++ { ++ case 1: ++ { ++// fprintf(stderr,"L1 data cache...\n"); ++ int sets = ecx+1; ++ int lines = (ebx & 0x0fff) +1; ++ ebx>>=12; ++ int part = (ebx&0x03ff)+1; ++ ebx >>=10; ++ int assoc = (ebx&0x03ff)+1; ++ LD1.size = (assoc*part*lines*sets)/1024; ++ LD1.associative = assoc; ++ LD1.linesize= lines; ++ break; ++ } ++ default: ++ break; ++ } ++ break; ++ } ++ case 2: ++ { ++ switch ((eax >>5) &0x07) ++ { ++ case 1: ++ { ++// fprintf(stderr,"L1 instruction cache...\n"); ++ int sets = ecx+1; ++ int lines = (ebx & 0x0fff) +1; ++ ebx>>=12; ++ int part = (ebx&0x03ff)+1; ++ ebx >>=10; ++ int assoc = (ebx&0x03ff)+1; ++ LC1.size = (assoc*part*lines*sets)/1024; ++ LC1.associative = assoc; ++ LC1.linesize= lines; ++ break; ++ } ++ default: ++ break; ++ } ++ break; ++ ++ } ++ default: ++ break; ++ } ++ } ++ } ++ + cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); + if (cpuid_level >= 0x80000006) { + if(L2.size<=0){ + +From 00774b1105ad5dbfe0e6be671096d51ad4a97b2e Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 12 Jul 2017 21:56:23 +0200 +Subject: [PATCH] Add dummy implementation of cpuid_count for the CPUIDEMU case + +--- + cpuid_x86.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/cpuid_x86.c b/cpuid_x86.c +index 73b4df6b3..103128a33 100644 +--- a/cpuid_x86.c ++++ b/cpuid_x86.c +@@ -157,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int * + *edx = idlist[current].d; + } + ++void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { ++ return cpuid (op, eax, ebx, ecx, edx); ++} ++ + #endif + + #endif // _MSC_VER +@@ -977,7 +981,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ + } + } + } +- + cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); + if (cpuid_level >= 0x80000006) { + if(L2.size<=0){ + diff --git a/components/serial-libs/openblas/SOURCES/1247.patch b/components/serial-libs/openblas/SOURCES/1247.patch new file mode 100644 index 0000000000..c44c4326e2 --- /dev/null +++ b/components/serial-libs/openblas/SOURCES/1247.patch @@ -0,0 +1,144 @@ +From 88a35ff457f55e527e0e8a503a0dc61976c1846d Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Tue, 25 Jul 2017 08:39:35 +0200 +Subject: [PATCH] Revert #1246, "honor cgroup/cpuset limits" for now + +Unsafe usage of the __GLIBC_PREREQ macro lead to build breakage on non-glibc systems +--- + driver/others/init.c | 49 +++++-------------------------------------------- + driver/others/memory.c | 37 ------------------------------------- + 2 files changed, 5 insertions(+), 81 deletions(-) + +diff --git a/driver/others/init.c b/driver/others/init.c +index 4c75d72e4..3e6176967 100644 +--- a/driver/others/init.c ++++ b/driver/others/init.c +@@ -778,11 +778,11 @@ static int initialized = 0; + void gotoblas_affinity_init(void) { + + int cpu, num_avail; +-#ifndef USE_OPENMP ++#ifndef USE_OPENMP + cpu_set_t cpu_mask; + #endif + int i; +- ++ + if (initialized) return; + + initialized = 1; +@@ -826,54 +826,15 @@ void gotoblas_affinity_init(void) { + common -> shmid = pshmid; + + if (common -> magic != SH_MAGIC) { +- cpu_set_t *cpusetp; +- int nums; +- int ret; +- + #ifdef DEBUG + fprintf(stderr, "Shared Memory Initialization.\n"); + #endif + + //returns the number of processors which are currently online +- +- nums = sysconf(_SC_NPROCESSORS_CONF); +- +-#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3) +- common->num_procs = nums; +-#elif __GLIBC_PREREQ(2, 7) +- cpusetp = CPU_ALLOC(nums); +- if (cpusetp == NULL) { +- common->num_procs = nums; +- } else { +- size_t size; +- size = CPU_ALLOC_SIZE(nums); +- ret = sched_getaffinity(0,size,cpusetp); +- if (ret!=0) +- common->num_procs = nums; +- else +- common->num_procs = CPU_COUNT_S(size,cpusetp); +- } +- CPU_FREE(cpusetp); +-#else +- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); +- if (ret!=0) { +- common->num_procs = nums; +- } else { +-#if !__GLIBC_PREREQ(2, 6) +- int i; +- int n = 0; +- for (i=0;inum_procs = n; +- } +-#else +- common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp); +-#endif +- +-#endif ++ common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);; + + if(common -> num_procs > MAX_CPUS) { +- fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS); ++ fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS); + exit(1); + } + +@@ -886,7 +847,7 @@ void gotoblas_affinity_init(void) { + if (common -> num_nodes > 1) numa_mapping(); + + common -> final_num_procs = 0; +- for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number. ++ for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number. + + for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0; + +diff --git a/driver/others/memory.c b/driver/others/memory.c +index 38d063715..916950315 100644 +--- a/driver/others/memory.c ++++ b/driver/others/memory.c +@@ -175,44 +175,7 @@ int get_num_procs(void); + #else + int get_num_procs(void) { + static int nums = 0; +-cpu_set_t *cpusetp; +-size_t size; +-int ret; +-int i,n; +- + if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); +-#if !defined(OS_LINUX) +- return nums; +-#endif +- +-#if !defined(__GLIBC_PREREQ) +- return nums; +-#endif +-#if !__GLIBC_PREREQ(2, 3) +- return nums; +-#endif +- +-#if !__GLIBC_PREREQ(2, 7) +- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); +- if (ret!=0) return nums; +- n=0; +-#if !__GLIBC_PREREQ(2, 6) +- for (i=0;i +Date: Wed, 2 Aug 2017 00:37:58 +0200 +Subject: [PATCH 1/2] Make sure that range_n of last thread never exceeds the + actual data size when splitting the workload + +--- + driver/level2/gbmv_thread.c | 2 ++ + driver/level2/sbmv_thread.c | 3 +++ + driver/level2/spmv_thread.c | 2 ++ + driver/level2/symv_thread.c | 4 +++- + driver/level2/tbmv_thread.c | 3 +++ + driver/level2/tpmv_thread.c | 4 +++- + driver/level2/trmv_thread.c | 4 +++- + 7 files changed, 19 insertions(+), 3 deletions(-) + +diff --git a/driver/level2/gbmv_thread.c b/driver/level2/gbmv_thread.c +index e86b565f8..9d374676e 100644 +--- a/driver/level2/gbmv_thread.c ++++ b/driver/level2/gbmv_thread.c +@@ -230,8 +230,10 @@ int CNAME(BLASLONG m, BLASLONG n, BLASLONG ku, BLASLONG kl, FLOAT *alpha, FLOAT + + #ifndef TRANSA + range_m[num_cpu] = num_cpu * ((m + 15) & ~15); ++ if (range_m[num_cpu] > m) range_m[num_cpu] = m; + #else + range_m[num_cpu] = num_cpu * ((n + 15) & ~15); ++ if (range_m[num_cpu] > n) range_m[num_cpu] = n; + #endif + + queue[num_cpu].mode = mode; +diff --git a/driver/level2/sbmv_thread.c b/driver/level2/sbmv_thread.c +index 5718c0ec9..ce841ee0e 100644 +--- a/driver/level2/sbmv_thread.c ++++ b/driver/level2/sbmv_thread.c +@@ -246,6 +246,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x + + range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; + range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); ++ if (range_n[num_cpu] > n) range_n[num_cpu] = n; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = sbmv_kernel; +@@ -285,6 +286,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x + + range_m[num_cpu + 1] = range_m[num_cpu] + width; + range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); ++ if (range_n[num_cpu] > n) range_n[num_cpu] = n; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = sbmv_kernel; +@@ -316,6 +318,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x + range_m[num_cpu + 1] = range_m[num_cpu] + width; + + range_n[num_cpu] = num_cpu * ((n + 15) & ~15); ++ if (range_n[num_cpu] > n) range_n[num_cpu] = n; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = sbmv_kernel; +diff --git a/driver/level2/spmv_thread.c b/driver/level2/spmv_thread.c +index 035300841..0b4087430 100644 +--- a/driver/level2/spmv_thread.c ++++ b/driver/level2/spmv_thread.c +@@ -246,6 +246,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, + + range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); ++ if (range_n[num_cpu] > m) range_n[num_cpu] = m; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = spmv_kernel; +@@ -285,6 +286,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *y, + + range_m[num_cpu + 1] = range_m[num_cpu] + width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); ++ if (range_n[num_cpu] > m) range_n[num_cpu] = m; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = spmv_kernel; +diff --git a/driver/level2/symv_thread.c b/driver/level2/symv_thread.c +index 6580178f1..8d4cd249c 100644 +--- a/driver/level2/symv_thread.c ++++ b/driver/level2/symv_thread.c +@@ -177,7 +177,8 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i + + range_m[num_cpu + 1] = range_m[num_cpu] + width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); +- ++ if (range_n[num_cpu] > m) range_n[num_cpu] = m; ++ + queue[MAX_CPU_NUMBER - num_cpu - 1].mode = mode; + queue[MAX_CPU_NUMBER - num_cpu - 1].routine = symv_kernel; + queue[MAX_CPU_NUMBER - num_cpu - 1].args = &args; +@@ -225,6 +226,7 @@ int CNAME(BLASLONG m, FLOAT *alpha, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG i + + range_m[num_cpu + 1] = range_m[num_cpu] + width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); ++ if (range_n[num_cpu] > m) range_n[num_cpu] = m; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = symv_kernel; +diff --git a/driver/level2/tbmv_thread.c b/driver/level2/tbmv_thread.c +index 226a922e9..aaf4958e2 100644 +--- a/driver/level2/tbmv_thread.c ++++ b/driver/level2/tbmv_thread.c +@@ -288,6 +288,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc + + range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; + range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); ++ if (range_n[num_cpu] > n) range_n[num_cpu] = n; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = trmv_kernel; +@@ -327,6 +328,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc + + range_m[num_cpu + 1] = range_m[num_cpu] + width; + range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); ++ if (range_n[num_cpu] > n) range_n[num_cpu] = n; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = trmv_kernel; +@@ -356,6 +358,7 @@ int CNAME(BLASLONG n, BLASLONG k, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG inc + + range_m[num_cpu + 1] = range_m[num_cpu] + width; + range_n[num_cpu] = num_cpu * (((n + 15) & ~15) + 16); ++ if (range_n[num_cpu] > n) range_n[num_cpu] = n; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = trmv_kernel; +diff --git a/driver/level2/tpmv_thread.c b/driver/level2/tpmv_thread.c +index c91b52775..79438ba29 100644 +--- a/driver/level2/tpmv_thread.c ++++ b/driver/level2/tpmv_thread.c +@@ -307,7 +307,8 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr + + range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); +- ++ if (range_n[num_cpu] > m) range_n[num_cpu] = m; ++ + queue[num_cpu].mode = mode; + queue[num_cpu].routine = tpmv_kernel; + queue[num_cpu].args = &args; +@@ -346,6 +347,7 @@ int CNAME(BLASLONG m, FLOAT *a, FLOAT *x, BLASLONG incx, FLOAT *buffer, int nthr + + range_m[num_cpu + 1] = range_m[num_cpu] + width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); ++ if (range_n[num_cpu] > m) range_n[num_cpu] = m; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = tpmv_kernel; +diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c +index 0a155366c..8b931a0e8 100644 +--- a/driver/level2/trmv_thread.c ++++ b/driver/level2/trmv_thread.c +@@ -346,7 +346,8 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu + + range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); +- ++ if (range_n[num_cpu] > m) range_n[num_cpu] = m; ++ + queue[num_cpu].mode = mode; + queue[num_cpu].routine = trmv_kernel; + queue[num_cpu].args = &args; +@@ -385,6 +386,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu + + range_m[num_cpu + 1] = range_m[num_cpu] + width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); ++ if (range_n[num_cpu] > m) range_n[num_cpu] = m; + + queue[num_cpu].mode = mode; + queue[num_cpu].routine = trmv_kernel; + +From 0ba64cee60c90f2533b918bc026283f5d5288a89 Mon Sep 17 00:00:00 2001 +From: Martin Kroeker +Date: Wed, 2 Aug 2017 12:03:54 +0200 +Subject: [PATCH 2/2] Update trmv_thread.c + +--- + driver/level2/trmv_thread.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/driver/level2/trmv_thread.c b/driver/level2/trmv_thread.c +index 8b931a0e8..24b881a93 100644 +--- a/driver/level2/trmv_thread.c ++++ b/driver/level2/trmv_thread.c +@@ -347,7 +347,7 @@ int CNAME(BLASLONG m, FLOAT *a, BLASLONG lda, FLOAT *x, BLASLONG incx, FLOAT *bu + range_m[MAX_CPU_NUMBER - num_cpu - 1] = range_m[MAX_CPU_NUMBER - num_cpu] - width; + range_n[num_cpu] = num_cpu * (((m + 15) & ~15) + 16); + if (range_n[num_cpu] > m) range_n[num_cpu] = m; +- ++ + queue[num_cpu].mode = mode; + queue[num_cpu].routine = trmv_kernel; + queue[num_cpu].args = &args; + diff --git a/components/serial-libs/openblas/SPECS/openblas.spec b/components/serial-libs/openblas/SPECS/openblas.spec index d771566ac3..1ecfc4d9e7 100644 --- a/components/serial-libs/openblas/SPECS/openblas.spec +++ b/components/serial-libs/openblas/SPECS/openblas.spec @@ -52,6 +52,12 @@ Patch2: openblas-noexecstack.patch Patch3: openblas-gemv.patch # PATCH-FIX-UPSTREADM fix-arm64-cpuid-return.patch Patch4: fix-arm64-cpuid-return.patch +# PATCH for https://github.com/xianyi/OpenBLAS/pull/1262 +Patch5: 1262.patch +# PATCH for https://github.com/xianyi/OpenBLAS/pull/1236 +Patch6: 1236.patch +# PATCH for https://github.com/xianyi/OpenBLAS/pull/1247 +Patch7: 1247.patch ExclusiveArch: %ix86 ia64 ppc ppc64 x86_64 aarch64 %description @@ -69,6 +75,9 @@ OpenBLAS is an optimized BLAS library based on GotoBLAS2 1.13 BSD version. # karl.w.schulz@intel.com (9/19/16) - disabling patch3 for v0.2.19 #%patch3 -p1 %patch4 -p1 +%patch5 -p0 +%patch6 -p0 +%patch7 -p0 %build # OpenHPC compiler/mpi designation