Skip to content

Commit

Permalink
Browse files Browse the repository at this point in the history
  • Loading branch information
crbaird committed Aug 28, 2017
1 parent a060814 commit 547d8b1
Show file tree
Hide file tree
Showing 4 changed files with 571 additions and 0 deletions.
221 changes: 221 additions & 0 deletions components/serial-libs/openblas/SOURCES/1236.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
From 6497aae57c77253b2d717b01f5ec17e137954395 Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 12 Jul 2017 20:43:09 +0200
Subject: [PATCH] Use cpuid 4 with subleafs to query L1 cache size on Intel
processors

---
cpuid_x86.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++--------
1 file changed, 102 insertions(+), 15 deletions(-)

diff --git a/cpuid_x86.c b/cpuid_x86.c
index ab2ecdcaf..73b4df6b3 100644
--- a/cpuid_x86.c
+++ b/cpuid_x86.c
@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx)
*edx = cpuInfo[3];
}

+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx)
+{
+ int cpuInfo[4] = {-1};
+ __cpuidex(cpuInfo, op, count);
+ *eax = cpuInfo[0];
+ *ebx = cpuInfo[1];
+ *ecx = cpuInfo[2];
+ *edx = cpuInfo[3];
+}
+
#else

#ifndef CPUIDEMU

#if defined(__APPLE__) && defined(__i386__)
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx);
+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx);
#else
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
#if defined(__i386__) && defined(__PIC__)
@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc");
#endif
}
+
+static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){
+#if defined(__i386__) && defined(__PIC__)
+ __asm__ __volatile__
+ ("mov %%ebx, %%edi;"
+ "cpuid;"
+ "xchgl %%ebx, %%edi;"
+ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
+#else
+ __asm__ __volatile__
+ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc");
+#endif
+}
#endif

#else
@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
cpuid(0, &cpuid_level, &ebx, &ecx, &edx);

if (cpuid_level > 1) {
-
+ int numcalls =0 ;
cpuid(2, &eax, &ebx, &ecx, &edx);
-
+ numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries
info[ 0] = BITMASK(eax, 8, 0xff);
info[ 1] = BITMASK(eax, 16, 0xff);
info[ 2] = BITMASK(eax, 24, 0xff);
@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
info[14] = BITMASK(edx, 24, 0xff);

for (i = 0; i < 15; i++){
-
switch (info[i]){

/* This table is from http://www.sandpile.org/ia32/cpuid.htm */
@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LD1.linesize = 64;
break;
case 0x63 :
- DTB.size = 2048;
- DTB.associative = 4;
- DTB.linesize = 32;
- LDTB.size = 4096;
- LDTB.associative= 4;
- LDTB.linesize = 32;
+ DTB.size = 2048;
+ DTB.associative = 4;
+ DTB.linesize = 32;
+ LDTB.size = 4096;
+ LDTB.associative= 4;
+ LDTB.linesize = 32;
+ break;
case 0x66 :
LD1.size = 8;
LD1.associative = 4;
@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
LC1.associative = 8;
break;
case 0x76 :
- ITB.size = 2048;
- ITB.associative = 0;
- ITB.linesize = 8;
- LITB.size = 4096;
- LITB.associative= 0;
- LITB.linesize = 8;
+ ITB.size = 2048;
+ ITB.associative = 0;
+ ITB.linesize = 8;
+ LITB.size = 4096;
+ LITB.associative= 0;
+ LITB.linesize = 8;
+ break;
case 0x77 :
LC1.size = 16;
LC1.associative = 4;
@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
}

if (get_vendor() == VENDOR_INTEL) {
+ if(LD1.size<=0 || LC1.size<=0){
+ //If we didn't detect L1 correctly before,
+ int count;
+ for (count=0;count <4;count++) {
+ cpuid_count(4, count, &eax, &ebx, &ecx, &edx);
+ switch (eax &0x1f) {
+ case 0:
+ continue;
+ case 1:
+ case 3:
+ {
+ switch ((eax >>5) &0x07)
+ {
+ case 1:
+ {
+// fprintf(stderr,"L1 data cache...\n");
+ int sets = ecx+1;
+ int lines = (ebx & 0x0fff) +1;
+ ebx>>=12;
+ int part = (ebx&0x03ff)+1;
+ ebx >>=10;
+ int assoc = (ebx&0x03ff)+1;
+ LD1.size = (assoc*part*lines*sets)/1024;
+ LD1.associative = assoc;
+ LD1.linesize= lines;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+ }
+ case 2:
+ {
+ switch ((eax >>5) &0x07)
+ {
+ case 1:
+ {
+// fprintf(stderr,"L1 instruction cache...\n");
+ int sets = ecx+1;
+ int lines = (ebx & 0x0fff) +1;
+ ebx>>=12;
+ int part = (ebx&0x03ff)+1;
+ ebx >>=10;
+ int assoc = (ebx&0x03ff)+1;
+ LC1.size = (assoc*part*lines*sets)/1024;
+ LC1.associative = assoc;
+ LC1.linesize= lines;
+ break;
+ }
+ default:
+ break;
+ }
+ break;
+
+ }
+ default:
+ break;
+ }
+ }
+ }
+
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level >= 0x80000006) {
if(L2.size<=0){

From 00774b1105ad5dbfe0e6be671096d51ad4a97b2e Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Wed, 12 Jul 2017 21:56:23 +0200
Subject: [PATCH] Add dummy implementation of cpuid_count for the CPUIDEMU case

---
cpuid_x86.c | 5 ++++-
1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/cpuid_x86.c b/cpuid_x86.c
index 73b4df6b3..103128a33 100644
--- a/cpuid_x86.c
+++ b/cpuid_x86.c
@@ -157,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int *
*edx = idlist[current].d;
}

+void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) {
+ return cpuid (op, eax, ebx, ecx, edx);
+}
+
#endif

#endif // _MSC_VER
@@ -977,7 +981,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){
}
}
}
-
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx);
if (cpuid_level >= 0x80000006) {
if(L2.size<=0){

144 changes: 144 additions & 0 deletions components/serial-libs/openblas/SOURCES/1247.patch
Original file line number Diff line number Diff line change
@@ -0,0 +1,144 @@
From 88a35ff457f55e527e0e8a503a0dc61976c1846d Mon Sep 17 00:00:00 2001
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de>
Date: Tue, 25 Jul 2017 08:39:35 +0200
Subject: [PATCH] Revert #1246, "honor cgroup/cpuset limits" for now

Unsafe usage of the __GLIBC_PREREQ macro lead to build breakage on non-glibc systems
---
driver/others/init.c | 49 +++++--------------------------------------------
driver/others/memory.c | 37 -------------------------------------
2 files changed, 5 insertions(+), 81 deletions(-)

diff --git a/driver/others/init.c b/driver/others/init.c
index 4c75d72e4..3e6176967 100644
--- a/driver/others/init.c
+++ b/driver/others/init.c
@@ -778,11 +778,11 @@ static int initialized = 0;
void gotoblas_affinity_init(void) {

int cpu, num_avail;
-#ifndef USE_OPENMP
+#ifndef USE_OPENMP
cpu_set_t cpu_mask;
#endif
int i;
-
+
if (initialized) return;

initialized = 1;
@@ -826,54 +826,15 @@ void gotoblas_affinity_init(void) {
common -> shmid = pshmid;

if (common -> magic != SH_MAGIC) {
- cpu_set_t *cpusetp;
- int nums;
- int ret;
-
#ifdef DEBUG
fprintf(stderr, "Shared Memory Initialization.\n");
#endif

//returns the number of processors which are currently online
-
- nums = sysconf(_SC_NPROCESSORS_CONF);
-
-#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3)
- common->num_procs = nums;
-#elif __GLIBC_PREREQ(2, 7)
- cpusetp = CPU_ALLOC(nums);
- if (cpusetp == NULL) {
- common->num_procs = nums;
- } else {
- size_t size;
- size = CPU_ALLOC_SIZE(nums);
- ret = sched_getaffinity(0,size,cpusetp);
- if (ret!=0)
- common->num_procs = nums;
- else
- common->num_procs = CPU_COUNT_S(size,cpusetp);
- }
- CPU_FREE(cpusetp);
-#else
- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
- if (ret!=0) {
- common->num_procs = nums;
- } else {
-#if !__GLIBC_PREREQ(2, 6)
- int i;
- int n = 0;
- for (i=0;i<nums;i++)
- if (CPU_ISSET(i,cpusetp)) n++;
- common->num_procs = n;
- }
-#else
- common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
-#endif
-
-#endif
+ common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);;

if(common -> num_procs > MAX_CPUS) {
- fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
+ fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS);
exit(1);
}

@@ -886,7 +847,7 @@ void gotoblas_affinity_init(void) {
if (common -> num_nodes > 1) numa_mapping();

common -> final_num_procs = 0;
- for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.
+ for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number.

for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0;

diff --git a/driver/others/memory.c b/driver/others/memory.c
index 38d063715..916950315 100644
--- a/driver/others/memory.c
+++ b/driver/others/memory.c
@@ -175,44 +175,7 @@ int get_num_procs(void);
#else
int get_num_procs(void) {
static int nums = 0;
-cpu_set_t *cpusetp;
-size_t size;
-int ret;
-int i,n;
-
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF);
-#if !defined(OS_LINUX)
- return nums;
-#endif
-
-#if !defined(__GLIBC_PREREQ)
- return nums;
-#endif
-#if !__GLIBC_PREREQ(2, 3)
- return nums;
-#endif
-
-#if !__GLIBC_PREREQ(2, 7)
- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp);
- if (ret!=0) return nums;
- n=0;
-#if !__GLIBC_PREREQ(2, 6)
- for (i=0;i<nums;i++)
- if (CPU_ISSET(i,cpusetp)) n++;
- nums=n;
-#else
- nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp);
-#endif
- return nums;
-#endif
-
- cpusetp = CPU_ALLOC(nums);
- if (cpusetp == NULL) return nums;
- size = CPU_ALLOC_SIZE(nums);
- ret = sched_getaffinity(0,size,cpusetp);
- if (ret!=0) return nums;
- nums = CPU_COUNT_S(size,cpusetp);
- CPU_FREE(cpusetp);
return nums;
}
#endif
Loading

0 comments on commit 547d8b1

Please sign in to comment.