-
Notifications
You must be signed in to change notification settings - Fork 195
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
PATCH for OpenMathLib/OpenBLAS#1236 PATCH for OpenMathLib/OpenBLAS#1247
- Loading branch information
Showing
4 changed files
with
571 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,221 @@ | ||
From 6497aae57c77253b2d717b01f5ec17e137954395 Mon Sep 17 00:00:00 2001 | ||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> | ||
Date: Wed, 12 Jul 2017 20:43:09 +0200 | ||
Subject: [PATCH] Use cpuid 4 with subleafs to query L1 cache size on Intel | ||
processors | ||
|
||
--- | ||
cpuid_x86.c | 117 ++++++++++++++++++++++++++++++++++++++++++++++++++++-------- | ||
1 file changed, 102 insertions(+), 15 deletions(-) | ||
|
||
diff --git a/cpuid_x86.c b/cpuid_x86.c | ||
index ab2ecdcaf..73b4df6b3 100644 | ||
--- a/cpuid_x86.c | ||
+++ b/cpuid_x86.c | ||
@@ -71,12 +71,23 @@ void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx) | ||
*edx = cpuInfo[3]; | ||
} | ||
|
||
+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx) | ||
+{ | ||
+ int cpuInfo[4] = {-1}; | ||
+ __cpuidex(cpuInfo, op, count); | ||
+ *eax = cpuInfo[0]; | ||
+ *ebx = cpuInfo[1]; | ||
+ *ecx = cpuInfo[2]; | ||
+ *edx = cpuInfo[3]; | ||
+} | ||
+ | ||
#else | ||
|
||
#ifndef CPUIDEMU | ||
|
||
#if defined(__APPLE__) && defined(__i386__) | ||
void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx); | ||
+void cpuid_count(int op, int count, int *eax, int *ebx, int *ecx, int *edx); | ||
#else | ||
static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | ||
#if defined(__i386__) && defined(__PIC__) | ||
@@ -90,6 +101,19 @@ static C_INLINE void cpuid(int op, int *eax, int *ebx, int *ecx, int *edx){ | ||
("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "a" (op) : "cc"); | ||
#endif | ||
} | ||
+ | ||
+static C_INLINE void cpuid_count(int op, int count ,int *eax, int *ebx, int *ecx, int *edx){ | ||
+#if defined(__i386__) && defined(__PIC__) | ||
+ __asm__ __volatile__ | ||
+ ("mov %%ebx, %%edi;" | ||
+ "cpuid;" | ||
+ "xchgl %%ebx, %%edi;" | ||
+ : "=a" (*eax), "=D" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc"); | ||
+#else | ||
+ __asm__ __volatile__ | ||
+ ("cpuid": "=a" (*eax), "=b" (*ebx), "=c" (*ecx), "=d" (*edx) : "0" (op), "2" (count) : "cc"); | ||
+#endif | ||
+} | ||
#endif | ||
|
||
#else | ||
@@ -312,9 +336,9 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | ||
cpuid(0, &cpuid_level, &ebx, &ecx, &edx); | ||
|
||
if (cpuid_level > 1) { | ||
- | ||
+ int numcalls =0 ; | ||
cpuid(2, &eax, &ebx, &ecx, &edx); | ||
- | ||
+ numcalls = BITMASK(eax, 0, 0xff); //FIXME some systems may require repeated calls to read all entries | ||
info[ 0] = BITMASK(eax, 8, 0xff); | ||
info[ 1] = BITMASK(eax, 16, 0xff); | ||
info[ 2] = BITMASK(eax, 24, 0xff); | ||
@@ -335,7 +359,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | ||
info[14] = BITMASK(edx, 24, 0xff); | ||
|
||
for (i = 0; i < 15; i++){ | ||
- | ||
switch (info[i]){ | ||
|
||
/* This table is from http://www.sandpile.org/ia32/cpuid.htm */ | ||
@@ -637,12 +660,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | ||
LD1.linesize = 64; | ||
break; | ||
case 0x63 : | ||
- DTB.size = 2048; | ||
- DTB.associative = 4; | ||
- DTB.linesize = 32; | ||
- LDTB.size = 4096; | ||
- LDTB.associative= 4; | ||
- LDTB.linesize = 32; | ||
+ DTB.size = 2048; | ||
+ DTB.associative = 4; | ||
+ DTB.linesize = 32; | ||
+ LDTB.size = 4096; | ||
+ LDTB.associative= 4; | ||
+ LDTB.linesize = 32; | ||
+ break; | ||
case 0x66 : | ||
LD1.size = 8; | ||
LD1.associative = 4; | ||
@@ -675,12 +699,13 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | ||
LC1.associative = 8; | ||
break; | ||
case 0x76 : | ||
- ITB.size = 2048; | ||
- ITB.associative = 0; | ||
- ITB.linesize = 8; | ||
- LITB.size = 4096; | ||
- LITB.associative= 0; | ||
- LITB.linesize = 8; | ||
+ ITB.size = 2048; | ||
+ ITB.associative = 0; | ||
+ ITB.linesize = 8; | ||
+ LITB.size = 4096; | ||
+ LITB.associative= 0; | ||
+ LITB.linesize = 8; | ||
+ break; | ||
case 0x77 : | ||
LC1.size = 16; | ||
LC1.associative = 4; | ||
@@ -891,6 +916,68 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | ||
} | ||
|
||
if (get_vendor() == VENDOR_INTEL) { | ||
+ if(LD1.size<=0 || LC1.size<=0){ | ||
+ //If we didn't detect L1 correctly before, | ||
+ int count; | ||
+ for (count=0;count <4;count++) { | ||
+ cpuid_count(4, count, &eax, &ebx, &ecx, &edx); | ||
+ switch (eax &0x1f) { | ||
+ case 0: | ||
+ continue; | ||
+ case 1: | ||
+ case 3: | ||
+ { | ||
+ switch ((eax >>5) &0x07) | ||
+ { | ||
+ case 1: | ||
+ { | ||
+// fprintf(stderr,"L1 data cache...\n"); | ||
+ int sets = ecx+1; | ||
+ int lines = (ebx & 0x0fff) +1; | ||
+ ebx>>=12; | ||
+ int part = (ebx&0x03ff)+1; | ||
+ ebx >>=10; | ||
+ int assoc = (ebx&0x03ff)+1; | ||
+ LD1.size = (assoc*part*lines*sets)/1024; | ||
+ LD1.associative = assoc; | ||
+ LD1.linesize= lines; | ||
+ break; | ||
+ } | ||
+ default: | ||
+ break; | ||
+ } | ||
+ break; | ||
+ } | ||
+ case 2: | ||
+ { | ||
+ switch ((eax >>5) &0x07) | ||
+ { | ||
+ case 1: | ||
+ { | ||
+// fprintf(stderr,"L1 instruction cache...\n"); | ||
+ int sets = ecx+1; | ||
+ int lines = (ebx & 0x0fff) +1; | ||
+ ebx>>=12; | ||
+ int part = (ebx&0x03ff)+1; | ||
+ ebx >>=10; | ||
+ int assoc = (ebx&0x03ff)+1; | ||
+ LC1.size = (assoc*part*lines*sets)/1024; | ||
+ LC1.associative = assoc; | ||
+ LC1.linesize= lines; | ||
+ break; | ||
+ } | ||
+ default: | ||
+ break; | ||
+ } | ||
+ break; | ||
+ | ||
+ } | ||
+ default: | ||
+ break; | ||
+ } | ||
+ } | ||
+ } | ||
+ | ||
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); | ||
if (cpuid_level >= 0x80000006) { | ||
if(L2.size<=0){ | ||
|
||
From 00774b1105ad5dbfe0e6be671096d51ad4a97b2e Mon Sep 17 00:00:00 2001 | ||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> | ||
Date: Wed, 12 Jul 2017 21:56:23 +0200 | ||
Subject: [PATCH] Add dummy implementation of cpuid_count for the CPUIDEMU case | ||
|
||
--- | ||
cpuid_x86.c | 5 ++++- | ||
1 file changed, 4 insertions(+), 1 deletion(-) | ||
|
||
diff --git a/cpuid_x86.c b/cpuid_x86.c | ||
index 73b4df6b3..103128a33 100644 | ||
--- a/cpuid_x86.c | ||
+++ b/cpuid_x86.c | ||
@@ -157,6 +157,10 @@ void cpuid(unsigned int op, unsigned int *eax, unsigned int *ebx, unsigned int * | ||
*edx = idlist[current].d; | ||
} | ||
|
||
+void cpuid_count (unsigned int op, unsigned int count, unsigned int *eax, unsigned int *ebx, unsigned int *ecx, unsigned int *edx) { | ||
+ return cpuid (op, eax, ebx, ecx, edx); | ||
+} | ||
+ | ||
#endif | ||
|
||
#endif // _MSC_VER | ||
@@ -977,7 +981,6 @@ int get_cacheinfo(int type, cache_info_t *cacheinfo){ | ||
} | ||
} | ||
} | ||
- | ||
cpuid(0x80000000, &cpuid_level, &ebx, &ecx, &edx); | ||
if (cpuid_level >= 0x80000006) { | ||
if(L2.size<=0){ | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
From 88a35ff457f55e527e0e8a503a0dc61976c1846d Mon Sep 17 00:00:00 2001 | ||
From: Martin Kroeker <martin@ruby.chemie.uni-freiburg.de> | ||
Date: Tue, 25 Jul 2017 08:39:35 +0200 | ||
Subject: [PATCH] Revert #1246, "honor cgroup/cpuset limits" for now | ||
|
||
Unsafe usage of the __GLIBC_PREREQ macro lead to build breakage on non-glibc systems | ||
--- | ||
driver/others/init.c | 49 +++++-------------------------------------------- | ||
driver/others/memory.c | 37 ------------------------------------- | ||
2 files changed, 5 insertions(+), 81 deletions(-) | ||
|
||
diff --git a/driver/others/init.c b/driver/others/init.c | ||
index 4c75d72e4..3e6176967 100644 | ||
--- a/driver/others/init.c | ||
+++ b/driver/others/init.c | ||
@@ -778,11 +778,11 @@ static int initialized = 0; | ||
void gotoblas_affinity_init(void) { | ||
|
||
int cpu, num_avail; | ||
-#ifndef USE_OPENMP | ||
+#ifndef USE_OPENMP | ||
cpu_set_t cpu_mask; | ||
#endif | ||
int i; | ||
- | ||
+ | ||
if (initialized) return; | ||
|
||
initialized = 1; | ||
@@ -826,54 +826,15 @@ void gotoblas_affinity_init(void) { | ||
common -> shmid = pshmid; | ||
|
||
if (common -> magic != SH_MAGIC) { | ||
- cpu_set_t *cpusetp; | ||
- int nums; | ||
- int ret; | ||
- | ||
#ifdef DEBUG | ||
fprintf(stderr, "Shared Memory Initialization.\n"); | ||
#endif | ||
|
||
//returns the number of processors which are currently online | ||
- | ||
- nums = sysconf(_SC_NPROCESSORS_CONF); | ||
- | ||
-#if !defined(__GLIBC_PREREQ) || !__GLIBC_PREREQ(2, 3) | ||
- common->num_procs = nums; | ||
-#elif __GLIBC_PREREQ(2, 7) | ||
- cpusetp = CPU_ALLOC(nums); | ||
- if (cpusetp == NULL) { | ||
- common->num_procs = nums; | ||
- } else { | ||
- size_t size; | ||
- size = CPU_ALLOC_SIZE(nums); | ||
- ret = sched_getaffinity(0,size,cpusetp); | ||
- if (ret!=0) | ||
- common->num_procs = nums; | ||
- else | ||
- common->num_procs = CPU_COUNT_S(size,cpusetp); | ||
- } | ||
- CPU_FREE(cpusetp); | ||
-#else | ||
- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); | ||
- if (ret!=0) { | ||
- common->num_procs = nums; | ||
- } else { | ||
-#if !__GLIBC_PREREQ(2, 6) | ||
- int i; | ||
- int n = 0; | ||
- for (i=0;i<nums;i++) | ||
- if (CPU_ISSET(i,cpusetp)) n++; | ||
- common->num_procs = n; | ||
- } | ||
-#else | ||
- common->num_procs = CPU_COUNT(sizeof(cpu_set_t),cpusetp); | ||
-#endif | ||
- | ||
-#endif | ||
+ common -> num_procs = sysconf(_SC_NPROCESSORS_CONF);; | ||
|
||
if(common -> num_procs > MAX_CPUS) { | ||
- fprintf(stderr, "\nOpenBLAS Warning : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS); | ||
+ fprintf(stderr, "\nOpenBLAS Warining : The number of CPU/Cores(%d) is beyond the limit(%d). Terminated.\n", common->num_procs, MAX_CPUS); | ||
exit(1); | ||
} | ||
|
||
@@ -886,7 +847,7 @@ void gotoblas_affinity_init(void) { | ||
if (common -> num_nodes > 1) numa_mapping(); | ||
|
||
common -> final_num_procs = 0; | ||
- for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number. | ||
+ for(i = 0; i < common -> avail_count; i++) common -> final_num_procs += rcount(common -> avail[i]) + 1; //Make the max cpu number. | ||
|
||
for (cpu = 0; cpu < common -> final_num_procs; cpu ++) common -> cpu_use[cpu] = 0; | ||
|
||
diff --git a/driver/others/memory.c b/driver/others/memory.c | ||
index 38d063715..916950315 100644 | ||
--- a/driver/others/memory.c | ||
+++ b/driver/others/memory.c | ||
@@ -175,44 +175,7 @@ int get_num_procs(void); | ||
#else | ||
int get_num_procs(void) { | ||
static int nums = 0; | ||
-cpu_set_t *cpusetp; | ||
-size_t size; | ||
-int ret; | ||
-int i,n; | ||
- | ||
if (!nums) nums = sysconf(_SC_NPROCESSORS_CONF); | ||
-#if !defined(OS_LINUX) | ||
- return nums; | ||
-#endif | ||
- | ||
-#if !defined(__GLIBC_PREREQ) | ||
- return nums; | ||
-#endif | ||
-#if !__GLIBC_PREREQ(2, 3) | ||
- return nums; | ||
-#endif | ||
- | ||
-#if !__GLIBC_PREREQ(2, 7) | ||
- ret = sched_getaffinity(0,sizeof(cpu_set_t), cpusetp); | ||
- if (ret!=0) return nums; | ||
- n=0; | ||
-#if !__GLIBC_PREREQ(2, 6) | ||
- for (i=0;i<nums;i++) | ||
- if (CPU_ISSET(i,cpusetp)) n++; | ||
- nums=n; | ||
-#else | ||
- nums = CPU_COUNT(sizeof(cpu_set_t),cpusetp); | ||
-#endif | ||
- return nums; | ||
-#endif | ||
- | ||
- cpusetp = CPU_ALLOC(nums); | ||
- if (cpusetp == NULL) return nums; | ||
- size = CPU_ALLOC_SIZE(nums); | ||
- ret = sched_getaffinity(0,size,cpusetp); | ||
- if (ret!=0) return nums; | ||
- nums = CPU_COUNT_S(size,cpusetp); | ||
- CPU_FREE(cpusetp); | ||
return nums; | ||
} | ||
#endif |
Oops, something went wrong.