From 783ba8058fbc6d5f0a56d27bc368b659448b1fb1 Mon Sep 17 00:00:00 2001 From: maomao194313 Date: Mon, 4 Mar 2019 16:30:50 +0800 Subject: [PATCH 1/6] HiSilicon tsv110 CPUs optimization branch add HiSilicon tsv110 CPUs optimization branch --- kernel/arm64/KERNEL.TSV110 | 175 +++++++++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) create mode 100644 kernel/arm64/KERNEL.TSV110 diff --git a/kernel/arm64/KERNEL.TSV110 b/kernel/arm64/KERNEL.TSV110 new file mode 100644 index 0000000000..04d6940d7a --- /dev/null +++ b/kernel/arm64/KERNEL.TSV110 @@ -0,0 +1,175 @@ +SAMINKERNEL = ../arm/amin.c +DAMINKERNEL = ../arm/amin.c +CAMINKERNEL = ../arm/zamin.c +ZAMINKERNEL = ../arm/zamin.c + +SMAXKERNEL = ../arm/max.c +DMAXKERNEL = ../arm/max.c + +SMINKERNEL = ../arm/min.c +DMINKERNEL = ../arm/min.c + +ISAMINKERNEL = ../arm/iamin.c +IDAMINKERNEL = ../arm/iamin.c +ICAMINKERNEL = ../arm/izamin.c +IZAMINKERNEL = ../arm/izamin.c + +ISMAXKERNEL = ../arm/imax.c +IDMAXKERNEL = ../arm/imax.c + +ISMINKERNEL = ../arm/imin.c +IDMINKERNEL = ../arm/imin.c + +STRMMKERNEL = ../generic/trmmkernel_4x4.c +DTRMMKERNEL = ../generic/trmmkernel_2x2.c +CTRMMKERNEL = ../generic/ztrmmkernel_2x2.c +ZTRMMKERNEL = ../generic/ztrmmkernel_2x2.c + +STRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +STRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +STRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +STRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +DTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +DTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +DTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +DTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +CTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +CTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +CTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +CTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +ZTRSMKERNEL_LN = ../generic/trsm_kernel_LN.c +ZTRSMKERNEL_LT = ../generic/trsm_kernel_LT.c +ZTRSMKERNEL_RN = ../generic/trsm_kernel_RN.c +ZTRSMKERNEL_RT = ../generic/trsm_kernel_RT.c + +SAMAXKERNEL = amax.S +DAMAXKERNEL = amax.S +CAMAXKERNEL = zamax.S +ZAMAXKERNEL = zamax.S + +ISAMAXKERNEL = iamax.S +IDAMAXKERNEL = iamax.S +ICAMAXKERNEL = izamax.S +IZAMAXKERNEL = izamax.S + +SASUMKERNEL = asum.S +DASUMKERNEL = asum.S +CASUMKERNEL = casum.S +ZASUMKERNEL = zasum.S + +SAXPYKERNEL = axpy.S +DAXPYKERNEL = axpy.S +CAXPYKERNEL = zaxpy.S +ZAXPYKERNEL = zaxpy.S + +SCOPYKERNEL = copy.S +DCOPYKERNEL = copy.S +CCOPYKERNEL = copy.S +ZCOPYKERNEL = copy.S + +SDOTKERNEL = dot.S +DDOTKERNEL = dot.S +CDOTKERNEL = zdot.S +ZDOTKERNEL = zdot.S +DSDOTKERNEL = dot.S + +SNRM2KERNEL = nrm2.S +DNRM2KERNEL = nrm2.S +CNRM2KERNEL = znrm2.S +ZNRM2KERNEL = znrm2.S + +SROTKERNEL = rot.S +DROTKERNEL = rot.S +CROTKERNEL = zrot.S +ZROTKERNEL = zrot.S + +SSCALKERNEL = scal.S +DSCALKERNEL = scal.S +CSCALKERNEL = zscal.S +ZSCALKERNEL = zscal.S + +SSWAPKERNEL = swap.S +DSWAPKERNEL = swap.S +CSWAPKERNEL = swap.S +ZSWAPKERNEL = swap.S + +SGEMVNKERNEL = gemv_n.S +DGEMVNKERNEL = gemv_n.S +CGEMVNKERNEL = zgemv_n.S +ZGEMVNKERNEL = zgemv_n.S + +SGEMVTKERNEL = gemv_t.S +DGEMVTKERNEL = gemv_t.S +CGEMVTKERNEL = zgemv_t.S +ZGEMVTKERNEL = zgemv_t.S + +SGEMMKERNEL = sgemm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S +STRMMKERNEL = strmm_kernel_$(SGEMM_UNROLL_M)x$(SGEMM_UNROLL_N).S +ifneq ($(SGEMM_UNROLL_M), $(SGEMM_UNROLL_N)) +SGEMMINCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_M).c +SGEMMITCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_M).c +SGEMMINCOPYOBJ = sgemm_incopy$(TSUFFIX).$(SUFFIX) +SGEMMITCOPYOBJ = sgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif +SGEMMONCOPY = ../generic/gemm_ncopy_$(SGEMM_UNROLL_N).c +SGEMMOTCOPY = ../generic/gemm_tcopy_$(SGEMM_UNROLL_N).c +SGEMMONCOPYOBJ = sgemm_oncopy$(TSUFFIX).$(SUFFIX) +SGEMMOTCOPYOBJ = sgemm_otcopy$(TSUFFIX).$(SUFFIX) + +DGEMMKERNEL = dgemm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S +DTRMMKERNEL = dtrmm_kernel_$(DGEMM_UNROLL_M)x$(DGEMM_UNROLL_N).S + +ifneq ($(DGEMM_UNROLL_M), $(DGEMM_UNROLL_N)) + +ifeq ($(DGEMM_UNROLL_M), 8) +DGEMMINCOPY = dgemm_ncopy_$(DGEMM_UNROLL_M).S +DGEMMITCOPY = dgemm_tcopy_$(DGEMM_UNROLL_M).S +else +DGEMMINCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_M).c +DGEMMITCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_M).c +endif + +DGEMMINCOPYOBJ = dgemm_incopy$(TSUFFIX).$(SUFFIX) +DGEMMITCOPYOBJ = dgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif + +ifeq ($(DGEMM_UNROLL_N), 4) +DGEMMONCOPY = dgemm_ncopy_$(DGEMM_UNROLL_N).S +DGEMMOTCOPY = dgemm_tcopy_$(DGEMM_UNROLL_N).S +else +DGEMMONCOPY = ../generic/gemm_ncopy_$(DGEMM_UNROLL_N).c +DGEMMOTCOPY = ../generic/gemm_tcopy_$(DGEMM_UNROLL_N).c +endif + +DGEMMONCOPYOBJ = dgemm_oncopy$(TSUFFIX).$(SUFFIX) +DGEMMOTCOPYOBJ = dgemm_otcopy$(TSUFFIX).$(SUFFIX) + +CGEMMKERNEL = cgemm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S +CTRMMKERNEL = ctrmm_kernel_$(CGEMM_UNROLL_M)x$(CGEMM_UNROLL_N).S +ifneq ($(CGEMM_UNROLL_M), $(CGEMM_UNROLL_N)) +CGEMMINCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_M).c +CGEMMITCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_M).c +CGEMMINCOPYOBJ = cgemm_incopy$(TSUFFIX).$(SUFFIX) +CGEMMITCOPYOBJ = cgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif +CGEMMONCOPY = ../generic/zgemm_ncopy_$(CGEMM_UNROLL_N).c +CGEMMOTCOPY = ../generic/zgemm_tcopy_$(CGEMM_UNROLL_N).c +CGEMMONCOPYOBJ = cgemm_oncopy$(TSUFFIX).$(SUFFIX) +CGEMMOTCOPYOBJ = cgemm_otcopy$(TSUFFIX).$(SUFFIX) + +ZGEMMKERNEL = zgemm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S +ZTRMMKERNEL = ztrmm_kernel_$(ZGEMM_UNROLL_M)x$(ZGEMM_UNROLL_N).S +ifneq ($(ZGEMM_UNROLL_M), $(ZGEMM_UNROLL_N)) +ZGEMMINCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_M).c +ZGEMMITCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_M).c +ZGEMMINCOPYOBJ = zgemm_incopy$(TSUFFIX).$(SUFFIX) +ZGEMMITCOPYOBJ = zgemm_itcopy$(TSUFFIX).$(SUFFIX) +endif +ZGEMMONCOPY = ../generic/zgemm_ncopy_$(ZGEMM_UNROLL_N).c +ZGEMMOTCOPY = ../generic/zgemm_tcopy_$(ZGEMM_UNROLL_N).c +ZGEMMONCOPYOBJ = zgemm_oncopy$(TSUFFIX).$(SUFFIX) +ZGEMMOTCOPYOBJ = zgemm_otcopy$(TSUFFIX).$(SUFFIX) + From 53f482ee72e56b31ace7860199c8fb3027af5303 Mon Sep 17 00:00:00 2001 From: maomao194313 Date: Mon, 4 Mar 2019 16:41:21 +0800 Subject: [PATCH 2/6] add TARGET support for HiSilicon tsv110 CPUs --- Makefile.arm64 | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Makefile.arm64 b/Makefile.arm64 index cd16dbfaed..4d10ff6844 100644 --- a/Makefile.arm64 +++ b/Makefile.arm64 @@ -38,3 +38,8 @@ ifeq ($(CORE), THUNDERX2T99) CCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 FCOMMON_OPT += -march=armv8.1-a -mtune=thunderx2t99 endif + +ifeq ($(CORE), TSV110) +CCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 +FCOMMON_OPT += -march=armv8.2-a -mtune=tsv110 +endif From 760842dda1fd8f0475216b46ca25fc016f671d05 Mon Sep 17 00:00:00 2001 From: maomao194313 Date: Mon, 4 Mar 2019 16:45:22 +0800 Subject: [PATCH 3/6] add TARGET support for HiSilicon tsv110 CPUs --- getarch.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/getarch.c b/getarch.c index 242d080044..ac58c82266 100644 --- a/getarch.c +++ b/getarch.c @@ -1065,6 +1065,23 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #else #endif +#ifdef FORCE_TSV110 +#define FORCE +#define ARCHITECTURE "ARM64" +#define SUBARCHITECTURE "TSV110" +#define SUBDIRNAME "arm64" +#define ARCHCONFIG "-DTSV110 " \ + "-DL1_CODE_SIZE=65536 -DL1_CODE_LINESIZE=64 -DL1_CODE_ASSOCIATIVE=4 " \ + "-DL1_DATA_SIZE=65536 -DL1_DATA_LINESIZE=64 -DL1_DATA_ASSOCIATIVE=4 " \ + "-DL2_SIZE=524288 -DL2_LINESIZE=64 -DL2_ASSOCIATIVE=8 " \ + "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 " \ + "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DARMV8" +#define LIBNAME "tsv110" +#define CORENAME "TSV110" +#else +#endif + + #ifdef FORCE_ZARCH_GENERIC #define FORCE #define ARCHITECTURE "ZARCH" From fb4dae71240be9ad1e55792a46b38f8e107cb70a Mon Sep 17 00:00:00 2001 From: maomao194313 Date: Mon, 4 Mar 2019 16:48:49 +0800 Subject: [PATCH 4/6] add TARGET support for HiSilicon tsv110 CPUs --- TargetList.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/TargetList.txt b/TargetList.txt index 3a5a322344..aebd0dd187 100644 --- a/TargetList.txt +++ b/TargetList.txt @@ -90,6 +90,7 @@ CORTEXA73 FALKOR THUNDERX THUNDERX2T99 +TSV110 9.System Z: ZARCH_GENERIC From f074d7d1463c15bbf838b2305f259160281dead3 Mon Sep 17 00:00:00 2001 From: maomao194313 Date: Tue, 12 Mar 2019 16:05:19 +0800 Subject: [PATCH 5/6] make DYNAMIC_ARCH=1 package work on TSV110. --- cpuid_arm64.c | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/cpuid_arm64.c b/cpuid_arm64.c index 5077d7b11c..a5e731d747 100644 --- a/cpuid_arm64.c +++ b/cpuid_arm64.c @@ -39,6 +39,8 @@ // Cavium #define CPU_THUNDERX 7 #define CPU_THUNDERX2T99 8 +//Hisilicon +#define CPU_TSV110 9 static char *cpuname[] = { "UNKNOWN", @@ -49,7 +51,8 @@ static char *cpuname[] = { "CORTEXA73", "FALKOR", "THUNDERX", - "THUNDERX2T99" + "THUNDERX2T99", + "TSV110" }; static char *cpuname_lower[] = { @@ -61,7 +64,8 @@ static char *cpuname_lower[] = { "cortexa73", "falkor", "thunderx", - "thunderx2t99" + "thunderx2t99", + "tsv110" }; int get_feature(char *search) @@ -145,6 +149,9 @@ int detect(void) return CPU_THUNDERX; else if (strstr(cpu_implementer, "0x43") && strstr(cpu_part, "0x0af")) return CPU_THUNDERX2T99; + // HiSilicon + else if (strstr(cpu_implementer, "0x48") && strstr(cpu_part, "0xd01")) + return CPU_TSV110; } p = (char *) NULL ; @@ -286,6 +293,21 @@ void get_cpuconfig(void) printf("#define DTB_DEFAULT_ENTRIES 64 \n"); printf("#define DTB_SIZE 4096 \n"); break; + + case CPU_TSV110: + printf("#define TSV110 \n"); + printf("#define L1_CODE_SIZE 65536 \n"); + printf("#define L1_CODE_LINESIZE 64 \n"); + printf("#define L1_CODE_ASSOCIATIVE 4 \n"); + printf("#define L1_DATA_SIZE 65536 \n"); + printf("#define L1_DATA_LINESIZE 64 \n"); + printf("#define L1_DATA_ASSOCIATIVE 4 \n"); + printf("#define L2_SIZE 524228 \n"); + printf("#define L2_LINESIZE 64 \n"); + printf("#define L2_ASSOCIATIVE 8 \n"); + printf("#define DTB_DEFAULT_ENTRIES 64 \n"); + printf("#define DTB_SIZE 4096 \n"); + break; } } From 7e3eb9b25d26ca9be337acf0b0fd2c647e353e0c Mon Sep 17 00:00:00 2001 From: maomao194313 Date: Tue, 12 Mar 2019 16:11:01 +0800 Subject: [PATCH 6/6] make DYNAMIC_ARCH=1 package work on TSV110 --- param.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/param.h b/param.h index 3cc400b548..79fb05380a 100644 --- a/param.h +++ b/param.h @@ -2591,7 +2591,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #if defined(CORTEXA53) || defined(CORTEXA57) || \ defined(CORTEXA72) || defined(CORTEXA73) || \ - defined(FALKOR) + defined(FALKOR) || defined(TSV110) #define SGEMM_DEFAULT_UNROLL_M 16 #define SGEMM_DEFAULT_UNROLL_N 4