Skip to content

Commit

Permalink
Merge pull request #47 from notaz/asm_rework
Browse files Browse the repository at this point in the history
Asm rework
  • Loading branch information
icculus committed May 9, 2016
2 parents 056d77f + ce46bd1 commit a8c6c77
Show file tree
Hide file tree
Showing 24 changed files with 1,080 additions and 1,296 deletions.
22 changes: 14 additions & 8 deletions Sources/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -188,13 +188,20 @@ else()
set(DEBUGSUFFIX "")
endif()

# This should not be needed anymore, but might be faster on 32bit x86
option(USE_I386_ASM "Use X86 ASM" FALSE)
option(USE_ASM "Use ASM code" TRUE)
if (USE_ASM)
MESSAGE(STATUS "Using assembler code (when available)")
else()
add_definitions(-DUSE_PORTABLE_C=1)
MESSAGE(STATUS "Using portable C instead of all ASM")
endif()

if (USE_I386_ASM)
option(USE_I386_NASM_ASM "Use i386 nasm ASM code" FALSE)

if (USE_ASM AND USE_I386_NASM_ASM)
# You need the Netwide Assembler (NASM) to build this on Intel systems.
# http://nasm.sf.net/
add_definitions(-DUSE_I386_ASM=1)
add_definitions(-DUSE_I386_NASM_ASM=1)
if (MACOSX)
set(ASMOBJFMT "macho")
list(APPEND ASMFLAGS --prefix _)
Expand All @@ -203,10 +210,9 @@ if (USE_I386_ASM)
else()
set(ASMOBJFMT "elf")
endif()
MESSAGE(STATUS "Using i386 assembler")
MESSAGE(STATUS "Using i386 nasm ASM")
else()
add_definitions(-DUSE_PORTABLE_C=1)
MESSAGE(STATUS "Using portable C instead of ASM")
MESSAGE(STATUS "Not using i386 nasm ASM")
endif()

option(PANDORA "Compile for Pandora" FALSE)
Expand Down Expand Up @@ -655,7 +661,7 @@ add_dependencies(${SHADERSLIB} ParseEntities)
add_parser_and_scanner("Engine/Base/Parser" "Engine/Base/Scanner")
add_parser_and_scanner("Engine/Ska/smcPars" "Engine/Ska/smcScan")

if (USE_I386_ASM)
if (USE_I386_NASM_ASM)
add_custom_command(
OUTPUT "SoundMixer386.o"
MAIN_DEPENDENCY "${CMAKE_CURRENT_SOURCE_DIR}/Engine/Sound/SoundMixer386.asm"
Expand Down
4 changes: 1 addition & 3 deletions Sources/Engine/Base/Base.h
Original file line number Diff line number Diff line change
Expand Up @@ -65,9 +65,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#else
#warning "UNKNOWN PLATFORM IDENTIFIED!!!!"
#define PLATFORM_UNKNOWN 1
#warning "USING PORTABLE C!!!"
#define USE_PORTABLE_C
#endif
#endif

#if PLATFORM_LINUX || PLATFORM_MACOSX
#ifndef PLATFORM_UNIX
Expand Down
28 changes: 13 additions & 15 deletions Sources/Engine/Base/Profiling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,24 +21,13 @@ with this program; if not, write to the Free Software Foundation, Inc.,
template class CStaticArray<CProfileCounter>;
template class CStaticArray<CProfileTimer>;

#if (defined USE_PORTABLE_C)
#if (defined PLATFORM_UNIX) && !defined(__GNU_INLINE_X86_32__)
#include <sys/time.h>
#endif

static inline __int64 ReadTSC_profile(void)
{
#if (defined USE_PORTABLE_C)
#ifdef __arm__
struct timespec tv;
clock_gettime(CLOCK_MONOTONIC, &tv);
return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_nsec) / 1000000) );
#else
struct timeval tv;
gettimeofday(&tv, NULL);
return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_usec) / 1000) );
#endif

#elif (defined __MSVC_INLINE__)
#if (defined __MSVC_INLINE__)
__int64 mmRet;
__asm {
rdtsc
Expand All @@ -47,7 +36,7 @@ static inline __int64 ReadTSC_profile(void)
}
return mmRet;

#elif (defined __GNU_INLINE__)
#elif (defined __GNU_INLINE_X86_32__)
__int64 mmRet;
__asm__ __volatile__ (
"rdtsc \n\t"
Expand All @@ -60,7 +49,16 @@ static inline __int64 ReadTSC_profile(void)
return(mmRet);

#else
#error Please implement for your platform/compiler.
#ifdef __arm__
struct timespec tv;
clock_gettime(CLOCK_MONOTONIC, &tv);
return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_nsec) / 1000000) );
#else
struct timeval tv;
gettimeofday(&tv, NULL);
return( (((__int64) tv.tv_sec) * 1000) + (((__int64) tv.tv_usec) / 1000) );
#endif

#endif
}

Expand Down
4 changes: 2 additions & 2 deletions Sources/Engine/Base/Timer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ with this program; if not, write to the Free Software Foundation, Inc.,
#include <Engine/Base/Priority.inl>

// !!! FIXME: use SDL timer code instead and rdtsc never?
#if (USE_PORTABLE_C)
#if (defined PLATFORM_UNIX) && !defined(__GNU_INLINE_X86_32__)
#define USE_GETTIMEOFDAY 1
#endif

Expand Down Expand Up @@ -64,7 +64,7 @@ static inline __int64 ReadTSC(void)
}
return mmRet;

#elif (defined __GNU_INLINE__)
#elif (defined __GNU_INLINE_X86_32__)
__int64 mmRet;
__asm__ __volatile__ (
"rdtsc \n\t"
Expand Down
51 changes: 27 additions & 24 deletions Sources/Engine/Base/Types.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,30 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
#define ASMSYM(x) #x
#endif

/* should we enable inline asm? */
#ifndef USE_PORTABLE_C
#if defined(__MSVC_INLINE__)
/* the build system selected __MSVC_INLINE__ */
#elif defined(__GNU_INLINE_X86_32__)
/* the build system selected __GNU_INLINE_X86_32__ */
#elif defined(_MSC_VER) && defined(_M_IX86)
#define __MSVC_INLINE__
#elif defined (__GNUC__) && defined(__i386)
#define __GNU_INLINE_X86_32__
#elif defined (__GNUC__) && defined(__x86_64__)
#define __GNU_INLINE_X86_64__
#endif

#if defined(__GNU_INLINE_X86_32__) || defined(__GNU_INLINE_X86_64__)
#define __GNU_INLINE_X86__
#endif

#if defined(__GNU_INLINE_X86__)
#define FPU_REGS "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
#define MMX_REGS "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
#endif
#endif

#ifdef PLATFORM_UNIX /* rcg10042001 */
#include <stdio.h>
#include <string.h>
Expand All @@ -134,25 +158,6 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
#endif
#endif

#if ((defined __GNUC__) && (!defined __GNU_INLINE__))
#define __GNU_INLINE__
#endif

#if (defined __INTEL_COMPILER)
#if ((!defined __GNU_INLINE__) && (!defined __MSVC_INLINE__))
#error Please define __GNU_INLINE__ or __MSVC_INLINE__ with Intel C++.
#endif

#if ((defined __GNU_INLINE__) && (defined __MSVC_INLINE__))
#error Define either __GNU_INLINE__ or __MSVC_INLINE__ with Intel C++.
#endif
#endif

#if defined(__GNU_INLINE__) && defined(__i386__)
#define FPU_REGS "st", "st(1)", "st(2)", "st(3)", "st(4)", "st(5)", "st(6)", "st(7)"
#define MMX_REGS "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7"
#endif

#ifndef PAGESIZE
#define PAGESIZE 4096
#endif
Expand Down Expand Up @@ -230,10 +235,7 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));

inline ULONG _rotl(ULONG ul, int bits)
{
#if (defined USE_PORTABLE_C)
// DG: according to http://blog.regehr.org/archives/1063 this is fast
return (ul<<bits) | (ul>>(-bits&31));
#elif (defined __GNU_INLINE__)
#if (defined __GNU_INLINE_X86_32__)
// This, on the other hand, is wicked fast. :)
__asm__ __volatile__ (
"roll %%cl, %%eax \n\t"
Expand All @@ -255,7 +257,8 @@ MY_STATIC_ASSERT(size_tSize, sizeof(size_t) == sizeof(void*));
return(ul);

#else
#error need inline asm for your platform.
// DG: according to http://blog.regehr.org/archives/1063 this is fast
return (ul<<bits) | (ul>>(-bits&31));
#endif
}

Expand Down
70 changes: 34 additions & 36 deletions Sources/Engine/Engine.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -125,14 +125,10 @@ BOOL APIENTRY DllMain( HANDLE hModule, DWORD ul_reason_for_call, LPVOID lpReser

static void DetectCPU(void)
{
#if (defined USE_PORTABLE_C) // rcg10072001
CPrintF(TRANSV(" (No CPU detection in this binary.)\n"));

#else
char strVendor[12+1];
char strVendor[12+1] = { 0 };
strVendor[12] = 0;
ULONG ulTFMS;
ULONG ulFeatures;
ULONG ulTFMS = 0;
ULONG ulFeatures = 0;

#if (defined __MSVC_INLINE__)
// test MMX presence and update flag
Expand All @@ -148,43 +144,47 @@ static void DetectCPU(void)
mov dword ptr [ulFeatures], edx
}

#elif (defined __GNU_INLINE__)
#elif (defined __GNU_INLINE_X86__)
ULONG eax, ebx, ecx, edx;
// test MMX presence and update flag
__asm__ __volatile__ (
"pushl %%ebx \n\t"
"xorl %%eax,%%eax \n\t" // request for basic id
#if (defined __GNU_INLINE_X86_64__)
"cpuid \n\t"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
#else
"movl %%ebx, %%esi \n\t"
"cpuid \n\t"
"movl %%ebx, (%%esi) \n\t"
"movl %%edx, 4(%%esi) \n\t"
"movl %%ecx, 8(%%esi) \n\t"
"popl %%ebx \n\t"
: // no specific outputs.
: "S" (strVendor)
: "eax", "ecx", "edx", "memory"
"xchgl %%ebx, %%esi \n\t"
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)
#endif
: "a" (0) // request for basic id
);

// need to break this into a separate asm block, since I'm clobbering
// too many registers. There's something to be said for letting MSVC
// figure out where on the stack your locals are resting, but yeah,
// I know, that's x86-specific anyhow...
// !!! FIXME: can probably do this right with modern GCC.
memcpy(strVendor + 0, &ebx, 4);
memcpy(strVendor + 4, &edx, 4);
memcpy(strVendor + 8, &ecx, 4);

__asm__ __volatile__ (
"pushl %%ebx \n\t"
"movl $1, %%eax \n\t" // request for TFMS feature flags
"cpuid \n\t"
"mov %%eax, (%%esi) \n\t" // remember type, family, model and stepping
"mov %%edx, (%%edi) \n\t"
"popl %%ebx \n\t"
: // no specific outputs.
: "S" (&ulTFMS), "D" (&ulFeatures)
: "eax", "ecx", "edx", "memory"
#if (defined __GNU_INLINE_X86_64__)
"cpuid \n\t"
: "=a" (eax), "=b" (ebx), "=c" (ecx), "=d" (edx)
#else
"movl %%ebx, %%esi \n\t"
"cpuid \n\t"
"xchgl %%ebx, %%esi \n\t"
: "=a" (eax), "=S" (ebx), "=c" (ecx), "=d" (edx)
#endif
: "a" (1) // request for TFMS feature flags
);
ulTFMS = eax;
ulFeatures = edx;

#else
#error Please implement for your platform or define USE_PORTABLE_C.
#endif

if (ulTFMS == 0) {
CPrintF(TRANSV(" (No CPU detection in this binary.)\n"));
return;
}

INDEX iType = (ulTFMS>>12)&0x3;
INDEX iFamily = (ulTFMS>> 8)&0xF;
INDEX iModel = (ulTFMS>> 4)&0xF;
Expand Down Expand Up @@ -215,8 +215,6 @@ static void DetectCPU(void)
sys_iCPUMHz = INDEX(_pTimer->tm_llCPUSpeedHZ/1E6);

if( !bMMX) FatalError( TRANS("MMX support required but not present!"));

#endif // defined USE_PORTABLE_C
}

static void DetectCPUWrapper(void)
Expand Down
Loading

0 comments on commit a8c6c77

Please sign in to comment.