From 0e00a03561c08a5598dccf9044618d54ad22deec Mon Sep 17 00:00:00 2001 From: Anders Jenbo Date: Tue, 1 Jan 2019 06:30:29 +0100 Subject: [PATCH 1/3] Use __asm instead of C++ only functions --- Source/control.cpp | 32 +++++++++++++++++++++++++++++++- Source/engine.cpp | 24 ++++++++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/Source/control.cpp b/Source/control.cpp index 975e36f84..da599cbe5 100644 --- a/Source/control.cpp +++ b/Source/control.cpp @@ -231,9 +231,22 @@ void __fastcall DrawSpellCel(int xp, int yp, char *Trans, int nCel, int w) _EAX = *(_WORD *)v6; v6 += 2; ASM_XLAT(_EAX, _EBX); +#ifndef _MSC_VER _EAX = __ROR2__(_EAX, 8); +#else + __asm mov eax, _EAX + __asm ror ax, 8 + __asm mov _EAX, eax +#endif ASM_XLAT(_EAX, _EBX); +#ifndef _MSC_VER *(_WORD *)v7 = __ROR2__(_EAX, 8); +#else + __asm mov eax, _EAX + __asm ror ax, 8 + __asm mov _EAX, eax +#endif + *(_WORD *)v7 = _EAX; v7 += 2; if (v15) { LABEL_15: @@ -241,13 +254,30 @@ void __fastcall DrawSpellCel(int xp, int yp, char *Trans, int nCel, int w) _EAX = *(_DWORD *)v6; v6 += 4; ASM_XLAT(_EAX, _EBX); +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 8); +#else + __asm ror _EAX, 8 +#endif ASM_XLAT(_EAX, _EBX); +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 8); +#else + __asm ror _EAX, 8 +#endif ASM_XLAT(_EAX, _EBX); +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 8); +#else + __asm ror _EAX, 8 +#endif ASM_XLAT(_EAX, _EBX); - *(_DWORD *)v7 = __ROR4__(_EAX, 8); +#ifndef _MSC_VER + _EAX = __ROR4__(_EAX, 8); +#else + __asm ror _EAX, 8 +#endif + *(_DWORD *)v7 = _EAX; v7 += 4; --v15; } while (v15); diff --git a/Source/engine.cpp b/Source/engine.cpp index 0af35e7c9..d38a3ef12 100644 --- a/Source/engine.cpp +++ b/Source/engine.cpp @@ -260,7 +260,11 @@ void __fastcall CelDecDatLightTrans(char *pDecodeTo, char *pRLEBytes, int frame_ ASM_XLAT(_EAX, _EBX); *(_BYTE *)v5 = _EAX; v25 = (_BYTE *)(v5 + 2); +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 16); +#else + __asm ror _EAX, 16 +#endif ASM_XLAT(_EAX, _EBX); *v25 = _EAX; v5 = (int)(v25 + 2); @@ -295,10 +299,18 @@ void __fastcall CelDecDatLightTrans(char *pDecodeTo, char *pRLEBytes, int frame_ _EAX = *(_DWORD *)v4; v4 += 4; v18 = (_BYTE *)(v5 + 1); +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 8); +#else + __asm ror _EAX, 8 +#endif ASM_XLAT(_EAX, _EBX); *v18 = _EAX; +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 16); +#else + __asm ror _EAX, 16 +#endif v18 += 2; ASM_XLAT(_EAX, _EBX); *v18 = _EAX; @@ -740,7 +752,11 @@ void __fastcall Cel2DecDatLightTrans(char *pDecodeTo, char *pRLEBytes, int frame ASM_XLAT(_EAX, _EBX); *(_BYTE *)v5 = _EAX; v25 = (_BYTE *)(v5 + 2); +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 16); +#else + __asm ror _EAX, 16 +#endif ASM_XLAT(_EAX, _EBX); *v25 = _EAX; v5 = (unsigned int)(v25 + 2); @@ -775,10 +791,18 @@ void __fastcall Cel2DecDatLightTrans(char *pDecodeTo, char *pRLEBytes, int frame _EAX = *(_DWORD *)v4; v4 += 4; v18 = (_BYTE *)(v5 + 1); +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 8); +#else + __asm ror _EAX, 8 +#endif ASM_XLAT(_EAX, _EBX); *v18 = _EAX; +#ifndef _MSC_VER _EAX = __ROR4__(_EAX, 16); +#else + __asm ror _EAX, 16 +#endif v18 += 2; ASM_XLAT(_EAX, _EBX); *v18 = _EAX; From 047bca1bb1ed63c2442fc71672acc51037e1ffca Mon Sep 17 00:00:00 2001 From: Anders Jenbo Date: Tue, 1 Jan 2019 06:30:45 +0100 Subject: [PATCH 2/3] Compile control.cpp as C --- MakefileVC | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MakefileVC b/MakefileVC index 783c77d99..d8a405f5a 100644 --- a/MakefileVC +++ b/MakefileVC @@ -58,7 +58,7 @@ DIABLO_SRC += Source/render.cpp OBJS=$(DIABLO_SRC:.cpp=.obj) # hardcoded list of src files, because only some of them are ready to be compiled as C instead of C++ # once all files compile as C we should use the /TC flag abd DIABLO_SRC instead -DIABLO_SRC_FIXED = Source/appfat.cpp /TcSource/automap.cpp /TcSource/capture.cpp /TcSource/codec.cpp Source/control.cpp /TcSource/cursor.cpp /TcSource/dead.cpp /TcSource/debug.cpp /TcSource/diablo.cpp /TcSource/doom.cpp /TcSource/drlg_l1.cpp /TcSource/drlg_l2.cpp Source/drlg_l3.cpp /TcSource/drlg_l4.cpp Source/dthread.cpp Source/dx.cpp /TcSource/effects.cpp /TcSource/encrypt.cpp Source/engine.cpp /TcSource/error.cpp Source/fault.cpp /TcSource/gamemenu.cpp /TcSource/gendung.cpp /TcSource/gmenu.cpp /TcSource/help.cpp /TcSource/init.cpp /TcSource/interfac.cpp /TcSource/inv.cpp Source/items.cpp Source/lighting.cpp /TcSource/loadsave.cpp Source/logging.cpp /TcSource/mainmenu.cpp /TcSource/minitext.cpp /TcSource/missiles.cpp Source/monster.cpp /TcSource/movie.cpp Source/mpqapi.cpp /TcSource/msg.cpp Source/msgcmd.cpp /TcSource/multi.cpp Source/nthread.cpp Source/objects.cpp /TcSource/pack.cpp /TcSource/palette.cpp /TcSource/path.cpp /TcSource/pfile.cpp /TcSource/player.cpp /TcSource/plrmsg.cpp /TcSource/portal.cpp /TcSource/quests.cpp /TcSource/restrict.cpp /TcSource/scrollrt.cpp /TcSource/setmaps.cpp /TcSource/sha.cpp /TcSource/sound.cpp /TcSource/spells.cpp Source/stores.cpp /TcSource/sync.cpp /TcSource/textdat.cpp /TcSource/themes.cpp /TcSource/tmsg.cpp /TcSource/town.cpp /TcSource/towners.cpp /TcSource/track.cpp /TcSource/trigs.cpp /TcSource/wave.cpp Source/render.cpp +DIABLO_SRC_FIXED = Source/appfat.cpp /TcSource/automap.cpp /TcSource/capture.cpp /TcSource/codec.cpp /TcSource/control.cpp /TcSource/cursor.cpp /TcSource/dead.cpp /TcSource/debug.cpp /TcSource/diablo.cpp /TcSource/doom.cpp /TcSource/drlg_l1.cpp /TcSource/drlg_l2.cpp Source/drlg_l3.cpp /TcSource/drlg_l4.cpp Source/dthread.cpp Source/dx.cpp /TcSource/effects.cpp /TcSource/encrypt.cpp Source/engine.cpp /TcSource/error.cpp Source/fault.cpp /TcSource/gamemenu.cpp /TcSource/gendung.cpp /TcSource/gmenu.cpp /TcSource/help.cpp /TcSource/init.cpp /TcSource/interfac.cpp /TcSource/inv.cpp Source/items.cpp Source/lighting.cpp /TcSource/loadsave.cpp Source/logging.cpp /TcSource/mainmenu.cpp /TcSource/minitext.cpp /TcSource/missiles.cpp Source/monster.cpp /TcSource/movie.cpp Source/mpqapi.cpp /TcSource/msg.cpp Source/msgcmd.cpp /TcSource/multi.cpp Source/nthread.cpp Source/objects.cpp /TcSource/pack.cpp /TcSource/palette.cpp /TcSource/path.cpp /TcSource/pfile.cpp /TcSource/player.cpp /TcSource/plrmsg.cpp /TcSource/portal.cpp /TcSource/quests.cpp /TcSource/restrict.cpp /TcSource/scrollrt.cpp /TcSource/setmaps.cpp /TcSource/sha.cpp /TcSource/sound.cpp /TcSource/spells.cpp Source/stores.cpp /TcSource/sync.cpp /TcSource/textdat.cpp /TcSource/themes.cpp /TcSource/tmsg.cpp /TcSource/town.cpp /TcSource/towners.cpp /TcSource/track.cpp /TcSource/trigs.cpp /TcSource/wave.cpp Source/render.cpp PKWARE_SRC=$(sort $(wildcard 3rdParty/PKWare/*.cpp)) PKWARE_OBJS=$(PKWARE_SRC:.cpp=.obj) From c2b7cfa75dc74fa49ecbca43d6f38e0f93f638b2 Mon Sep 17 00:00:00 2001 From: Anders Jenbo Date: Fri, 4 Jan 2019 21:03:02 +0100 Subject: [PATCH 3/3] Use stdlib _rotr instead of inline asm This also makes qmemcpy avalible to the C compiler --- Source/control.cpp | 38 ++++---------------------------------- Source/engine.cpp | 36 ++++++------------------------------ defs.h | 35 +++++++---------------------------- 3 files changed, 17 insertions(+), 92 deletions(-) diff --git a/Source/control.cpp b/Source/control.cpp index da599cbe5..4bed7f96d 100644 --- a/Source/control.cpp +++ b/Source/control.cpp @@ -231,22 +231,9 @@ void __fastcall DrawSpellCel(int xp, int yp, char *Trans, int nCel, int w) _EAX = *(_WORD *)v6; v6 += 2; ASM_XLAT(_EAX, _EBX); -#ifndef _MSC_VER _EAX = __ROR2__(_EAX, 8); -#else - __asm mov eax, _EAX - __asm ror ax, 8 - __asm mov _EAX, eax -#endif ASM_XLAT(_EAX, _EBX); -#ifndef _MSC_VER *(_WORD *)v7 = __ROR2__(_EAX, 8); -#else - __asm mov eax, _EAX - __asm ror ax, 8 - __asm mov _EAX, eax -#endif - *(_WORD *)v7 = _EAX; v7 += 2; if (v15) { LABEL_15: @@ -254,30 +241,13 @@ void __fastcall DrawSpellCel(int xp, int yp, char *Trans, int nCel, int w) _EAX = *(_DWORD *)v6; v6 += 4; ASM_XLAT(_EAX, _EBX); -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 8); -#else - __asm ror _EAX, 8 -#endif + _EAX = _rotr(_EAX, 8); ASM_XLAT(_EAX, _EBX); -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 8); -#else - __asm ror _EAX, 8 -#endif + _EAX = _rotr(_EAX, 8); ASM_XLAT(_EAX, _EBX); -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 8); -#else - __asm ror _EAX, 8 -#endif + _EAX = _rotr(_EAX, 8); ASM_XLAT(_EAX, _EBX); -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 8); -#else - __asm ror _EAX, 8 -#endif - *(_DWORD *)v7 = _EAX; + *(_DWORD *)v7 = _rotr(_EAX, 8); v7 += 4; --v15; } while (v15); diff --git a/Source/engine.cpp b/Source/engine.cpp index d38a3ef12..74fff114c 100644 --- a/Source/engine.cpp +++ b/Source/engine.cpp @@ -260,11 +260,7 @@ void __fastcall CelDecDatLightTrans(char *pDecodeTo, char *pRLEBytes, int frame_ ASM_XLAT(_EAX, _EBX); *(_BYTE *)v5 = _EAX; v25 = (_BYTE *)(v5 + 2); -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 16); -#else - __asm ror _EAX, 16 -#endif + _EAX = _rotr(_EAX, 16); ASM_XLAT(_EAX, _EBX); *v25 = _EAX; v5 = (int)(v25 + 2); @@ -299,18 +295,10 @@ void __fastcall CelDecDatLightTrans(char *pDecodeTo, char *pRLEBytes, int frame_ _EAX = *(_DWORD *)v4; v4 += 4; v18 = (_BYTE *)(v5 + 1); -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 8); -#else - __asm ror _EAX, 8 -#endif + _EAX = _rotr(_EAX, 8); ASM_XLAT(_EAX, _EBX); *v18 = _EAX; -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 16); -#else - __asm ror _EAX, 16 -#endif + _EAX = _rotr(_EAX, 16); v18 += 2; ASM_XLAT(_EAX, _EBX); *v18 = _EAX; @@ -752,11 +740,7 @@ void __fastcall Cel2DecDatLightTrans(char *pDecodeTo, char *pRLEBytes, int frame ASM_XLAT(_EAX, _EBX); *(_BYTE *)v5 = _EAX; v25 = (_BYTE *)(v5 + 2); -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 16); -#else - __asm ror _EAX, 16 -#endif + _EAX = _rotr(_EAX, 16); ASM_XLAT(_EAX, _EBX); *v25 = _EAX; v5 = (unsigned int)(v25 + 2); @@ -791,18 +775,10 @@ void __fastcall Cel2DecDatLightTrans(char *pDecodeTo, char *pRLEBytes, int frame _EAX = *(_DWORD *)v4; v4 += 4; v18 = (_BYTE *)(v5 + 1); -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 8); -#else - __asm ror _EAX, 8 -#endif + _EAX = _rotr(_EAX, 8); ASM_XLAT(_EAX, _EBX); *v18 = _EAX; -#ifndef _MSC_VER - _EAX = __ROR4__(_EAX, 16); -#else - __asm ror _EAX, 16 -#endif + _EAX = _rotr(_EAX, 16); v18 += 2; ASM_XLAT(_EAX, _EBX); *v18 = _EAX; diff --git a/defs.h b/defs.h index 8ac319b42..e8c9d0a9d 100644 --- a/defs.h +++ b/defs.h @@ -136,12 +136,10 @@ typedef __int64 int64; // Helper functions to represent some assembly instructions. -#ifdef __cplusplus - #ifdef FAST_MEMCPY #define qmemcpy memcpy #else -inline void *qmemcpy(void *dst, const void *src, size_t cnt) +__inline void *qmemcpy(void *dst, const void *src, size_t cnt) { char *out = (char *)dst; const char *in = (const char *)src; @@ -153,31 +151,15 @@ inline void *qmemcpy(void *dst, const void *src, size_t cnt) } #endif -// rotate left -template -T __ROL__(T value, int count) +// rotate right +__inline WORD __ROR2__(WORD value, DWORD count) { - const unsigned int nbits = sizeof(T) * 8; - - if (count > 0) { - count %= nbits; - T high = value >> (nbits - count); - if (T(-1) < 0) // signed value - high &= ~((T(-1) << count)); - value <<= count; - value |= high; - } else { - count = -count % nbits; - T low = value << (nbits - count); - value >>= count; - value |= low; - } - return value; -} + count %= 16; -inline unsigned short __ROR2__(unsigned short value, int count) { return __ROL__((unsigned short)value, -count); } -inline unsigned int __ROR4__(unsigned int value, int count) { return __ROL__((unsigned int)value, -count); } + return value >> count | value << (16 - count); +} +#ifdef __cplusplus // sign flag template char __SETS__(T x) @@ -205,9 +187,6 @@ char __OFSUB__(T x, U y) return (sx ^ __SETS__(y2)) & (sx ^ __SETS__(x - y2)); } } - -#else -#define qmemcpy memcpy #endif #endif /* IDA_GARBAGE */