Skip to content

Commit

Permalink
Optimize TLUT loading and hashing
Browse files Browse the repository at this point in the history
  • Loading branch information
aglab2 committed Dec 15, 2024
1 parent 2055a77 commit a1a231d
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 17 deletions.
2 changes: 1 addition & 1 deletion projects/msvc/GLideN64.vcxproj
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@
<InlineFunctionExpansion>AnySuitable</InlineFunctionExpansion>
<IntrinsicFunctions>true</IntrinsicFunctions>
<FavorSizeOrSpeed>Speed</FavorSizeOrSpeed>
<OmitFramePointers>false</OmitFramePointers>
<OmitFramePointers>true</OmitFramePointers>
<PreprocessorDefinitions>NDEBUG;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<StringPooling>true</StringPooling>
<FunctionLevelLinking>true</FunctionLevelLinking>
Expand Down
12 changes: 9 additions & 3 deletions src/CRC.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,19 @@ static inline u32 CRC_Calculate(u32 crc, const void* buffer, u32 count)
return (u32)XXH3_64bits_withSeed(buffer, count, crc);
}

static inline u32 CRC_CalculatePalette(u32 crc, const void* buffer, u32 count)
static inline u32 CRC_CalculatePalette(u32 crc, const void* buffer)
{
u8 combined[32];

int count = 16;
u8* p = (u8*)buffer;
u8* o = combined;
while (count--) {
crc = (u32)XXH3_64bits_withSeed(p, 2, crc);
__builtin_memcpy(o, p, 2);
p += 8;
o += 2;
}
return crc;

return (u32)XXH3_64bits_withSeed(combined, 32, crc);
}

2 changes: 1 addition & 1 deletion src/N64.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,7 +55,7 @@ extern TMEMCacheHashEntry TMEMCacheHash;

static inline void tmemCacheHashInvalidate()
{
TMEMCacheHash.off = 0;
TMEMCacheHash.off = -1;
}

static inline void tmemCacheHashSet(u32 off, u32 size, u32 hash)
Expand Down
58 changes: 47 additions & 11 deletions src/gDP.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -708,7 +708,7 @@ static void tmemAddCacheEntry(uint32_t address, uint32_t tmemIdx, uint16_t qword
#else
__movsd((unsigned long*)tmem.data, (unsigned long*)&TMEM[tmemIdx], qwords << 1);
#endif
tmemCacheHashSet(tmemIdx, qwords >> 3, entry.crc);
tmemCacheHashSet(tmemIdx, qwords << 3, entry.crc);
}

static Tmem* tmemFindCacheEntry(uint32_t address, uint16_t qwords, uint16_t dxt)
Expand Down Expand Up @@ -841,6 +841,14 @@ void gDPLoadBlock(u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt)
DebugMsg( DEBUG_NORMAL, "gDPLoadBlock( %i, %i, %i, %i, %i );\n", tile, uls, ult, lrs, dxt );
}

// Compiler will play dumb if I don't do this (and it still plays a bit dumb)
static void copyFastPalette(const u8* __restrict src, u16* __restrict dst)
{
for (u16 j = 0; j < 16; ++j) {
dst[j * 4] = swapword(*(const u16*)(src + ((j * 2) ^ 2)));
}
}

void gDPLoadTLUT( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt )
{
gDPSetTileSize( tile, uls, ult, lrs, lrt );
Expand All @@ -852,20 +860,48 @@ void gDPLoadTLUT( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt )
u32 address = gDP.textureImage.address + gDP.tiles[tile].ult * gDP.textureImage.bpl + (gDP.tiles[tile].uls << gDP.textureImage.size >> 1);
u16 pal = (u16)((gDP.tiles[tile].tmem - 256) >> 4);
u16 * dest = reinterpret_cast<u16*>(TMEM);
u32 destIdx = gDP.tiles[tile].tmem << 2;
u32 destIdx = 0x400 | (gDP.tiles[tile].tmem << 2);

tmemCacheHashInvalidate();
if (TMEMCacheHash.off > 0x400)
tmemCacheHashInvalidate();

if ((0 == (address & 0x3)) && (destIdx < 0x800 - count * 4))
{
int i = 0;
while (i < count) {
if (i + 16 < count)
{
copyFastPalette(RDRAM + address, dest + destIdx);
i += 16;
address += 32;
destIdx += 64;
}
else
{
for (u16 j = 0; (j < 16) && (i < count); ++j, ++i) {
dest[destIdx] = swapword(*(u16*)(RDRAM + (address ^ 2)));
address += 2;
destIdx += 4;
}
}

int i = 0;
while (i < count) {
for (u16 j = 0; (j < 16) && (i < count); ++j, ++i) {
dest[(destIdx | 0x0400) & 0x07FF] = swapword(*(u16*)(RDRAM + (address ^ 2)));
address += 2;
destIdx += 4;
gDP.paletteCRC16[pal] = CRC_CalculatePalette(0xFFFFFFFF, &TMEM[256 + (pal << 4)]);
pal = (pal + 1) & 0x0F;
}
}
else
{
int i = 0;
while (i < count) {
for (u16 j = 0; (j < 16) && (i < count); ++j, ++i) {
dest[(destIdx | 0x0400) & 0x07FF] = swapword(*(u16*)(RDRAM + (address ^ 2)));
address += 2;
destIdx += 4;
}

gDP.paletteCRC16[pal] = CRC_CalculatePalette(0xFFFFFFFF, &TMEM[256 + (pal << 4)], 16);
pal = (pal + 1) & 0x0F;
gDP.paletteCRC16[pal] = CRC_CalculatePalette(0xFFFFFFFF, &TMEM[256 + (pal << 4)]);
pal = (pal + 1) & 0x0F;
}
}

gDP.paletteCRC256 = CRC_Calculate(0xFFFFFFFF, gDP.paletteCRC16, 64);
Expand Down
2 changes: 1 addition & 1 deletion src/windows/ZilmarAPIImpl_windows.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ void PluginAPI::GetDllInfo(PLUGIN_INFO * PluginInfo)
{
PluginInfo->Version = 0x103;
PluginInfo->Type = PLUGIN_TYPE_GFX;
sprintf(PluginInfo->Name, "ANGLE %s v4.3.12", pluginName, PLUGIN_REVISION);
sprintf(PluginInfo->Name, "ANGLE %s v4.3.14", pluginName, PLUGIN_REVISION);
PluginInfo->NormalMemory = FALSE;
PluginInfo->MemoryBswaped = TRUE;
}

0 comments on commit a1a231d

Please sign in to comment.