Skip to content

Commit

Permalink
Merge pull request #16132 from unknownbrackets/softgpu-blend
Browse files Browse the repository at this point in the history
softjit: Fix dst blend shift
  • Loading branch information
hrydgard authored Sep 30, 2022
2 parents a72a425 + 6468e0f commit 89dab44
Show file tree
Hide file tree
Showing 6 changed files with 61 additions and 18 deletions.
34 changes: 32 additions & 2 deletions GPU/GPUCommon.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -780,6 +780,9 @@ void GPUCommon::ResetMatrices() {
matrixVisible.proj[i] = toFloat24(gstate.projMatrix[i]);
for (size_t i = 0; i < ARRAY_SIZE(gstate.tgenMatrix); ++i)
matrixVisible.tgen[i] = toFloat24(gstate.tgenMatrix[i]);

// Assume all the matrices changed, so dirty things related to them.
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_PROJMATRIX | DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BONE_UNIFORMS);
}

u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) {
Expand Down Expand Up @@ -1341,7 +1344,8 @@ void GPUCommon::Execute_Iaddr(u32 op, u32 diff) {
}

void GPUCommon::Execute_Origin(u32 op, u32 diff) {
gstate_c.offsetAddr = currentList->pc;
if (currentList)
gstate_c.offsetAddr = currentList->pc;
}

void GPUCommon::Execute_Jump(u32 op, u32 diff) {
Expand Down Expand Up @@ -2175,6 +2179,11 @@ void GPUCommon::Execute_BlockTransferStart(u32 op, u32 diff) {
}

void GPUCommon::Execute_WorldMtxNum(u32 op, u32 diff) {
if (!currentList) {
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (op & 0xF);
return;
}

// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
Expand Down Expand Up @@ -2225,6 +2234,11 @@ void GPUCommon::Execute_WorldMtxData(u32 op, u32 diff) {
}

void GPUCommon::Execute_ViewMtxNum(u32 op, u32 diff) {
if (!currentList) {
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (op & 0xF);
return;
}

// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
Expand Down Expand Up @@ -2273,6 +2287,11 @@ void GPUCommon::Execute_ViewMtxData(u32 op, u32 diff) {
}

void GPUCommon::Execute_ProjMtxNum(u32 op, u32 diff) {
if (!currentList) {
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (op & 0xF);
return;
}

// This is almost always followed by GE_CMD_PROJMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
Expand Down Expand Up @@ -2322,6 +2341,11 @@ void GPUCommon::Execute_ProjMtxData(u32 op, u32 diff) {
}

void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
if (!currentList) {
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (op & 0xF);
return;
}

// This is almost always followed by GE_CMD_TGENMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
Expand All @@ -2339,7 +2363,8 @@ void GPUCommon::Execute_TgenMtxNum(u32 op, u32 diff) {
if (dst[i] != newVal) {
Flush();
dst[i] = newVal;
gstate_c.Dirty(DIRTY_TEXMATRIX);
// We check the matrix to see if we need projection.
gstate_c.Dirty(DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE);
}
if (++i >= end) {
break;
Expand Down Expand Up @@ -2370,6 +2395,11 @@ void GPUCommon::Execute_TgenMtxData(u32 op, u32 diff) {
}

void GPUCommon::Execute_BoneMtxNum(u32 op, u32 diff) {
if (!currentList) {
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (op & 0x7F);
return;
}

// This is almost always followed by GE_CMD_BONEMATRIXDATA.
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
Expand Down
24 changes: 11 additions & 13 deletions GPU/GPUState.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -127,9 +127,6 @@ void GPUgstate::Reset() {
memset(gstate.tgenMatrix, 0, sizeof(gstate.tgenMatrix));
memset(gstate.boneMatrix, 0, sizeof(gstate.boneMatrix));

if (gpu)
gpu->ResetMatrices();

savedContextVersion = 1;
}

Expand All @@ -143,6 +140,7 @@ void GPUgstate::Save(u32_le *ptr) {
u32_le *cmds = ptr + 17;
for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {
for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {
// We'll run ReapplyGfxState after this to process dirtying.
*cmds++ = cmdmem[n];
}
}
Expand Down Expand Up @@ -171,11 +169,11 @@ void GPUgstate::Save(u32_le *ptr) {
cmds = SaveMatrix(cmds, GE_MTX_PROJECTION, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
cmds = SaveMatrix(cmds, GE_MTX_TEXGEN, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);

*cmds++ = boneMatrixNumber;
*cmds++ = worldmtxnum;
*cmds++ = viewmtxnum;
*cmds++ = projmtxnum;
*cmds++ = texmtxnum;
*cmds++ = boneMatrixNumber & 0xFF00007F;
*cmds++ = worldmtxnum & 0xFF00000F;
*cmds++ = viewmtxnum & 0xFF00000F;
*cmds++ = projmtxnum & 0xFF00000F;
*cmds++ = texmtxnum & 0xFF00000F;
*cmds++ = GE_CMD_END << 24;
}
}
Expand Down Expand Up @@ -251,11 +249,11 @@ void GPUgstate::Restore(u32_le *ptr) {
cmds = LoadMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix));
cmds = LoadMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix));

boneMatrixNumber = *cmds++;
worldmtxnum = *cmds++;
viewmtxnum = *cmds++;
projmtxnum = *cmds++;
texmtxnum = *cmds++;
boneMatrixNumber = (*cmds++) & 0xFF00007F;
worldmtxnum = (*cmds++) & 0xFF00000F;
viewmtxnum = (*cmds++) & 0xFF00000F;
projmtxnum = (*cmds++) & 0xFF00000F;
texmtxnum = (*cmds++) & 0xFF00000F;
}

if (gpu)
Expand Down
2 changes: 1 addition & 1 deletion GPU/Software/DrawPixelX86.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1159,7 +1159,7 @@ bool PixelJitCache::Jit_AlphaBlend(const PixelFuncID &id) {
if (id.AlphaBlendEq() == GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE)
PXOR(dstReg, R(dstReg));
} else if (id.AlphaBlendDst() == PixelBlendFactor::ONE) {
if (blendState.dstColorAsFactor)
if (blendState.dstColorAsFactor || blendState.usesDstAlpha)
PSRLW(dstReg, 4);
}

Expand Down
6 changes: 6 additions & 0 deletions GPU/Software/Rasterizer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,12 @@ void ComputeRasterizerState(RasterizerState *state) {
state->minFilt = gstate.isMinifyFilteringEnabled();
state->magFilt = gstate.isMagnifyFilteringEnabled();
state->textureProj = gstate.getUVGenMode() == GE_TEXMAP_TEXTURE_MATRIX;
if (state->textureProj) {
// We may be able to optimize this off. This is actually kinda common.
if (gstate.tgenMatrix[2] == 0.0f && gstate.tgenMatrix[5] == 0.0f && gstate.tgenMatrix[8] == 0.0f && gstate.tgenMatrix[11] == 1.0f) {
state->textureProj = false;
}
}
}

state->shadeGouraud = gstate.getShadeMode() == GE_SHADE_GOURAUD;
Expand Down
12 changes: 10 additions & 2 deletions GPU/Software/SoftGpu.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1127,8 +1127,11 @@ void SoftGPU::Execute_TgenMtxData(u32 op, u32 diff) {
if (num < 12) {
u32 *target = (u32 *)&gstate.tgenMatrix[num];
u32 newVal = op << 8;
// No dirtying, read during vertex read.
*target = newVal;
if (newVal != *target) {
*target = newVal;
// This is mainly used in vertex read, but also affects if we enable texture projection.
dirtyFlags_ |= SoftDirty::RAST_TEX;
}
}

// Doesn't wrap to any other matrix.
Expand Down Expand Up @@ -1196,6 +1199,11 @@ bool SoftGPU::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {
return true;
}

void SoftGPU::ResetMatrices() {
GPUCommon::ResetMatrices();
dirtyFlags_ |= SoftDirty::TRANSFORM_MATRIX | SoftDirty::RAST_TEX;
}

void SoftGPU::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
GPUCommon::Execute_ImmVertexAlphaPrim(op, diff);
// We won't flush as often as hardware renderers, so we want to flush right away.
Expand Down
1 change: 1 addition & 0 deletions GPU/Software/SoftGpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,7 @@ class SoftGPU : public GPUCommon {
void Execute_BoneMtxData(u32 op, u32 diff);

bool GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) override;
void ResetMatrices() override;

void Execute_ImmVertexAlphaPrim(u32 op, u32 diff);

Expand Down

0 comments on commit 89dab44

Please sign in to comment.