mirror of
https://github.com/blawar/GLideN64.git
synced 2024-07-02 09:03:37 +00:00
Correct load to TMEM functions: wrap tmem address in case of overflow.
Necessary for issue #571
This commit is contained in:
parent
a13e6322d9
commit
27149064e0
|
@ -243,7 +243,7 @@ void GBIInfo::loadMicrocode(u32 uc_start, u32 uc_dstart, u16 uc_dsize)
|
|||
|
||||
// See if we can identify it by text
|
||||
char uc_data[2048];
|
||||
UnswapCopy( &RDRAM[uc_dstart & 0x1FFFFFFF], uc_data, 2048 );
|
||||
UnswapCopyWrap(RDRAM, uc_dstart & 0x1FFFFFFF, (u8*)uc_data, 0, 0x7FF, 2048);
|
||||
char uc_str[256];
|
||||
strcpy(uc_str, "Not Found");
|
||||
|
||||
|
|
|
@ -3,7 +3,7 @@
|
|||
u8 *HEADER;
|
||||
u8 *DMEM;
|
||||
u8 *IMEM;
|
||||
u64 TMEM[TMEM_SIZE];
|
||||
u64 TMEM[512];
|
||||
u8 *RDRAM;
|
||||
|
||||
u32 RDRAMSize;
|
||||
|
|
|
@ -5,12 +5,6 @@
|
|||
|
||||
#define MI_INTR_DP 0x20 // Bit 5: DP intr
|
||||
|
||||
// Actual TMEM size is 512 QWORDS. However, some load operations load more data that TMEM can take.
|
||||
// We can either cut the surplus data, or increase the buffer and load everything.
|
||||
// The second option is more simple and safe. Actual texture load will use correct TMEM size.
|
||||
#define TMEM_SIZE 1024
|
||||
#define TMEM_SIZE_BYTES 8192
|
||||
|
||||
struct N64Regs
|
||||
{
|
||||
u32 *MI_INTR;
|
||||
|
@ -45,7 +39,7 @@ extern u8 *HEADER;
|
|||
extern u8 *DMEM;
|
||||
extern u8 *IMEM;
|
||||
extern u8 *RDRAM;
|
||||
extern u64 TMEM[TMEM_SIZE];
|
||||
extern u64 TMEM[512];
|
||||
extern u32 RDRAMSize;
|
||||
extern bool ConfigOpen;
|
||||
|
||||
|
|
|
@ -753,7 +753,7 @@ void TextureCache::_loadBackground(CachedTexture *pTexture)
|
|||
numBytes = bpl * gSP.bgImage.height;
|
||||
pSwapped = (u8*)malloc(numBytes);
|
||||
assert(pSwapped != NULL);
|
||||
UnswapCopy(&RDRAM[gSP.bgImage.address], pSwapped, numBytes);
|
||||
UnswapCopyWrap(RDRAM, gSP.bgImage.address, pSwapped, 0, RDRAMSize, numBytes);
|
||||
pDest = (u32*)malloc(pTexture->textureBytes);
|
||||
assert(pDest != NULL);
|
||||
|
||||
|
|
153
src/convert.h
153
src/convert.h
|
@ -102,148 +102,57 @@ const volatile unsigned char One2Eight[2] =
|
|||
255, // 1 = 11111111
|
||||
};
|
||||
|
||||
static inline void UnswapCopy( void *src, void *dest, u32 numBytes )
|
||||
static inline void UnswapCopyWrap(const u8 *src, u32 srcIdx, u8 *dest, u32 destIdx, u32 destMask, u32 numBytes)
|
||||
{
|
||||
#ifdef WIN32_ASM
|
||||
__asm
|
||||
{
|
||||
mov ecx, 0
|
||||
mov esi, dword ptr [src]
|
||||
mov edi, dword ptr [dest]
|
||||
|
||||
mov ebx, esi
|
||||
and ebx, 3 // ebx = number of leading bytes
|
||||
|
||||
cmp ebx, 0
|
||||
jz StartDWordLoop
|
||||
neg ebx
|
||||
add ebx, 4
|
||||
|
||||
cmp ebx, [numBytes]
|
||||
jle NotGreater
|
||||
mov ebx, [numBytes]
|
||||
NotGreater:
|
||||
mov ecx, ebx
|
||||
xor esi, 3
|
||||
LeadingLoop: // Copies leading bytes, in reverse order (un-swaps)
|
||||
mov al, byte ptr [esi]
|
||||
mov byte ptr [edi], al
|
||||
sub esi, 1
|
||||
add edi, 1
|
||||
loop LeadingLoop
|
||||
add esi, 5
|
||||
|
||||
StartDWordLoop:
|
||||
mov ecx, dword ptr [numBytes]
|
||||
sub ecx, ebx // Don't copy what's already been copied
|
||||
|
||||
mov ebx, ecx
|
||||
and ebx, 3
|
||||
// add ecx, 3 // Round up to nearest dword
|
||||
shr ecx, 2
|
||||
|
||||
cmp ecx, 0 // If there's nothing to do, don't do it
|
||||
jle StartTrailingLoop
|
||||
|
||||
// Copies from source to destination, bswap-ing first
|
||||
DWordLoop:
|
||||
mov eax, dword ptr [esi]
|
||||
bswap eax
|
||||
mov dword ptr [edi], eax
|
||||
add esi, 4
|
||||
add edi, 4
|
||||
loop DWordLoop
|
||||
StartTrailingLoop:
|
||||
cmp ebx, 0
|
||||
jz Done
|
||||
mov ecx, ebx
|
||||
xor esi, 3
|
||||
|
||||
TrailingLoop:
|
||||
mov al, byte ptr [esi]
|
||||
mov byte ptr [edi], al
|
||||
sub esi, 1
|
||||
add edi, 1
|
||||
loop TrailingLoop
|
||||
Done:
|
||||
}
|
||||
# else // WIN32_ASM
|
||||
// copy leading bytes
|
||||
int leadingBytes = ((long)src) & 3;
|
||||
if (leadingBytes != 0)
|
||||
{
|
||||
leadingBytes = 4-leadingBytes;
|
||||
if ((unsigned int)leadingBytes > numBytes)
|
||||
u32 leadingBytes = srcIdx & 3;
|
||||
if (leadingBytes != 0) {
|
||||
leadingBytes = 4 - leadingBytes;
|
||||
if ((u32)leadingBytes > numBytes)
|
||||
leadingBytes = numBytes;
|
||||
numBytes -= leadingBytes;
|
||||
|
||||
src = (void *)((long)src ^ 3);
|
||||
for (int i = 0; i < leadingBytes; i++)
|
||||
{
|
||||
*(u8 *)(dest) = *(u8 *)(src);
|
||||
dest = (void *)((long)dest+1);
|
||||
src = (void *)((long)src -1);
|
||||
srcIdx ^= 3;
|
||||
for (int i = 0; i < leadingBytes; i++) {
|
||||
dest[destIdx&destMask] = src[srcIdx];
|
||||
++destIdx;
|
||||
--srcIdx;
|
||||
}
|
||||
src = (void *)((long)src+5);
|
||||
srcIdx += 5;
|
||||
}
|
||||
|
||||
// copy dwords
|
||||
int numDWords = numBytes >> 2;
|
||||
while (numDWords--)
|
||||
{
|
||||
u32 dword = *(u32 *)src;
|
||||
#ifdef ARM_ASM
|
||||
asm("rev %0, %0" : "+r"(dword)::);
|
||||
#else
|
||||
dword = ((dword<<24)|((dword<<8)&0x00FF0000)|((dword>>8)&0x0000FF00)|(dword>>24));
|
||||
#endif
|
||||
*(u32 *)dest = dword;
|
||||
dest = (void *)((long)dest+4);
|
||||
src = (void *)((long)src +4);
|
||||
while (numDWords--) {
|
||||
dest[(destIdx + 3) & destMask] = src[srcIdx++];
|
||||
dest[(destIdx + 2) & destMask] = src[srcIdx++];
|
||||
dest[(destIdx + 1) & destMask] = src[srcIdx++];
|
||||
dest[(destIdx + 0) & destMask] = src[srcIdx++];
|
||||
destIdx += 4;
|
||||
}
|
||||
|
||||
// copy trailing bytes
|
||||
int trailingBytes = numBytes & 3;
|
||||
if (trailingBytes)
|
||||
{
|
||||
src = (void *)((long)src ^ 3);
|
||||
for (int i = 0; i < trailingBytes; i++)
|
||||
{
|
||||
*(u8 *)(dest) = *(u8 *)(src);
|
||||
dest = (void *)((long)dest+1);
|
||||
src = (void *)((long)src -1);
|
||||
if (trailingBytes) {
|
||||
srcIdx ^= 3;
|
||||
for (int i = 0; i < trailingBytes; i++) {
|
||||
dest[destIdx&destMask] = src[srcIdx];
|
||||
++destIdx;
|
||||
--srcIdx;
|
||||
}
|
||||
}
|
||||
#endif // WIN32_ASM
|
||||
}
|
||||
|
||||
static inline void DWordInterleave( void *mem, u32 numDWords )
|
||||
static inline void DWordInterleaveWrap(u32 *src, u32 srcIdx, u32 srcMask, u32 numQWords)
|
||||
{
|
||||
#ifdef WIN32_ASM
|
||||
__asm {
|
||||
mov esi, dword ptr [mem]
|
||||
mov edi, dword ptr [mem]
|
||||
add edi, 4
|
||||
mov ecx, dword ptr [numDWords]
|
||||
DWordInterleaveLoop:
|
||||
mov eax, dword ptr [esi]
|
||||
mov ebx, dword ptr [edi]
|
||||
mov dword ptr [esi], ebx
|
||||
mov dword ptr [edi], eax
|
||||
add esi, 8
|
||||
add edi, 8
|
||||
loop DWordInterleaveLoop
|
||||
u32 tmp;
|
||||
while (numQWords--) {
|
||||
tmp = src[srcIdx & srcMask];
|
||||
src[srcIdx & srcMask] = src[(srcIdx + 1) & srcMask];
|
||||
++srcIdx;
|
||||
src[srcIdx & srcMask] = tmp;
|
||||
++srcIdx;
|
||||
}
|
||||
#else // WIN32_ASM
|
||||
int tmp;
|
||||
while( numDWords-- )
|
||||
{
|
||||
tmp = *(int *)((long)mem + 0);
|
||||
*(int *)((long)mem + 0) = *(int *)((long)mem + 4);
|
||||
*(int *)((long)mem + 4) = tmp;
|
||||
mem = (void *)((long)mem + 8);
|
||||
}
|
||||
#endif // WIN32_ASM
|
||||
}
|
||||
|
||||
inline u16 swapword( u16 value )
|
||||
|
|
41
src/gDP.cpp
41
src/gDP.cpp
|
@ -486,21 +486,11 @@ void gDPLoadTile(u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt)
|
|||
if (gDP.loadTile->line == 0)
|
||||
return;
|
||||
|
||||
const u32 address = gDP.textureImage.address + gDP.loadTile->ult * gDP.textureImage.bpl + (gDP.loadTile->uls << gDP.textureImage.size >> 1);
|
||||
u32 address = gDP.textureImage.address + gDP.loadTile->ult * gDP.textureImage.bpl + (gDP.loadTile->uls << gDP.textureImage.size >> 1);
|
||||
if ((address + height * gDP.textureImage.bpl) > RDRAMSize)
|
||||
return;
|
||||
|
||||
const u32 bpl = gDP.loadTile->line << 3;
|
||||
if (((gDP.loadTile->tmem << 3) + height * bpl) > TMEM_SIZE_BYTES) // Stay within TMEM
|
||||
{
|
||||
#ifdef DEBUG
|
||||
DebugMsg( DEBUG_HIGH | DEBUG_ERROR | DEBUG_TEXTURE, "// Attempting to load texture tile out of range\n" );
|
||||
DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTile( %i, %i, %i, %i, %i );\n",
|
||||
tile, gDP.loadTile->uls, gDP.loadTile->ult, gDP.loadTile->lrs, gDP.loadTile->lrt );
|
||||
#endif
|
||||
height = (TMEM_SIZE_BYTES - (gDP.loadTile->tmem << 3)) / bpl;
|
||||
}
|
||||
|
||||
u32 bpl2 = bpl;
|
||||
if (gDP.loadTile->lrs > gDP.textureImage.width)
|
||||
bpl2 = (gDP.textureImage.width - gDP.loadTile->uls);
|
||||
|
@ -513,15 +503,15 @@ void gDPLoadTile(u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt)
|
|||
if (gDP.loadTile->size == G_IM_SIZ_32b)
|
||||
gDPLoadTile32b(gDP.loadTile->uls, gDP.loadTile->ult, gDP.loadTile->lrs, gDP.loadTile->lrt);
|
||||
else {
|
||||
u64 * dest = &TMEM[gDP.loadTile->tmem];
|
||||
u8 * src = &RDRAM[address];
|
||||
u32 tmemAddr = gDP.loadTile->tmem;
|
||||
const u32 line = gDP.loadTile->line;
|
||||
for (u32 y = 0; y < height; ++y) {
|
||||
UnswapCopy(src, dest, bpl);
|
||||
if (y & 1) DWordInterleave(dest, line);
|
||||
UnswapCopyWrap(RDRAM, address, (u8*)TMEM, tmemAddr << 3, 0xFFF, bpl);
|
||||
if (y & 1)
|
||||
DWordInterleaveWrap((u32*)TMEM, tmemAddr << 1, 0x3FF, line);
|
||||
|
||||
src += gDP.textureImage.bpl;
|
||||
dest += line;
|
||||
address += gDP.textureImage.bpl;
|
||||
tmemAddr += line;
|
||||
}
|
||||
}
|
||||
#ifdef DEBUG
|
||||
|
@ -606,8 +596,6 @@ void gDPLoadBlock(u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt)
|
|||
info.loadType = LOADTYPE_BLOCK;
|
||||
|
||||
u32 bytes = (lrs - uls + 1) << gDP.loadTile->size >> 1;
|
||||
if (((gDP.loadTile->tmem << 3) + bytes) > TMEM_SIZE_BYTES) // Stay within TMEM
|
||||
bytes = TMEM_SIZE_BYTES - (gDP.loadTile->tmem << 3);
|
||||
if ((bytes & 7) != 0)
|
||||
bytes = (bytes & (~7)) + 8;
|
||||
u32 address = gDP.textureImage.address + ult * gDP.textureImage.bpl + (uls << gDP.textureImage.size >> 1);
|
||||
|
@ -629,8 +617,7 @@ void gDPLoadBlock(u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt)
|
|||
else if (gDP.loadTile->format == G_IM_FMT_YUV)
|
||||
memcpy(TMEM, &RDRAM[address], bytes); // HACK!
|
||||
else {
|
||||
u64* src = (u64*)&RDRAM[address];
|
||||
u64* dest = &TMEM[gDP.loadTile->tmem];
|
||||
u32 tmemAddr = gDP.loadTile->tmem;
|
||||
|
||||
if (dxt > 0) {
|
||||
u32 line = (2047 + dxt) / dxt;
|
||||
|
@ -638,14 +625,14 @@ void gDPLoadBlock(u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt)
|
|||
u32 height = bytes / bpl;
|
||||
|
||||
for (u32 y = 0; y < height; ++y) {
|
||||
UnswapCopy(src, dest, bpl);
|
||||
if (y & 1) DWordInterleave(dest, line);
|
||||
|
||||
src += line;
|
||||
dest += line;
|
||||
UnswapCopyWrap(RDRAM, address, (u8*)TMEM, tmemAddr << 3, 0xFFF, bpl);
|
||||
if (y & 1)
|
||||
DWordInterleaveWrap((u32*)TMEM, tmemAddr << 1, 0x3FF, line);
|
||||
address += bpl;
|
||||
tmemAddr += line;
|
||||
}
|
||||
} else
|
||||
UnswapCopy(src, dest, bytes);
|
||||
UnswapCopyWrap(RDRAM, address, (u8*)TMEM, tmemAddr << 3, 0xFFF, bytes);
|
||||
}
|
||||
#ifdef DEBUG
|
||||
DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadBlock( %i, %i, %i, %i, %i );\n",
|
||||
|
|
Loading…
Reference in New Issue
Block a user