1
0
mirror of https://github.com/blawar/GLideN64.git synced 2024-07-02 09:03:37 +00:00

Correct load to TMEM functions: wrap tmem address in case of overflow.

Necessary for issue #571
This commit is contained in:
Sergey Lipskiy 2015-06-17 13:22:21 +06:00
parent a13e6322d9
commit 27149064e0
6 changed files with 49 additions and 159 deletions

View File

@ -243,7 +243,7 @@ void GBIInfo::loadMicrocode(u32 uc_start, u32 uc_dstart, u16 uc_dsize)
// See if we can identify it by text
char uc_data[2048];
UnswapCopy( &RDRAM[uc_dstart & 0x1FFFFFFF], uc_data, 2048 );
UnswapCopyWrap(RDRAM, uc_dstart & 0x1FFFFFFF, (u8*)uc_data, 0, 0x7FF, 2048);
char uc_str[256];
strcpy(uc_str, "Not Found");

View File

@ -3,7 +3,7 @@
u8 *HEADER;
u8 *DMEM;
u8 *IMEM;
u64 TMEM[TMEM_SIZE];
u64 TMEM[512];
u8 *RDRAM;
u32 RDRAMSize;

View File

@ -5,12 +5,6 @@
#define MI_INTR_DP 0x20 // Bit 5: DP intr
// Actual TMEM size is 512 QWORDS. However, some load operations load more data that TMEM can take.
// We can either cut the surplus data, or increase the buffer and load everything.
// The second option is more simple and safe. Actual texture load will use correct TMEM size.
#define TMEM_SIZE 1024
#define TMEM_SIZE_BYTES 8192
struct N64Regs
{
u32 *MI_INTR;
@ -45,7 +39,7 @@ extern u8 *HEADER;
extern u8 *DMEM;
extern u8 *IMEM;
extern u8 *RDRAM;
extern u64 TMEM[TMEM_SIZE];
extern u64 TMEM[512];
extern u32 RDRAMSize;
extern bool ConfigOpen;

View File

@ -753,7 +753,7 @@ void TextureCache::_loadBackground(CachedTexture *pTexture)
numBytes = bpl * gSP.bgImage.height;
pSwapped = (u8*)malloc(numBytes);
assert(pSwapped != NULL);
UnswapCopy(&RDRAM[gSP.bgImage.address], pSwapped, numBytes);
UnswapCopyWrap(RDRAM, gSP.bgImage.address, pSwapped, 0, RDRAMSize, numBytes);
pDest = (u32*)malloc(pTexture->textureBytes);
assert(pDest != NULL);

View File

@ -102,148 +102,57 @@ const volatile unsigned char One2Eight[2] =
255, // 1 = 11111111
};
static inline void UnswapCopy( void *src, void *dest, u32 numBytes )
static inline void UnswapCopyWrap(const u8 *src, u32 srcIdx, u8 *dest, u32 destIdx, u32 destMask, u32 numBytes)
{
#ifdef WIN32_ASM
__asm
{
mov ecx, 0
mov esi, dword ptr [src]
mov edi, dword ptr [dest]
mov ebx, esi
and ebx, 3 // ebx = number of leading bytes
cmp ebx, 0
jz StartDWordLoop
neg ebx
add ebx, 4
cmp ebx, [numBytes]
jle NotGreater
mov ebx, [numBytes]
NotGreater:
mov ecx, ebx
xor esi, 3
LeadingLoop: // Copies leading bytes, in reverse order (un-swaps)
mov al, byte ptr [esi]
mov byte ptr [edi], al
sub esi, 1
add edi, 1
loop LeadingLoop
add esi, 5
StartDWordLoop:
mov ecx, dword ptr [numBytes]
sub ecx, ebx // Don't copy what's already been copied
mov ebx, ecx
and ebx, 3
// add ecx, 3 // Round up to nearest dword
shr ecx, 2
cmp ecx, 0 // If there's nothing to do, don't do it
jle StartTrailingLoop
// Copies from source to destination, bswap-ing first
DWordLoop:
mov eax, dword ptr [esi]
bswap eax
mov dword ptr [edi], eax
add esi, 4
add edi, 4
loop DWordLoop
StartTrailingLoop:
cmp ebx, 0
jz Done
mov ecx, ebx
xor esi, 3
TrailingLoop:
mov al, byte ptr [esi]
mov byte ptr [edi], al
sub esi, 1
add edi, 1
loop TrailingLoop
Done:
}
# else // WIN32_ASM
// copy leading bytes
int leadingBytes = ((long)src) & 3;
if (leadingBytes != 0)
{
leadingBytes = 4-leadingBytes;
if ((unsigned int)leadingBytes > numBytes)
u32 leadingBytes = srcIdx & 3;
if (leadingBytes != 0) {
leadingBytes = 4 - leadingBytes;
if ((u32)leadingBytes > numBytes)
leadingBytes = numBytes;
numBytes -= leadingBytes;
src = (void *)((long)src ^ 3);
for (int i = 0; i < leadingBytes; i++)
{
*(u8 *)(dest) = *(u8 *)(src);
dest = (void *)((long)dest+1);
src = (void *)((long)src -1);
srcIdx ^= 3;
for (int i = 0; i < leadingBytes; i++) {
dest[destIdx&destMask] = src[srcIdx];
++destIdx;
--srcIdx;
}
src = (void *)((long)src+5);
srcIdx += 5;
}
// copy dwords
int numDWords = numBytes >> 2;
while (numDWords--)
{
u32 dword = *(u32 *)src;
#ifdef ARM_ASM
asm("rev %0, %0" : "+r"(dword)::);
#else
dword = ((dword<<24)|((dword<<8)&0x00FF0000)|((dword>>8)&0x0000FF00)|(dword>>24));
#endif
*(u32 *)dest = dword;
dest = (void *)((long)dest+4);
src = (void *)((long)src +4);
while (numDWords--) {
dest[(destIdx + 3) & destMask] = src[srcIdx++];
dest[(destIdx + 2) & destMask] = src[srcIdx++];
dest[(destIdx + 1) & destMask] = src[srcIdx++];
dest[(destIdx + 0) & destMask] = src[srcIdx++];
destIdx += 4;
}
// copy trailing bytes
int trailingBytes = numBytes & 3;
if (trailingBytes)
{
src = (void *)((long)src ^ 3);
for (int i = 0; i < trailingBytes; i++)
{
*(u8 *)(dest) = *(u8 *)(src);
dest = (void *)((long)dest+1);
src = (void *)((long)src -1);
if (trailingBytes) {
srcIdx ^= 3;
for (int i = 0; i < trailingBytes; i++) {
dest[destIdx&destMask] = src[srcIdx];
++destIdx;
--srcIdx;
}
}
#endif // WIN32_ASM
}
static inline void DWordInterleave( void *mem, u32 numDWords )
static inline void DWordInterleaveWrap(u32 *src, u32 srcIdx, u32 srcMask, u32 numQWords)
{
#ifdef WIN32_ASM
__asm {
mov esi, dword ptr [mem]
mov edi, dword ptr [mem]
add edi, 4
mov ecx, dword ptr [numDWords]
DWordInterleaveLoop:
mov eax, dword ptr [esi]
mov ebx, dword ptr [edi]
mov dword ptr [esi], ebx
mov dword ptr [edi], eax
add esi, 8
add edi, 8
loop DWordInterleaveLoop
u32 tmp;
while (numQWords--) {
tmp = src[srcIdx & srcMask];
src[srcIdx & srcMask] = src[(srcIdx + 1) & srcMask];
++srcIdx;
src[srcIdx & srcMask] = tmp;
++srcIdx;
}
#else // WIN32_ASM
int tmp;
while( numDWords-- )
{
tmp = *(int *)((long)mem + 0);
*(int *)((long)mem + 0) = *(int *)((long)mem + 4);
*(int *)((long)mem + 4) = tmp;
mem = (void *)((long)mem + 8);
}
#endif // WIN32_ASM
}
inline u16 swapword( u16 value )

View File

@ -486,21 +486,11 @@ void gDPLoadTile(u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt)
if (gDP.loadTile->line == 0)
return;
const u32 address = gDP.textureImage.address + gDP.loadTile->ult * gDP.textureImage.bpl + (gDP.loadTile->uls << gDP.textureImage.size >> 1);
u32 address = gDP.textureImage.address + gDP.loadTile->ult * gDP.textureImage.bpl + (gDP.loadTile->uls << gDP.textureImage.size >> 1);
if ((address + height * gDP.textureImage.bpl) > RDRAMSize)
return;
const u32 bpl = gDP.loadTile->line << 3;
if (((gDP.loadTile->tmem << 3) + height * bpl) > TMEM_SIZE_BYTES) // Stay within TMEM
{
#ifdef DEBUG
DebugMsg( DEBUG_HIGH | DEBUG_ERROR | DEBUG_TEXTURE, "// Attempting to load texture tile out of range\n" );
DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTile( %i, %i, %i, %i, %i );\n",
tile, gDP.loadTile->uls, gDP.loadTile->ult, gDP.loadTile->lrs, gDP.loadTile->lrt );
#endif
height = (TMEM_SIZE_BYTES - (gDP.loadTile->tmem << 3)) / bpl;
}
u32 bpl2 = bpl;
if (gDP.loadTile->lrs > gDP.textureImage.width)
bpl2 = (gDP.textureImage.width - gDP.loadTile->uls);
@ -513,15 +503,15 @@ void gDPLoadTile(u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt)
if (gDP.loadTile->size == G_IM_SIZ_32b)
gDPLoadTile32b(gDP.loadTile->uls, gDP.loadTile->ult, gDP.loadTile->lrs, gDP.loadTile->lrt);
else {
u64 * dest = &TMEM[gDP.loadTile->tmem];
u8 * src = &RDRAM[address];
u32 tmemAddr = gDP.loadTile->tmem;
const u32 line = gDP.loadTile->line;
for (u32 y = 0; y < height; ++y) {
UnswapCopy(src, dest, bpl);
if (y & 1) DWordInterleave(dest, line);
UnswapCopyWrap(RDRAM, address, (u8*)TMEM, tmemAddr << 3, 0xFFF, bpl);
if (y & 1)
DWordInterleaveWrap((u32*)TMEM, tmemAddr << 1, 0x3FF, line);
src += gDP.textureImage.bpl;
dest += line;
address += gDP.textureImage.bpl;
tmemAddr += line;
}
}
#ifdef DEBUG
@ -606,8 +596,6 @@ void gDPLoadBlock(u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt)
info.loadType = LOADTYPE_BLOCK;
u32 bytes = (lrs - uls + 1) << gDP.loadTile->size >> 1;
if (((gDP.loadTile->tmem << 3) + bytes) > TMEM_SIZE_BYTES) // Stay within TMEM
bytes = TMEM_SIZE_BYTES - (gDP.loadTile->tmem << 3);
if ((bytes & 7) != 0)
bytes = (bytes & (~7)) + 8;
u32 address = gDP.textureImage.address + ult * gDP.textureImage.bpl + (uls << gDP.textureImage.size >> 1);
@ -629,8 +617,7 @@ void gDPLoadBlock(u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt)
else if (gDP.loadTile->format == G_IM_FMT_YUV)
memcpy(TMEM, &RDRAM[address], bytes); // HACK!
else {
u64* src = (u64*)&RDRAM[address];
u64* dest = &TMEM[gDP.loadTile->tmem];
u32 tmemAddr = gDP.loadTile->tmem;
if (dxt > 0) {
u32 line = (2047 + dxt) / dxt;
@ -638,14 +625,14 @@ void gDPLoadBlock(u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt)
u32 height = bytes / bpl;
for (u32 y = 0; y < height; ++y) {
UnswapCopy(src, dest, bpl);
if (y & 1) DWordInterleave(dest, line);
src += line;
dest += line;
UnswapCopyWrap(RDRAM, address, (u8*)TMEM, tmemAddr << 3, 0xFFF, bpl);
if (y & 1)
DWordInterleaveWrap((u32*)TMEM, tmemAddr << 1, 0x3FF, line);
address += bpl;
tmemAddr += line;
}
} else
UnswapCopy(src, dest, bytes);
UnswapCopyWrap(RDRAM, address, (u8*)TMEM, tmemAddr << 3, 0xFFF, bytes);
}
#ifdef DEBUG
DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadBlock( %i, %i, %i, %i, %i );\n",