1
0
mirror of https://github.com/blawar/GLideN64.git synced 2024-07-02 09:03:37 +00:00

Implement partial buffer copy.

This commit is contained in:
Sergey Lipskiy 2015-11-23 19:03:33 +06:00
parent ab54f787c4
commit 5d8ea5dda1
2 changed files with 263 additions and 86 deletions

View File

@ -23,7 +23,11 @@ class FrameBufferToRDRAM
{
public:
FrameBufferToRDRAM() :
m_FBO(0), m_pTexture(NULL), m_curIndex(0)
m_FBO(0),
m_pTexture(nullptr),
m_pCurFrameBuffer(nullptr),
m_curIndex(-1),
m_frameCount(0)
{
m_PBO[0] = m_PBO[1] = m_PBO[2] = 0;
}
@ -31,7 +35,8 @@ public:
void Init();
void Destroy();
void CopyToRDRAM(u32 _address, bool _sync);
void copyToRDRAM(u32 _address, bool _sync);
void copyChunkToRDRAM(u32 _address);
private:
union RGBA {
@ -41,9 +46,19 @@ private:
u32 raw;
};
bool _prepareCopy(u32 _address);
void _copy(u32 _startAddress, u32 _endAddress, bool _sync);
// Convert pixel from video memory to N64 buffer format.
static u8 _RGBAtoR8(u8 _c);
static u16 _RGBAtoRGBA16(u32 _c);
static u32 _RGBAtoRGBA32(u32 _c);
GLuint m_FBO;
CachedTexture * m_pTexture;
FrameBuffer * m_pCurFrameBuffer;
u32 m_curIndex;
u32 m_frameCount;
GLuint m_PBO[3];
};
@ -51,19 +66,33 @@ class DepthBufferToRDRAM
{
public:
DepthBufferToRDRAM() :
m_FBO(0), m_PBO(0), m_pColorTexture(NULL), m_pDepthTexture(NULL)
m_FBO(0),
m_PBO(0),
m_frameCount(-1),
m_pColorTexture(nullptr),
m_pDepthTexture(nullptr),
m_pCurDepthBuffer(nullptr)
{}
void Init();
void Destroy();
bool CopyToRDRAM( u32 address );
bool copyToRDRAM(u32 _address);
bool copyChunkToRDRAM(u32 _address);
private:
bool _prepareCopy(u32 _address);
bool _copy(u32 _startAddress, u32 _endAddress);
// Convert pixel from video memory to N64 depth buffer format.
static u16 _FloatToUInt16(f32 _z);
GLuint m_FBO;
GLuint m_PBO;
u32 m_frameCount;
CachedTexture * m_pColorTexture;
CachedTexture * m_pDepthTexture;
DepthBuffer * m_pCurDepthBuffer;
};
#endif // GLES2
@ -995,39 +1024,40 @@ void FrameBufferToRDRAM::Destroy() {
m_PBO[0] = m_PBO[1] = m_PBO[2] = 0;
}
void FrameBufferToRDRAM::CopyToRDRAM(u32 _address, bool _sync)
bool FrameBufferToRDRAM::_prepareCopy(u32 _address)
{
if (VI.width == 0 || frameBufferList().getCurrent() == NULL)
return;
return false;
FrameBuffer *pBuffer = frameBufferList().findBuffer(_address);
if (pBuffer == NULL || pBuffer->m_isOBScreen)
return;
m_pCurFrameBuffer = frameBufferList().findBuffer(_address);
if (m_pCurFrameBuffer == NULL || m_pCurFrameBuffer->m_isOBScreen)
return false;
const u32 numPixels = pBuffer->m_width * pBuffer->m_height;
const u32 numPixels = m_pCurFrameBuffer->m_width * m_pCurFrameBuffer->m_height;
if (numPixels == 0)
return;
return false;
const u32 stride = pBuffer->m_width << pBuffer->m_size >> 1;
const u32 height = _cutHeight(_address, pBuffer->m_height, stride);
const u32 stride = m_pCurFrameBuffer->m_width << m_pCurFrameBuffer->m_size >> 1;
const u32 height = _cutHeight(_address, m_pCurFrameBuffer->m_height, stride);
if (height == 0)
return;
return false;
if ((config.generalEmulation.hacks & hack_subscreen) != 0 && pBuffer->m_width == VI.width && pBuffer->m_height == VI.height) {
copyWhiteToRDRAM(pBuffer);
return;
if ((config.generalEmulation.hacks & hack_subscreen) != 0 && m_pCurFrameBuffer->m_width == VI.width && m_pCurFrameBuffer->m_height == VI.height) {
copyWhiteToRDRAM(m_pCurFrameBuffer);
return false;
}
_address = pBuffer->m_startAddress;
_address = m_pCurFrameBuffer->m_startAddress;
if (config.video.multisampling != 0) {
pBuffer->resolveMultisampledTexture();
glBindFramebuffer(GL_READ_FRAMEBUFFER, pBuffer->m_resolveFBO);
} else
glBindFramebuffer(GL_READ_FRAMEBUFFER, pBuffer->m_FBO);
m_pCurFrameBuffer->resolveMultisampledTexture();
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_pCurFrameBuffer->m_resolveFBO);
}
else
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_pCurFrameBuffer->m_FBO);
if (pBuffer->m_scaleX > 1.0f) {
if (m_pCurFrameBuffer->m_scaleX > 1.0f) {
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_FBO);
glScissor(0, 0, pBuffer->m_pTexture->realWidth, pBuffer->m_pTexture->realHeight);
glScissor(0, 0, m_pCurFrameBuffer->m_pTexture->realWidth, m_pCurFrameBuffer->m_pTexture->realHeight);
glBlitFramebuffer(
0, 0, video().getWidth(), video().getHeight(),
0, 0, VI.width, VI.height,
@ -1036,58 +1066,131 @@ void FrameBufferToRDRAM::CopyToRDRAM(u32 _address, bool _sync)
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frameBufferList().getCurrent()->m_FBO);
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_FBO);
}
return true;
}
template <typename TSrc, typename TDst>
void _writeToRdram(TSrc* _src, TDst* _dst, TDst(*converter)(TSrc _c), TSrc _testValue, u32 _xor, u32 _width, u32 _height, u32 _numPixels, u32 _startAddress, u32 _bufferAddress, u32 _bufferSize)
{
u32 chunkStart = ((_startAddress - _bufferAddress) >> (_bufferSize - 1)) % _width;
if (chunkStart % 2 != 0) {
--chunkStart;
--_dst;
}
u32 numStored = 0;
u32 y = 0;
TSrc c;
if (chunkStart > 0) {
for (u32 x = chunkStart; x < _width; ++x) {
c = _src[x + (_height - 1)*_width];
if (c != _testValue)
_dst[x ^ _xor] = converter(c);
++numStored;
}
++y;
_dst += numStored;
}
u32 dsty = 0;
for (; y < _height; ++y) {
for (u32 x = 0; x < _width && numStored < _numPixels; ++x) {
c = _src[x + (_height - y - 1)*_width];
if (c != _testValue)
_dst[(x + dsty*_width) ^ _xor] = converter(c);
++numStored;
}
++dsty;
}
}
u8 FrameBufferToRDRAM::_RGBAtoR8(u8 _c) {
return _c;
}
u16 FrameBufferToRDRAM::_RGBAtoRGBA16(u32 _c) {
RGBA c;
c.raw = _c;
return ((c.r >> 3) << 11) | ((c.g >> 3) << 6) | ((c.b >> 3) << 1) | (c.a == 0 ? 0 : 1);
}
u32 FrameBufferToRDRAM::_RGBAtoRGBA32(u32 _c) {
RGBA c;
c.raw = _c;
return (c.r << 24) | (c.g << 16) | (c.b << 8) | c.a;
}
void FrameBufferToRDRAM::_copy(u32 _startAddress, u32 _endAddress, bool _sync)
{
const u32 stride = m_pCurFrameBuffer->m_width << m_pCurFrameBuffer->m_size >> 1;
const u32 max_height = _cutHeight(_startAddress, m_pCurFrameBuffer->m_height, stride);
u32 numPixels = (_endAddress - _startAddress) >> (m_pCurFrameBuffer->m_size - 1);
if (numPixels / m_pCurFrameBuffer->m_width > max_height) {
_endAddress = _startAddress + (max_height * stride);
numPixels = (_endAddress - _startAddress) >> (m_pCurFrameBuffer->m_size - 1);
}
const GLsizei width = m_pCurFrameBuffer->m_width;
const GLsizei height = numPixels / width + ((numPixels % width) > 0 ? 1 : 0);
const GLint x0 = 0;
const GLint y0 = max_height - (_startAddress - m_pCurFrameBuffer->m_startAddress) / width - height;
GLenum colorFormat, colorType, colorFormatBytes;
if (m_pCurFrameBuffer->m_size > G_IM_SIZ_8b) {
colorFormat = fboFormats.colorFormat;
colorType = fboFormats.colorType;
colorFormatBytes = fboFormats.colorFormatBytes;
}
else {
colorFormat = fboFormats.monochromeFormat;
colorType = fboFormats.monochromeType;
colorFormatBytes = fboFormats.monochromeFormatBytes;
}
#ifndef GLES2
// If Sync, read pixels from the buffer, copy them to RDRAM.
// If not Sync, read pixels from the buffer, copy pixels from the previous buffer to RDRAM.
if (!_sync) {
m_curIndex ^= 1;
const u32 nextIndex = m_curIndex^1;
const u32 nextIndex = m_curIndex ^ 1;
glBindBuffer(GL_PIXEL_PACK_BUFFER, m_PBO[m_curIndex]);
glReadPixels(0, 0, pBuffer->m_width, pBuffer->m_height, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glReadPixels(x0, y0, width, height, colorFormat, colorType, 0);
glBindBuffer(GL_PIXEL_PACK_BUFFER, m_PBO[nextIndex]);
} else {
}
else {
glBindBuffer(GL_PIXEL_PACK_BUFFER, m_PBO[2]);
glReadPixels(0, 0, pBuffer->m_width, pBuffer->m_height, GL_RGBA, GL_UNSIGNED_BYTE, 0);
glReadPixels(x0, y0, width, height, colorFormat, colorType, 0);
}
GLubyte* pixelData = (GLubyte*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, numPixels * 4, GL_MAP_READ_BIT);
GLubyte* pixelData = (GLubyte*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, width * height * colorFormatBytes, GL_MAP_READ_BIT);
if (pixelData == NULL)
return;
#else
GLubyte* pixelData = (GLubyte* )malloc(numPixels * 4);
GLubyte* pixelData = (GLubyte*)malloc(width * height * colorFormatBytes);
if (pixelData == NULL)
return;
glReadPixels(0, 0, VI.width, VI.height, GL_RGBA, GL_UNSIGNED_BYTE, pixelData);
glReadPixels(x0, y0, width, height, colorFormat, colorType, pixelData);
#endif // GLES2
if (pBuffer->m_size == G_IM_SIZ_32b) {
u32 *ptr_dst = (u32*)(RDRAM + _address);
if (m_pCurFrameBuffer->m_size == G_IM_SIZ_32b) {
u32 *ptr_src = (u32*)pixelData;
RGBA c;
for (u32 y = 0; y < height; ++y) {
for (u32 x = 0; x < pBuffer->m_width; ++x) {
c.raw = ptr_src[x + (height - y - 1)*pBuffer->m_width];
if (c.raw != 0)
ptr_dst[(x + y*pBuffer->m_width)] = (c.r << 24) | (c.g << 16) | (c.b << 8) | c.a;
}
}
} else if (pBuffer->m_size == G_IM_SIZ_16b) {
u16 *ptr_dst = (u16*)(RDRAM + _address);
u32 * ptr_src = (u32*)pixelData;
RGBA c;
for (u32 y = 0; y < height; ++y) {
for (u32 x = 0; x < pBuffer->m_width; ++x) {
c.raw = ptr_src[x + (height - y - 1)*pBuffer->m_width];
if (c.raw != 0)
ptr_dst[(x + y*pBuffer->m_width) ^ 1] = ((c.r >> 3) << 11) | ((c.g >> 3) << 6) | ((c.b >> 3) << 1) | (c.a == 0 ? 0 : 1);
}
}
u32 *ptr_dst = (u32*)(RDRAM + _startAddress);
_writeToRdram<u32, u32>(ptr_src, ptr_dst, &FrameBufferToRDRAM::_RGBAtoRGBA32, 0, 0, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size);
} else if (m_pCurFrameBuffer->m_size == G_IM_SIZ_16b) {
u32 *ptr_src = (u32*)pixelData;
u16 *ptr_dst = (u16*)(RDRAM + _startAddress);
_writeToRdram<u32, u16>(ptr_src, ptr_dst, &FrameBufferToRDRAM::_RGBAtoRGBA16, 0, 1, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size);
} else if (m_pCurFrameBuffer->m_size == G_IM_SIZ_8b) {
u8 *ptr_src = (u8*)pixelData;
u8 *ptr_dst = RDRAM + _startAddress;
_writeToRdram<u8, u8>(ptr_src, ptr_dst, &FrameBufferToRDRAM::_RGBAtoR8, 0, 3, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size);
}
pBuffer->m_copiedToRdram = true;
pBuffer->copyRdram();
pBuffer->m_cleared = false;
m_pCurFrameBuffer->m_copiedToRdram = true;
m_pCurFrameBuffer->copyRdram();
m_pCurFrameBuffer->m_cleared = false;
#ifndef GLES2
glUnmapBuffer(GL_PIXEL_PACK_BUFFER);
glBindBuffer(GL_PIXEL_PACK_BUFFER, 0);
@ -1097,12 +1200,26 @@ void FrameBufferToRDRAM::CopyToRDRAM(u32 _address, bool _sync)
glBindFramebuffer(GL_READ_FRAMEBUFFER, 0);
gDP.changed |= CHANGED_SCISSOR;
}
void FrameBufferToRDRAM::copyToRDRAM(u32 _address, bool _sync)
{
if (!_prepareCopy(_address))
return;
_copy(m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_endAddress, _sync);
}
void FrameBufferToRDRAM::copyChunkToRDRAM(u32 _address)
{
if (!_prepareCopy(_address))
return;
_copy(_address, _address + 0x1000, true);
}
#endif // GLES2
void FrameBuffer_CopyToRDRAM(u32 _address, bool _sync)
{
#ifndef GLES2
g_fbToRDRAM.CopyToRDRAM(_address, _sync);
g_fbToRDRAM.copyToRDRAM(_address, _sync);
#else
if ((config.generalEmulation.hacks & hack_subscreen) == 0)
return;
@ -1115,6 +1232,13 @@ void FrameBuffer_CopyToRDRAM(u32 _address, bool _sync)
#endif
}
void FrameBuffer_CopyChunkToRDRAM(u32 _address)
{
#ifndef GLES2
g_fbToRDRAM.copyChunkToRDRAM(_address);
#endif
}
#ifndef GLES2
void DepthBufferToRDRAM::Init()
{
@ -1195,7 +1319,12 @@ void DepthBufferToRDRAM::Destroy() {
}
}
bool DepthBufferToRDRAM::CopyToRDRAM( u32 _address) {
bool DepthBufferToRDRAM::_prepareCopy(u32 _address)
{
const u32 curFrame = video().getBuffersSwapCount();
if (m_frameCount == curFrame)
return true;
const u32 numPixels = VI.width * VI.height;
if (numPixels == 0) // Incorrect buffer size. Don't copy
return false;
@ -1203,19 +1332,19 @@ bool DepthBufferToRDRAM::CopyToRDRAM( u32 _address) {
if (pBuffer == NULL || pBuffer->m_width < VI.width || pBuffer->m_pDepthBuffer == NULL || !pBuffer->m_pDepthBuffer->m_cleared)
return false;
DepthBuffer * pDepthBuffer = pBuffer->m_pDepthBuffer;
const u32 address = pDepthBuffer->m_address;
m_pCurDepthBuffer = pBuffer->m_pDepthBuffer;
const u32 address = m_pCurDepthBuffer->m_address;
if (address + numPixels * 2 > RDRAMSize)
return false;
const u32 height = _cutHeight(address, min(VI.height, pDepthBuffer->m_lry), pBuffer->m_width * 2);
const u32 height = _cutHeight(address, min(VI.height, m_pCurDepthBuffer->m_lry), pBuffer->m_width * 2);
if (height == 0)
return false;
if (config.video.multisampling == 0)
glBindFramebuffer(GL_READ_FRAMEBUFFER, pBuffer->m_FBO);
else {
pDepthBuffer->resolveDepthBufferTexture(pBuffer);
m_pCurDepthBuffer->resolveDepthBufferTexture(pBuffer);
glBindFramebuffer(GL_READ_FRAMEBUFFER, pBuffer->m_resolveFBO);
}
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, m_FBO);
@ -1226,33 +1355,51 @@ bool DepthBufferToRDRAM::CopyToRDRAM( u32 _address) {
GL_DEPTH_BUFFER_BIT, GL_NEAREST
);
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, frameBufferList().getCurrent()->m_FBO);
m_frameCount = curFrame;
return true;
}
u16 DepthBufferToRDRAM::_FloatToUInt16(f32 _z)
{
static const u16 * const zLUT = depthBufferList().getZLUT();
u32 idx = 0x3FFFF;
if (_z < 1.0f) {
_z *= 262144.0f;
idx = min(0x3FFFFU, u32(floorf(_z + 0.5f)));
}
return zLUT[idx];
}
bool DepthBufferToRDRAM::_copy(u32 _startAddress, u32 _endAddress)
{
const u32 stride = m_pCurDepthBuffer->m_width << 1;
const u32 max_height = _cutHeight(_startAddress, min(VI.height, m_pCurDepthBuffer->m_lry), stride);
u32 numPixels = (_endAddress - _startAddress) >> 1;
if (numPixels / m_pCurDepthBuffer->m_width > max_height) {
_endAddress = _startAddress + (max_height * stride);
numPixels = (_endAddress - _startAddress) >> 1;
}
const GLsizei width = m_pCurDepthBuffer->m_width;
const GLsizei height = numPixels / width + ((numPixels % width) > 0 ? 1 : 0);
const GLint x0 = 0;
const GLint y0 = max_height - (_startAddress - m_pCurDepthBuffer->m_address) / stride - height;
PBOBinder binder(GL_PIXEL_PACK_BUFFER, m_PBO);
glBindFramebuffer(GL_READ_FRAMEBUFFER, m_FBO);
glReadPixels(0, 0, VI.width, VI.height, GL_DEPTH_COMPONENT, GL_FLOAT, 0);
glReadPixels(x0, y0, width, height, fboFormats.depthFormat, fboFormats.depthType, 0);
GLubyte* pixelData = (GLubyte*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, numPixels * 4, GL_MAP_READ_BIT);
if(pixelData == NULL)
GLubyte* pixelData = (GLubyte*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, width * height * fboFormats.depthFormatBytes, GL_MAP_READ_BIT);
if (pixelData == NULL)
return false;
f32 * ptr_src = (f32*)pixelData;
u16 *ptr_dst = (u16*)(RDRAM + address);
const u16 * const zLUT = depthBufferList().getZLUT();
u16 *ptr_dst = (u16*)(RDRAM + _startAddress);
_writeToRdram<f32, u16>(ptr_src, ptr_dst, &DepthBufferToRDRAM::_FloatToUInt16, 2.0f, 1, width, height, numPixels, _startAddress, m_pCurDepthBuffer->m_address, G_IM_SIZ_16b);
for (u32 y = pDepthBuffer->m_uly; y < height; ++y) {
for (u32 x = 0; x < VI.width; ++x) {
f32 z = ptr_src[x + (height - y - 1)*VI.width];
u32 idx = 0x3FFFF;
if (z < 1.0f) {
z *= 262144.0f;
idx = min(0x3FFFFU, u32(floorf(z + 0.5f)));
}
ptr_dst[(x + y*VI.width) ^ 1] = zLUT[idx];
}
}
pDepthBuffer->m_cleared = false;
pBuffer = frameBufferList().findBuffer(pDepthBuffer->m_address);
m_pCurDepthBuffer->m_cleared = false;
FrameBuffer * pBuffer = frameBufferList().findBuffer(m_pCurDepthBuffer->m_address);
if (pBuffer != NULL)
pBuffer->m_cleared = false;
@ -1261,19 +1408,47 @@ bool DepthBufferToRDRAM::CopyToRDRAM( u32 _address) {
gDP.changed |= CHANGED_SCISSOR;
return true;
}
bool DepthBufferToRDRAM::copyToRDRAM( u32 _address)
{
if (!_prepareCopy(_address))
return false;
const u32 endAddress = m_pCurDepthBuffer->m_address + (min(VI.height, m_pCurDepthBuffer->m_lry) * m_pCurDepthBuffer->m_width * 2);
return _copy(m_pCurDepthBuffer->m_address, endAddress);
}
bool DepthBufferToRDRAM::copyChunkToRDRAM(u32 _address)
{
if (!_prepareCopy(_address))
return false;
const u32 endAddress = _address + 0x1000;
return _copy(_address, endAddress);
}
#endif // GLES2
bool FrameBuffer_CopyDepthBuffer( u32 address ) {
bool FrameBuffer_CopyDepthBuffer( u32 address )
{
#ifndef GLES2
FrameBuffer * pCopyBuffer = frameBufferList().getCopyBuffer();
if (pCopyBuffer != NULL) {
// This code is mainly to emulate Zelda MM camera.
g_fbToRDRAM.CopyToRDRAM(pCopyBuffer->m_startAddress, true);
g_fbToRDRAM.copyToRDRAM(pCopyBuffer->m_startAddress, true);
pCopyBuffer->m_RdramCopy.resize(0); // To disable validity check by RDRAM content. CPU may change content of the buffer for some unknown reason.
frameBufferList().setCopyBuffer(NULL);
return true;
} else
return g_dbToRDRAM.CopyToRDRAM(address);
return g_dbToRDRAM.copyToRDRAM(address);
#else
return false;
#endif
}
bool FrameBuffer_CopyDepthBufferChunk(u32 address)
{
#ifndef GLES2
return g_dbToRDRAM.copyChunkToRDRAM(address);
#else
return false;
#endif

View File

@ -116,8 +116,10 @@ FrameBufferList & frameBufferList()
void FrameBuffer_Init();
void FrameBuffer_Destroy();
void FrameBuffer_CopyToRDRAM( u32 _address , bool _sync );
void FrameBuffer_CopyFromRDRAM( u32 address, bool bUseAlpha );
void FrameBuffer_CopyChunkToRDRAM(u32 _address);
void FrameBuffer_CopyFromRDRAM(u32 address, bool bUseAlpha);
bool FrameBuffer_CopyDepthBuffer( u32 address );
bool FrameBuffer_CopyDepthBufferChunk(u32 address);
void FrameBuffer_ActivateBufferTexture(s16 t, FrameBuffer *pBuffer);
void FrameBuffer_ActivateBufferTextureBG(s16 t, FrameBuffer *pBuffer);