From dca2c29edf8ffecca0b2fc0e27a192745093772a Mon Sep 17 00:00:00 2001 From: Francisco Zurita Date: Tue, 24 May 2016 01:18:56 -0400 Subject: [PATCH] When in CopyColorToRDRAM async mode, we will now use the OES_EGL_image_external OpenGL ES extension to copy the color buffer into system memory. This provides large performance improvements in many Android devices. --- src/FrameBuffer.cpp | 183 ++++++++++++++++++++++------- src/mupenplus/OpenGL_mupenplus.cpp | 9 ++ 2 files changed, 149 insertions(+), 43 deletions(-) diff --git a/src/FrameBuffer.cpp b/src/FrameBuffer.cpp index ac5a10d8..4ea08831 100644 --- a/src/FrameBuffer.cpp +++ b/src/FrameBuffer.cpp @@ -18,6 +18,19 @@ #include "Debug.h" #include "PostProcessor.h" #include "FrameBufferInfo.h" +#include "Log.h" + +#ifdef ANDROID +#include "ui/GraphicBuffer.h" +#include +#include +#include + +typedef void (GL_APIENTRY* PFNGLEGLIMAGETARGETTEXTURE2DOESPROC) (GLenum target, EGLImageKHR image); +typedef void (GL_APIENTRY* PFNGLEGLIMAGETARGETRENDERBUFFERSTORAGEOESPROC) (GLenum target, EGLImageKHR image); + +using namespace android; +#endif using namespace std; @@ -51,6 +64,9 @@ private: }; bool _prepareCopy(u32 _startAddress); + + GLubyte* getPixels(GLint _x0, GLint _y0, GLsizei _width, GLsizei _height, bool _sync); + void cleanUpPixels(GLubyte* pixelData); void _copy(u32 _startAddress, u32 _endAddress, bool _sync); // Convert pixel from video memory to N64 buffer format. @@ -65,6 +81,12 @@ private: u32 m_frameCount; u32 m_startAddress; GLuint m_PBO[3]; + +#ifdef ANDROID + GraphicBuffer* m_window; + EGLImageKHR m_image; + PFNGLEGLIMAGETARGETTEXTURE2DOESPROC m_glEGLImageTargetTexture2DOES; +#endif }; class DepthBufferToRDRAM @@ -680,7 +702,7 @@ void FrameBufferList::saveBuffer(u32 _address, u16 _format, u16 _size, u16 _widt //Also, before making any adjustments, make sure gDP.colorImage.height has a valid value. if((!m_pCurrent->isAuxiliary() || m_pCurrent->m_needHeightCorrection) && gDP.colorImage.height != 0) { - m_pCurrent->m_endAddress = min(RDRAMSize, m_pCurrent->m_startAddress + (((m_pCurrent->m_width * gDP.colorImage.height) << m_pCurrent->m_size >> 1) - 1)); + m_pCurrent->m_endAddress = min(RDRAMSize, m_pCurrent->m_startAddress + (((m_pCurrent->m_width * gDP.colorImage.height) << m_pCurrent->m_size >> 1) - 1)); } if (!m_pCurrent->_isMarioTennisScoreboard() && !m_pCurrent->m_isDepthBuffer && !m_pCurrent->m_copiedToRdram && !m_pCurrent->m_cfb && !m_pCurrent->m_cleared && m_pCurrent->m_RdramCopy.empty() && gDP.colorImage.height > 1) { @@ -1172,6 +1194,16 @@ void FrameBufferToRDRAM::Init() assert(checkFBO()); glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); +#ifdef ANDROID + m_window = new GraphicBuffer(m_pTexture->realWidth, m_pTexture->realHeight, + PIXEL_FORMAT_RGBA_8888, GraphicBuffer::USAGE_SW_READ_OFTEN | GraphicBuffer::USAGE_HW_TEXTURE); + + EGLint eglImgAttrs[] = { EGL_IMAGE_PRESERVED_KHR, EGL_TRUE, EGL_NONE, EGL_NONE }; + m_image = eglCreateImageKHR(eglGetDisplay(EGL_DEFAULT_DISPLAY), EGL_NO_CONTEXT, + EGL_NATIVE_BUFFER_ANDROID, (EGLClientBuffer)m_window->getNativeBuffer(), eglImgAttrs); + + m_glEGLImageTargetTexture2DOES = (PFNGLEGLIMAGETARGETTEXTURE2DOESPROC)eglGetProcAddress("glEGLImageTargetTexture2DOES"); +#else // Generate and initialize Pixel Buffer Objects glGenBuffers(3, m_PBO); for (u32 i = 0; i < 3; ++i) { @@ -1180,6 +1212,7 @@ void FrameBufferToRDRAM::Init() } glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); m_curIndex = 0; +#endif } void FrameBufferToRDRAM::Destroy() { @@ -1192,8 +1225,13 @@ void FrameBufferToRDRAM::Destroy() { textureCache().removeFrameBufferTexture(m_pTexture); m_pTexture = NULL; } + +#ifdef ANDROID + eglDestroyImageKHR(eglGetDisplay(EGL_DEFAULT_DISPLAY), m_image); +#else glDeleteBuffers(3, m_PBO); m_PBO[0] = m_PBO[1] = m_PBO[2] = 0; +#endif } bool FrameBufferToRDRAM::_prepareCopy(u32 _startAddress) @@ -1318,23 +1356,57 @@ u32 FrameBufferToRDRAM::_RGBAtoRGBA32(u32 _c) { return (c.r << 24) | (c.g << 16) | (c.b << 8) | c.a; } -void FrameBufferToRDRAM::_copy(u32 _startAddress, u32 _endAddress, bool _sync) +#ifdef ANDROID +GLubyte* FrameBufferToRDRAM::getPixels(GLint _x0, GLint _y0, GLsizei _width, GLsizei _height, bool _sync) { - const u32 stride = m_pCurFrameBuffer->m_width << m_pCurFrameBuffer->m_size >> 1; - const u32 max_height = _cutHeight(_startAddress, m_pCurFrameBuffer->m_height, stride); - - u32 numPixels = (_endAddress - _startAddress) >> (m_pCurFrameBuffer->m_size - 1); - if (numPixels / m_pCurFrameBuffer->m_width > max_height) { - _endAddress = _startAddress + (max_height * stride); - numPixels = (_endAddress - _startAddress) >> (m_pCurFrameBuffer->m_size - 1); + GLenum colorFormat, colorType, colorFormatBytes; + if (m_pCurFrameBuffer->m_size > G_IM_SIZ_8b) { + colorFormat = fboFormats.colorFormat; + colorType = fboFormats.colorType; + colorFormatBytes = fboFormats.colorFormatBytes; + } + else { + colorFormat = fboFormats.monochromeFormat; + colorType = fboFormats.monochromeType; + colorFormatBytes = fboFormats.monochromeFormatBytes; } - const GLsizei width = m_pCurFrameBuffer->m_width; - const GLint x0 = 0; - const GLint y0 = max_height - (_endAddress - m_pCurFrameBuffer->m_startAddress) / stride; - const GLint y1 = max_height - (_startAddress - m_pCurFrameBuffer->m_startAddress) / stride; - const GLsizei height = std::min(max_height, 1u + y1 - y0); + GLubyte* pixelData = (GLubyte*)malloc(m_pTexture->realWidth * m_pTexture->realHeight * colorFormatBytes); + if (!_sync) { + void* ptr; + + glBindTexture(GL_TEXTURE_2D, m_pTexture->glName); + m_glEGLImageTargetTexture2DOES(GL_TEXTURE_2D, m_image); + glBindTexture(GL_TEXTURE_2D, 0); + + m_window->lock(GraphicBuffer::USAGE_SW_READ_OFTEN, &ptr); + memcpy(pixelData, ptr, m_pTexture->realWidth * m_pTexture->realHeight * colorFormatBytes); + m_window->unlock(); + + int widthBytes = _width*colorFormatBytes; + int strideBytes = m_pTexture->realWidth*colorFormatBytes; + for(unsigned int lnIndex = 0; lnIndex < _height; ++lnIndex) + { + memmove(pixelData + lnIndex*widthBytes, pixelData+((lnIndex+_y0)*strideBytes), widthBytes); + } + } + else { + glReadPixels(_x0, _y0, _width, _height, colorFormat, colorType, pixelData); + } + + return pixelData; +} + +void FrameBufferToRDRAM::cleanUpPixels(GLubyte* pixelData) +{ + free(pixelData); +} + +#else + +GLubyte* FrameBufferToRDRAM::getPixels(GLint _x0, GLint _y0, GLsizei _width, GLsizei _height, bool _sync) +{ GLenum colorFormat, colorType, colorFormatBytes; if (m_pCurFrameBuffer->m_size > G_IM_SIZ_8b) { colorFormat = fboFormats.colorFormat; @@ -1354,53 +1426,78 @@ void FrameBufferToRDRAM::_copy(u32 _startAddress, u32 _endAddress, bool _sync) m_curIndex ^= 1; const u32 nextIndex = m_curIndex ^ 1; glBindBuffer(GL_PIXEL_PACK_BUFFER, m_PBO[m_curIndex]); - glReadPixels(x0, y0, width, height, colorFormat, colorType, 0); + glReadPixels(_x0, _y0, _width, _height, colorFormat, colorType, 0); glBindBuffer(GL_PIXEL_PACK_BUFFER, m_PBO[nextIndex]); } else { glBindBuffer(GL_PIXEL_PACK_BUFFER, m_PBO[2]); - glReadPixels(x0, y0, width, height, colorFormat, colorType, 0); + glReadPixels(_x0, _y0, _width, _height, colorFormat, colorType, 0); } - GLubyte* pixelData = (GLubyte*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, width * height * colorFormatBytes, GL_MAP_READ_BIT); + GLubyte* pixelData = (GLubyte*)glMapBufferRange(GL_PIXEL_PACK_BUFFER, 0, _width * _height * colorFormatBytes, GL_MAP_READ_BIT); if (pixelData == NULL) - return; + return NULL; #else - GLubyte* pixelData = (GLubyte*)malloc(width * height * colorFormatBytes); + GLubyte* pixelData = (GLubyte*)malloc(_width * _height * colorFormatBytes); if (pixelData == NULL) - return; - glReadPixels(x0, y0, width, height, colorFormat, colorType, pixelData); + return NULL; + glReadPixels(_x0, _y0, _width, _height, colorFormat, colorType, pixelData); #endif // GLES2 - if (m_pCurFrameBuffer->m_size == G_IM_SIZ_32b) { - u32 *ptr_src = (u32*)pixelData; - u32 *ptr_dst = (u32*)(RDRAM + _startAddress); - std::vector srcBuf(width * height); - memcpy(srcBuf.data(), ptr_src, width * height * sizeof(u32)); - _writeToRdram(srcBuf.data(), ptr_dst, &FrameBufferToRDRAM::_RGBAtoRGBA32, 0, 0, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size); - } else if (m_pCurFrameBuffer->m_size == G_IM_SIZ_16b) { - u32 *ptr_src = (u32*)pixelData; - u16 *ptr_dst = (u16*)(RDRAM + _startAddress); - std::vector srcBuf(width * height); - memcpy(srcBuf.data(), ptr_src, width * height * sizeof(u32)); - _writeToRdram(srcBuf.data(), ptr_dst, &FrameBufferToRDRAM::_RGBAtoRGBA16, 0, 1, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size); - } else if (m_pCurFrameBuffer->m_size == G_IM_SIZ_8b) { - u8 *ptr_src = (u8*)pixelData; - u8 *ptr_dst = RDRAM + _startAddress; - std::vector srcBuf(width * height); - memcpy(srcBuf.data(), ptr_src, width * height * sizeof(u8)); - _writeToRdram(srcBuf.data(), ptr_dst, &FrameBufferToRDRAM::_RGBAtoR8, 0, 3, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size); - } + return pixelData; +} - m_pCurFrameBuffer->m_copiedToRdram = true; - m_pCurFrameBuffer->copyRdram(); - m_pCurFrameBuffer->m_cleared = false; +void FrameBufferToRDRAM::cleanUpPixels(GLubyte* pixelData) +{ #ifndef GLES2 glUnmapBuffer(GL_PIXEL_PACK_BUFFER); glBindBuffer(GL_PIXEL_PACK_BUFFER, 0); #else free(pixelData); #endif +} +#endif //ANDROID + + +void FrameBufferToRDRAM::_copy(u32 _startAddress, u32 _endAddress, bool _sync) +{ + const u32 stride = m_pCurFrameBuffer->m_width << m_pCurFrameBuffer->m_size >> 1; + const u32 max_height = _cutHeight(_startAddress, m_pCurFrameBuffer->m_height, stride); + + u32 numPixels = (_endAddress - _startAddress) >> (m_pCurFrameBuffer->m_size - 1); + if (numPixels / m_pCurFrameBuffer->m_width > max_height) { + _endAddress = _startAddress + (max_height * stride); + numPixels = (_endAddress - _startAddress) >> (m_pCurFrameBuffer->m_size - 1); + } + + const GLsizei width = m_pCurFrameBuffer->m_width; + const GLint x0 = 0; + const GLint y0 = max_height - (_endAddress - m_pCurFrameBuffer->m_startAddress) / stride; + const GLint y1 = max_height - (_startAddress - m_pCurFrameBuffer->m_startAddress) / stride; + const GLsizei height = std::min(max_height, 1u + y1 - y0); + + GLubyte* pixelData = getPixels(x0, y0, width, height, _sync); + + if (m_pCurFrameBuffer->m_size == G_IM_SIZ_32b) { + u32 *ptr_src = (u32*)pixelData; + u32 *ptr_dst = (u32*)(RDRAM + _startAddress); + _writeToRdram(ptr_src, ptr_dst, &FrameBufferToRDRAM::_RGBAtoRGBA32, 0, 0, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size); + } else if (m_pCurFrameBuffer->m_size == G_IM_SIZ_16b) { + u32 *ptr_src = (u32*)pixelData; + u16 *ptr_dst = (u16*)(RDRAM + _startAddress); + _writeToRdram(ptr_src, ptr_dst, &FrameBufferToRDRAM::_RGBAtoRGBA16, 0, 1, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size); + } else if (m_pCurFrameBuffer->m_size == G_IM_SIZ_8b) { + u8 *ptr_src = (u8*)pixelData; + u8 *ptr_dst = RDRAM + _startAddress; + _writeToRdram(ptr_src, ptr_dst, &FrameBufferToRDRAM::_RGBAtoR8, 0, 3, width, height, numPixels, _startAddress, m_pCurFrameBuffer->m_startAddress, m_pCurFrameBuffer->m_size); + } + + m_pCurFrameBuffer->m_copiedToRdram = true; + m_pCurFrameBuffer->copyRdram(); + m_pCurFrameBuffer->m_cleared = false; + + cleanUpPixels(pixelData); + gDP.changed |= CHANGED_SCISSOR; } diff --git a/src/mupenplus/OpenGL_mupenplus.cpp b/src/mupenplus/OpenGL_mupenplus.cpp index ce380915..8e6621d5 100644 --- a/src/mupenplus/OpenGL_mupenplus.cpp +++ b/src/mupenplus/OpenGL_mupenplus.cpp @@ -44,6 +44,7 @@ OGLVideo & OGLVideo::get() void OGLVideoMupenPlus::_setAttributes() { + #ifdef GLES2 CoreVideo_GL_SetAttribute(M64P_GL_CONTEXT_MAJOR_VERSION, 2); CoreVideo_GL_SetAttribute(M64P_GL_CONTEXT_MINOR_VERSION, 0); @@ -60,6 +61,14 @@ void OGLVideoMupenPlus::_setAttributes() #else // Do nothing #endif + +#ifndef GLES2 + CoreVideo_GL_SetAttribute(M64P_GL_RED_SIZE, 8); + CoreVideo_GL_SetAttribute(M64P_GL_GREEN_SIZE, 8); + CoreVideo_GL_SetAttribute(M64P_GL_BLUE_SIZE, 8); + CoreVideo_GL_SetAttribute(M64P_GL_ALPHA_SIZE, 8); +#endif + CoreVideo_GL_SetAttribute(M64P_GL_DOUBLEBUFFER, 1); CoreVideo_GL_SetAttribute(M64P_GL_SWAP_CONTROL, config.video.verticalSync); CoreVideo_GL_SetAttribute(M64P_GL_BUFFER_SIZE, 32);