From 663a0d76f1cc359e58ef5fd343146bee45c1a7ea Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Wed, 6 May 2015 18:36:07 +0600 Subject: [PATCH] Implement multisampled textures fetch in shaders. Thanks mudlord for the idea. Fixed Perfect Dark frame buffer effects do not work when FSAA is on. #244 --- src/FrameBuffer.cpp | 4 +- src/GLSLCombiner.cpp | 62 +++++++++++++++++++++----- src/GLSLCombiner.h | 6 +-- src/Shaders.h | 30 +++++++++++++ src/Textures.cpp | 103 ++++++++++++++++++++++++++++++------------- src/Textures.h | 6 +++ 6 files changed, 164 insertions(+), 47 deletions(-) diff --git a/src/FrameBuffer.cpp b/src/FrameBuffer.cpp index daf64b5f..6db7d96e 100644 --- a/src/FrameBuffer.cpp +++ b/src/FrameBuffer.cpp @@ -777,7 +777,7 @@ void FrameBuffer_ActivateBufferTexture(s16 t, FrameBuffer *pBuffer) if (pBuffer == NULL || pBuffer->m_pTexture == NULL) return; - CachedTexture *pTexture = pBuffer->getTexture(); + CachedTexture *pTexture = pBuffer->m_pTexture; pTexture->scaleS = pBuffer->m_scaleX / (float)pTexture->realWidth; pTexture->scaleT = pBuffer->m_scaleY / (float)pTexture->realHeight; @@ -819,7 +819,7 @@ void FrameBuffer_ActivateBufferTextureBG(s16 t, FrameBuffer *pBuffer ) if (pBuffer == NULL || pBuffer->m_pTexture == NULL) return; - CachedTexture *pTexture = pBuffer->getTexture(); + CachedTexture *pTexture = pBuffer->m_pTexture; pTexture->scaleS = video().getScaleX() / (float)pTexture->realWidth; pTexture->scaleT = video().getScaleY() / (float)pTexture->realHeight; diff --git a/src/GLSLCombiner.cpp b/src/GLSLCombiner.cpp index 02de1061..ad1625af 100644 --- a/src/GLSLCombiner.cpp +++ b/src/GLSLCombiner.cpp @@ -46,8 +46,7 @@ bool checkShaderCompileStatus(GLuint obj) { GLint status; glGetShaderiv(obj, GL_COMPILE_STATUS, &status); - if(status == GL_FALSE) - { + if(status == GL_FALSE) { GLchar shader_log[nShaderLogSize]; GLsizei nLogSize = nShaderLogSize; glGetShaderInfoLog(obj, nShaderLogSize, &nLogSize, shader_log); @@ -62,8 +61,7 @@ bool checkProgramLinkStatus(GLuint obj) { GLint status; glGetProgramiv(obj, GL_LINK_STATUS, &status); - if(status == GL_FALSE) - { + if(status == GL_FALSE) { GLsizei nLogSize = nShaderLogSize; GLchar shader_log[nShaderLogSize]; glGetProgramInfoLog(obj, nShaderLogSize, &nLogSize, shader_log); @@ -73,7 +71,6 @@ bool checkProgramLinkStatus(GLuint obj) return true; } -static const GLuint noiseTexIndex = 2; class NoiseTexture { public: @@ -159,7 +156,7 @@ void NoiseTexture::update() glUnmapBuffer(GL_PIXEL_UNPACK_BUFFER); // release the mapped buffer #endif - glActiveTexture(GL_TEXTURE0 + noiseTexIndex); + glActiveTexture(GL_TEXTURE0 + g_noiseTexIndex); glBindTexture(GL_TEXTURE_2D, m_pTexture->glName); #ifndef GLES2 glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, VI.width, VI.height, GL_RED, GL_UNSIGNED_BYTE, 0); @@ -528,10 +525,29 @@ ShaderCombiner::ShaderCombiner(Combiner & _color, Combiner & _alpha, const gDPCo strFragmentShader.append(" lowp vec4 readtex0, readtex1; \n"); strFragmentShader.append(" lowp float lod_frac = mipmap(readtex0, readtex1); \n"); } else { +#ifdef GL_MULTISAMPLING_SUPPORT + if (usesT0()) { + if (config.video.multisampling > 0) { + strFragmentShader.append(" lowp vec4 readtex0; \n"); + strFragmentShader.append(" if (uMSTex0Enabled == 0) readtex0 = readTex(uTex0, vTexCoord0, uFb8Bit == 1 || uFb8Bit == 3, uFbFixedAlpha == 1 || uFbFixedAlpha == 3); \n"); + strFragmentShader.append(" else readtex0 = readTexMS(uMSTex0, vTexCoord0, uFb8Bit == 1 || uFb8Bit == 3, uFbFixedAlpha == 1 || uFbFixedAlpha == 3); \n"); + } else + strFragmentShader.append(" lowp vec4 readtex0 = readTex(uTex0, vTexCoord0, uFb8Bit == 1 || uFb8Bit == 3, uFbFixedAlpha == 1 || uFbFixedAlpha == 3); \n"); + } + if (usesT1()) { + if (config.video.multisampling > 0) { + strFragmentShader.append(" lowp vec4 readtex1; \n"); + strFragmentShader.append(" if (uMSTex1Enabled == 0) readtex1 = readTex(uTex1, vTexCoord1, uFb8Bit == 2 || uFb8Bit == 3, uFbFixedAlpha == 2 || uFbFixedAlpha == 3); \n"); + strFragmentShader.append(" else readtex1 = readTexMS(uMSTex1, vTexCoord1, uFb8Bit == 1 || uFb8Bit == 3, uFbFixedAlpha == 1 || uFbFixedAlpha == 3); \n"); + } else + strFragmentShader.append(" lowp vec4 readtex1 = readTex(uTex1, vTexCoord1, uFb8Bit == 2 || uFb8Bit == 3, uFbFixedAlpha == 2 || uFbFixedAlpha == 3); \n"); + } +#else if (usesT0()) strFragmentShader.append(" lowp vec4 readtex0 = readTex(uTex0, vTexCoord0, uFb8Bit == 1 || uFb8Bit == 3, uFbFixedAlpha == 1 || uFbFixedAlpha == 3); \n"); if (usesT1()) strFragmentShader.append(" lowp vec4 readtex1 = readTex(uTex1, vTexCoord1, uFb8Bit == 2 || uFb8Bit == 3, uFbFixedAlpha == 2 || uFbFixedAlpha == 3); \n"); +#endif // GL_MULTISAMPLING_SUPPORT } const bool bUseHWLight = config.generalEmulation.enableHWLighting != 0 && GBI.isHWLSupported() && usesShadeColor(); if (bUseHWLight) @@ -692,6 +708,15 @@ void ShaderCombiner::_locateUniforms() { LocateUniform(uScreenScale); LocateUniform(uDepthScale); LocateUniform(uFogScale); + +#ifdef GL_MULTISAMPLING_SUPPORT + LocateUniform(uMSTex0); + LocateUniform(uMSTex1); + LocateUniform(uMSTex0Enabled); + LocateUniform(uMSTex1Enabled); + LocateUniform(uMSAASamples); + LocateUniform(uMSAAScale); +#endif } void ShaderCombiner::_locate_attributes() const { @@ -706,12 +731,22 @@ void ShaderCombiner::update(bool _bForce) { glUseProgram(m_program); if (_bForce) { - _setIUniform(m_uniforms.uTex0, 0, _bForce); - _setIUniform(m_uniforms.uTex1, 1, _bForce); - _setIUniform(m_uniforms.uTexNoise, noiseTexIndex, _bForce); + _setIUniform(m_uniforms.uTexNoise, g_noiseTexIndex, true); + if (usesTex()) { + _setIUniform(m_uniforms.uTex0, 0, true); + _setIUniform(m_uniforms.uTex1, 1, true); +#ifdef GL_MULTISAMPLING_SUPPORT + _setIUniform(m_uniforms.uMSTex0, g_MSTex0Index + 0, true); + _setIUniform(m_uniforms.uMSTex1, g_MSTex0Index + 1, true); + _setIUniform(m_uniforms.uMSAASamples, config.video.multisampling, true); + _setFUniform(m_uniforms.uMSAAScale, 1.0f / (float)config.video.multisampling, true); + _setIUniform(m_uniforms.uMSTex0Enabled, 0, true); + _setIUniform(m_uniforms.uMSTex1Enabled, 0, true); +#endif + } - updateFBInfo(_bForce); - updateRenderState(_bForce); + updateFBInfo(true); + updateRenderState(true); } updateGammaCorrection(_bForce); @@ -858,6 +893,7 @@ void ShaderCombiner::updateFBInfo(bool _bForce) { return; int nFb8bitMode = 0, nFbFixedAlpha = 0; + int nMSTex0Enabled = 0, nMSTex1Enabled = 0; TextureCache & cache = textureCache(); if (cache.current[0] != NULL && cache.current[0]->frameBufferTexture == TRUE) { if (cache.current[0]->size == G_IM_SIZ_8b) { @@ -865,6 +901,7 @@ void ShaderCombiner::updateFBInfo(bool _bForce) { if (gDP.otherMode.imageRead == 0) nFbFixedAlpha |= 1; } + nMSTex0Enabled = config.video.multisampling; } if (cache.current[1] != NULL && cache.current[1]->frameBufferTexture == TRUE) { if (cache.current[1]->size == G_IM_SIZ_8b) { @@ -872,9 +909,12 @@ void ShaderCombiner::updateFBInfo(bool _bForce) { if (gDP.otherMode.imageRead == 0) nFbFixedAlpha |= 2; } + nMSTex1Enabled = config.video.multisampling; } _setIUniform(m_uniforms.uFb8Bit, nFb8bitMode, _bForce); _setIUniform(m_uniforms.uFbFixedAlpha, nFbFixedAlpha, _bForce); + _setIUniform(m_uniforms.uMSTex0Enabled, nMSTex0Enabled, _bForce); + _setIUniform(m_uniforms.uMSTex1Enabled, nMSTex1Enabled, _bForce); gDP.changed &= ~CHANGED_FB_TEXTURE; } diff --git a/src/GLSLCombiner.h b/src/GLSLCombiner.h index 3106a80c..d7adc437 100644 --- a/src/GLSLCombiner.h +++ b/src/GLSLCombiner.h @@ -39,14 +39,14 @@ private: struct UniformLocation { - iUniform uTex0, uTex1, uTexNoise, uTlutImage, uZlutImage, uDepthImage, + iUniform uTex0, uTex1, uMSTex0, uMSTex1, uMSTex0Enabled, uMSTex1Enabled, uTexNoise, uTlutImage, uZlutImage, uDepthImage, uFogMode, uFogUsage, uEnableLod, uEnableAlphaTest, uEnableDepth, uEnableDepthCompare, uEnableDepthUpdate, uDepthMode, uDepthSource, uFb8Bit, uFbFixedAlpha, uRenderState, uSpecialBlendMode, - uMaxTile, uTextureDetail, uTexturePersp, uTextureFilterMode, + uMaxTile, uTextureDetail, uTexturePersp, uTextureFilterMode, uMSAASamples, uAlphaCompareMode, uAlphaDitherMode, uColorDitherMode, uGammaCorrectionEnabled; - fUniform uFogAlpha, uPrimitiveLod, uMinLod, uDeltaZ, uAlphaTestValue; + fUniform uFogAlpha, uPrimitiveLod, uMinLod, uDeltaZ, uAlphaTestValue, uMSAAScale; fv2Uniform uScreenScale, uDepthScale, uFogScale; }; diff --git a/src/Shaders.h b/src/Shaders.h index 25b47e20..0e358d5b 100644 --- a/src/Shaders.h +++ b/src/Shaders.h @@ -163,6 +163,12 @@ static const char* fragment_shader_header_common_variables = SHADER_VERSION "uniform sampler2D uTex0; \n" "uniform sampler2D uTex1; \n" +#ifdef GL_MULTISAMPLING_SUPPORT +"uniform sampler2DMS uMSTex0; \n" +"uniform sampler2DMS uMSTex1; \n" +"uniform lowp int uMSTex0Enabled; \n" +"uniform lowp int uMSTex1Enabled; \n" +#endif "layout (std140) uniform ColorsBlock {\n" " lowp vec4 uFogColor; \n" " lowp vec4 uCenterColor; \n" @@ -236,6 +242,9 @@ static const char* fragment_shader_header_common_functions = "void calc_light(in lowp float fLights, in lowp vec3 input_color, out lowp vec3 output_color);\n" "mediump float mipmap(out lowp vec4 readtex0, out lowp vec4 readtex1); \n" "lowp vec4 readTex(in sampler2D tex, in mediump vec2 texCoord, in bool fb8bit, in bool fbFixedAlpha); \n" +#ifdef GL_MULTISAMPLING_SUPPORT +"lowp vec4 readTexMS(in sampler2DMS mstex, in mediump vec2 texCoord, in bool fb8bit, in bool fbFixedAlpha); \n" +#endif // GL_MULTISAMPLING_SUPPORT "bool depth_compare(); \n" "void colorNoiseDither(in float _noise, inout vec3 _color); \n" "void alphaNoiseDither(in float _noise, inout float _alpha);\n" @@ -469,6 +478,27 @@ SHADER_VERSION " if (fbFixedAlpha) texColor.a = 0.825; \n" " return texColor; \n" "} \n" +#ifdef GL_MULTISAMPLING_SUPPORT +"uniform lowp int uMSAASamples; \n" +"uniform lowp float uMSAAScale; \n" +"lowp vec4 sampleMS(in sampler2DMS mstex, in mediump ivec2 ipos) \n" +"{ \n" +" lowp vec4 texel = vec4(0.0); \n" +" for (int i = 0; i < uMSAASamples; ++i) \n" +" texel += texelFetch(mstex, ipos, i); \n" +" return texel * uMSAAScale; \n" +"} \n" +" \n" +"lowp vec4 readTexMS(in sampler2DMS mstex, in mediump vec2 texCoord, in bool fb8bit, in bool fbFixedAlpha) \n" +"{ \n" +" mediump vec2 msTexSize = vec2(textureSize(mstex)); \n" +" mediump ivec2 itexCoord = ivec2(msTexSize * texCoord); \n" +" lowp vec4 texColor = sampleMS(mstex, itexCoord); \n" +" if (fb8bit) texColor = vec4(texColor.r); \n" +" if (fbFixedAlpha) texColor.a = 0.825; \n" +" return texColor; \n" +"} \n" +#endif // GL_MULTISAMPLING_SUPPORT ; static const char* fragment_shader_noise = diff --git a/src/Textures.cpp b/src/Textures.cpp index b2fb1579..a100fff3 100644 --- a/src/Textures.cpp +++ b/src/Textures.cpp @@ -19,6 +19,9 @@ using namespace std; +const GLuint g_noiseTexIndex = 2; +const GLuint g_MSTex0Index = g_noiseTexIndex + 1; + typedef u32 (*GetTexelFunc)( u64 *src, u16 x, u16 i, u8 palette ); inline u32 GetNone( u64 *src, u16 x, u16 i, u8 palette ) @@ -436,45 +439,61 @@ TextureCache & TextureCache::get() { return cache; } +void TextureCache::_initDummyTexture(CachedTexture * _pDummy) +{ + _pDummy->address = 0; + _pDummy->clampS = 1; + _pDummy->clampT = 1; + _pDummy->clampWidth = 2; + _pDummy->clampHeight = 2; + _pDummy->crc = 0; + _pDummy->format = 0; + _pDummy->size = 0; + _pDummy->frameBufferTexture = FALSE; + _pDummy->width = 2; + _pDummy->height = 2; + _pDummy->realWidth = 2; + _pDummy->realHeight = 2; + _pDummy->maskS = 0; + _pDummy->maskT = 0; + _pDummy->scaleS = 0.5f; + _pDummy->scaleT = 0.5f; + _pDummy->shiftScaleS = 1.0f; + _pDummy->shiftScaleT = 1.0f; + _pDummy->textureBytes = 2 * 2 * 4; + _pDummy->tMem = 0; +} + void TextureCache::init() { - u32 dummyTexture[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; - m_maxBytes = config.texture.maxBytes; - m_pDummy = addFrameBufferTexture(); // we don't want to remove dummy texture - - m_pDummy->address = 0; - m_pDummy->clampS = 1; - m_pDummy->clampT = 1; - m_pDummy->clampWidth = 2; - m_pDummy->clampHeight = 2; - m_pDummy->crc = 0; - m_pDummy->format = 0; - m_pDummy->size = 0; - m_pDummy->frameBufferTexture = FALSE; - m_pDummy->width = 2; - m_pDummy->height = 2; - m_pDummy->realWidth = 2; - m_pDummy->realHeight = 2; - m_pDummy->maskS = 0; - m_pDummy->maskT = 0; - m_pDummy->scaleS = 0.5f; - m_pDummy->scaleT = 0.5f; - m_pDummy->shiftScaleS = 1.0f; - m_pDummy->shiftScaleT = 1.0f; - m_pDummy->textureBytes = 2*2*4; - m_pDummy->tMem = 0; - glGetIntegerv(GL_UNPACK_ALIGNMENT, &m_curUnpackAlignment); - glBindTexture( GL_TEXTURE_2D, m_pDummy->glName ); + u32 dummyTexture[16] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 }; + + m_pDummy = addFrameBufferTexture(); // we don't want to remove dummy texture + _initDummyTexture(m_pDummy); + + glBindTexture(GL_TEXTURE_2D, m_pDummy->glName); glTexImage2D( GL_TEXTURE_2D, 0, GL_RGBA, 2, 2, 0, GL_RGBA, GL_UNSIGNED_BYTE, dummyTexture ); m_cachedBytes = m_pDummy->textureBytes; activateDummy( 0 ); activateDummy( 1 ); current[0] = current[1] = NULL; + +#ifdef GL_MULTISAMPLING_SUPPORT + m_pMSDummy = addFrameBufferTexture(); // we don't want to remove dummy texture + _initDummyTexture(m_pMSDummy); + + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, m_pMSDummy->glName); + glTexImage2DMultisample(GL_TEXTURE_2D_MULTISAMPLE, config.video.multisampling, GL_RGBA8, m_pMSDummy->realWidth, m_pMSDummy->realHeight, false); + activateMSDummy(0); + activateMSDummy(1); +#else + m_pMSDummy = NULL; +#endif } void TextureCache::destroy() @@ -1053,10 +1072,18 @@ u32 _calculateCRC(u32 t, const TextureParams & _params) void TextureCache::activateTexture(u32 _t, CachedTexture *_pTexture) { - glActiveTexture(GL_TEXTURE0 + _t); - - // Bind the cached texture - glBindTexture( GL_TEXTURE_2D, _pTexture->glName ); +#ifdef GL_MULTISAMPLING_SUPPORT + if (config.video.multisampling > 0 && _pTexture->frameBufferTexture == TRUE) { + glActiveTexture(GL_TEXTURE0 + g_MSTex0Index + _t); + // Bind the cached texture + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, _pTexture->glName); + } else +#endif + { + glActiveTexture(GL_TEXTURE0 + _t); + // Bind the cached texture + glBindTexture(GL_TEXTURE_2D, _pTexture->glName); + } const bool bUseBilinear = (gDP.otherMode.textureFilter | (gSP.objRendermode&G_OBJRM_BILERP)) != 0; @@ -1118,6 +1145,20 @@ void TextureCache::activateDummy(u32 _t) glTexParameteri( GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST ); } +void TextureCache::activateMSDummy(u32 _t) +{ +#ifdef GL_MULTISAMPLING_SUPPORT + glActiveTexture(GL_TEXTURE0 + g_MSTex0Index + _t); + + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, m_pMSDummy->glName); + + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST); + glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST); + + glBindTexture(GL_TEXTURE_2D_MULTISAMPLE, 0); +#endif +} + void TextureCache::_updateBackground() { u32 numBytes = gSP.bgImage.width * gSP.bgImage.height << gSP.bgImage.size >> 1; diff --git a/src/Textures.h b/src/Textures.h index bd8af766..221fbb1e 100644 --- a/src/Textures.h +++ b/src/Textures.h @@ -6,6 +6,9 @@ #include "CRC.h" #include "convert.h" +extern const GLuint g_noiseTexIndex; +extern const GLuint g_MSTex0Index; + struct CachedTexture { CachedTexture(GLuint _glName) : glName(_glName), max_level(0) {} @@ -48,6 +51,7 @@ struct TextureCache void removeFrameBufferTexture(CachedTexture * _pTexture); void activateTexture(u32 _t, CachedTexture *_pTexture); void activateDummy(u32 _t); + void activateMSDummy(u32 _t); void update(u32 _t); static TextureCache & get(); @@ -69,11 +73,13 @@ private: bool _loadHiresBackground(CachedTexture *_pTexture); void _updateBackground(); void _clear(); + void _initDummyTexture(CachedTexture * _pDummy); typedef std::map Textures; Textures m_textures; Textures m_fbTextures; CachedTexture * m_pDummy; + CachedTexture * m_pMSDummy; u32 m_hits, m_misses; u32 m_maxBytes; u32 m_cachedBytes;