diff --git a/FrameBuffer.cpp b/FrameBuffer.cpp index 1f9896c7..da55a403 100644 --- a/FrameBuffer.cpp +++ b/FrameBuffer.cpp @@ -18,7 +18,7 @@ bool g_bCopyToRDRAM = false; bool g_bCopyFromRDRAM = false; -bool g_bUseFloatDepthTexture = false; +bool g_bUseFloatDepthTexture = true; static const GLint depthTextureInternalFormat = g_bUseFloatDepthTexture ? GL_R32F : GL_R16; static const GLenum depthTextureType = g_bUseFloatDepthTexture ? GL_FLOAT : GL_UNSIGNED_INT; diff --git a/GLSLCombiner.cpp b/GLSLCombiner.cpp index e5a37b99..e5ad2f55 100644 --- a/GLSLCombiner.cpp +++ b/GLSLCombiner.cpp @@ -9,6 +9,7 @@ # include // malloc() #endif #include +#include "N64.h" #include "OpenGL.h" #include "Combiner.h" #include "GLSLCombiner.h" @@ -23,6 +24,16 @@ static GLhandleARB g_calc_depth_shader_object; static GLhandleARB g_test_alpha_shader_object; static GLuint g_zlut_tex = 0; +static GLhandleARB g_shadow_map_vertex_shader_object; +static GLhandleARB g_shadow_map_fragment_shader_object; +static GLhandleARB g_draw_shadow_map_program; +GLuint g_tlut_tex = 0; + +static const GLuint ZlutImageUnit = 0; +static const GLuint TlutImageUnit = 1; +static const GLuint depthImageUnit = 2; + + static void display_warning(const char *text, ...) { @@ -40,6 +51,7 @@ void display_warning(const char *text, ...) } } +static void InitZlutTexture() { u16 * zLUT = new u16[0x40000]; @@ -66,12 +78,75 @@ void InitZlutTexture() delete[] zLUT; } +static void DestroyZlutTexture() { if (g_zlut_tex > 0) glDeleteTextures(1, &g_zlut_tex); } +static +void InitShadowMapShader() +{ + glGenTextures(1, &g_tlut_tex); + glBindTexture(GL_TEXTURE_1D, g_tlut_tex); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + glTexParameteri(GL_TEXTURE_1D, GL_TEXTURE_WRAP_S, GL_CLAMP); + glTexImage1D(GL_TEXTURE_1D, 0, GL_R16, 256, 0, GL_RED, GL_UNSIGNED_SHORT, NULL); + + g_shadow_map_vertex_shader_object = glCreateShaderObjectARB(GL_VERTEX_SHADER_ARB); + glShaderSourceARB(g_shadow_map_vertex_shader_object, 1, &shadow_map_vertex_shader, NULL); + glCompileShaderARB(g_shadow_map_vertex_shader_object); + + g_shadow_map_fragment_shader_object = glCreateShaderObjectARB(GL_FRAGMENT_SHADER_ARB); + if (g_bUseFloatDepthTexture) + glShaderSourceARB(g_shadow_map_fragment_shader_object, 1, &shadow_map_fragment_shader_float, NULL); + else + glShaderSourceARB(g_shadow_map_fragment_shader_object, 1, &shadow_map_fragment_shader_int, NULL); + glCompileShaderARB(g_shadow_map_fragment_shader_object); + + g_draw_shadow_map_program = glCreateProgramObjectARB(); + glAttachObjectARB(g_draw_shadow_map_program, g_shadow_map_vertex_shader_object); + glAttachObjectARB(g_draw_shadow_map_program, g_shadow_map_fragment_shader_object); + glLinkProgramARB(g_draw_shadow_map_program); + +#ifdef _DEBUG + int log_length; + glGetObjectParameterivARB(g_draw_shadow_map_program, GL_OBJECT_LINK_STATUS_ARB , &log_length); + if(!log_length) + { + const int nLogSize = 1024; + char shader_log[nLogSize]; + glGetInfoLogARB(g_shadow_map_fragment_shader_object, + nLogSize, &log_length, shader_log); + if(log_length) + display_warning(shader_log); + glGetInfoLogARB(g_shadow_map_vertex_shader_object, nLogSize, &log_length, shader_log); + if(log_length) + display_warning(shader_log); + glGetInfoLogARB(g_draw_shadow_map_program, + nLogSize, &log_length, shader_log); + if(log_length) + display_warning(shader_log); + } +#endif +} + +static +void DestroyShadowMapShader() +{ + if (g_tlut_tex > 0) + glDeleteTextures(1, &g_tlut_tex); + /* + glDetachShader(g_draw_shadow_map_program, g_shadow_map_vertex_shader_object); + glDetachShader(g_draw_shadow_map_program, g_shadow_map_fragment_shader_object); + glDeleteShader(g_shadow_map_vertex_shader_object); + glDeleteShader(g_shadow_map_fragment_shader_object); + glDeleteProgram(g_draw_shadow_map_program); + */ +} + void InitGLSLCombiner() { glActiveTextureARB(GL_TEXTURE0_ARB); @@ -107,6 +182,7 @@ void InitGLSLCombiner() glCompileShaderARB(g_calc_depth_shader_object); InitZlutTexture(); + InitShadowMapShader(); /* const char* base_vertex_shader = @@ -148,6 +224,7 @@ const char* base_vertex_shader = void DestroyGLSLCombiner() { ogl_glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); DestroyZlutTexture(); + DestroyShadowMapShader(); } const char *ColorInput_1cycle[] = { @@ -356,6 +433,7 @@ GLSLCombiner::GLSLCombiner(Combiner *_color, Combiner *_alpha) { #endif strcat(fragment_shader, " if (fog_enabled > 0) \n"); strcat(fragment_shader, " gl_FragColor = vec4(mix(gl_Fog.color.rgb, gl_FragColor.rgb, gl_FogFragCoord), gl_FragColor.a); \n"); + strcat(fragment_shader, fragment_shader_end); #ifdef USE_TOONIFY @@ -519,8 +597,6 @@ void GLSLCombiner::UpdateDepthInfo() { glUniform1iARB(depth_polygon_offset, iPlygonOffset); } - const GLuint ZlutImageUnit = 0; - const GLuint depthImageUnit = 1; GLuint texture = frameBuffer.top->depth_texture->glName; glBindImageTexture(ZlutImageUnit, g_zlut_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16UI); GLenum depthTexFormat = g_bUseFloatDepthTexture ? GL_R32F : GL_R16UI; @@ -650,3 +726,19 @@ void GLSL_RenderDepth() { gDP.changed |= CHANGED_COMBINE; #endif } + +void GLS_SetShadowMapCombiner() { + /* + glBindTexture(GL_TEXTURE_1D, g_tlut_tex); + u16 *pData = (u16*)&TMEM[256]; + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RED, GL_UNSIGNED_SHORT, pData); + glBindTexture(GL_TEXTURE_1D, 0); + */ + glUseProgramObjectARB(g_draw_shadow_map_program); + + glBindImageTexture(TlutImageUnit, g_tlut_tex, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R16UI); + GLenum depthTexFormat = g_bUseFloatDepthTexture ? GL_R32F : GL_R16UI; + GLuint texture = frameBuffer.top->depth_texture->glName; + glBindImageTexture(depthImageUnit, texture, 0, GL_FALSE, 0, GL_READ_ONLY, depthTexFormat); + gDP.changed |= CHANGED_COMBINE; +} diff --git a/OpenGL.cpp b/OpenGL.cpp index 53349733..3939ac2c 100644 --- a/OpenGL.cpp +++ b/OpenGL.cpp @@ -890,8 +890,9 @@ void OGL_DrawTriangles() Combiner_UpdateCombineDepthInfo(); glDrawArrays( GL_TRIANGLES, 0, OGL.numVertices ); glBindImageTexture(0, 0, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16UI); + glBindImageTexture(1, 0, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16UI); GLenum depthTexFormat = g_bUseFloatDepthTexture ? GL_R32F : GL_R16UI; - glBindImageTexture(1, 0, 0, GL_FALSE, 0, GL_READ_WRITE, depthTexFormat); + glBindImageTexture(2, 0, 0, GL_FALSE, 0, GL_READ_WRITE, depthTexFormat); OGL.numTriangles = OGL.numVertices = 0; } @@ -972,6 +973,7 @@ void OGL_DrawRect( int ulx, int uly, int lrx, int lry, float *color ) glEnable( GL_DEPTH_TEST ); } +void GLS_SetShadowMapCombiner(); void OGL_DrawTexturedRect( float ulx, float uly, float lrx, float lry, float uls, float ult, float lrs, float lrt, bool flip ) { GLVertex rect[2] = @@ -982,6 +984,10 @@ void OGL_DrawTexturedRect( float ulx, float uly, float lrx, float lry, float uls OGL_UpdateStates(); +// if ((gDP.otherMode.l >> 16) == 0x3c18 && gDP.combine.muxs0 == 0x00ffffff && gDP.combine.muxs1 == 0xfffff238) //depth image based fog + if (gDP.textureImage.address >= gDP.depthImageAddress && gDP.textureImage.address < (gDP.depthImageAddress + gDP.colorImage.width*gDP.colorImage.width*6/4)) + GLS_SetShadowMapCombiner(); + glDisable( GL_CULL_FACE ); glMatrixMode( GL_PROJECTION ); glLoadIdentity(); @@ -1144,6 +1150,10 @@ void OGL_DrawTexturedRect( float ulx, float uly, float lrx, float lry, float uls glLoadIdentity(); OGL_UpdateCullFace(); OGL_UpdateViewport(); + + glBindImageTexture(0, 0, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16UI); + glBindImageTexture(1, 0, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16UI); + glBindImageTexture(2, 0, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R16UI); } void OGL_ClearDepthBuffer() diff --git a/OpenGL.h b/OpenGL.h index 40ad961d..fc3305b7 100644 --- a/OpenGL.h +++ b/OpenGL.h @@ -220,4 +220,6 @@ GLenum ogl_glCheckFramebufferStatus (GLenum target); void ogl_glBlitFramebuffer(GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter); bool checkFBO(); +extern GLuint g_tlut_tex; + #endif diff --git a/Shaders.h b/Shaders.h index b426f1a7..4dd36c69 100644 --- a/Shaders.h +++ b/Shaders.h @@ -1,7 +1,7 @@ static const char* vertex_shader = "uniform float time; \n" "varying vec2 noiseCoord2D; \n" -"varying vec4 secondary_color; \n" +"varying vec4 secondary_color; \n" "void main() \n" "{ \n" " gl_Position = ftransform(); \n" @@ -154,13 +154,13 @@ static const char* depth_compare_shader_int = "uniform int depthUpdateEnabled; \n" "uniform unsigned int depthPolygonOffset; \n" "layout(binding = 0, r16ui) uniform readonly uimage2D zlut_image;\n" -"layout(binding = 1, r16ui) uniform restrict uimage2D depth_image;\n" +"layout(binding = 2, r16ui) uniform restrict uimage2D depth_image;\n" "bool depth_compare() \n" "{ \n" " if (depthEnabled == 0) return true; \n" " ivec2 coord = ivec2(gl_FragCoord.xy); \n" " highp uvec4 depth = imageLoad(depth_image,coord); \n" -" highp unsigned int bufZ = depth.r; \n" +" highp unsigned int bufZ = depth.r; \n" " highp int iZ = int(gl_FragCoord.z*262143.0); \n" " int y0 = iZ / 512; \n" " int x0 = iZ - 512*y0; \n" @@ -184,18 +184,19 @@ static const char* depth_compare_shader_float = "uniform int depthUpdateEnabled; \n" "uniform float depthPolygonOffset; \n" "layout(binding = 0, r16ui) uniform readonly uimage2D zlut_image;\n" -"layout(binding = 1, r32f) uniform restrict image2D depth_image;\n" +"layout(binding = 2, r32f) uniform restrict image2D depth_image;\n" "bool depth_compare() \n" "{ \n" " if (depthEnabled == 0) return true; \n" " ivec2 coord = ivec2(gl_FragCoord.xy); \n" " highp vec4 depth = imageLoad(depth_image,coord); \n" " highp float bufZ = depth.r; \n" -" highp int iZ = int(gl_FragCoord.z*262143.0); \n" -" int y0 = iZ / 512; \n" +" highp int iZ = max(0, int((gl_FragCoord.z-0.005)*262143.0)); \n" +" int y0 = clamp(iZ / 512, 0, 511); \n" " int x0 = iZ - 512*y0; \n" " unsigned int icurZ = imageLoad(zlut_image,ivec2(x0,y0)).r;\n" -" highp float curZ = float(icurZ)/262143.0 - depthPolygonOffset; \n" +//" highp float curZ = clamp(float(icurZ)/65535.0 - depthPolygonOffset, 0.0, 1.0); \n" +" highp float curZ = clamp(float(icurZ)/65532.0, 0.0, 1.0); \n" " if (depthUpdateEnabled > 0 && curZ < depth.r) { \n" " depth.r = curZ; \n" " imageStore(depth_image,coord,depth); \n" @@ -203,7 +204,6 @@ static const char* depth_compare_shader_float = " memoryBarrier(); \n" " if (depthCompareEnabled > 0) \n" " return curZ <= bufZ; \n" - " return true; \n" "} \n" ; @@ -218,3 +218,50 @@ static const char* alpha_test_fragment_shader = " return alphaValue > 0.0; \n" "} \n" ; + +static const char* shadow_map_vertex_shader = +"void main() \n" +"{ \n" +" gl_Position = ftransform(); \n" +" gl_FrontColor = gl_Color; \n" +"} \n" +; + +static const char* shadow_map_fragment_shader_float = +"#version 420 core \n" +"layout(binding = 1, r16ui) uniform readonly uimage1D tlut_image;\n" +"layout(binding = 2, r32f) uniform readonly image2D depth_image;\n" +"float get_alpha() \n" +"{ \n" +" ivec2 coord = ivec2(gl_FragCoord.xy); \n" +" float bufZ = imageLoad(depth_image,coord).r; \n" +" int index = min(255, int(bufZ*255.0)); \n" +" unsigned int iAlpha = imageLoad(tlut_image,index).r; \n" +" memoryBarrier(); \n" +" return float(iAlpha/256)/255.0; \n" +"} \n" +"void main() \n" +"{ \n" +" gl_FragColor = vec4(gl_Fog.color.rgb, get_alpha()); \n" +"} \n" +; + +static const char* shadow_map_fragment_shader_int = +"#version 420 core \n" +"layout(binding = 1, r16ui) uniform readonly uimage1D tlut_image;\n" +"layout(binding = 2, r16ui) uniform readonly uimage2D depth_image;\n" +"float get_alpha() \n" +"{ \n" +" ivec2 coord = ivec2(gl_FragCoord.xy); \n" +" unsigned int bufZ = imageLoad(depth_image,coord).r; \n" +" int index = min(255, int(bufZ/256)); \n" +" index += 80; \n" +" unsigned int iAlpha = imageLoad(tlut_image,index).r; \n" +" memoryBarrier(); \n" +" return float(iAlpha/256)/255.0; \n" +"} \n" +"void main() \n" +"{ \n" +" gl_FragColor = vec4(gl_Fog.color.rgb, get_alpha()); \n" +"} \n" +; diff --git a/VI.cpp b/VI.cpp index 027236a3..ef24d4a3 100644 --- a/VI.cpp +++ b/VI.cpp @@ -71,8 +71,6 @@ void VI_UpdateScreen() } if (g_bCopyToRDRAM && !bCFB) FrameBuffer_CopyToRDRAM( *REG.VI_ORIGIN, false ); -void GLSL_RenderDepth(); -GLSL_RenderDepth(); FrameBuffer_RenderBuffer( *REG.VI_ORIGIN ); gDP.colorImage.changed = FALSE; diff --git a/gDP.cpp b/gDP.cpp index b88bf74a..4339381f 100644 --- a/gDP.cpp +++ b/gDP.cpp @@ -676,6 +676,21 @@ void gDPLoadBlock( u32 tile, u32 uls, u32 ult, u32 lrs, u32 dxt ) #endif } +void load_palette (u32 addr) +{ + if (g_tlut_tex == 0) + return; + u16 *pPal = gDP.palette; + for (u32 i = 0; i < 256; ++i) { + *(pPal++) = *(u16*)(RDRAM + (addr^2)); + addr += 2; + } + glBindTexture(GL_TEXTURE_1D, g_tlut_tex); + glTexSubImage1D(GL_TEXTURE_1D, 0, 0, 256, GL_RED, GL_UNSIGNED_SHORT, gDP.palette); + glBindTexture(GL_TEXTURE_1D, 0); +} + + void gDPLoadTLUT( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt ) { gDPSetTileSize( tile, uls, ult, lrs, lrt ); @@ -711,6 +726,9 @@ void gDPLoadTLUT( u32 tile, u32 uls, u32 ult, u32 lrs, u32 lrt ) gDP.changed |= CHANGED_TMEM; + if (count == 256) + load_palette(gDP.textureImage.address); + #ifdef DEBUG DebugMsg( DEBUG_HIGH | DEBUG_HANDLED | DEBUG_TEXTURE, "gDPLoadTLUT( %i, %i, %i, %i, %i );\n", tile, gDP.tiles[tile].uls, gDP.tiles[tile].ult, gDP.tiles[tile].lrs, gDP.tiles[tile].lrt ); diff --git a/gDP.h b/gDP.h index b355eb99..66515953 100644 --- a/gDP.h +++ b/gDP.h @@ -228,7 +228,7 @@ struct gDPInfo u32 changed; - //u16 palette[256]; + u16 palette[256]; u32 paletteCRC16[16]; u32 paletteCRC256; u32 half_1, half_2;