From 6af6e2c17fe7df4b997510c8bd3d5ee0a37be79b Mon Sep 17 00:00:00 2001 From: Sergey Lipskiy Date: Sat, 26 Nov 2016 19:31:50 +0700 Subject: [PATCH] Rewrite lighting. Fixed chopper attack wrong textures #99 Thanks Gillou68310 for detection of the problem's origin. --- src/3DMath.cpp | 37 ++++--- src/3DMath.h | 1 + src/3DMathNeon.cpp | 9 ++ src/Combiner.cpp | 10 +- src/GLES2/Shaders_gles2.h | 3 +- src/GLUniforms/UniformBlock.cpp | 2 +- src/GLUniforms/UniformSet.cpp | 2 +- src/OGL3X/Shaders_ogl3x.h | 3 +- src/OpenGL.cpp | 2 +- src/gSP.cpp | 167 ++++++++++++++++++-------------- src/gSP.h | 12 ++- src/gSPNeon.cpp | 85 ---------------- 12 files changed, 140 insertions(+), 193 deletions(-) diff --git a/src/3DMath.cpp b/src/3DMath.cpp index bda882c8..3bc6f3df 100644 --- a/src/3DMath.cpp +++ b/src/3DMath.cpp @@ -15,27 +15,24 @@ void MultMatrix(float m0[4][4], float m1[4][4], float dest[4][4]) void TransformVectorNormalize(float vec[3], float mtx[4][4]) { - float len; - float vres[3]; - vres[0] = mtx[0][0] * vec[0] - + mtx[1][0] * vec[1] - + mtx[2][0] * vec[2]; - vres[1] = mtx[0][1] * vec[0] - + mtx[1][1] * vec[1] - + mtx[2][1] * vec[2]; - vres[2] = mtx[0][2] * vec[0] - + mtx[1][2] * vec[1] - + mtx[2][2] * vec[2]; - memcpy(vec, vres, sizeof(float)*3); - len = vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2]; - if (len != 0.0) - { - len = sqrtf(len); - vec[0] /= len; - vec[1] /= len; - vec[2] /= len; - } + vres[0] = mtx[0][0] * vec[0] + mtx[1][0] * vec[1] + mtx[2][0] * vec[2]; + vres[1] = mtx[0][1] * vec[0] + mtx[1][1] * vec[1] + mtx[2][1] * vec[2]; + vres[2] = mtx[0][2] * vec[0] + mtx[1][2] * vec[1] + mtx[2][2] * vec[2]; + vec[0] = vres[0]; + vec[1] = vres[1]; + vec[2] = vres[2]; + + Normalize(vec); +} + +void InverseTransformVectorNormalize(float src[3], float dst[3], float mtx[4][4]) +{ + dst[0] = mtx[0][0] * src[0] + mtx[0][1] * src[1] + mtx[0][2] * src[2]; + dst[1] = mtx[1][0] * src[0] + mtx[1][1] * src[1] + mtx[1][2] * src[2]; + dst[2] = mtx[2][0] * src[0] + mtx[2][1] * src[1] + mtx[2][2] * src[2]; + + Normalize(dst); } void Normalize(float v[3]) diff --git a/src/3DMath.h b/src/3DMath.h index 8cc6d6a5..90c2b376 100644 --- a/src/3DMath.h +++ b/src/3DMath.h @@ -5,6 +5,7 @@ void MultMatrix( float m0[4][4], float m1[4][4], float dest[4][4]); void TransformVectorNormalize(float vec[3], float mtx[4][4]); +void InverseTransformVectorNormalize(float src[3], float dst[3], float mtx[4][4]); void Normalize(float v[3]); float DotProduct(const float v0[3], const float v1[3]); diff --git a/src/3DMathNeon.cpp b/src/3DMathNeon.cpp index 5db7d47d..f881bd65 100644 --- a/src/3DMathNeon.cpp +++ b/src/3DMathNeon.cpp @@ -77,6 +77,15 @@ void TransformVectorNormalize(float vec[3], float mtx[4][4]) ); } +void InverseTransformVectorNormalize(float src[3], float dst[3], float mtx[4][4]) +{ + dst[0] = mtx[0][0] * src[0] + mtx[0][1] * src[1] + mtx[0][2] * src[2]; + dst[1] = mtx[1][0] * src[0] + mtx[1][1] * src[1] + mtx[1][2] * src[2]; + dst[2] = mtx[2][0] * src[0] + mtx[2][1] * src[1] + mtx[2][2] * src[2]; + + Normalize(dst); +} + void Normalize(float v[3]) { asm volatile ( diff --git a/src/Combiner.cpp b/src/Combiner.cpp index b904a7c1..e07059ee 100644 --- a/src/Combiner.cpp +++ b/src/Combiner.cpp @@ -326,9 +326,11 @@ void CombinerInfo::updateTextureParameters() void CombinerInfo::updateLightParameters() { - if (m_pUniformCollection != nullptr) - m_pUniformCollection->updateLightParameters(); - gSP.changed &= ~CHANGED_LIGHT; + if (config.generalEmulation.enableHWLighting != 0) { + if (m_pUniformCollection != nullptr) + m_pUniformCollection->updateLightParameters(); + } + gSP.changed ^= CHANGED_HW_LIGHT; } void CombinerInfo::updateParameters(OGLRender::RENDER_STATE _renderState) @@ -384,7 +386,7 @@ Storage format: uint32 - number of shaders shaders in binary form */ -static const u32 ShaderStorageFormatVersion = 0x0CU; +static const u32 ShaderStorageFormatVersion = 0x0DU; void CombinerInfo::_saveShadersStorage() const { if (m_shadersLoaded >= m_combiners.size()) diff --git a/src/GLES2/Shaders_gles2.h b/src/GLES2/Shaders_gles2.h index 7ef45312..29539e3b 100644 --- a/src/GLES2/Shaders_gles2.h +++ b/src/GLES2/Shaders_gles2.h @@ -241,9 +241,8 @@ static const char* fragment_shader_calc_light = " return; \n" " output_color = uLightColor[nLights]; \n" " mediump float intensity; \n" -" mediump vec3 n = normalize(input_color); \n" " for (int i = 0; i < nLights; i++) { \n" -" intensity = max(dot(n, uLightDirection[i]), 0.0); \n" +" intensity = max(dot(input_color, uLightDirection[i]), 0.0);\n" " output_color += intensity*uLightColor[i]; \n" " }; \n" " output_color = clamp(output_color, 0.0, 1.0); \n" diff --git a/src/GLUniforms/UniformBlock.cpp b/src/GLUniforms/UniformBlock.cpp index 9a2e2755..b47b88be 100644 --- a/src/GLUniforms/UniformBlock.cpp +++ b/src/GLUniforms/UniformBlock.cpp @@ -243,7 +243,7 @@ void UniformBlock::updateLightParameters() GLbyte * pData = m_lightBlockData.data(); const u32 arraySize = m_lightBlock.m_offsets[luLightColor] / 8; for (s32 i = 0; i <= gSP.numLights; ++i) { - memcpy(pData + m_lightBlock.m_offsets[luLightDirection] + arraySize*i, &gSP.lights[i].x, arraySize); + memcpy(pData + m_lightBlock.m_offsets[luLightDirection] + arraySize*i, &gSP.lights[i].ix, arraySize); memcpy(pData + m_lightBlock.m_offsets[luLightColor] + arraySize*i, &gSP.lights[i].r, arraySize); } if (m_currentBuffer != m_lightBlock.m_buffer) { diff --git a/src/GLUniforms/UniformSet.cpp b/src/GLUniforms/UniformSet.cpp index 8253f9d5..be225bc9 100644 --- a/src/GLUniforms/UniformSet.cpp +++ b/src/GLUniforms/UniformSet.cpp @@ -122,7 +122,7 @@ void UniformSet::_updateTextureSize(UniformSetLocation & _location, bool _bUsesT void UniformSet::_updateLightUniforms(UniformSetLocation & _location, bool _bForce) { for (s32 i = 0; i <= gSP.numLights; ++i) { - _location.uLightDirection[i].set(&gSP.lights[i].x, _bForce); + _location.uLightDirection[i].set(&gSP.lights[i].ix, _bForce); _location.uLightColor[i].set(&gSP.lights[i].r, _bForce); } } diff --git a/src/OGL3X/Shaders_ogl3x.h b/src/OGL3X/Shaders_ogl3x.h index 6ef34312..4b9e9e01 100644 --- a/src/OGL3X/Shaders_ogl3x.h +++ b/src/OGL3X/Shaders_ogl3x.h @@ -305,9 +305,8 @@ AUXILIARY_SHADER_VERSION " return; \n" " output_color = uLightColor[nLights]; \n" " mediump float intensity; \n" -" mediump vec3 n = normalize(input_color); \n" " for (int i = 0; i < nLights; i++) { \n" -" intensity = max(dot(n, uLightDirection[i]), 0.0); \n" +" intensity = max(dot(input_color, uLightDirection[i]), 0.0);\n" " output_color += intensity*uLightColor[i]; \n" " }; \n" " output_color = clamp(output_color, 0.0, 1.0); \n" diff --git a/src/OpenGL.cpp b/src/OpenGL.cpp index e4788c6c..9c5d5f46 100644 --- a/src/OpenGL.cpp +++ b/src/OpenGL.cpp @@ -1175,7 +1175,7 @@ void OGLRender::_updateStates(RENDER_STATE _renderState) const if (gSP.changed & CHANGED_VIEWPORT) _updateViewport(); - if (gSP.changed & CHANGED_LIGHT) + if (gSP.changed & CHANGED_HW_LIGHT) cmbInfo.updateLightParameters(); if ((gSP.changed & CHANGED_TEXTURE) || diff --git a/src/gSP.cpp b/src/gSP.cpp index d7258e1d..7b1f6702 100644 --- a/src/gSP.cpp +++ b/src/gSP.cpp @@ -89,6 +89,7 @@ void gSP4Triangles(const s32 v00, const s32 v01, const s32 v02, gSPInfo gSP; +static f32 identityMatrix[4][4] = { { 1.0f, 0.0f, 0.0f, 0.0f }, @@ -115,32 +116,8 @@ static void gSPTransformVertex4_default(u32 v, float mtx[4][4]) } } -static void gSPTransformNormal4_default(u32 v, float mtx[4][4]) -{ - float len, x, y, z; - OGLRender & render = video().getRender(); - for (int i = 0; i < 4; ++i) { - SPVertex & vtx = render.getVertex(v+i); - x = vtx.nx; - y = vtx.ny; - z = vtx.nz; - - vtx.nx = mtx[0][0]*x + mtx[1][0]*y + mtx[2][0]*z; - vtx.ny = mtx[0][1]*x + mtx[1][1]*y + mtx[2][1]*z; - vtx.nz = mtx[0][2]*x + mtx[1][2]*y + mtx[2][2]*z; - len = vtx.nx*vtx.nx + vtx.ny*vtx.ny + vtx.nz*vtx.nz; - if (len != 0.0f) { - len = sqrtf(len); - vtx.nx /= len; - vtx.ny /= len; - vtx.nz /= len; - } - } -} - static void gSPLightVertex4_default(u32 v) { - gSPTransformNormal4(v, gSP.matrix.modelView[gSP.matrix.modelViewi]); OGLRender & render = video().getRender(); if (!config.generalEmulation.enableHWLighting) { for(int j = 0; j < 4; ++j) { @@ -151,7 +128,7 @@ static void gSPLightVertex4_default(u32 v) vtx.HWLight = 0; for (int i = 0; i < gSP.numLights; ++i) { - f32 intensity = DotProduct( &vtx.nx, &gSP.lights[i].x ); + f32 intensity = DotProduct( &vtx.nx, &gSP.lights[i].ix ); if (intensity < 0.0f) intensity = 0.0f; vtx.r += gSP.lights[i].r * intensity; @@ -176,7 +153,6 @@ static void gSPLightVertex4_default(u32 v) static void gSPPointLightVertex4_default(u32 v, float _vPos[4][3]) { assert(_vPos != nullptr); - gSPTransformNormal4(v, gSP.matrix.modelView[gSP.matrix.modelViewi]); OGLRender & render = video().getRender(); for(int j = 0; j < 4; ++j) { SPVertex & vtx = render.getVertex(v+j); @@ -211,7 +187,6 @@ static void gSPPointLightVertex4_default(u32 v, float _vPos[4][3]) static void gSPLightVertex4_CBFD(u32 v) { - gSPTransformNormal4(v, gSP.matrix.modelView[gSP.matrix.modelViewi]); OGLRender & render = video().getRender(); for(int j = 0; j < 4; ++j) { SPVertex & vtx = render.getVertex(v+j); @@ -246,7 +221,6 @@ static void gSPLightVertex4_CBFD(u32 v) static void gSPPointLightVertex4_CBFD(u32 v, float _vPos[4][3]) { - gSPTransformNormal4(v, gSP.matrix.modelView[gSP.matrix.modelViewi]); OGLRender & render = video().getRender(); for(int j = 0; j < 4; ++j) { SPVertex & vtx = render.getVertex(v+j); @@ -275,7 +249,7 @@ static void gSPPointLightVertex4_CBFD(u32 v, float _vPos[4][3]) b += light.b * intensity; } const SPLight & light = gSP.lights[gSP.numLights-1]; - intensity = DotProduct( &vtx.nx, &light.x ); + intensity = DotProduct( &vtx.nx, &light.ix ); if ((light.r != 0.0 || light.g != 0.0 || light.b != 0.0) && intensity > 0) { r += light.r * intensity; g += light.g * intensity; @@ -369,8 +343,8 @@ void gSPProcessVertex4(u32 v) f32 fLightDir[3] = {vtx.nx, vtx.ny, vtx.nz}; f32 x, y; if (gSP.lookatEnable) { - x = DotProduct(&gSP.lookat[0].x, fLightDir); - y = DotProduct(&gSP.lookat[1].x, fLightDir); + x = DotProduct(&gSP.lookat[0].ix, fLightDir); + y = DotProduct(&gSP.lookat[1].ix, fLightDir); } else { x = fLightDir[0]; y = fLightDir[1]; @@ -416,7 +390,7 @@ static void gSPLightVertex_default(SPVertex & _vtx) _vtx.g = gSP.lights[gSP.numLights].g; _vtx.b = gSP.lights[gSP.numLights].b; for (int i = 0; i < gSP.numLights; ++i){ - f32 intensity = DotProduct( &_vtx.nx, &gSP.lights[i].x ); + f32 intensity = DotProduct( &_vtx.nx, &gSP.lights[i].ix ); if (intensity < 0.0f) intensity = 0.0f; _vtx.r += gSP.lights[i].r * intensity; @@ -522,7 +496,7 @@ static void gSPPointLightVertex_CBFD(SPVertex & _vtx, float * /*_vPos*/) b += light.b * intensity; } const SPLight & light = gSP.lights[gSP.numLights-1]; - intensity = DotProduct( &_vtx.nx, &light.x ); + intensity = DotProduct( &_vtx.nx, &light.ix ); if ((light.r != 0.0 || light.g != 0.0 || light.b != 0.0) && intensity > 0) { r += light.r * intensity; g += light.g * intensity; @@ -590,7 +564,6 @@ void gSPProcessVertex(u32 v) vtx.modify = 0; if (gSP.geometryMode & G_LIGHTING) { - TransformVectorNormalize( &vtx.nx, gSP.matrix.modelView[gSP.matrix.modelViewi] ); if (gSP.geometryMode & G_POINT_LIGHTING) gSPPointLightVertex(vtx, vPos); else @@ -600,8 +573,8 @@ void gSPProcessVertex(u32 v) f32 fLightDir[3] = {vtx.nx, vtx.ny, vtx.nz}; f32 x, y; if (gSP.lookatEnable) { - x = DotProduct(&gSP.lookat[0].x, fLightDir); - y = DotProduct(&gSP.lookat[1].x, fLightDir); + x = DotProduct(&gSP.lookat[0].ix, fLightDir); + y = DotProduct(&gSP.lookat[1].ix, fLightDir); } else { x = fLightDir[0]; y = fLightDir[1]; @@ -679,6 +652,7 @@ void gSPMatrix( u32 matrix, u8 param ) CopyMatrix( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx ); else MultMatrix2( gSP.matrix.modelView[gSP.matrix.modelViewi], mtx ); + gSP.changed |= CHANGED_LIGHT | CHANGED_LOOKAT; } gSP.changed |= CHANGED_MATRIX; @@ -832,8 +806,7 @@ void gSPLight( u32 l, s32 n ) gSP.lights[n].qa = (float)(RDRAM[(addrByte + 14) ^ 3]) / 8.0f; } - if (config.generalEmulation.enableHWLighting != 0) - gSP.changed |= CHANGED_LIGHT; + gSP.changed |= CHANGED_LIGHT; #ifdef DEBUG DebugMsg( DEBUG_DETAIL | DEBUG_HANDLED, "// x = %2.6f y = %2.6f z = %2.6f\n", @@ -878,8 +851,7 @@ void gSPLightCBFD( u32 l, s32 n ) gSP.lights[n].ca = (float)(RDRAM[(addrByte + 12) ^ 3]) / 16.0f; } - if (config.generalEmulation.enableHWLighting != 0) - gSP.changed |= CHANGED_LIGHT; + gSP.changed |= CHANGED_LIGHT; #ifdef DEBUG DebugMsg( DEBUG_DETAIL | DEBUG_HANDLED, "// x = %2.6f y = %2.6f z = %2.6f\n", @@ -914,15 +886,44 @@ void gSPLookAt( u32 _l, u32 _n ) gSP.lookatEnable = (_n == 0) || (_n == 1 && (light->x != 0 || light->y != 0)); Normalize(&gSP.lookat[_n].x); + gSP.changed |= CHANGED_LOOKAT; } -void gSPVertex( u32 a, u32 n, u32 v0 ) +static +void gSPUpdateLightVectors() +{ + for (u32 l = 0; l < gSP.numLights; ++l) + InverseTransformVectorNormalize(&gSP.lights[l].x, &gSP.lights[l].ix, gSP.matrix.modelView[gSP.matrix.modelViewi]); + gSP.changed ^= CHANGED_LIGHT; + gSP.changed |= CHANGED_HW_LIGHT; +} + +static +void gSPUpdateLookatVectors() +{ + if (gSP.lookatEnable) { + for (u32 l = 0; l < 2; ++l) + InverseTransformVectorNormalize(&gSP.lookat[l].x, &gSP.lookat[l].ix, gSP.matrix.modelView[gSP.matrix.modelViewi]); + } + gSP.changed ^= CHANGED_LOOKAT; +} + +void gSPVertex(u32 a, u32 n, u32 v0) { u32 address = RSP_SegmentToPhysical(a); if ((address + sizeof( Vertex ) * n) > RDRAMSize) return; + if ((gSP.geometryMode & G_LIGHTING) != 0) { + + if ((gSP.changed & CHANGED_LIGHT) != 0) + gSPUpdateLightVectors(); + + if (((gSP.geometryMode & G_TEXTURE_GEN) != 0) && ((gSP.changed & CHANGED_LOOKAT) != 0)) + gSPUpdateLookatVectors(); + } + Vertex *vertex = (Vertex*)&RDRAM[address]; OGLRender & render = video().getRender(); @@ -940,9 +941,9 @@ void gSPVertex( u32 a, u32 n, u32 v0 ) vtx.s = _FIXED2FLOAT( vertex->s, 5 ); vtx.t = _FIXED2FLOAT( vertex->t, 5 ); if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = vertex->normal.x; - vtx.ny = vertex->normal.y; - vtx.nz = vertex->normal.z; + vtx.nx = _FIXED2FLOAT( vertex->normal.x, 7 ); + vtx.ny = _FIXED2FLOAT( vertex->normal.y, 7 ); + vtx.nz = _FIXED2FLOAT( vertex->normal.z, 7 ); vtx.a = vertex->color.a * 0.0039215689f; } else { vtx.r = vertex->color.r * 0.0039215689f; @@ -964,9 +965,9 @@ void gSPVertex( u32 a, u32 n, u32 v0 ) vtx.s = _FIXED2FLOAT( vertex->s, 5 ); vtx.t = _FIXED2FLOAT( vertex->t, 5 ); if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = vertex->normal.x; - vtx.ny = vertex->normal.y; - vtx.nz = vertex->normal.z; + vtx.nx = _FIXED2FLOAT(vertex->normal.x, 7); + vtx.ny = _FIXED2FLOAT(vertex->normal.y, 7); + vtx.nz = _FIXED2FLOAT(vertex->normal.z, 7); vtx.a = vertex->color.a * 0.0039215689f; } else { vtx.r = vertex->color.r * 0.0039215689f; @@ -990,6 +991,15 @@ void gSPCIVertex( u32 a, u32 n, u32 v0 ) if ((address + sizeof( PDVertex ) * n) > RDRAMSize) return; + if ((gSP.geometryMode & G_LIGHTING) != 0) { + + if ((gSP.changed & CHANGED_LIGHT) != 0) + gSPUpdateLightVectors(); + + if (((gSP.geometryMode & G_TEXTURE_GEN) != 0) && ((gSP.changed & CHANGED_LOOKAT) != 0)) + gSPUpdateLookatVectors(); + } + PDVertex *vertex = (PDVertex*)&RDRAM[address]; OGLRender & render = video().getRender(); @@ -1008,9 +1018,9 @@ void gSPCIVertex( u32 a, u32 n, u32 v0 ) u8 *color = &RDRAM[gSP.vertexColorBase + (vertex->ci & 0xff)]; if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = (s8)color[3]; - vtx.ny = (s8)color[2]; - vtx.nz = (s8)color[1]; + vtx.nx = _FIXED2FLOAT((s8)color[3], 7); + vtx.ny = _FIXED2FLOAT((s8)color[2], 7); + vtx.nz = _FIXED2FLOAT((s8)color[1], 7); vtx.a = color[0] * 0.0039215689f; } else { vtx.r = color[3] * 0.0039215689f; @@ -1034,9 +1044,9 @@ void gSPCIVertex( u32 a, u32 n, u32 v0 ) u8 *color = &RDRAM[gSP.vertexColorBase + (vertex->ci & 0xff)]; if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = (s8)color[3]; - vtx.ny = (s8)color[2]; - vtx.nz = (s8)color[1]; + vtx.nx = _FIXED2FLOAT((s8)color[3], 7); + vtx.ny = _FIXED2FLOAT((s8)color[2], 7); + vtx.nz = _FIXED2FLOAT((s8)color[1], 7); vtx.a = color[0] * 0.0039215689f; } else { vtx.r = color[3] * 0.0039215689f; @@ -1061,6 +1071,15 @@ void gSPDMAVertex( u32 a, u32 n, u32 v0 ) if ((address + 10 * n) > RDRAMSize) return; + if ((gSP.geometryMode & G_LIGHTING) != 0) { + + if ((gSP.changed & CHANGED_LIGHT) != 0) + gSPUpdateLightVectors(); + + if (((gSP.geometryMode & G_TEXTURE_GEN) != 0) && ((gSP.changed & CHANGED_LOOKAT) != 0)) + gSPUpdateLookatVectors(); + } + OGLRender & render = video().getRender(); if ((n + v0) <= INDEXMAP_SIZE) { u32 i = v0; @@ -1074,9 +1093,9 @@ void gSPDMAVertex( u32 a, u32 n, u32 v0 ) vtx.z = *(s16*)&RDRAM[(address + 4) ^ 2]; if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = *(s8*)&RDRAM[(address + 6) ^ 3]; - vtx.ny = *(s8*)&RDRAM[(address + 7) ^ 3]; - vtx.nz = *(s8*)&RDRAM[(address + 8) ^ 3]; + vtx.nx = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 6) ^ 3], 7); + vtx.ny = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 7) ^ 3], 7); + vtx.nz = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 8) ^ 3], 7); vtx.a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; } else { vtx.r = *(u8*)&RDRAM[(address + 6) ^ 3] * 0.0039215689f; @@ -1097,9 +1116,9 @@ void gSPDMAVertex( u32 a, u32 n, u32 v0 ) vtx.z = *(s16*)&RDRAM[(address + 4) ^ 2]; if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = *(s8*)&RDRAM[(address + 6) ^ 3]; - vtx.ny = *(s8*)&RDRAM[(address + 7) ^ 3]; - vtx.nz = *(s8*)&RDRAM[(address + 8) ^ 3]; + vtx.nx = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 6) ^ 3], 7); + vtx.ny = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 7) ^ 3], 7); + vtx.nz = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 8) ^ 3], 7); vtx.a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; } else { vtx.r = *(u8*)&RDRAM[(address + 6) ^ 3] * 0.0039215689f; @@ -1123,6 +1142,15 @@ void gSPCBFDVertex( u32 a, u32 n, u32 v0 ) if ((address + sizeof( Vertex ) * n) > RDRAMSize) return; + if ((gSP.geometryMode & G_LIGHTING) != 0) { + + if ((gSP.changed & CHANGED_LIGHT) != 0) + gSPUpdateLightVectors(); + + if (((gSP.geometryMode & G_TEXTURE_GEN) != 0) && ((gSP.changed & CHANGED_LOOKAT) != 0)) + gSPUpdateLookatVectors(); + } + Vertex *vertex = (Vertex*)&RDRAM[address]; OGLRender & render = video().getRender(); @@ -1140,9 +1168,9 @@ void gSPCBFDVertex( u32 a, u32 n, u32 v0 ) vtx.t = _FIXED2FLOAT( vertex->t, 5 ); if (gSP.geometryMode & G_LIGHTING) { const u32 normaleAddrOffset = ((v+j)<<1); - vtx.nx = (float)(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 0)^3]); - vtx.ny = (float)(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 1)^3]); - vtx.nz = (float)((s8)(vertex->flag&0xFF)); + vtx.nx = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 0) ^ 3], 7); + vtx.ny = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 1) ^ 3], 7); + vtx.nz = _FIXED2FLOAT((s8)(vertex->flag & 0xFF), 7); } vtx.r = vertex->color.r * 0.0039215689f; vtx.g = vertex->color.g * 0.0039215689f; @@ -1163,9 +1191,9 @@ void gSPCBFDVertex( u32 a, u32 n, u32 v0 ) vtx.t = _FIXED2FLOAT( vertex->t, 5 ); if (gSP.geometryMode & G_LIGHTING) { const u32 normaleAddrOffset = (v<<1); - vtx.nx = (float)(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 0)^3]); - vtx.ny = (float)(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 1)^3]); - vtx.nz = (float)((s8)(vertex->flag&0xFF)); + vtx.nx = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 0) ^ 3], 7); + vtx.ny = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 1) ^ 3], 7); + vtx.nz = _FIXED2FLOAT((s8)(vertex->flag & 0xFF), 7); } vtx.r = vertex->color.r * 0.0039215689f; vtx.g = vertex->color.g * 0.0039215689f; @@ -1565,8 +1593,7 @@ void gSPNumLights( s32 n ) { if (n <= 12) { gSP.numLights = n; - if (config.generalEmulation.enableHWLighting != 0) - gSP.changed |= CHANGED_LIGHT; + gSP.changed |= CHANGED_LIGHT; } #ifdef DEBUG else @@ -1588,8 +1615,7 @@ void gSPLightColor( u32 lightNum, u32 packedColor ) gSP.lights[lightNum].r = _SHIFTR( packedColor, 24, 8 ) * 0.0039215689f; gSP.lights[lightNum].g = _SHIFTR( packedColor, 16, 8 ) * 0.0039215689f; gSP.lights[lightNum].b = _SHIFTR( packedColor, 8, 8 ) * 0.0039215689f; - if (config.generalEmulation.enableHWLighting != 0) - gSP.changed |= CHANGED_LIGHT; + gSP.changed |= CHANGED_HW_LIGHT; } #ifdef DEBUG DebugMsg( DEBUG_HIGH | DEBUG_HANDLED, "gSPLightColor( %i, 0x%08X );\n", @@ -2455,18 +2481,15 @@ void gSPObjRendermode(u32 _mode) #ifdef __NEON_OPT void gSPTransformVertex4NEON(u32 v, float mtx[4][4]); -void gSPTransformNormal4NEON(u32 v, float mtx[4][4]); void gSPBillboardVertex4NEON(u32 v); #endif //__NEON_OPT #ifdef __VEC4_OPT #ifndef __NEON_OPT void (*gSPTransformVertex4)(u32 v, float mtx[4][4]) = gSPTransformVertex4_default; -void (*gSPTransformNormal4)(u32 v, float mtx[4][4]) = gSPTransformNormal4_default; void (*gSPBillboardVertex4)(u32 v) = gSPBillboardVertex4_default; #else void (*gSPTransformVertex4)(u32 v, float mtx[4][4]) = gSPTransformVertex4NEON; -void (*gSPTransformNormal4)(u32 v, float mtx[4][4]) = gSPTransformNormal4NEON; void (*gSPBillboardVertex4)(u32 v) = gSPBillboardVertex4NEON; #endif diff --git a/src/gSP.h b/src/gSP.h index 273d580c..279c8e56 100644 --- a/src/gSP.h +++ b/src/gSP.h @@ -7,11 +7,13 @@ #define CHANGED_VIEWPORT 0x01 #define CHANGED_MATRIX 0x02 +#define CHANGED_TEXTURE 0x04 #define CHANGED_GEOMETRYMODE 0x08 -#define CHANGED_TEXTURE 0x10 -#define CHANGED_FOGPOSITION 0x20 -#define CHANGED_LIGHT 0x40 +#define CHANGED_FOGPOSITION 0x10 +#define CHANGED_LIGHT 0x20 +#define CHANGED_LOOKAT 0x40 #define CHANGED_TEXTURESCALE 0x80 +#define CHANGED_HW_LIGHT 0x100 #define CLIP_X 0x03 #define CLIP_NEGX 0x01 @@ -55,6 +57,7 @@ struct SPLight { f32 r, g, b; f32 x, y, z; + f32 ix, iy, iz; f32 posx, posy, posz, posw; f32 ca, la, qa; }; @@ -85,6 +88,7 @@ struct gSPInfo SPLight lights[12]; SPLight lookat[2]; + s32 numLights; bool lookatEnable; struct @@ -115,8 +119,6 @@ struct gSPInfo } bgImage; u32 geometryMode; - s32 numLights; - u32 changed; struct { diff --git a/src/gSPNeon.cpp b/src/gSPNeon.cpp index b27125a3..c5abd07d 100644 --- a/src/gSPNeon.cpp +++ b/src/gSPNeon.cpp @@ -76,91 +76,6 @@ void gSPTransformVertex4NEON(u32 v, float mtx[4][4]) ); } -//4x Transform normal and normalize -void gSPTransformNormal4NEON(u32 v, float mtx[4][4]) -{ - OGLRender & render = video().getRender(); - SPVertex & vtx = render.getVertex(v); - void *ptr = &vtx.nx; - - asm volatile ( - "vld1.32 {d0, d1}, [%1] \n\t" //q0 = {x,y,z,w} - "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} - "vld1.32 {d2, d3}, [%1] \n\t" //q1 = {x,y,z,w} - "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} - "vld1.32 {d4, d5}, [%1] \n\t" //q2 = {x,y,z,w} - "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} - "vld1.32 {d6, d7}, [%1] \n\t" //q3 = {x,y,z,w} - "sub %1, %1, %3 \n\t" //q0 = {x,y,z,w} - - "vld1.32 {d18, d19}, [%0]! \n\t" //q9 = m - "vld1.32 {d20, d21}, [%0]! \n\t" //q10 = m+16 - "vld1.32 {d22, d23}, [%0] \n\t" //q11 = m+32 - - "vmul.f32 q12, q9, d0[0] \n\t" //q12 = q9*d0[0] - "vmul.f32 q13, q9, d2[0] \n\t" //q13 = q9*d2[0] - "vmul.f32 q14, q9, d4[0] \n\t" //q14 = q9*d4[0] - "vmul.f32 q15, q9, d6[0] \n\t" //q15 = q9*d6[0] - - "vmla.f32 q12, q10, d0[1] \n\t" //q12 += q10*q0[1] - "vmla.f32 q13, q10, d2[1] \n\t" //q13 += q10*q2[1] - "vmla.f32 q14, q10, d4[1] \n\t" //q14 += q10*q4[1] - "vmla.f32 q15, q10, d6[1] \n\t" //q15 += q10*q6[1] - - "vmla.f32 q12, q11, d1[0] \n\t" //q12 += q11*d1[0] - "vmla.f32 q13, q11, d3[0] \n\t" //q13 += q11*d3[0] - "vmla.f32 q14, q11, d5[0] \n\t" //q14 += q11*d5[0] - "vmla.f32 q15, q11, d7[0] \n\t" //q15 += q11*d7[0] - - "vmul.f32 q0, q12, q12 \n\t" //q0 = q12*q12 - "vmul.f32 q1, q13, q13 \n\t" //q1 = q13*q13 - "vmul.f32 q2, q14, q14 \n\t" //q2 = q14*q14 - "vmul.f32 q3, q15, q15 \n\t" //q3 = q15*q15 - - "vpadd.f32 d0, d0 \n\t" //d0[0] = d0[0] + d0[1] - "vpadd.f32 d2, d2 \n\t" //d2[0] = d2[0] + d2[1] - "vpadd.f32 d4, d4 \n\t" //d4[0] = d4[0] + d4[1] - "vpadd.f32 d6, d6 \n\t" //d6[0] = d6[0] + d6[1] - - "vmov.f32 s1, s2 \n\t" //d0[1] = d1[0] - "vmov.f32 s5, s6 \n\t" //d2[1] = d3[0] - "vmov.f32 s9, s10 \n\t" //d4[1] = d5[0] - "vmov.f32 s13, s14 \n\t" //d6[1] = d7[0] - - "vpadd.f32 d0, d0, d2 \n\t" //d0 = {d0[0] + d0[1], d2[0] + d2[1]} - "vpadd.f32 d1, d4, d6 \n\t" //d1 = {d4[0] + d4[1], d6[0] + d6[1]} - - "vmov.f32 q1, q0 \n\t" //q1 = q0 - "vrsqrte.f32 q0, q0 \n\t" //q0 = ~ 1.0 / sqrt(q0) - "vmul.f32 q2, q0, q1 \n\t" //q2 = q0 * q1 - "vrsqrts.f32 q3, q2, q0 \n\t" //q3 = (3 - q0 * q2) / 2 - "vmul.f32 q0, q0, q3 \n\t" //q0 = q0 * q3 - "vmul.f32 q2, q0, q1 \n\t" //q2 = q0 * q1 - "vrsqrts.f32 q3, q2, q0 \n\t" //q3 = (3 - q0 * q2) / 2 - "vmul.f32 q0, q0, q3 \n\t" //q0 = q0 * q3 - - "vmul.f32 q3, q15, d1[1] \n\t" //q3 = q15*d1[1] - "vmul.f32 q2, q14, d1[0] \n\t" //q2 = q14*d1[0] - "vmul.f32 q1, q13, d0[1] \n\t" //q1 = q13*d0[1] - "vmul.f32 q0, q12, d0[0] \n\t" //q0 = q12*d0[0] - - "vst1.32 {d0, d1}, [%1] \n\t" //d0={nx,ny,nz,pad} - "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} - "vst1.32 {d2, d3}, [%1] \n\t" //d2={nx,ny,nz,pad} - "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} - "vst1.32 {d4, d5}, [%1] \n\t" //d4={nx,ny,nz,pad} - "add %1, %1, %2 \n\t" //q0 = {x,y,z,w} - "vst1.32 {d6, d7}, [%1] \n\t" //d6={nx,ny,nz,pad} - - : "+&r"(mtx), "+&r"(ptr) - : "I"(sizeof(SPVertex)), "I"(3 * sizeof(SPVertex)) - : "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", - "d16","d17", "d18","d19", "d20", "d21", "d22", - "d23", "d24", "d25", "d26", "d27", "d28", "d29", - "d30", "d31", "memory" - ); -} - void gSPBillboardVertex4NEON(u32 v) { int i = 0;