diff --git a/projects/msvc12/GLideN64.vcxproj b/projects/msvc12/GLideN64.vcxproj index 0b7ab8f3..d1b88551 100644 --- a/projects/msvc12/GLideN64.vcxproj +++ b/projects/msvc12/GLideN64.vcxproj @@ -224,7 +224,7 @@ copy /Y "$(ProjectDir)$(OutDir)$(TargetName).*" "$(Mupen64PluginsDir)" true Speed true - NDEBUG;UNICODE;GL_USE_UNIFORMBLOCK;TXFILTER_LIB;WIN32_ASM;OS_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + __VEC4_OPT;NDEBUG;UNICODE;GL_USE_UNIFORMBLOCK;TXFILTER_LIB;WIN32_ASM;OS_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) true MultiThreaded true @@ -267,7 +267,7 @@ copy /Y "$(ProjectDir)$(OutDir)$(TargetName).*" "$(N64PluginsDir)" true Speed true - NDEBUG;UNICODE;GL_USE_UNIFORMBLOCK;TXFILTER_LIB;MUPENPLUSAPI;WIN32;WIN32_ASM;OS_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + __VEC4_OPT;NDEBUG;UNICODE;GL_USE_UNIFORMBLOCK;TXFILTER_LIB;MUPENPLUSAPI;WIN32;WIN32_ASM;OS_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) true MultiThreaded true diff --git a/projects/msvc15/GLideN64.vcxproj b/projects/msvc15/GLideN64.vcxproj index 371f3a52..872ab263 100644 --- a/projects/msvc15/GLideN64.vcxproj +++ b/projects/msvc15/GLideN64.vcxproj @@ -223,7 +223,7 @@ copy /Y "$(ProjectDir)$(OutDir)$(TargetName).*" "$(Mupen64PluginsDir)" true Speed true - NDEBUG;UNICODE;GL_USE_UNIFORMBLOCK;TXFILTER_LIB;WIN32_ASM;OS_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + __VEC4_OPT;NDEBUG;UNICODE;GL_USE_UNIFORMBLOCK;TXFILTER_LIB;WIN32_ASM;OS_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) true MultiThreaded true @@ -264,7 +264,7 @@ copy /Y "$(ProjectDir)$(OutDir)$(TargetName).*" "$(N64PluginsDir)" true Speed true - NDEBUG;UNICODE;GL_USE_UNIFORMBLOCK;TXFILTER_LIB;MUPENPLUSAPI;WIN32;WIN32_ASM;OS_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) + __VEC4_OPT;NDEBUG;UNICODE;GL_USE_UNIFORMBLOCK;TXFILTER_LIB;MUPENPLUSAPI;WIN32;WIN32_ASM;OS_WINDOWS;_USRDLL;_CRT_SECURE_NO_WARNINGS;%(PreprocessorDefinitions) true MultiThreaded true diff --git a/src/GraphicsDrawer.h b/src/GraphicsDrawer.h index ec00d883..60afa34f 100644 --- a/src/GraphicsDrawer.h +++ b/src/GraphicsDrawer.h @@ -135,6 +135,8 @@ public: SPVertex & getVertex(u32 _v) { return triangles.vertices[_v]; } + SPVertex * getVertexPtr(u32 _v) { return triangles.vertices.data() + _v; } + void setDMAVerticesSize(u32 _size) { if (m_dmaVertices.size() < _size) m_dmaVertices.resize(_size); } SPVertex * getDMAVerticesData() { return m_dmaVertices.data(); } diff --git a/src/Neon/gSPNeon.cpp b/src/Neon/gSPNeon.cpp index b11afc71..cb3dbff7 100644 --- a/src/Neon/gSPNeon.cpp +++ b/src/Neon/gSPNeon.cpp @@ -116,7 +116,7 @@ void gSPBillboardVertex4NEON(u32 v) ); } -void gSPTransformVertex_NEON(float vtx[4], float mtx[4][4]) +void gSPTransformVector_NEON(float vtx[4], float mtx[4][4]) { // Load vtx float32x4_t _vtx = vld1q_f32(vtx); @@ -244,12 +244,11 @@ void DotProductMax4FullNeon( float v0[3], float v1[4][3], float _lights[4][3], f ); } -void gSPLightVertex4_NEON(u32 v) +void gSPLightVertex4_NEON(u32 v, SPVertex * spVtx) { - GraphicsDrawer & drawer = dwnd().getDrawer(); if (!config.generalEmulation.enableHWLighting) { for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); + SPVertex & vtx = spVtx[v + j]; vtx.r = gSP.lights.rgb[gSP.numLights][R]; vtx.g = gSP.lights.rgb[gSP.numLights][G]; vtx.b = gSP.lights.rgb[gSP.numLights][B]; @@ -280,7 +279,7 @@ void gSPLightVertex4_NEON(u32 v) } } else { for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); + SPVertex & vtx = spVtx[v + j]; vtx.HWLight = gSP.numLights; vtx.r = vtx.nx; vtx.g = vtx.ny; diff --git a/src/gSP.cpp b/src/gSP.cpp index 30fb73f7..c5a99ded 100644 --- a/src/gSP.cpp +++ b/src/gSP.cpp @@ -30,6 +30,14 @@ using namespace graphics; #define INDEXMAP_SIZE 80U +#ifdef __VEC4_OPT +#define VEC_OPT 4U +#else +#define VEC_OPT 1U +#endif + +static bool g_ConkerUcode; + void gSPFlushTriangles() { if ((gSP.geometryMode & G_SHADING_SMOOTH) == 0) { @@ -117,600 +125,6 @@ f32 identityMatrix[4][4] = { 0.0f, 0.0f, 0.0f, 1.0f } }; -#ifdef __VEC4_OPT -static void gSPTransformVertex4_default(u32 v, float mtx[4][4]) -{ - float x, y, z; - GraphicsDrawer & drawer = dwnd().getDrawer(); - for (int i = 0; i < 4; ++i) { - SPVertex & vtx = drawer.getVertex(v+i); - x = vtx.x; - y = vtx.y; - z = vtx.z; - vtx.x = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0]; - vtx.y = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1]; - vtx.z = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2]; - vtx.w = x * mtx[0][3] + y * mtx[1][3] + z * mtx[2][3] + mtx[3][3]; - } -} - -static void gSPLightVertex4_default(u32 v) -{ - GraphicsDrawer & drawer = dwnd().getDrawer(); - if (!config.generalEmulation.enableHWLighting) { - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - vtx.r = gSP.lights.rgb[gSP.numLights][R]; - vtx.g = gSP.lights.rgb[gSP.numLights][G]; - vtx.b = gSP.lights.rgb[gSP.numLights][B]; - vtx.HWLight = 0; - - for (int i = 0; i < gSP.numLights; ++i) { - f32 intensity = DotProduct( &vtx.nx, gSP.lights.i_xyz[i] ); - if (intensity < 0.0f) - intensity = 0.0f; - vtx.r += gSP.lights.rgb[i][R] * intensity; - vtx.g += gSP.lights.rgb[i][G] * intensity; - vtx.b += gSP.lights.rgb[i][B] * intensity; - } - vtx.r = min(1.0f, vtx.r); - vtx.g = min(1.0f, vtx.g); - vtx.b = min(1.0f, vtx.b); - } - } else { - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - vtx.HWLight = gSP.numLights; - vtx.r = vtx.nx; - vtx.g = vtx.ny; - vtx.b = vtx.nz; - } - } -} - -static void gSPPointLightVertex4_default(u32 v, float _vPos[4][3]) -{ - assert(_vPos != nullptr); - GraphicsDrawer & drawer = dwnd().getDrawer(); - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - float light_intensity = 0.0f; - vtx.HWLight = 0; - vtx.r = gSP.lights.rgb[gSP.numLights][R]; - vtx.g = gSP.lights.rgb[gSP.numLights][G]; - vtx.b = gSP.lights.rgb[gSP.numLights][B]; - for (u32 l=0; l < gSP.numLights; ++l) { - if (gSP.lights.ca[l] != 0.0f) { - float lvec[3] = {gSP.lights.pos_xyzw[l][X], gSP.lights.pos_xyzw[l][Y], gSP.lights.pos_xyzw[l][Z]}; - lvec[0] -= _vPos[j][0]; - lvec[1] -= _vPos[j][1]; - lvec[2] -= _vPos[j][2]; - const float light_len2 = (lvec[0] * lvec[0] + lvec[1] * lvec[1] + lvec[2] * lvec[2]) / 65535.0f; - const float light_len = sqrtf(light_len2); - const float at = gSP.lights.ca[l] + light_len*gSP.lights.la[l] + light_len2*gSP.lights.qa[l]; - if (at > 0.0f) - light_intensity = 1/at; - else - light_intensity = 0.0f; - if (light_intensity > 0.0f) { - vtx.r += gSP.lights.rgb[l][R] * light_intensity; - vtx.g += gSP.lights.rgb[l][G] * light_intensity; - vtx.b += gSP.lights.rgb[l][B] * light_intensity; - } - } else { - f32 intensity = DotProduct(&vtx.nx, gSP.lights.i_xyz[l]); - if (intensity < 0.0f) - intensity = 0.0f; - vtx.r += gSP.lights.rgb[l][R] * intensity; - vtx.g += gSP.lights.rgb[l][G] * intensity; - vtx.b += gSP.lights.rgb[l][B] * intensity; - } - } - if (vtx.r > 1.0f) vtx.r = 1.0f; - if (vtx.g > 1.0f) vtx.g = 1.0f; - if (vtx.b > 1.0f) vtx.b = 1.0f; - } -} - -static void gSPLightVertex4_CBFD(u32 v) -{ - GraphicsDrawer & drawer = dwnd().getDrawer(); - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - f32 r = gSP.lights.rgb[gSP.numLights][R]; - f32 g = gSP.lights.rgb[gSP.numLights][G]; - f32 b = gSP.lights.rgb[gSP.numLights][B]; - - for (u32 l = 0; l < gSP.numLights; ++l) { - const f32 vx = (vtx.x + gSP.vertexCoordMod[ 8])*gSP.vertexCoordMod[12] - gSP.lights.pos_xyzw[l][X]; - const f32 vy = (vtx.y + gSP.vertexCoordMod[ 9])*gSP.vertexCoordMod[13] - gSP.lights.pos_xyzw[l][Y]; - const f32 vz = (vtx.z + gSP.vertexCoordMod[10])*gSP.vertexCoordMod[14] - gSP.lights.pos_xyzw[l][Z]; - const f32 vw = (vtx.w + gSP.vertexCoordMod[11])*gSP.vertexCoordMod[15] - gSP.lights.pos_xyzw[l][W]; - const f32 len = (vx*vx+vy*vy+vz*vz+vw*vw)/65536.0f; - f32 intensity = gSP.lights.ca[l] / len; - if (intensity > 1.0f) intensity = 1.0f; - r += gSP.lights.rgb[l][R] * intensity; - g += gSP.lights.rgb[l][G] * intensity; - b += gSP.lights.rgb[l][B] * intensity; - } - - r = min(1.0f, r); - g = min(1.0f, g); - b = min(1.0f, b); - - vtx.r *= r; - vtx.g *= g; - vtx.b *= b; - vtx.HWLight = 0; - } -} - -static void gSPPointLightVertex4_CBFD(u32 v, float _vPos[4][3]) -{ - GraphicsDrawer & drawer = dwnd().getDrawer(); - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - f32 r = gSP.lights.rgb[gSP.numLights][R]; - f32 g = gSP.lights.rgb[gSP.numLights][G]; - f32 b = gSP.lights.rgb[gSP.numLights][B]; - - f32 intensity = 0.0f; - for (u32 l = 0; l < gSP.numLights-1; ++l) { - intensity = DotProduct( &vtx.nx, gSP.lights.xyz[l] ); - if ((gSP.lights.rgb[l][R] == 0.0f && gSP.lights.rgb[l][G] == 0.0f && gSP.lights.rgb[l][B] == 0.0f) || intensity < 0.0f) - continue; - if (gSP.lights.ca[l] > 0.0f) { - const f32 vx = (vtx.x + gSP.vertexCoordMod[ 8])*gSP.vertexCoordMod[12] - gSP.lights.pos_xyzw[l][X]; - const f32 vy = (vtx.y + gSP.vertexCoordMod[ 9])*gSP.vertexCoordMod[13] - gSP.lights.pos_xyzw[l][Y]; - const f32 vz = (vtx.z + gSP.vertexCoordMod[10])*gSP.vertexCoordMod[14] - gSP.lights.pos_xyzw[l][Z]; - const f32 vw = (vtx.w + gSP.vertexCoordMod[11])*gSP.vertexCoordMod[15] - gSP.lights.pos_xyzw[l][W]; - const f32 len = (vx*vx+vy*vy+vz*vz+vw*vw)/65536.0f; - float p_i = gSP.lights.ca[l] / len; - if (p_i > 1.0f) p_i = 1.0f; - intensity *= p_i; - } - r += gSP.lights.rgb[l][R] * intensity; - g += gSP.lights.rgb[l][G] * intensity; - b += gSP.lights.rgb[l][B] * intensity; - } - - intensity = DotProduct( &vtx.nx, gSP.lights.i_xyz[gSP.numLights-1] ); - if ((gSP.lights.i_xyz[gSP.numLights-1][R] != 0.0 || gSP.lights.i_xyz[gSP.numLights-1][G] != 0.0 || gSP.lights.i_xyz[gSP.numLights-1][B] != 0.0) && intensity > 0) { - r += gSP.lights.rgb[gSP.numLights-1][R] * intensity; - g += gSP.lights.rgb[gSP.numLights-1][G] * intensity; - b += gSP.lights.rgb[gSP.numLights-1][B] * intensity; - } - - r = min(1.0f, r); - g = min(1.0f, g); - b = min(1.0f, b); - - vtx.r *= r; - vtx.g *= g; - vtx.b *= b; - vtx.HWLight = 0; - } -} - -static void gSPBillboardVertex4_default(u32 v) -{ - GraphicsDrawer & drawer = dwnd().getDrawer(); - int i = 0; - SPVertex & vtx0 = drawer.getVertex(i); - for (int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - vtx.x += vtx0.x; - vtx.y += vtx0.y; - vtx.z += vtx0.z; - vtx.w += vtx0.w; - } -} - -void gSPClipVertex4(u32 v) -{ - GraphicsDrawer & drawer = dwnd().getDrawer(); - for(int i = 0; i < 4; ++i) { - SPVertex & vtx = drawer.getVertex(v+i); - vtx.clip = 0; - if (vtx.x > +vtx.w) vtx.clip |= CLIP_POSX; - if (vtx.x < -vtx.w) vtx.clip |= CLIP_NEGX; - if (vtx.y > +vtx.w) vtx.clip |= CLIP_POSY; - if (vtx.y < -vtx.w) vtx.clip |= CLIP_NEGY; - if (vtx.w < 0.01f) vtx.clip |= CLIP_W; - } -} - -void gSPProcessVertex4(u32 v) -{ - if (gSP.changed & CHANGED_MATRIX) - _gSPCombineMatrices(); - - DisplayWindow & wnd = dwnd(); - GraphicsDrawer & drawer = wnd.getDrawer(); - float vPos[4][3]; - for(int i = 0; i < 4; ++i) { - SPVertex & vtx = drawer.getVertex(v+i); - vPos[i][0] = vtx.x; - vPos[i][1] = vtx.y; - vPos[i][2] = vtx.z; - vtx.modify = 0; - } - gSPTransformVertex4(v, gSP.matrix.combined ); - - if (wnd.isAdjustScreen() && (gDP.colorImage.width > VI.width * 98 / 100)) { - for(int i = 0; i < 4; ++i) { - SPVertex & vtx = drawer.getVertex(v+i); - vtx.x *= wnd.getAdjustScale(); - if (gSP.matrix.projection[3][2] == -1.f) - vtx.w *= wnd.getAdjustScale(); - } - } - - if (gSP.viewport.vscale[0] < 0) { - for(int i = 0; i < 4; ++i) { - SPVertex & vtx = drawer.getVertex(v+i); - vtx.x = -vtx.x; - } - } - if (gSP.viewport.vscale[1] < 0) { - for(int i = 0; i < 4; ++i) { - SPVertex & vtx = drawer.getVertex(v+i); - vtx.y = -vtx.y; - } - } - - if (gSP.matrix.billboard) - gSPBillboardVertex4(v); - - if (gSP.geometryMode & G_LIGHTING) { - if (gSP.geometryMode & G_POINT_LIGHTING) - gSPPointLightVertex4(v, vPos); - else - gSPLightVertex4(v); - - if ((gSP.geometryMode & G_TEXTURE_GEN) != 0) { - if (GBI.getMicrocodeType() != F3DFLX2) { - for(int i = 0; i < 4; ++i) { - SPVertex & vtx = drawer.getVertex(v+i); - f32 fLightDir[3] = {vtx.nx, vtx.ny, vtx.nz}; - f32 x, y; - if (gSP.lookatEnable) { - x = DotProduct(gSP.lookat.i_xyz[0], fLightDir); - y = DotProduct(gSP.lookat.i_xyz[1], fLightDir); - } else { - fLightDir[0] *= 128.0f; - fLightDir[1] *= 128.0f; - fLightDir[2] *= 128.0f; - TransformVectorNormalize(fLightDir, gSP.matrix.modelView[gSP.matrix.modelViewi]); - x = fLightDir[0]; - y = fLightDir[1]; - } - if (gSP.geometryMode & G_TEXTURE_GEN_LINEAR) { - vtx.s = acosf(-x) * 325.94931f; - vtx.t = acosf(-y) * 325.94931f; - } else { // G_TEXTURE_GEN - vtx.s = (x + 1.0f) * 512.0f; - vtx.t = (y + 1.0f) * 512.0f; - } - } - } else { - for(int i = 0; i < 4; ++i) { - SPVertex & vtx = drawer.getVertex(v+i); - const f32 intensity = DotProduct(gSP.lookat.i_xyz[0], &vtx.nx) * 128.0f; - const s16 index = static_cast(intensity); - vtx.a = _FIXED2FLOAT(RDRAM[(gSP.DMAIO_address + 128 + index) ^ 3], 8); - } - } - } - } else { - for(int i = 0; i < 4; ++i) - drawer.getVertex(v+i).HWLight = 0; - } - - gSPClipVertex4(v); -} - -#endif //__VEC4_OPT - -static void gSPLightVertex_default(SPVertex & _vtx) -{ - if (config.generalEmulation.enableHWLighting == 0) { - _vtx.HWLight = 0; - _vtx.r = gSP.lights.rgb[gSP.numLights][R]; - _vtx.g = gSP.lights.rgb[gSP.numLights][G]; - _vtx.b = gSP.lights.rgb[gSP.numLights][B]; - for (int i = 0; i < gSP.numLights; ++i){ - f32 intensity = DotProduct( &_vtx.nx, gSP.lights.i_xyz[i] ); - if (intensity < 0.0f) - intensity = 0.0f; - _vtx.r += gSP.lights.rgb[i][R] * intensity; - _vtx.g += gSP.lights.rgb[i][G] * intensity; - _vtx.b += gSP.lights.rgb[i][B] * intensity; - } - _vtx.r = min(1.0f, _vtx.r); - _vtx.g = min(1.0f, _vtx.g); - _vtx.b = min(1.0f, _vtx.b); - } else { - _vtx.HWLight = gSP.numLights; - _vtx.r = _vtx.nx; - _vtx.g = _vtx.ny; - _vtx.b = _vtx.nz; - } -} - -static -void gSPTransformVertex_default(float vtx[4], float mtx[4][4]) -{ - const float x = vtx[0]; - const float y = vtx[1]; - const float z = vtx[2]; - - vtx[0] = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0]; - vtx[1] = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1]; - vtx[2] = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2]; - vtx[3] = x * mtx[0][3] + y * mtx[1][3] + z * mtx[2][3] + mtx[3][3]; -} - -static -void gSPInverseTransformVector_default(float vec[3], float mtx[4][4]) -{ - const float x = vec[0]; - const float y = vec[1]; - const float z = vec[2]; - - vec[0] = mtx[0][0] * x + mtx[0][1] * y + mtx[0][2] * z; - vec[1] = mtx[1][0] * x + mtx[1][1] * y + mtx[1][2] * z; - vec[2] = mtx[2][0] * x + mtx[2][1] * y + mtx[2][2] * z; -} - -static void gSPPointLightVertex_default(SPVertex & _vtx, f32 * _vecPos) -{ - assert(_vecPos != nullptr); - _vtx.HWLight = 0; - _vtx.r = gSP.lights.rgb[gSP.numLights][R]; - _vtx.g = gSP.lights.rgb[gSP.numLights][G]; - _vtx.b = gSP.lights.rgb[gSP.numLights][B]; - gSPTransformVertex(_vecPos, gSP.matrix.modelView[gSP.matrix.modelViewi]); - - f32 intensity = 0.0f; - for (u32 l=0; l < gSP.numLights; ++l) { - if (gSP.lights.ca[l] != 0.0f) { - // Point lighting - f32 lvec[3] = {gSP.lights.pos_xyzw[l][X], gSP.lights.pos_xyzw[l][Y], gSP.lights.pos_xyzw[l][Z]}; - lvec[0] -= _vecPos[0]; - lvec[1] -= _vecPos[1]; - lvec[2] -= _vecPos[2]; - - const f32 K = (lvec[0] * lvec[0] + lvec[1] * lvec[1] + lvec[2] * lvec[2] * 2.0f); - const f32 KS = sqrtf(K); - const f32 L = (1.0f / KS) / 2.0f; - - gSPInverseTransformVector(lvec, gSP.matrix.modelView[gSP.matrix.modelViewi]); - - for (u32 i = 0; i < 3; ++i) { - lvec[i] = (lvec[i] * L * 4.0f * 2.0f); - if (lvec[i] < -1.0f) - lvec[i] = -1.0f; - if (lvec[i] > 1.0f) - lvec[i] = 1.0f; - } - - f32 V = lvec[0] * _vtx.nx + lvec[1] * _vtx.ny + lvec[2] * _vtx.nz; - if (V < -1.0f) - V = -1.0f; - if (V > 1.0f) - V = 1.0f; - - const f32 KSF = floorf(KS); - const f32 D = (KSF * gSP.lights.la[l] * 2.0f + KSF * KSF * gSP.lights.qa[l] / 8.0f) / 65536.0f + 1.0f; - intensity = V / D; - } else { - // Standard lighting - intensity = DotProduct(&_vtx.nx, gSP.lights.i_xyz[l]); - } - if (intensity > 0.0f) { - _vtx.r += gSP.lights.rgb[l][R] * intensity; - _vtx.g += gSP.lights.rgb[l][G] * intensity; - _vtx.b += gSP.lights.rgb[l][B] * intensity; - } - } - if (_vtx.r > 1.0f) _vtx.r = 1.0f; - if (_vtx.g > 1.0f) _vtx.g = 1.0f; - if (_vtx.b > 1.0f) _vtx.b = 1.0f; -} - -static void gSPLightVertex_CBFD(SPVertex & _vtx) -{ - f32 r = gSP.lights.rgb[gSP.numLights][R]; - f32 g = gSP.lights.rgb[gSP.numLights][G]; - f32 b = gSP.lights.rgb[gSP.numLights][B]; - - for (u32 l = 0; l < gSP.numLights; ++l) { - const f32 vx = (_vtx.x + gSP.vertexCoordMod[ 8])*gSP.vertexCoordMod[12] - gSP.lights.pos_xyzw[l][X]; - const f32 vy = (_vtx.y + gSP.vertexCoordMod[ 9])*gSP.vertexCoordMod[13] - gSP.lights.pos_xyzw[l][Y]; - const f32 vz = (_vtx.z + gSP.vertexCoordMod[10])*gSP.vertexCoordMod[14] - gSP.lights.pos_xyzw[l][Z]; - const f32 vw = (_vtx.w + gSP.vertexCoordMod[11])*gSP.vertexCoordMod[15] - gSP.lights.pos_xyzw[l][W]; - const f32 len = (vx*vx+vy*vy+vz*vz+vw*vw)/65536.0f; - f32 intensity = gSP.lights.ca[l] / len; - if (intensity > 1.0f) intensity = 1.0f; - r += gSP.lights.rgb[l][R] * intensity; - g += gSP.lights.rgb[l][G] * intensity; - b += gSP.lights.rgb[l][B] * intensity; - } - - r = min(1.0f, r); - g = min(1.0f, g); - b = min(1.0f, b); - - _vtx.r *= r; - _vtx.g *= g; - _vtx.b *= b; - _vtx.HWLight = 0; -} - -static void gSPPointLightVertex_CBFD(SPVertex & _vtx, float * /*_vPos*/) -{ - f32 r = gSP.lights.rgb[gSP.numLights][R]; - f32 g = gSP.lights.rgb[gSP.numLights][G]; - f32 b = gSP.lights.rgb[gSP.numLights][B]; - - f32 intensity = 0.0f; - for (u32 l = 0; l < gSP.numLights-1; ++l) { - intensity = DotProduct( &_vtx.nx, gSP.lights.rgb[l] ); - if ((gSP.lights.rgb[l][R] == 0.0f && gSP.lights.rgb[l][G] == 0.0f && gSP.lights.rgb[l][B] == 0.0f) || intensity < 0.0f) - continue; - if (gSP.lights.ca[l] > 0.0f) { - const f32 vx = (_vtx.x + gSP.vertexCoordMod[ 8])*gSP.vertexCoordMod[12] - gSP.lights.pos_xyzw[l][X]; - const f32 vy = (_vtx.y + gSP.vertexCoordMod[ 9])*gSP.vertexCoordMod[13] - gSP.lights.pos_xyzw[l][Y]; - const f32 vz = (_vtx.z + gSP.vertexCoordMod[10])*gSP.vertexCoordMod[14] - gSP.lights.pos_xyzw[l][Z]; - const f32 vw = (_vtx.w + gSP.vertexCoordMod[11])*gSP.vertexCoordMod[15] - gSP.lights.pos_xyzw[l][W]; - const f32 len = (vx*vx+vy*vy+vz*vz+vw*vw)/65536.0f; - float p_i = gSP.lights.ca[l] / len; - if (p_i > 1.0f) p_i = 1.0f; - intensity *= p_i; - } - r += gSP.lights.rgb[l][R] * intensity; - g += gSP.lights.rgb[l][G] * intensity; - b += gSP.lights.rgb[l][B] * intensity; - } - intensity = DotProduct( &_vtx.nx, gSP.lights.i_xyz[gSP.numLights-1] ); - if ((gSP.lights.rgb[gSP.numLights-1][R] != 0.0 || gSP.lights.rgb[gSP.numLights-1][G] != 0.0 || gSP.lights.rgb[gSP.numLights-1][B] != 0.0) && intensity > 0) { - r += gSP.lights.rgb[gSP.numLights-1][R] * intensity; - g += gSP.lights.rgb[gSP.numLights-1][G] * intensity; - b += gSP.lights.rgb[gSP.numLights-1][B] * intensity; - } - - r = min(1.0f, r); - g = min(1.0f, g); - b = min(1.0f, b); - - _vtx.r *= r; - _vtx.g *= g; - _vtx.b *= b; - _vtx.HWLight = 0; -} - -static -void gSPPointLightVertex_Acclaim(SPVertex & _vtx) -{ - _vtx.HWLight = 0; - - for (u32 l = 2; l < 10; ++l) { - if (gSP.lights.ca[l] < 0) - continue; - - const f32 dX = fabsf(gSP.lights.pos_xyzw[l][X] - _vtx.x); - const f32 dY = fabsf(gSP.lights.pos_xyzw[l][Y] - _vtx.y); - const f32 dZ = fabsf(gSP.lights.pos_xyzw[l][Z] - _vtx.z); - const f32 distance = dX + dY + dZ - gSP.lights.ca[l]; - if (distance >= 0.0f) - continue; - - const f32 light_intensity = -distance * gSP.lights.la[l]; - _vtx.r += gSP.lights.rgb[l][R] * light_intensity; - _vtx.g += gSP.lights.rgb[l][G] * light_intensity; - _vtx.b += gSP.lights.rgb[l][B] * light_intensity; - } - - if (_vtx.r > 1.0f) _vtx.r = 1.0f; - if (_vtx.g > 1.0f) _vtx.g = 1.0f; - if (_vtx.b > 1.0f) _vtx.b = 1.0f; -} - -static void gSPBillboardVertex_default(u32 v, u32 i) -{ - GraphicsDrawer & drawer = dwnd().getDrawer(); - SPVertex & vtx0 = drawer.getVertex(i); - SPVertex & vtx = drawer.getVertex(v); - vtx.x += vtx0.x; - vtx.y += vtx0.y; - vtx.z += vtx0.z; - vtx.w += vtx0.w; -} - -void gSPClipVertex(u32 v) -{ - SPVertex & vtx = dwnd().getDrawer().getVertex(v); - vtx.clip = 0; - if (vtx.x > +vtx.w) vtx.clip |= CLIP_POSX; - if (vtx.x < -vtx.w) vtx.clip |= CLIP_NEGX; - if (vtx.y > +vtx.w) vtx.clip |= CLIP_POSY; - if (vtx.y < -vtx.w) vtx.clip |= CLIP_NEGY; - if (vtx.w < 0.01f) vtx.clip |= CLIP_W; -} - -void gSPProcessVertex(u32 v) -{ - if (gSP.changed & CHANGED_MATRIX) - _gSPCombineMatrices(); - - DisplayWindow & wnd = dwnd(); - GraphicsDrawer & drawer = wnd.getDrawer(); - SPVertex & vtx = drawer.getVertex(v); - f32 vPos[4] = {vtx.x, vtx.y, vtx.z, 0.0f}; - gSPTransformVertex(&vtx.x, gSP.matrix.combined); - - if (wnd.isAdjustScreen() && (gDP.colorImage.width > VI.width * 98 / 100)) { - vtx.x *= wnd.getAdjustScale(); - if (gSP.matrix.projection[3][2] == -1.f) - vtx.w *= wnd.getAdjustScale(); - } - - if (gSP.viewport.vscale[0] < 0) - vtx.x = -vtx.x; - if (gSP.viewport.vscale[1] < 0) - vtx.y = -vtx.y; - - if (gSP.matrix.billboard) { - int i = 0; - gSPBillboardVertex(v, i); - } - - gSPClipVertex(v); - vtx.modify = 0; - - if (gSP.geometryMode & G_LIGHTING) { - if (gSP.geometryMode & G_POINT_LIGHTING) - gSPPointLightVertex(vtx, vPos); - else - gSPLightVertex(vtx); - - if ((gSP.geometryMode & G_TEXTURE_GEN) != 0) { - if (GBI.getMicrocodeType() != F3DFLX2) { - f32 fLightDir[3] = {vtx.nx, vtx.ny, vtx.nz}; - f32 x, y; - if (gSP.lookatEnable) { - x = DotProduct(gSP.lookat.i_xyz[0], fLightDir); - y = DotProduct(gSP.lookat.i_xyz[1], fLightDir); - } else { - fLightDir[0] *= 128.0f; - fLightDir[1] *= 128.0f; - fLightDir[2] *= 128.0f; - TransformVectorNormalize(fLightDir, gSP.matrix.modelView[gSP.matrix.modelViewi]); - x = fLightDir[0]; - y = fLightDir[1]; - } - if (gSP.geometryMode & G_TEXTURE_GEN_LINEAR) { - vtx.s = acosf(-x) * 325.94931f; - vtx.t = acosf(-y) * 325.94931f; - } else { // G_TEXTURE_GEN - vtx.s = (x + 1.0f) * 512.0f; - vtx.t = (y + 1.0f) * 512.0f; - } - } else { - const f32 intensity = DotProduct(gSP.lookat.i_xyz[0], &vtx.nx) * 128.0f; - const s16 index = static_cast(intensity); - vtx.a = _FIXED2FLOAT(RDRAM[(gSP.DMAIO_address + 128 + index) ^ 3], 8); - } - } - } else if (gSP.geometryMode & G_ACCLAIM_LIGHTING) { - gSPPointLightVertex_Acclaim(vtx); - } else { - vtx.HWLight = 0; - } - -} - void gSPLoadUcodeEx( u32 uc_start, u32 uc_dstart, u16 uc_dsize ) { gSP.matrix.modelViewi = 0; @@ -1028,17 +442,480 @@ void gSPUpdateLookatVectors() gSP.changed ^= CHANGED_LOOKAT; } +/*---------------------------------Vertex Load------------------------------------*/ + +static +void gSPTransformVector_default(float vtx[4], float mtx[4][4]) +{ + const float x = vtx[0]; + const float y = vtx[1]; + const float z = vtx[2]; + + vtx[0] = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0]; + vtx[1] = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1]; + vtx[2] = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2]; + vtx[3] = x * mtx[0][3] + y * mtx[1][3] + z * mtx[2][3] + mtx[3][3]; +} + +static +void gSPInverseTransformVector_default(float vec[3], float mtx[4][4]) +{ + const float x = vec[0]; + const float y = vec[1]; + const float z = vec[2]; + + vec[0] = mtx[0][0] * x + mtx[0][1] * y + mtx[0][2] * z; + vec[1] = mtx[1][0] * x + mtx[1][1] * y + mtx[1][2] * z; + vec[2] = mtx[2][0] * x + mtx[2][1] * y + mtx[2][2] * z; +} + +template +void gSPLightVertexStandard(u32 v, SPVertex * spVtx) +{ +#ifndef __NEON_OPT + if (config.generalEmulation.enableHWLighting == 0) { + for(int j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[v+j]; + vtx.r = gSP.lights.rgb[gSP.numLights][R]; + vtx.g = gSP.lights.rgb[gSP.numLights][G]; + vtx.b = gSP.lights.rgb[gSP.numLights][B]; + vtx.HWLight = 0; + + for (int i = 0; i < gSP.numLights; ++i) { + const f32 intensity = DotProduct( &vtx.nx, gSP.lights.i_xyz[i] ); + if (intensity > 0.0f) { + vtx.r += gSP.lights.rgb[i][R] * intensity; + vtx.g += gSP.lights.rgb[i][G] * intensity; + vtx.b += gSP.lights.rgb[i][B] * intensity; + } + } + vtx.r = min(1.0f, vtx.r); + vtx.g = min(1.0f, vtx.g); + vtx.b = min(1.0f, vtx.b); + } + } else { + for(int j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[v+j]; + vtx.HWLight = gSP.numLights; + vtx.r = vtx.nx; + vtx.g = vtx.ny; + vtx.b = vtx.nz; + } + } +#else + void gSPLightVertex_NEON(SPVertex & _vtx); + void gSPLightVertex4_NEON(u32 v, SPVertex * spVtx); + if (VNUM == 1) + gSPLightVertex_NEON(spVtx[v]); + else + gSPLightVertex4_NEON(v, spVtx); +#endif +} + +template +void gSPLightVertexCBFD(u32 v, SPVertex * spVtx) +{ + for (int j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[v + j]; + f32 r = gSP.lights.rgb[gSP.numLights][R]; + f32 g = gSP.lights.rgb[gSP.numLights][G]; + f32 b = gSP.lights.rgb[gSP.numLights][B]; + + for (u32 l = 0; l < gSP.numLights; ++l) { + const f32 vx = (vtx.x + gSP.vertexCoordMod[8])*gSP.vertexCoordMod[12] - gSP.lights.pos_xyzw[l][X]; + const f32 vy = (vtx.y + gSP.vertexCoordMod[9])*gSP.vertexCoordMod[13] - gSP.lights.pos_xyzw[l][Y]; + const f32 vz = (vtx.z + gSP.vertexCoordMod[10])*gSP.vertexCoordMod[14] - gSP.lights.pos_xyzw[l][Z]; + const f32 vw = (vtx.w + gSP.vertexCoordMod[11])*gSP.vertexCoordMod[15] - gSP.lights.pos_xyzw[l][W]; + const f32 len = (vx*vx + vy*vy + vz*vz + vw*vw) / 65536.0f; + f32 intensity = gSP.lights.ca[l] / len; + if (intensity > 1.0f) intensity = 1.0f; + r += gSP.lights.rgb[l][R] * intensity; + g += gSP.lights.rgb[l][G] * intensity; + b += gSP.lights.rgb[l][B] * intensity; + } + + r = min(1.0f, r); + g = min(1.0f, g); + b = min(1.0f, b); + + vtx.r *= r; + vtx.g *= g; + vtx.b *= b; + vtx.HWLight = 0; + } +} + +template +void gSPLightVertex(u32 _v, SPVertex * _spVtx) +{ + if (g_ConkerUcode) + gSPLightVertexCBFD(_v, _spVtx); + else + gSPLightVertexStandard(_v, _spVtx); +} + +void gSPLightVertex(SPVertex & _vtx) +{ + gSPLightVertex<1>(0, &_vtx); +} + +template +void gSPPointLightVertexZeldaMM(u32 v, float _vecPos[VNUM][4], SPVertex * spVtx) +{ + f32 intensity = 0.0f; + for (int j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[v + j]; + vtx.HWLight = 0; + vtx.r = gSP.lights.rgb[gSP.numLights][R]; + vtx.g = gSP.lights.rgb[gSP.numLights][G]; + vtx.b = gSP.lights.rgb[gSP.numLights][B]; + gSPTransformVector(_vecPos[j], gSP.matrix.modelView[gSP.matrix.modelViewi]); + + for (u32 l = 0; l < gSP.numLights; ++l) { + if (gSP.lights.ca[l] != 0.0f) { + // Point lighting + f32 lvec[3] = { gSP.lights.pos_xyzw[l][X], gSP.lights.pos_xyzw[l][Y], gSP.lights.pos_xyzw[l][Z] }; + lvec[0] -= _vecPos[j][0]; + lvec[1] -= _vecPos[j][1]; + lvec[2] -= _vecPos[j][2]; + + const f32 K = lvec[0] * lvec[0] + lvec[1] * lvec[1] + lvec[2] * lvec[2] * 2.0f; + const f32 KS = sqrtf(K); + + gSPInverseTransformVector(lvec, gSP.matrix.modelView[gSP.matrix.modelViewi]); + + for (u32 i = 0; i < 3; ++i) { + lvec[i] = (4.0f * lvec[i] / KS); + if (lvec[i] < -1.0f) + lvec[i] = -1.0f; + if (lvec[i] > 1.0f) + lvec[i] = 1.0f; + } + + f32 V = lvec[0] * vtx.nx + lvec[1] * vtx.ny + lvec[2] * vtx.nz; + if (V < -1.0f) + V = -1.0f; + if (V > 1.0f) + V = 1.0f; + + const f32 KSF = floorf(KS); + const f32 D = (KSF * gSP.lights.la[l] * 2.0f + KSF * KSF * gSP.lights.qa[l] / 8.0f) / 65536.0f + 1.0f; + intensity = V / D; + } else { + // Standard lighting + intensity = DotProduct(&vtx.nx, gSP.lights.i_xyz[l]); + } + if (intensity > 0.0f) { + vtx.r += gSP.lights.rgb[l][R] * intensity; + vtx.g += gSP.lights.rgb[l][G] * intensity; + vtx.b += gSP.lights.rgb[l][B] * intensity; + } + } + if (vtx.r > 1.0f) vtx.r = 1.0f; + if (vtx.g > 1.0f) vtx.g = 1.0f; + if (vtx.b > 1.0f) vtx.b = 1.0f; + } +} + +template +void gSPPointLightVertexCBFD(u32 v, SPVertex * spVtx) +{ + f32 intensity = 0.0f; + for (int j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[v + j]; + f32 r = gSP.lights.rgb[gSP.numLights][R]; + f32 g = gSP.lights.rgb[gSP.numLights][G]; + f32 b = gSP.lights.rgb[gSP.numLights][B]; + + for (u32 l = 0; l < gSP.numLights - 1; ++l) { + intensity = DotProduct(&vtx.nx, gSP.lights.xyz[l]); + if ((gSP.lights.rgb[l][R] == 0.0f && gSP.lights.rgb[l][G] == 0.0f && gSP.lights.rgb[l][B] == 0.0f) || intensity < 0.0f) + continue; + if (gSP.lights.ca[l] > 0.0f) { + const f32 vx = (vtx.x + gSP.vertexCoordMod[8])*gSP.vertexCoordMod[12] - gSP.lights.pos_xyzw[l][X]; + const f32 vy = (vtx.y + gSP.vertexCoordMod[9])*gSP.vertexCoordMod[13] - gSP.lights.pos_xyzw[l][Y]; + const f32 vz = (vtx.z + gSP.vertexCoordMod[10])*gSP.vertexCoordMod[14] - gSP.lights.pos_xyzw[l][Z]; + const f32 vw = (vtx.w + gSP.vertexCoordMod[11])*gSP.vertexCoordMod[15] - gSP.lights.pos_xyzw[l][W]; + const f32 len = (vx*vx + vy*vy + vz*vz + vw*vw) / 65536.0f; + float p_i = gSP.lights.ca[l] / len; + if (p_i > 1.0f) p_i = 1.0f; + intensity *= p_i; + } + r += gSP.lights.rgb[l][R] * intensity; + g += gSP.lights.rgb[l][G] * intensity; + b += gSP.lights.rgb[l][B] * intensity; + } + + intensity = DotProduct(&vtx.nx, gSP.lights.i_xyz[gSP.numLights - 1]); + if ((gSP.lights.i_xyz[gSP.numLights - 1][R] != 0.0 || gSP.lights.i_xyz[gSP.numLights - 1][G] != 0.0 || gSP.lights.i_xyz[gSP.numLights - 1][B] != 0.0) && intensity > 0) { + r += gSP.lights.rgb[gSP.numLights - 1][R] * intensity; + g += gSP.lights.rgb[gSP.numLights - 1][G] * intensity; + b += gSP.lights.rgb[gSP.numLights - 1][B] * intensity; + } + + r = min(1.0f, r); + g = min(1.0f, g); + b = min(1.0f, b); + + vtx.r *= r; + vtx.g *= g; + vtx.b *= b; + vtx.HWLight = 0; + } +} + +template +void gSPPointLightVertex(u32 _v, float _vecPos[VNUM][4], SPVertex * _spVtx) +{ + if (g_ConkerUcode) + gSPPointLightVertexCBFD(_v, _spVtx); + else + gSPPointLightVertexZeldaMM(_v, _vecPos, _spVtx); +} + +template +void gSPPointLightVertexAcclaim(u32 v, SPVertex * spVtx) +{ + for (int j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[v + j]; + vtx.HWLight = 0; + + for (u32 l = 2; l < 10; ++l) { + if (gSP.lights.ca[l] < 0) + continue; + + const f32 dX = fabsf(gSP.lights.pos_xyzw[l][X] - vtx.x); + const f32 dY = fabsf(gSP.lights.pos_xyzw[l][Y] - vtx.y); + const f32 dZ = fabsf(gSP.lights.pos_xyzw[l][Z] - vtx.z); + const f32 distance = dX + dY + dZ - gSP.lights.ca[l]; + if (distance >= 0.0f) + continue; + + const f32 light_intensity = -distance * gSP.lights.la[l]; + vtx.r += gSP.lights.rgb[l][R] * light_intensity; + vtx.g += gSP.lights.rgb[l][G] * light_intensity; + vtx.b += gSP.lights.rgb[l][B] * light_intensity; + } + + if (vtx.r > 1.0f) vtx.r = 1.0f; + if (vtx.g > 1.0f) vtx.g = 1.0f; + if (vtx.b > 1.0f) vtx.b = 1.0f; + } +} + +template +void gSPBillboardVertex(u32 v, SPVertex * spVtx) +{ +#ifndef __NEON_OPT + SPVertex & vtx0 = spVtx[0]; + for (u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[v + j]; + vtx.x += vtx0.x; + vtx.y += vtx0.y; + vtx.z += vtx0.z; + vtx.w += vtx0.w; + } +#else + if (VNUM == 1) { + SPVertex & vtx0 = spVtx[0]; + SPVertex & vtx = spVtx[v]; + vtx.x += vtx0.x; + vtx.y += vtx0.y; + vtx.z += vtx0.z; + vtx.w += vtx0.w; + } else { + void gSPBillboardVertex4NEON(u32 v); + gSPBillboardVertex4NEON(v); + } +#endif //__NEON_OPT +} + +template +void gSPClipVertex(u32 v, SPVertex * spVtx) +{ + for (u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[v+j]; + vtx.clip = 0; + if (vtx.x > +vtx.w) vtx.clip |= CLIP_POSX; + if (vtx.x < -vtx.w) vtx.clip |= CLIP_NEGX; + if (vtx.y > +vtx.w) vtx.clip |= CLIP_POSY; + if (vtx.y < -vtx.w) vtx.clip |= CLIP_NEGY; + if (vtx.w < 0.01f) vtx.clip |= CLIP_W; + } +} + +template +void gSPTransformVertex(u32 v, SPVertex * spVtx, float mtx[4][4]) +{ +#ifndef __NEON_OPT + float x, y, z; + for (int i = 0; i < VNUM; ++i) { + SPVertex & vtx = spVtx[v+i]; + x = vtx.x; + y = vtx.y; + z = vtx.z; + vtx.x = x * mtx[0][0] + y * mtx[1][0] + z * mtx[2][0] + mtx[3][0]; + vtx.y = x * mtx[0][1] + y * mtx[1][1] + z * mtx[2][1] + mtx[3][1]; + vtx.z = x * mtx[0][2] + y * mtx[1][2] + z * mtx[2][2] + mtx[3][2]; + vtx.w = x * mtx[0][3] + y * mtx[1][3] + z * mtx[2][3] + mtx[3][3]; + } +#else + void gSPTransformVector_NEON(float vtx[4], float mtx[4][4]); + void gSPTransformVertex4NEON(u32 v, float mtx[4][4]); + if (VNUM == 1) + gSPTransformVector_NEON(&spVtx[v].x, mtx); + else + gSPTransformVertex4NEON(v, mtx); +#endif //__NEON_OPT +} + +template +void gSPProcessVertex(u32 v, SPVertex * spVtx) +{ + if (gSP.changed & CHANGED_MATRIX) + _gSPCombineMatrices(); + + float vPos[VNUM][4]; + for(u32 i = 0; i < VNUM; ++i) { + SPVertex & vtx = spVtx[v+i]; + vPos[i][0] = vtx.x; + vPos[i][1] = vtx.y; + vPos[i][2] = vtx.z; + vPos[i][3] = 0.0f; + vtx.modify = 0; + } + + gSPTransformVertex(v, spVtx, gSP.matrix.combined ); + + if (dwnd().isAdjustScreen() && (gDP.colorImage.width > VI.width * 98 / 100)) { + const f32 adjustScale = dwnd().getAdjustScale(); + for(int i = 0; i < VNUM; ++i) { + SPVertex & vtx = spVtx[v+i]; + vtx.x *= adjustScale; + if (gSP.matrix.projection[3][2] == -1.f) + vtx.w *= adjustScale; + } + } + if (gSP.viewport.vscale[0] < 0) { + for(int i = 0; i < VNUM; ++i) { + SPVertex & vtx = spVtx[v+i]; + vtx.x = -vtx.x; + } + } + if (gSP.viewport.vscale[1] < 0) { + for(int i = 0; i < VNUM; ++i) { + SPVertex & vtx = spVtx[v+i]; + vtx.y = -vtx.y; + } + } + + if (gSP.matrix.billboard) + gSPBillboardVertex(v, spVtx); + + gSPClipVertex(v, spVtx); + + if (gSP.geometryMode & G_LIGHTING) { + if (gSP.geometryMode & G_POINT_LIGHTING) + gSPPointLightVertex(v, vPos, spVtx); + else + gSPLightVertex(v, spVtx); + + if ((gSP.geometryMode & G_TEXTURE_GEN) != 0) { + if (GBI.getMicrocodeType() != F3DFLX2) { + for(int i = 0; i < VNUM; ++i) { + SPVertex & vtx = spVtx[v+i]; + f32 fLightDir[3] = {vtx.nx, vtx.ny, vtx.nz}; + f32 x, y; + if (gSP.lookatEnable) { + x = DotProduct(gSP.lookat.i_xyz[0], fLightDir); + y = DotProduct(gSP.lookat.i_xyz[1], fLightDir); + } else { + fLightDir[0] *= 128.0f; + fLightDir[1] *= 128.0f; + fLightDir[2] *= 128.0f; + TransformVectorNormalize(fLightDir, gSP.matrix.modelView[gSP.matrix.modelViewi]); + x = fLightDir[0]; + y = fLightDir[1]; + } + if (gSP.geometryMode & G_TEXTURE_GEN_LINEAR) { + vtx.s = acosf(-x) * 325.94931f; + vtx.t = acosf(-y) * 325.94931f; + } else { // G_TEXTURE_GEN + vtx.s = (x + 1.0f) * 512.0f; + vtx.t = (y + 1.0f) * 512.0f; + } + } + } else { + for(int i = 0; i < VNUM; ++i) { + SPVertex & vtx = spVtx[v+i]; + const f32 intensity = DotProduct(gSP.lookat.i_xyz[0], &vtx.nx) * 128.0f; + const s16 index = static_cast(intensity); + vtx.a = _FIXED2FLOAT(RDRAM[(gSP.DMAIO_address + 128 + index) ^ 3], 8); + } + } + } + } else if (gSP.geometryMode & G_ACCLAIM_LIGHTING) { + gSPPointLightVertexAcclaim(v, spVtx); + } else { + for(u32 i = 0; i < VNUM; ++i) + spVtx[v].HWLight = 0; + } + + for(u32 i = 0; i < VNUM; ++i) { + SPVertex & vtx = spVtx[v+i]; + DebugMsg(DEBUG_DETAIL, "v%d - x: %f, y: %f, z: %f, w: %f, s: %f, t: %f, r=%02f, g=%02f, b=%02f, a=%02f\n", + i, vtx.x, vtx.y, vtx.z, vtx.w, vtx.s, vtx.t, vtx.r, vtx.g, vtx.b, vtx.a); + } +} + +template +u32 gSPLoadVertexData(const Vertex *orgVtx, SPVertex * spVtx, u32 v0, u32 vi, u32 n) +{ + const u32 end = n - (n%VNUM) + v0; + for (; vi < end; vi += VNUM) { + for(u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[vi+j]; + vtx.x = orgVtx->x; + vtx.y = orgVtx->y; + vtx.z = orgVtx->z; + //vtx.flag = vertex->flag; + vtx.s = _FIXED2FLOAT( orgVtx->s, 5 ); + vtx.t = _FIXED2FLOAT( orgVtx->t, 5 ); + if (gSP.geometryMode & G_LIGHTING) { + vtx.nx = _FIXED2FLOAT( orgVtx->normal.x, 7 ); + vtx.ny = _FIXED2FLOAT( orgVtx->normal.y, 7 ); + vtx.nz = _FIXED2FLOAT( orgVtx->normal.z, 7 ); + vtx.a = orgVtx->color.a * 0.0039215689f; + } else { + vtx.r = orgVtx->color.r * 0.0039215689f; + vtx.g = orgVtx->color.g * 0.0039215689f; + vtx.b = orgVtx->color.b * 0.0039215689f; + vtx.a = orgVtx->color.a * 0.0039215689f; + } + ++orgVtx; + } + gSPProcessVertex(vi, spVtx); + } + return vi; +} + void gSPVertex(u32 a, u32 n, u32 v0) { - u32 address = RSP_SegmentToPhysical(a); + DebugMsg(DEBUG_NORMAL, "gSPVertex n = %i, v0 = %i, from %08x\n", n, v0, a); + + if ((n + v0) > INDEXMAP_SIZE) { + LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); + DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); + return; + } + + const u32 address = RSP_SegmentToPhysical(a); if ((address + sizeof(Vertex)* n) > RDRAMSize) { DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "gSPVertex Using Vertex outside RDRAM n = %i, v0 = %i, from %08x\n", n, v0, a); return; } - DebugMsg(DEBUG_NORMAL, "gSPVertex n = %i, v0 = %i, from %08x\n", n, v0, a); - if ((gSP.geometryMode & G_LIGHTING) != 0) { if ((gSP.changed & CHANGED_LIGHT) != 0) @@ -1048,71 +925,56 @@ void gSPVertex(u32 a, u32 n, u32 v0) gSPUpdateLookatVectors(); } - Vertex *vertex = (Vertex*)&RDRAM[address]; + const Vertex *vertex = (Vertex*)&RDRAM[address]; + SPVertex * spVtx = dwnd().getDrawer().getVertexPtr(0); + u32 i = gSPLoadVertexData(vertex, spVtx, v0, v0, n); + if (i < n + v0) + gSPLoadVertexData<1>(vertex + (i - v0), spVtx, v0, i, n); +} + +template +u32 gSPLoadCIVertexData(const PDVertex *orgVtx, SPVertex * spVtx, u32 v0, u32 vi, u32 n) +{ + const u32 end = n - (n%VNUM) + v0; + for (; vi < end; vi += VNUM) { + for(u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[vi+j]; + vtx.x = orgVtx->x; + vtx.y = orgVtx->y; + vtx.z = orgVtx->z; + vtx.s = _FIXED2FLOAT( orgVtx->s, 5 ); + vtx.t = _FIXED2FLOAT( orgVtx->t, 5 ); + u8 *color = &RDRAM[gSP.vertexColorBase + (orgVtx->ci & 0xff)]; - GraphicsDrawer & drawer = dwnd().getDrawer(); - if ((n + v0) <= INDEXMAP_SIZE) { - unsigned int i = v0; -#ifdef __VEC4_OPT - for (; i < n - (n%4) + v0; i += 4) { - u32 v = i; - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - //vtx.flag = vertex->flag; - vtx.s = _FIXED2FLOAT( vertex->s, 5 ); - vtx.t = _FIXED2FLOAT( vertex->t, 5 ); - if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = _FIXED2FLOAT( vertex->normal.x, 7 ); - vtx.ny = _FIXED2FLOAT( vertex->normal.y, 7 ); - vtx.nz = _FIXED2FLOAT( vertex->normal.z, 7 ); - vtx.a = vertex->color.a * 0.0039215689f; - } else { - vtx.r = vertex->color.r * 0.0039215689f; - vtx.g = vertex->color.g * 0.0039215689f; - vtx.b = vertex->color.b * 0.0039215689f; - vtx.a = vertex->color.a * 0.0039215689f; - } - vertex++; - } - gSPProcessVertex4(v); - } -#endif - for (; i < n + v0; ++i) { - u32 v = i; - SPVertex & vtx = drawer.getVertex(v); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - vtx.s = _FIXED2FLOAT( vertex->s, 5 ); - vtx.t = _FIXED2FLOAT( vertex->t, 5 ); if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = _FIXED2FLOAT(vertex->normal.x, 7); - vtx.ny = _FIXED2FLOAT(vertex->normal.y, 7); - vtx.nz = _FIXED2FLOAT(vertex->normal.z, 7); - vtx.a = vertex->color.a * 0.0039215689f; + vtx.nx = _FIXED2FLOAT((s8)color[3], 7); + vtx.ny = _FIXED2FLOAT((s8)color[2], 7); + vtx.nz = _FIXED2FLOAT((s8)color[1], 7); + vtx.a = color[0] * 0.0039215689f; } else { - vtx.r = vertex->color.r * 0.0039215689f; - vtx.g = vertex->color.g * 0.0039215689f; - vtx.b = vertex->color.b * 0.0039215689f; - vtx.a = vertex->color.a * 0.0039215689f; + vtx.r = color[3] * 0.0039215689f; + vtx.g = color[2] * 0.0039215689f; + vtx.b = color[1] * 0.0039215689f; + vtx.a = color[0] * 0.0039215689f; } - gSPProcessVertex(v); - DebugMsg(DEBUG_DETAIL, "v%d - x: %f, y: %f, z: %f, w: %f, s: %f, t: %f, r=%02f, g=%02f, b=%02f, a=%02f\n", i, vtx.x, vtx.y, vtx.z, vtx.w, vtx.s, vtx.t, vtx.r, vtx.g, vtx.b, vtx.a); - vertex++; + ++orgVtx; } - } else { - LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); - DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); + gSPProcessVertex(vi, spVtx); } + return vi; } void gSPCIVertex( u32 a, u32 n, u32 v0 ) { + DebugMsg(DEBUG_NORMAL, "gSPCIVertex n = %i, v0 = %i, from %08x\n", n, v0, a); - u32 address = RSP_SegmentToPhysical( a ); + if ((n + v0) > INDEXMAP_SIZE) { + LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); + DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); + return; + } + + const u32 address = RSP_SegmentToPhysical( a ); if ((address + sizeof( PDVertex ) * n) > RDRAMSize) return; @@ -1126,118 +988,21 @@ void gSPCIVertex( u32 a, u32 n, u32 v0 ) gSPUpdateLookatVectors(); } - PDVertex *vertex = (PDVertex*)&RDRAM[address]; - - GraphicsDrawer & drawer = dwnd().getDrawer(); - if ((n + v0) <= INDEXMAP_SIZE) { - unsigned int i = v0; -#ifdef __VEC4_OPT - for (; i < n - (n%4) + v0; i += 4) { - u32 v = i; - for(unsigned int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v + j); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - vtx.s = _FIXED2FLOAT( vertex->s, 5 ); - vtx.t = _FIXED2FLOAT( vertex->t, 5 ); - u8 *color = &RDRAM[gSP.vertexColorBase + (vertex->ci & 0xff)]; - - if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = _FIXED2FLOAT((s8)color[3], 7); - vtx.ny = _FIXED2FLOAT((s8)color[2], 7); - vtx.nz = _FIXED2FLOAT((s8)color[1], 7); - vtx.a = color[0] * 0.0039215689f; - } else { - vtx.r = color[3] * 0.0039215689f; - vtx.g = color[2] * 0.0039215689f; - vtx.b = color[1] * 0.0039215689f; - vtx.a = color[0] * 0.0039215689f; - } - vertex++; - } - gSPProcessVertex4(v); - } -#endif - for(; i < n + v0; ++i) { - u32 v = i; - SPVertex & vtx = drawer.getVertex(v); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - vtx.s = _FIXED2FLOAT( vertex->s, 5 ); - vtx.t = _FIXED2FLOAT( vertex->t, 5 ); - u8 *color = &RDRAM[gSP.vertexColorBase + (vertex->ci & 0xff)]; - - if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = _FIXED2FLOAT((s8)color[3], 7); - vtx.ny = _FIXED2FLOAT((s8)color[2], 7); - vtx.nz = _FIXED2FLOAT((s8)color[1], 7); - vtx.a = color[0] * 0.0039215689f; - } else { - vtx.r = color[3] * 0.0039215689f; - vtx.g = color[2] * 0.0039215689f; - vtx.b = color[1] * 0.0039215689f; - vtx.a = color[0] * 0.0039215689f; - } - - gSPProcessVertex(v); - vertex++; - } - } else { - LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); - DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); - } + const PDVertex *vertex = (PDVertex*)&RDRAM[address]; + SPVertex * spVtx = dwnd().getDrawer().getVertexPtr(0); + u32 i = gSPLoadCIVertexData(vertex, spVtx, v0, v0, n); + if (i < n + v0) + gSPLoadCIVertexData<1>(vertex + (i - v0), spVtx, v0, i, n); } -void gSPDMAVertex( u32 a, u32 n, u32 v0 ) + +template +u32 gSPLoadDMAVertexData(u32 address, SPVertex * spVtx, u32 v0, u32 vi, u32 n) { - - u32 address = gSP.DMAOffsets.vtx + RSP_SegmentToPhysical(a); - - if ((address + 10 * n) > RDRAMSize) - return; - - if ((gSP.geometryMode & G_LIGHTING) != 0) { - - if ((gSP.changed & CHANGED_LIGHT) != 0) - gSPUpdateLightVectors(); - - if (((gSP.geometryMode & G_TEXTURE_GEN) != 0) && ((gSP.changed & CHANGED_LOOKAT) != 0)) - gSPUpdateLookatVectors(); - } - - GraphicsDrawer & drawer = dwnd().getDrawer(); - if ((n + v0) <= INDEXMAP_SIZE) { - u32 i = v0; -#ifdef __VEC4_OPT - for (; i < n - (n%4) + v0; i += 4) { - u32 v = i; - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v + j); - vtx.x = *(s16*)&RDRAM[address ^ 2]; - vtx.y = *(s16*)&RDRAM[(address + 2) ^ 2]; - vtx.z = *(s16*)&RDRAM[(address + 4) ^ 2]; - - if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 6) ^ 3], 7); - vtx.ny = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 7) ^ 3], 7); - vtx.nz = _FIXED2FLOAT(*(s8*)&RDRAM[(address + 8) ^ 3], 7); - vtx.a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; - } else { - vtx.r = *(u8*)&RDRAM[(address + 6) ^ 3] * 0.0039215689f; - vtx.g = *(u8*)&RDRAM[(address + 7) ^ 3] * 0.0039215689f; - vtx.b = *(u8*)&RDRAM[(address + 8) ^ 3] * 0.0039215689f; - vtx.a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; - } - address += 10; - } - gSPProcessVertex4(v); - } -#endif - for (; i < n + v0; ++i) { - u32 v = i; - SPVertex & vtx = drawer.getVertex(v); + const u32 end = n - (n%VNUM) + v0; + for (; vi < end; vi += VNUM) { + for(u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[vi+j]; vtx.x = *(s16*)&RDRAM[address ^ 2]; vtx.y = *(s16*)&RDRAM[(address + 2) ^ 2]; vtx.z = *(s16*)&RDRAM[(address + 4) ^ 2]; @@ -1253,19 +1018,83 @@ void gSPDMAVertex( u32 a, u32 n, u32 v0 ) vtx.b = *(u8*)&RDRAM[(address + 8) ^ 3] * 0.0039215689f; vtx.a = *(u8*)&RDRAM[(address + 9) ^ 3] * 0.0039215689f; } - - gSPProcessVertex(v); address += 10; } - } else { + gSPProcessVertex(vi, spVtx); + } + return vi; +} + +void gSPDMAVertex( u32 a, u32 n, u32 v0 ) +{ + DebugMsg(DEBUG_NORMAL, "gSPDMAVertex n = %i, v0 = %i, from %08x\n", n, v0, a); + + if ((n + v0) > INDEXMAP_SIZE) { LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); + return; } + + const u32 address = gSP.DMAOffsets.vtx + RSP_SegmentToPhysical(a); + + if ((address + 10 * n) > RDRAMSize) + return; + + if ((gSP.geometryMode & G_LIGHTING) != 0) { + + if ((gSP.changed & CHANGED_LIGHT) != 0) + gSPUpdateLightVectors(); + + if (((gSP.geometryMode & G_TEXTURE_GEN) != 0) && ((gSP.changed & CHANGED_LOOKAT) != 0)) + gSPUpdateLookatVectors(); + } + + SPVertex * spVtx = dwnd().getDrawer().getVertexPtr(0); + u32 i = gSPLoadDMAVertexData(address, spVtx, v0, v0, n); + if (i < n + v0) + gSPLoadDMAVertexData<1>(address + (i - v0) * 10, spVtx, v0, i, n); +} + +template +u32 gSPLoadCBFDVertexData(const Vertex *orgVtx, SPVertex * spVtx, u32 v0, u32 vi, u32 n) +{ + const u32 end = n - (n%VNUM) + v0; + for (; vi < end; vi += VNUM) { + for(u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[vi+j]; + vtx.x = orgVtx->x; + vtx.y = orgVtx->y; + vtx.z = orgVtx->z; + vtx.s = _FIXED2FLOAT( orgVtx->s, 5 ); + vtx.t = _FIXED2FLOAT( orgVtx->t, 5 ); + if (gSP.geometryMode & G_LIGHTING) { + const u32 normaleAddrOffset = ((vi+j)<<1); + vtx.nx = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 0) ^ 3], 7); + vtx.ny = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 1) ^ 3], 7); + vtx.nz = _FIXED2FLOAT((s8)(orgVtx->flag & 0xFF), 7); + } + vtx.r = orgVtx->color.r * 0.0039215689f; + vtx.g = orgVtx->color.g * 0.0039215689f; + vtx.b = orgVtx->color.b * 0.0039215689f; + vtx.a = orgVtx->color.a * 0.0039215689f; + ++orgVtx; + } + gSPProcessVertex(vi, spVtx); + } + return vi; } void gSPCBFDVertex( u32 a, u32 n, u32 v0 ) { - u32 address = RSP_SegmentToPhysical(a); + DebugMsg(DEBUG_NORMAL, "gSPCBFDVertex n = %i, v0 = %i, from %08x\n", n, v0, a); + + if ((n + v0) > INDEXMAP_SIZE) { + LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); + DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); + return; + } + + const u32 address = RSP_SegmentToPhysical(a); if ((address + sizeof( Vertex ) * n) > RDRAMSize) return; @@ -1279,61 +1108,127 @@ void gSPCBFDVertex( u32 a, u32 n, u32 v0 ) gSPUpdateLookatVectors(); } - Vertex *vertex = (Vertex*)&RDRAM[address]; + const Vertex *vertex = (Vertex*)&RDRAM[address]; + SPVertex * spVtx = dwnd().getDrawer().getVertexPtr(0); + u32 i = gSPLoadCBFDVertexData(vertex, spVtx, v0, v0, n); + if (i < n + v0) + gSPLoadCBFDVertexData<1>(vertex + (i - v0), spVtx, v0, i, n); +} - GraphicsDrawer & drawer = dwnd().getDrawer(); - if ((n + v0) <= INDEXMAP_SIZE) { - unsigned int i = v0; -#ifdef __VEC4_OPT - for (; i < n - (n%4) + v0; i += 4) { - u32 v = i; - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - vtx.s = _FIXED2FLOAT( vertex->s, 5 ); - vtx.t = _FIXED2FLOAT( vertex->t, 5 ); - if (gSP.geometryMode & G_LIGHTING) { - const u32 normaleAddrOffset = ((v+j)<<1); - vtx.nx = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 0) ^ 3], 7); - vtx.ny = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 1) ^ 3], 7); - vtx.nz = _FIXED2FLOAT((s8)(vertex->flag & 0xFF), 7); - } - vtx.r = vertex->color.r * 0.0039215689f; - vtx.g = vertex->color.g * 0.0039215689f; - vtx.b = vertex->color.b * 0.0039215689f; - vtx.a = vertex->color.a * 0.0039215689f; - vertex++; - } - gSPProcessVertex4(v); - } -#endif - for (; i < n + v0; ++i) { - u32 v = i; - SPVertex & vtx = drawer.getVertex(v); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - vtx.s = _FIXED2FLOAT( vertex->s, 5 ); - vtx.t = _FIXED2FLOAT( vertex->t, 5 ); +static +void calcF3DAMTexCoords(const Vertex * _vertex, SPVertex & _vtx) +{ + const u32 s0 = (u32)_vertex->s; + const u32 t0 = (u32)_vertex->t; + const u32 acum_0 = ((_SHIFTR(gSP.textureCoordScaleOrg, 0, 16) * t0) << 1) + 0x8000; + const u32 acum_1 = ((_SHIFTR(gSP.textureCoordScale[1], 0, 16) * t0) << 1) + 0x8000; + const u32 sres = ((_SHIFTR(gSP.textureCoordScaleOrg, 16, 16) * s0) << 1) + acum_0; + const u32 tres = ((_SHIFTR(gSP.textureCoordScale[1], 16, 16) * s0) << 1) + acum_1; + const s16 s = _SHIFTR(sres, 16, 16) + _SHIFTR(gSP.textureCoordScale[0], 16, 16); + const s16 t = _SHIFTR(tres, 16, 16) + _SHIFTR(gSP.textureCoordScale[0], 0, 16); + + _vtx.s = _FIXED2FLOAT( s, 5 ); + _vtx.t = _FIXED2FLOAT( t, 5 ); +} + +template +u32 gSPLoadF3DAMVertexData(const Vertex *orgVtx, SPVertex * spVtx, u32 v0, u32 vi, u32 n) +{ + const u32 end = n - (n%VNUM) + v0; + for (; vi < end; vi += VNUM) { + for(u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[vi+j]; + vtx.x = orgVtx->x; + vtx.y = orgVtx->y; + vtx.z = orgVtx->z; + //vtx.flag = orgVtx->flag; + calcF3DAMTexCoords(orgVtx, vtx); if (gSP.geometryMode & G_LIGHTING) { - const u32 normaleAddrOffset = (v<<1); - vtx.nx = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 0) ^ 3], 7); - vtx.ny = _FIXED2FLOAT(((s8*)RDRAM)[(gSP.vertexNormalBase + normaleAddrOffset + 1) ^ 3], 7); - vtx.nz = _FIXED2FLOAT((s8)(vertex->flag & 0xFF), 7); + vtx.nx = _FIXED2FLOAT( orgVtx->normal.x, 7 ); + vtx.ny = _FIXED2FLOAT( orgVtx->normal.y, 7 ); + vtx.nz = _FIXED2FLOAT( orgVtx->normal.z, 7 ); + vtx.a = orgVtx->color.a * 0.0039215689f; + } else { + vtx.r = orgVtx->color.r * 0.0039215689f; + vtx.g = orgVtx->color.g * 0.0039215689f; + vtx.b = orgVtx->color.b * 0.0039215689f; + vtx.a = orgVtx->color.a * 0.0039215689f; } - vtx.r = vertex->color.r * 0.0039215689f; - vtx.g = vertex->color.g * 0.0039215689f; - vtx.b = vertex->color.b * 0.0039215689f; - vtx.a = vertex->color.a * 0.0039215689f; - gSPProcessVertex(v); - vertex++; + ++orgVtx; } - } else { + gSPProcessVertex(vi, spVtx); + } + return vi; +} + +void gSPF3DAMVertex(u32 a, u32 n, u32 v0) +{ + DebugMsg(DEBUG_NORMAL, "gSPF3DAMVertex n = %i, v0 = %i, from %08x\n", n, v0, a); + + if ((n + v0) > INDEXMAP_SIZE) { LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); + return; } + + const u32 address = RSP_SegmentToPhysical(a); + + if ((address + sizeof(Vertex)* n) > RDRAMSize) { + DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "gSPF3DAMVertex Using Vertex outside RDRAM n = %i, v0 = %i, from %08x\n", n, v0, a); + return; + } + + if ((gSP.geometryMode & G_LIGHTING) != 0) { + + if ((gSP.changed & CHANGED_LIGHT) != 0) + gSPUpdateLightVectors(); + + if (((gSP.geometryMode & G_TEXTURE_GEN) != 0) && ((gSP.changed & CHANGED_LOOKAT) != 0)) + gSPUpdateLookatVectors(); + } + + const Vertex *vertex = (Vertex*)&RDRAM[address]; + SPVertex * spVtx = dwnd().getDrawer().getVertexPtr(0); + u32 i = gSPLoadF3DAMVertexData(vertex, spVtx, v0, v0, n); + if (i < n + v0) + gSPLoadF3DAMVertexData<1>(vertex + (i - v0), spVtx, v0, i, n); +} + +template +u32 gSPLoadSWVertexData(const SWVertex *orgVtx, SPVertex * spVtx, u32 v0, u32 vi, u32 n) +{ + const u32 end = n - (n%VNUM) + v0; + for (; vi < end; vi += VNUM) { + for(u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[vi+j]; + vtx.x = orgVtx->x; + vtx.y = orgVtx->y; + vtx.z = orgVtx->z; + ++orgVtx; + } + gSPProcessVertex(vi, spVtx); + for (u32 j = 0; j < VNUM; ++j) { + SPVertex & vtx = spVtx[vi+j]; + vtx.y = -vtx.y; + } + } + return vi; +} + +void gSPSWVertex(const SWVertex * vertex, u32 n, u32 v0) +{ + DebugMsg(DEBUG_NORMAL, "gSPSWVertex n = %i, v0 = %i\n", n, v0); + + if ((n + v0) > INDEXMAP_SIZE) { + LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); + DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); + return; + } + + SPVertex * spVtx = dwnd().getDrawer().getVertexPtr(0); + u32 i = gSPLoadSWVertexData(vertex, spVtx, v0, v0, n); + if (i < n + v0) + gSPLoadSWVertexData<1>(vertex + (i - v0), spVtx, v0, i, n); } void gSPT3DUXVertex(u32 a, u32 n, u32 ci) @@ -1359,13 +1254,13 @@ void gSPT3DUXVertex(u32 a, u32 n, u32 ci) if ((address + sizeof(T3DUXVertex)* n) > RDRAMSize) return; - GraphicsDrawer & drawer = dwnd().getDrawer(); + SPVertex * spVtx = dwnd().getDrawer().getVertexPtr(0); u32 i = 0; #ifdef __VEC4_OPT for (; i < n - (n % 4); i += 4) { u32 v = i; for (int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v + j); + SPVertex & vtx = spVtx[v+j]; vtx.x = vertex->x; vtx.y = vertex->y; vtx.z = vertex->z; @@ -1378,11 +1273,11 @@ void gSPT3DUXVertex(u32 a, u32 n, u32 ci) vertex++; color++; } - gSPProcessVertex4(v); + gSPProcessVertex<4>(v, spVtx); } #endif for (; i < n; ++i) { - SPVertex & vtx = drawer.getVertex(i); + SPVertex & vtx = spVtx[i]; vtx.x = vertex->x; vtx.y = vertex->y; vtx.z = vertex->z; @@ -1392,148 +1287,12 @@ void gSPT3DUXVertex(u32 a, u32 n, u32 ci) vtx.g = _FIXED2FLOAT(color->g, 8); vtx.b = _FIXED2FLOAT(color->b, 8); vtx.a = _FIXED2FLOAT(color->a, 8); - gSPProcessVertex(i); + gSPProcessVertex<1>(i, spVtx); vertex++; color++; } } -static -void calcF3DAMTexCoords(Vertex * _vertex, SPVertex & _vtx) -{ - const u32 s0 = (u32)_vertex->s; - const u32 t0 = (u32)_vertex->t; - const u32 acum_0 = ((_SHIFTR(gSP.textureCoordScaleOrg, 0, 16) * t0) << 1) + 0x8000; - const u32 acum_1 = ((_SHIFTR(gSP.textureCoordScale[1], 0, 16) * t0) << 1) + 0x8000; - const u32 sres = ((_SHIFTR(gSP.textureCoordScaleOrg, 16, 16) * s0) << 1) + acum_0; - const u32 tres = ((_SHIFTR(gSP.textureCoordScale[1], 16, 16) * s0) << 1) + acum_1; - const s16 s = _SHIFTR(sres, 16, 16) + _SHIFTR(gSP.textureCoordScale[0], 16, 16); - const s16 t = _SHIFTR(tres, 16, 16) + _SHIFTR(gSP.textureCoordScale[0], 0, 16); - - _vtx.s = _FIXED2FLOAT( s, 5 ); - _vtx.t = _FIXED2FLOAT( t, 5 ); -} - -void gSPF3DAMVertex(u32 a, u32 n, u32 v0) -{ - u32 address = RSP_SegmentToPhysical(a); - - if ((address + sizeof(Vertex)* n) > RDRAMSize) { - DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "gSPF3DAMVertex Using Vertex outside RDRAM n = %i, v0 = %i, from %08x\n", n, v0, a); - return; - } - - DebugMsg(DEBUG_NORMAL, "gSPF3DAMVertex n = %i, v0 = %i, from %08x\n", n, v0, a); - - if ((gSP.geometryMode & G_LIGHTING) != 0) { - - if ((gSP.changed & CHANGED_LIGHT) != 0) - gSPUpdateLightVectors(); - - if (((gSP.geometryMode & G_TEXTURE_GEN) != 0) && ((gSP.changed & CHANGED_LOOKAT) != 0)) - gSPUpdateLookatVectors(); - } - - Vertex *vertex = (Vertex*)&RDRAM[address]; - - GraphicsDrawer & drawer = dwnd().getDrawer(); - if ((n + v0) <= INDEXMAP_SIZE) { - unsigned int i = v0; -#ifdef __VEC4_OPT - for (; i < n - (n%4) + v0; i += 4) { - u32 v = i; - for(int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v+j); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - //vtx.flag = vertex->flag; - calcF3DAMTexCoords(vertex, vtx); - if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = _FIXED2FLOAT( vertex->normal.x, 7 ); - vtx.ny = _FIXED2FLOAT( vertex->normal.y, 7 ); - vtx.nz = _FIXED2FLOAT( vertex->normal.z, 7 ); - vtx.a = vertex->color.a * 0.0039215689f; - } else { - vtx.r = vertex->color.r * 0.0039215689f; - vtx.g = vertex->color.g * 0.0039215689f; - vtx.b = vertex->color.b * 0.0039215689f; - vtx.a = vertex->color.a * 0.0039215689f; - } - vertex++; - } - gSPProcessVertex4(v); - } -#endif - for (; i < n + v0; ++i) { - u32 v = i; - SPVertex & vtx = drawer.getVertex(v); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - calcF3DAMTexCoords(vertex, vtx); - if (gSP.geometryMode & G_LIGHTING) { - vtx.nx = _FIXED2FLOAT(vertex->normal.x, 7); - vtx.ny = _FIXED2FLOAT(vertex->normal.y, 7); - vtx.nz = _FIXED2FLOAT(vertex->normal.z, 7); - vtx.a = vertex->color.a * 0.0039215689f; - } else { - vtx.r = vertex->color.r * 0.0039215689f; - vtx.g = vertex->color.g * 0.0039215689f; - vtx.b = vertex->color.b * 0.0039215689f; - vtx.a = vertex->color.a * 0.0039215689f; - } - gSPProcessVertex(v); - DebugMsg(DEBUG_DETAIL, "v%d - x: %f, y: %f, z: %f, w: %f, s: %f, t: %f, r=%02f, g=%02f, b=%02f, a=%02f\n", i, vtx.x, vtx.y, vtx.z, vtx.w, vtx.s, vtx.t, vtx.r, vtx.g, vtx.b, vtx.a); - vertex++; - } - } else { - LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); - DebugMsg(DEBUG_NORMAL | DEBUG_ERROR, "//Using Vertex outside buffer v0 = %i, n = %i\n", v0, n); - } -} - -void gSPSWVertex(const SWVertex * vertex, u32 n, u32 v0) -{ - DebugMsg(DEBUG_NORMAL, "gSPSWVertex n = %i, v0 = %i\n", n, v0); - - GraphicsDrawer & drawer = dwnd().getDrawer(); - if ((n + v0) <= INDEXMAP_SIZE) { - unsigned int i = v0; -#ifdef __VEC4_OPT - for (; i < n - (n % 4) + v0; i += 4) { - u32 v = i; - for (unsigned int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v + j); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - vertex++; - } - gSPProcessVertex4(v); - for (unsigned int j = 0; j < 4; ++j) { - SPVertex & vtx = drawer.getVertex(v + j); - vtx.y = -vtx.y; - } - } -#endif - for (; i < n + v0; ++i) { - u32 v = i; - SPVertex & vtx = drawer.getVertex(v); - vtx.x = vertex->x; - vtx.y = vertex->y; - vtx.z = vertex->z; - - gSPProcessVertex(v); - vtx.y = -vtx.y; - vertex++; - } - } else { - LOG(LOG_ERROR, "Using Vertex outside buffer v0=%i, n=%i\n", v0, n); - } -} - - void gSPDisplayList( u32 dl ) { u32 address = RSP_SegmentToPhysical( dl ); @@ -2862,70 +2621,15 @@ void gSPObjRendermode(u32 _mode) DebugMsg(DEBUG_NORMAL, "gSPObjRendermode(0x%08x)\n", _mode); } -#ifdef __NEON_OPT -void gSPTransformVertex4NEON(u32 v, float mtx[4][4]); -void gSPBillboardVertex4NEON(u32 v); -void gSPTransformVertex_NEON(float vtx[4], float mtx[4][4]); -void gSPLightVertex_NEON(SPVertex & _vtx); -void gSPLightVertex4_NEON(u32 v); +void(*gSPInverseTransformVector)(float vtx[4], float mtx[4][4]) = gSPInverseTransformVector_default; +#ifndef __NEON_OPT +void(*gSPTransformVector)(float vtx[4], float mtx[4][4]) = gSPTransformVector_default; +#else +void gSPTransformVector_NEON(float vtx[4], float mtx[4][4]); +void(*gSPTransformVector)(float vtx[4], float mtx[4][4]) = gSPTransformVector_NEON; #endif //__NEON_OPT -#ifdef __VEC4_OPT -#ifndef __NEON_OPT -void (*gSPTransformVertex4)(u32 v, float mtx[4][4]) = gSPTransformVertex4_default; -void (*gSPBillboardVertex4)(u32 v) = gSPBillboardVertex4_default; -void (*gSPLightVertex4)(u32 v) = gSPLightVertex4_default; -#else -void (*gSPTransformVertex4)(u32 v, float mtx[4][4]) = gSPTransformVertex4NEON; -void (*gSPBillboardVertex4)(u32 v) = gSPBillboardVertex4NEON; -void (*gSPLightVertex4)(u32 v) = gSPLightVertex4_NEON; -#endif - -void (*gSPPointLightVertex4)(u32 v, float _vPos[4][3]) = gSPPointLightVertex4_default; - -#endif - - -void (*gSPInverseTransformVector)(float vtx[4], float mtx[4][4]) = gSPInverseTransformVector_default; -#ifndef __NEON_OPT -void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]) = - gSPTransformVertex_default; -void (*gSPLightVertex)(SPVertex & _vtx) = - gSPLightVertex_default; -#else -void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]) = - gSPTransformVertex_NEON; -void (*gSPLightVertex)(SPVertex & _vtx) = - gSPLightVertex_NEON; -#endif -void (*gSPPointLightVertex)(SPVertex & _vtx, float * _vPos) = gSPPointLightVertex_default; -void (*gSPBillboardVertex)(u32 v, u32 i) = gSPBillboardVertex_default; - void gSPSetupFunctions() { - if (GBI.getMicrocodeType() != F3DEX2CBFD) { - -#ifdef __VEC4_OPT -#ifndef __NEON_OPT - gSPLightVertex4 = gSPLightVertex4_default; -#else - gSPLightVertex4 = gSPLightVertex4_NEON; -#endif - gSPPointLightVertex4 = gSPPointLightVertex4_default; -#endif - -#ifndef __NEON_OPT - gSPLightVertex = gSPLightVertex_default; -#else - gSPLightVertex = gSPLightVertex_NEON; -#endif - gSPPointLightVertex = gSPPointLightVertex_default; - return; - } -#ifdef __VEC4_OPT - gSPLightVertex4 = gSPLightVertex4_CBFD; - gSPPointLightVertex4 = gSPPointLightVertex4_CBFD; -#endif - gSPLightVertex = gSPLightVertex_CBFD; - gSPPointLightVertex = gSPPointLightVertex_CBFD; + g_ConkerUcode = GBI.getMicrocodeType() == F3DEX2CBFD; } diff --git a/src/gSP.h b/src/gSP.h index 4165fa48..90ff1954 100644 --- a/src/gSP.h +++ b/src/gSP.h @@ -221,18 +221,10 @@ void gSP4Triangles(const s32 v00, const s32 v01, const s32 v02, const s32 v20, const s32 v21, const s32 v22, const s32 v30, const s32 v31, const s32 v32 ); -#ifdef __VEC4_OPT -extern void (*gSPTransformVertex4)(u32 v, float mtx[4][4]); -extern void (*gSPTransformNormal4)(u32 v, float mtx[4][4]); -extern void (*gSPLightVertex4)(u32 v); -extern void (*gSPPointLightVertex4)(u32 v, float _vPos[4][3]); -extern void (*gSPBillboardVertex4)(u32 v); -#endif -extern void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]); +void gSPLightVertex(SPVertex & _vtx); + +extern void (*gSPTransformVector)(float vtx[4], float mtx[4][4]); extern void (*gSPInverseTransformVector)(float vtx[4], float mtx[4][4]); -extern void (*gSPLightVertex)(SPVertex & _vtx); -extern void (*gSPPointLightVertex)(SPVertex & _vtx, float * _vPos); -extern void (*gSPBillboardVertex)(u32 v, u32 i); void gSPSetupFunctions(); void gSPFlushTriangles(); #endif