From 39d4b223c877ca8789efaa54c285c8366c1434f5 Mon Sep 17 00:00:00 2001 From: gizmo98 Date: Sun, 8 Oct 2017 15:30:33 +0200 Subject: [PATCH] arm neon: add gSPInverseTransformVector function Fix declaration of gSPInverseTransformVector as well. Replace vtx[4] with vec[3]. --- src/Neon/gSPNeon.cpp | 19 +++++++++++++++++++ src/gSP.cpp | 4 +++- src/gSP.h | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) diff --git a/src/Neon/gSPNeon.cpp b/src/Neon/gSPNeon.cpp index cb3dbff7..b5c8356b 100644 --- a/src/Neon/gSPNeon.cpp +++ b/src/Neon/gSPNeon.cpp @@ -136,6 +136,25 @@ void gSPTransformVector_NEON(float vtx[4], float mtx[4][4]) vst1q_f32(vtx, _mtx0); } +void gSPInverseTransformVector_NEON(float vec[3], float mtx[4][4]) +{ + float32x4x4_t _mtx = vld4q_f32(mtx[0]); // load 4x4 mtx interleaved + + _mtx.val[0] = vmulq_n_f32(_mtx.val[0], vec[0]); // mtx[0][0]=mtx[0][0]*_vtx[0] + // mtx[0][1]=mtx[0][1]*_vtx[0] + // mtx[0][2]=mtx[0][2]*_vtx[0] + _mtx.val[0] = vmlaq_n_f32(_mtx.val[0], _mtx.val[1], vec[1]); // mtx[0][0]+=mtx[1][0]*_vtx[1] + // mtx[0][1]+=mtx[1][1]*_vtx[1] + // mtx[0][2]+=mtx[1][2]*_vtx[1] + _mtx.val[0] = vmlaq_n_f32(_mtx.val[0], _mtx.val[2], vec[2]); // mtx[0][0]+=mtx[2][0]*_vtx[2] + // mtx[0][1]+=mtx[2][1]*_vtx[2] + // mtx[0][2]+=mtx[2][2]*_vtx[2] + const float32x4_t _vec4 = _mtx.val[0]; + vec[0] = _vec4[0]; // store vec[0] + vec[1] = _vec4[1]; // store vec[1] + vec[2] = _vec4[2]; // store vec[2] +} + void DotProductMax7FullNeon( float v0[3], float v1[7][3], float lights[7][3], float _vtx[3]) { asm volatile ( diff --git a/src/gSP.cpp b/src/gSP.cpp index c5a99ded..a721be6d 100644 --- a/src/gSP.cpp +++ b/src/gSP.cpp @@ -2621,11 +2621,13 @@ void gSPObjRendermode(u32 _mode) DebugMsg(DEBUG_NORMAL, "gSPObjRendermode(0x%08x)\n", _mode); } -void(*gSPInverseTransformVector)(float vtx[4], float mtx[4][4]) = gSPInverseTransformVector_default; #ifndef __NEON_OPT +void(*gSPInverseTransformVector)(float vec[3], float mtx[4][4]) = gSPInverseTransformVector_default; void(*gSPTransformVector)(float vtx[4], float mtx[4][4]) = gSPTransformVector_default; #else +void gSPInverseTransformVector_NEON(float vec[3], float mtx[4][4]); void gSPTransformVector_NEON(float vtx[4], float mtx[4][4]); +void(*gSPInverseTransformVector)(float vec[3], float mtx[4][4]) = gSPInverseTransformVector_NEON; void(*gSPTransformVector)(float vtx[4], float mtx[4][4]) = gSPTransformVector_NEON; #endif //__NEON_OPT diff --git a/src/gSP.h b/src/gSP.h index 90ff1954..dfc24265 100644 --- a/src/gSP.h +++ b/src/gSP.h @@ -224,7 +224,7 @@ void gSP4Triangles(const s32 v00, const s32 v01, const s32 v02, void gSPLightVertex(SPVertex & _vtx); extern void (*gSPTransformVector)(float vtx[4], float mtx[4][4]); -extern void (*gSPInverseTransformVector)(float vtx[4], float mtx[4][4]); +extern void (*gSPInverseTransformVector)(float vec[3], float mtx[4][4]); void gSPSetupFunctions(); void gSPFlushTriangles(); #endif