diff --git a/src/3DMath.cpp b/src/3DMath.cpp index e36d8bb5..720cd1c2 100644 --- a/src/3DMath.cpp +++ b/src/3DMath.cpp @@ -95,3 +95,51 @@ void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[ InverseTransformVectorNormalize((float(*))src[i], (float(*))dst[i], mtx); } } + +void CopyMatrix( float m0[4][4], float m1[4][4] ) +{ +#ifdef WIN32_ASM + __asm { + mov esi, [m1] + mov edi, [m0] + + mov eax, dword ptr [esi+00h] + mov dword ptr [edi+00h], eax + mov eax, dword ptr [esi+04h] + mov dword ptr [edi+04h], eax + mov eax, dword ptr [esi+08h] + mov dword ptr [edi+08h], eax + mov eax, dword ptr [esi+0Ch] + mov dword ptr [edi+0Ch], eax + + mov eax, dword ptr [esi+10h] + mov dword ptr [edi+10h], eax + mov eax, dword ptr [esi+14h] + mov dword ptr [edi+14h], eax + mov eax, dword ptr [esi+18h] + mov dword ptr [edi+18h], eax + mov eax, dword ptr [esi+1Ch] + mov dword ptr [edi+1Ch], eax + + mov eax, dword ptr [esi+20h] + mov dword ptr [edi+20h], eax + mov eax, dword ptr [esi+24h] + mov dword ptr [edi+24h], eax + mov eax, dword ptr [esi+28h] + mov dword ptr [edi+28h], eax + mov eax, dword ptr [esi+2Ch] + mov dword ptr [edi+2Ch], eax + + mov eax, dword ptr [esi+30h] + mov dword ptr [edi+30h], eax + mov eax, dword ptr [esi+34h] + mov dword ptr [edi+34h], eax + mov eax, dword ptr [esi+38h] + mov dword ptr [edi+38h], eax + mov eax, dword ptr [esi+3Ch] + mov dword ptr [edi+3Ch], eax + } +#else + memcpy( m0, m1, 16 * sizeof( float ) ); +#endif // WIN32_ASM +} diff --git a/src/3DMath.h b/src/3DMath.h index c611b1bb..83c6bc61 100644 --- a/src/3DMath.h +++ b/src/3DMath.h @@ -11,54 +11,7 @@ void InverseTransformVectorNormalize(float src[3], float dst[3], float mtx[4][4] void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[4][4], u32 count); void Normalize(float v[3]); float DotProduct(const float v0[3], const float v1[3]); - -inline void CopyMatrix( float m0[4][4], float m1[4][4] ) -{ -#ifdef WIN32_ASM - __asm { - mov esi, [m1] - mov edi, [m0] - - mov eax, dword ptr [esi+00h] - mov dword ptr [edi+00h], eax - mov eax, dword ptr [esi+04h] - mov dword ptr [edi+04h], eax - mov eax, dword ptr [esi+08h] - mov dword ptr [edi+08h], eax - mov eax, dword ptr [esi+0Ch] - mov dword ptr [edi+0Ch], eax - - mov eax, dword ptr [esi+10h] - mov dword ptr [edi+10h], eax - mov eax, dword ptr [esi+14h] - mov dword ptr [edi+14h], eax - mov eax, dword ptr [esi+18h] - mov dword ptr [edi+18h], eax - mov eax, dword ptr [esi+1Ch] - mov dword ptr [edi+1Ch], eax - - mov eax, dword ptr [esi+20h] - mov dword ptr [edi+20h], eax - mov eax, dword ptr [esi+24h] - mov dword ptr [edi+24h], eax - mov eax, dword ptr [esi+28h] - mov dword ptr [edi+28h], eax - mov eax, dword ptr [esi+2Ch] - mov dword ptr [edi+2Ch], eax - - mov eax, dword ptr [esi+30h] - mov dword ptr [edi+30h], eax - mov eax, dword ptr [esi+34h] - mov dword ptr [edi+34h], eax - mov eax, dword ptr [esi+38h] - mov dword ptr [edi+38h], eax - mov eax, dword ptr [esi+3Ch] - mov dword ptr [edi+3Ch], eax - } -#else - memcpy( m0, m1, 16 * sizeof( float ) ); -#endif // WIN32_ASM -} +void CopyMatrix( float m0[4][4], float m1[4][4]); inline float DotProduct(const float v0[3], const float v1[3]) { diff --git a/src/Neon/3DMathNeon.cpp b/src/Neon/3DMathNeon.cpp index 146f06a4..7eb0f672 100644 --- a/src/Neon/3DMathNeon.cpp +++ b/src/Neon/3DMathNeon.cpp @@ -344,3 +344,13 @@ void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[ "d20","d21", "d22","d23","d24","d25","d26","d27","d28","d29", "r4", "memory" ); } + +void CopyMatrix( float m0[4][4], float m1[4][4] ) +{ + asm volatile ( + " vldm.32 %1, {q0-q3} \n\t" //load 16 floats in four neon quad registers + " vstm.32 %0, {q0-q3} \n\t" //store 16 floats + : "+r"(m0), "+r"(m1) : + : "d0","d1","d2","d3","d4","d5","d6","d7", "memory" + ); +}