1
0
mirror of https://github.com/blawar/GLideN64.git synced 2024-07-02 09:03:37 +00:00

arm neon: add CopyMatrix function

This commit is contained in:
gizmo98 2017-07-08 21:30:17 +02:00
parent 72abac8a83
commit a986503133
3 changed files with 59 additions and 48 deletions

View File

@ -95,3 +95,51 @@ void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[
InverseTransformVectorNormalize((float(*))src[i], (float(*))dst[i], mtx);
}
}
void CopyMatrix( float m0[4][4], float m1[4][4] )
{
#ifdef WIN32_ASM
__asm {
mov esi, [m1]
mov edi, [m0]
mov eax, dword ptr [esi+00h]
mov dword ptr [edi+00h], eax
mov eax, dword ptr [esi+04h]
mov dword ptr [edi+04h], eax
mov eax, dword ptr [esi+08h]
mov dword ptr [edi+08h], eax
mov eax, dword ptr [esi+0Ch]
mov dword ptr [edi+0Ch], eax
mov eax, dword ptr [esi+10h]
mov dword ptr [edi+10h], eax
mov eax, dword ptr [esi+14h]
mov dword ptr [edi+14h], eax
mov eax, dword ptr [esi+18h]
mov dword ptr [edi+18h], eax
mov eax, dword ptr [esi+1Ch]
mov dword ptr [edi+1Ch], eax
mov eax, dword ptr [esi+20h]
mov dword ptr [edi+20h], eax
mov eax, dword ptr [esi+24h]
mov dword ptr [edi+24h], eax
mov eax, dword ptr [esi+28h]
mov dword ptr [edi+28h], eax
mov eax, dword ptr [esi+2Ch]
mov dword ptr [edi+2Ch], eax
mov eax, dword ptr [esi+30h]
mov dword ptr [edi+30h], eax
mov eax, dword ptr [esi+34h]
mov dword ptr [edi+34h], eax
mov eax, dword ptr [esi+38h]
mov dword ptr [edi+38h], eax
mov eax, dword ptr [esi+3Ch]
mov dword ptr [edi+3Ch], eax
}
#else
memcpy( m0, m1, 16 * sizeof( float ) );
#endif // WIN32_ASM
}

View File

@ -11,54 +11,7 @@ void InverseTransformVectorNormalize(float src[3], float dst[3], float mtx[4][4]
void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[4][4], u32 count);
void Normalize(float v[3]);
float DotProduct(const float v0[3], const float v1[3]);
inline void CopyMatrix( float m0[4][4], float m1[4][4] )
{
#ifdef WIN32_ASM
__asm {
mov esi, [m1]
mov edi, [m0]
mov eax, dword ptr [esi+00h]
mov dword ptr [edi+00h], eax
mov eax, dword ptr [esi+04h]
mov dword ptr [edi+04h], eax
mov eax, dword ptr [esi+08h]
mov dword ptr [edi+08h], eax
mov eax, dword ptr [esi+0Ch]
mov dword ptr [edi+0Ch], eax
mov eax, dword ptr [esi+10h]
mov dword ptr [edi+10h], eax
mov eax, dword ptr [esi+14h]
mov dword ptr [edi+14h], eax
mov eax, dword ptr [esi+18h]
mov dword ptr [edi+18h], eax
mov eax, dword ptr [esi+1Ch]
mov dword ptr [edi+1Ch], eax
mov eax, dword ptr [esi+20h]
mov dword ptr [edi+20h], eax
mov eax, dword ptr [esi+24h]
mov dword ptr [edi+24h], eax
mov eax, dword ptr [esi+28h]
mov dword ptr [edi+28h], eax
mov eax, dword ptr [esi+2Ch]
mov dword ptr [edi+2Ch], eax
mov eax, dword ptr [esi+30h]
mov dword ptr [edi+30h], eax
mov eax, dword ptr [esi+34h]
mov dword ptr [edi+34h], eax
mov eax, dword ptr [esi+38h]
mov dword ptr [edi+38h], eax
mov eax, dword ptr [esi+3Ch]
mov dword ptr [edi+3Ch], eax
}
#else
memcpy( m0, m1, 16 * sizeof( float ) );
#endif // WIN32_ASM
}
void CopyMatrix( float m0[4][4], float m1[4][4]);
inline float DotProduct(const float v0[3], const float v1[3])
{

View File

@ -344,3 +344,13 @@ void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[
"d20","d21", "d22","d23","d24","d25","d26","d27","d28","d29", "r4", "memory"
);
}
void CopyMatrix( float m0[4][4], float m1[4][4] )
{
asm volatile (
" vldm.32 %1, {q0-q3} \n\t" //load 16 floats in four neon quad registers
" vstm.32 %0, {q0-q3} \n\t" //store 16 floats
: "+r"(m0), "+r"(m1) :
: "d0","d1","d2","d3","d4","d5","d6","d7", "memory"
);
}