mirror of
https://github.com/blawar/GLideN64.git
synced 2024-07-02 09:03:37 +00:00
arm neon: add CopyMatrix function
This commit is contained in:
parent
72abac8a83
commit
a986503133
|
@ -95,3 +95,51 @@ void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[
|
|||
InverseTransformVectorNormalize((float(*))src[i], (float(*))dst[i], mtx);
|
||||
}
|
||||
}
|
||||
|
||||
void CopyMatrix( float m0[4][4], float m1[4][4] )
|
||||
{
|
||||
#ifdef WIN32_ASM
|
||||
__asm {
|
||||
mov esi, [m1]
|
||||
mov edi, [m0]
|
||||
|
||||
mov eax, dword ptr [esi+00h]
|
||||
mov dword ptr [edi+00h], eax
|
||||
mov eax, dword ptr [esi+04h]
|
||||
mov dword ptr [edi+04h], eax
|
||||
mov eax, dword ptr [esi+08h]
|
||||
mov dword ptr [edi+08h], eax
|
||||
mov eax, dword ptr [esi+0Ch]
|
||||
mov dword ptr [edi+0Ch], eax
|
||||
|
||||
mov eax, dword ptr [esi+10h]
|
||||
mov dword ptr [edi+10h], eax
|
||||
mov eax, dword ptr [esi+14h]
|
||||
mov dword ptr [edi+14h], eax
|
||||
mov eax, dword ptr [esi+18h]
|
||||
mov dword ptr [edi+18h], eax
|
||||
mov eax, dword ptr [esi+1Ch]
|
||||
mov dword ptr [edi+1Ch], eax
|
||||
|
||||
mov eax, dword ptr [esi+20h]
|
||||
mov dword ptr [edi+20h], eax
|
||||
mov eax, dword ptr [esi+24h]
|
||||
mov dword ptr [edi+24h], eax
|
||||
mov eax, dword ptr [esi+28h]
|
||||
mov dword ptr [edi+28h], eax
|
||||
mov eax, dword ptr [esi+2Ch]
|
||||
mov dword ptr [edi+2Ch], eax
|
||||
|
||||
mov eax, dword ptr [esi+30h]
|
||||
mov dword ptr [edi+30h], eax
|
||||
mov eax, dword ptr [esi+34h]
|
||||
mov dword ptr [edi+34h], eax
|
||||
mov eax, dword ptr [esi+38h]
|
||||
mov dword ptr [edi+38h], eax
|
||||
mov eax, dword ptr [esi+3Ch]
|
||||
mov dword ptr [edi+3Ch], eax
|
||||
}
|
||||
#else
|
||||
memcpy( m0, m1, 16 * sizeof( float ) );
|
||||
#endif // WIN32_ASM
|
||||
}
|
||||
|
|
49
src/3DMath.h
49
src/3DMath.h
|
@ -11,54 +11,7 @@ void InverseTransformVectorNormalize(float src[3], float dst[3], float mtx[4][4]
|
|||
void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[4][4], u32 count);
|
||||
void Normalize(float v[3]);
|
||||
float DotProduct(const float v0[3], const float v1[3]);
|
||||
|
||||
inline void CopyMatrix( float m0[4][4], float m1[4][4] )
|
||||
{
|
||||
#ifdef WIN32_ASM
|
||||
__asm {
|
||||
mov esi, [m1]
|
||||
mov edi, [m0]
|
||||
|
||||
mov eax, dword ptr [esi+00h]
|
||||
mov dword ptr [edi+00h], eax
|
||||
mov eax, dword ptr [esi+04h]
|
||||
mov dword ptr [edi+04h], eax
|
||||
mov eax, dword ptr [esi+08h]
|
||||
mov dword ptr [edi+08h], eax
|
||||
mov eax, dword ptr [esi+0Ch]
|
||||
mov dword ptr [edi+0Ch], eax
|
||||
|
||||
mov eax, dword ptr [esi+10h]
|
||||
mov dword ptr [edi+10h], eax
|
||||
mov eax, dword ptr [esi+14h]
|
||||
mov dword ptr [edi+14h], eax
|
||||
mov eax, dword ptr [esi+18h]
|
||||
mov dword ptr [edi+18h], eax
|
||||
mov eax, dword ptr [esi+1Ch]
|
||||
mov dword ptr [edi+1Ch], eax
|
||||
|
||||
mov eax, dword ptr [esi+20h]
|
||||
mov dword ptr [edi+20h], eax
|
||||
mov eax, dword ptr [esi+24h]
|
||||
mov dword ptr [edi+24h], eax
|
||||
mov eax, dword ptr [esi+28h]
|
||||
mov dword ptr [edi+28h], eax
|
||||
mov eax, dword ptr [esi+2Ch]
|
||||
mov dword ptr [edi+2Ch], eax
|
||||
|
||||
mov eax, dword ptr [esi+30h]
|
||||
mov dword ptr [edi+30h], eax
|
||||
mov eax, dword ptr [esi+34h]
|
||||
mov dword ptr [edi+34h], eax
|
||||
mov eax, dword ptr [esi+38h]
|
||||
mov dword ptr [edi+38h], eax
|
||||
mov eax, dword ptr [esi+3Ch]
|
||||
mov dword ptr [edi+3Ch], eax
|
||||
}
|
||||
#else
|
||||
memcpy( m0, m1, 16 * sizeof( float ) );
|
||||
#endif // WIN32_ASM
|
||||
}
|
||||
void CopyMatrix( float m0[4][4], float m1[4][4]);
|
||||
|
||||
inline float DotProduct(const float v0[3], const float v1[3])
|
||||
{
|
||||
|
|
|
@ -344,3 +344,13 @@ void InverseTransformVectorNormalizeN(float src[][3], float dst[][3], float mtx[
|
|||
"d20","d21", "d22","d23","d24","d25","d26","d27","d28","d29", "r4", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void CopyMatrix( float m0[4][4], float m1[4][4] )
|
||||
{
|
||||
asm volatile (
|
||||
" vldm.32 %1, {q0-q3} \n\t" //load 16 floats in four neon quad registers
|
||||
" vstm.32 %0, {q0-q3} \n\t" //store 16 floats
|
||||
: "+r"(m0), "+r"(m1) :
|
||||
: "d0","d1","d2","d3","d4","d5","d6","d7", "memory"
|
||||
);
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user