1
0
mirror of https://github.com/blawar/GLideN64.git synced 2024-07-02 09:03:37 +00:00

Remove asm code from 3dMath

This commit is contained in:
Sergey Lipskiy 2014-03-21 15:48:30 +07:00
parent 3f72a7aefc
commit a41417ef6b
7 changed files with 62 additions and 234 deletions

44
3DMath.cpp Normal file
View File

@ -0,0 +1,44 @@
#include <math.h>
#include "3DMath.h"
void MultMatrix(float m0[4][4], float m1[4][4], float dest[4][4])
{
int i;
for (i = 0; i < 4; i++)
{
dest[0][i] = m0[0][i]*m1[0][0] + m0[1][i]*m1[0][1] + m0[2][i]*m1[0][2] + m0[3][i]*m1[0][3];
dest[1][i] = m0[0][i]*m1[1][0] + m0[1][i]*m1[1][1] + m0[2][i]*m1[1][2] + m0[3][i]*m1[1][3];
dest[2][i] = m0[0][i]*m1[2][0] + m0[1][i]*m1[2][1] + m0[2][i]*m1[2][2] + m0[3][i]*m1[2][3];
dest[3][i] = m0[3][i]*m1[3][3] + m0[2][i]*m1[3][2] + m0[1][i]*m1[3][1] + m0[0][i]*m1[3][0];
}
}
void MultMatrix2(float m0[4][4], float m1[4][4])
{
float dst[4][4];
MultMatrix(m0, m1, dst);
memcpy( m0, dst, sizeof(float) * 16 );
}
void TransformVectorNormalize(float vec[3], float mtx[4][4])
{
float len;
vec[0] = mtx[0][0] * vec[0]
+ mtx[1][0] * vec[1]
+ mtx[2][0] * vec[2];
vec[1] = mtx[0][1] * vec[0]
+ mtx[1][1] * vec[1]
+ mtx[2][1] * vec[2];
vec[2] = mtx[0][2] * vec[0]
+ mtx[1][2] * vec[1]
+ mtx[2][2] * vec[2];
len = vec[0]*vec[0] + vec[1]*vec[1] + vec[2]*vec[2];
if (len != 0.0)
{
len = sqrtf(len);
vec[0] /= len;
vec[1] /= len;
vec[2] /= len;
}
}

242
3DMath.h
View File

@ -2,6 +2,10 @@
#define _3DMATH_H
#include <memory.h>
void MultMatrix( float m0[4][4], float m1[4][4], float dest[4][4]);
void MultMatrix2(float m0[4][4], float m1[4][4] );
void TransformVectorNormalize(float vec[3], float mtx[4][4]);
inline void CopyMatrix( float m0[4][4], float m1[4][4] )
{
#ifdef WIN32
@ -50,93 +54,6 @@ inline void CopyMatrix( float m0[4][4], float m1[4][4] )
#endif // WIN32
}
inline void MultMatrix( float m0[4][4], float m1[4][4] )
{
#ifdef WIN32
__asm {
mov esi, dword ptr [m0]
mov edi, dword ptr [m1]
mov cx, 4
MultMatrix_Loop:
// ST(5) ST(4) ST(3) ST(2) ST(1) ST
fld dword ptr [esi+30h] // m030
fld dword ptr [esi+20h] // m030 m020
fld dword ptr [esi+10h] // m030 m020 m010
fld dword ptr [esi] // m030 m020 m010 m000
fld dword ptr [edi] // m030 m020 m010 m000 m100
fmul ST, ST(1) // m030 m020 m010 m000 m000*m100
fld dword ptr [edi+04h] // m030 m020 m010 m000 m000*m100 m101
fmul ST, ST(3) // m030 m020 m010 m000 m000*m100 m010*m101
fadd // m030 m020 m010 m000 m000*m100+m010*m101
fld dword ptr [edi+08h] // m030 m020 m010 m000 m000*m100+m010*m101 m102
fmul ST, ST(4) // m030 m020 m010 m000 m000*m100+m010*m101 m020*m102
fadd // m030 m020 m010 m000 m000*m100+m010*m101+m020*m102
fld dword ptr [edi+0Ch] // m030 m020 m010 m000 m000*m100+m010*m101+m020*m102 m103
fmul ST, ST(5) // m030 m020 m010 m000 m000*m100+m010*m101+m020*m102 m030*m103
fadd // m030 m020 m010 m000 m000*m100+m010*m101+m020*m102+m030*m103
fstp dword ptr [esi] // m030 m020 m010 m000
fld dword ptr [edi+10h] // m030 m020 m010 m000 m110
fmul ST, ST(1) // m030 m020 m010 m000 m000*m110
fld dword ptr [edi+14h] // m030 m020 m010 m000 m000*m110 m111
fmul ST, ST(3) // m030 m020 m010 m000 m000*m110 m010*m111
fadd // m030 m020 m010 m000 m000*m110+m010*m111
fld dword ptr [edi+18h] // m030 m020 m010 m000 m000*m110+m010*m111 m112
fmul ST, ST(4) // m030 m020 m010 m000 m000*m110+m010*m111 m020*m112
fadd // m030 m020 m010 m000 m000*m110+m010*m111+m020*m112
fld dword ptr [edi+1Ch] // m030 m020 m010 m000 m000*m110+m010*m111+m020*m112 m113
fmul ST, ST(5) // m030 m020 m010 m000 m000*m110+m010*m111+m020*m112 m030*m113
fadd // m030 m020 m010 m000 m000*m110+m010*m111+m020*m112+m030*m113
fstp dword ptr [esi+10h] // m030 m020 m010 m000
fld dword ptr [edi+20h] // m030 m020 m010 m000 m120
fmul ST, ST(1) // m030 m020 m010 m000 m000*m120
fld dword ptr [edi+24h] // m030 m020 m010 m000 m000*m120 m121
fmul ST, ST(3) // m030 m020 m010 m000 m000*m120 m010*m121
fadd // m030 m020 m010 m000 m000*m120+m010*m121
fld dword ptr [edi+28h] // m030 m020 m010 m000 m000*m120+m010*m121 m122
fmul ST, ST(4) // m030 m020 m010 m000 m000*m120+m010*m121 m020*m122
fadd // m030 m020 m010 m000 m000*m120+m010*m121+m020*m122
fld dword ptr [edi+2Ch] // m030 m020 m010 m000 m000*m120+m010*m121+m020*m122 m123
fmul ST, ST(5) // m030 m020 m010 m000 m000*m120+m010*m121+m020*m122 m030*m123
fadd // m030 m020 m010 m000 m000*m120+m010*m121+m020*m122+m030*m123
fstp dword ptr [esi+20h] // m030 m020 m010 m000
fld dword ptr [edi+30h] // m030 m020 m010 m000 m130
fmulp ST(1), ST // m030 m020 m010 m000*m130
fld dword ptr [edi+34h] // m030 m020 m010 m000*m130 m131
fmulp ST(2), ST // m030 m020 m010*m131 m000*m130
fadd // m030 m020 m010*m131+m000*m130
fld dword ptr [edi+38h] // m030 m020 m010*m131+m000*m130 m132
fmulp ST(2), ST // m030 m020*m132 m010*m131+m000*m130
fadd // m030 m020*m132+m010*m131+m000*m130
fld dword ptr [edi+3Ch] // m030 m020*m132+m010*m131+m000*m130 m133
fmulp ST(2), ST // m030*m133 m020*m132+m010*m131+m000*m130
fadd // m030*m133+m020*m132+m010*m131+m000*m130
fstp dword ptr [esi+30h] //
add esi, 4
dec cx
cmp cx, 0
ja MultMatrix_Loop
}
#else // WIN32
int i;
float dst[4][4];
for (i = 0; i < 4; i++)
{
dst[0][i] = m0[0][i]*m1[0][0] + m0[1][i]*m1[0][1] + m0[2][i]*m1[0][2] + m0[3][i]*m1[0][3];
dst[1][i] = m0[0][i]*m1[1][0] + m0[1][i]*m1[1][1] + m0[2][i]*m1[1][2] + m0[3][i]*m1[1][3];
dst[2][i] = m0[0][i]*m1[2][0] + m0[1][i]*m1[2][1] + m0[2][i]*m1[2][2] + m0[3][i]*m1[2][3];
dst[3][i] = m0[3][i]*m1[3][3] + m0[2][i]*m1[3][2] + m0[1][i]*m1[3][1] + m0[0][i]*m1[3][0];
}
memcpy( m0, dst, sizeof(float) * 16 );
#endif // WIN32
}
inline void Transpose3x3Matrix( float mtx[4][4] )
{
#ifdef WIN32
@ -176,150 +93,7 @@ inline void Transpose3x3Matrix( float mtx[4][4] )
#endif // WIN32
}
inline void TransformVertex( float vtx[4], float mtx[4][4] )//, float perspNorm )
{
#ifdef WIN32
__asm {
mov esi, dword ptr [vtx]
mov ebx, dword ptr [mtx]
// ST(4) ST(3) ST(2) ST(1) ST
fld dword ptr [esi+8] // vtx2
fld dword ptr [esi+4] // vtx2 vtx1
fld dword ptr [esi] // vtx2 vtx1 vtx0
fld dword ptr [ebx] // vtx2 vtx1 vtx0 mtx00
fmul ST, ST(1) // vtx2 vtx1 vtx0 vtx0*mtx00
fld dword ptr [ebx+10h] // vtx2 vtx1 vtx0 vtx0*mtx00 mtx10
fmul ST, ST(3) // vtx2 vtx1 vtx0 vtx0*mtx00 vtx1*mtx10
fadd // vtx2 vtx1 vtx0 vtx0*mtx00+vtx1*mtx10
fld dword ptr [ebx+20h] // vtx2 vtx1 vtx0 vtx0*mtx00+vtx1*mtx10 mtx20
fmul ST, ST(4) // vtx2 vtx1 vtx0 vtx0*mtx00+vtx1*mtx10 vtx2*mtx20
fadd // vtx2 vtx1 vtx0 vtx0*mtx00+vtx1*mtx10+vtx2*mtx20
fadd dword ptr [ebx+30h] // vtx2 vtx1 vtx0 vtx0*mtx00+vtx1*mtx10+vtx2*mtx20+mtx30
// fmul dword ptr [perspNorm] // (vtx2*mtx23+vtx1*mtx13+vtx0*mtx03+mtx33)*perspNorm
fstp dword ptr [esi] // vtx2 vtx1 vtx[0]
fld dword ptr [ebx+04h] // vtx2 vtx1 vtx0 mtx01
fmul ST, ST(1) // vtx2 vtx1 vtx0 vtx0*mtx01
fld dword ptr [ebx+14h] // vtx2 vtx1 vtx0 vtx0*mtx01 mtx11
fmul ST, ST(3) // vtx2 vtx1 vtx0 vtx0*mtx01 vtx1*mtx11
fadd // vtx2 vtx1 vtx0 vtx0*mtx01+vtx1*mtx11
fld dword ptr [ebx+24h] // vtx2 vtx1 vtx0 vtx0*mtx01+vtx1*mtx11 mtx21
fmul ST, ST(4) // vtx2 vtx1 vtx0 vtx0*mtx01+vtx1*mtx11 vtx2*mtx21
fadd // vtx2 vtx1 vtx0 vtx0*mtx01+vtx1*mtx11+vtx2*mtx21
fadd dword ptr [ebx+34h] // vtx2 vtx1 vtx0 vtx0*mtx01+vtx1*mtx11+vtx2*mtx21+mtx31
// fmul dword ptr [perspNorm] // (vtx2*mtx23+vtx1*mtx13+vtx0*mtx03+mtx33)*perspNorm
fstp dword ptr [esi+04h] // vtx2 vtx1 vtx[0]
fld dword ptr [ebx+08h] // vtx2 vtx1 vtx0 mtx02
fmul ST, ST(1) // vtx2 vtx1 vtx0 vtx0*mtx02
fld dword ptr [ebx+18h] // vtx2 vtx1 vtx0 vtx0*mtx02 mtx12
fmul ST, ST(3) // vtx2 vtx1 vtx0 vtx0*mtx02 vtx1*mtx12
fadd // vtx2 vtx1 vtx0 vtx0*mtx02+vtx1*mtx12
fld dword ptr [ebx+28h] // vtx2 vtx1 vtx0 vtx0*mtx02+vtx1*mtx12 mtx22
fmul ST, ST(4) // vtx2 vtx1 vtx0 vtx0*mtx02+vtx1*mtx12 vtx2*mtx22
fadd // vtx2 vtx1 vtx0 vtx0*mtx02+vtx1*mtx12+vtx2*mtx22
fadd dword ptr [ebx+38h] // vtx2 vtx1 vtx0 vtx0*mtx02+vtx1*mtx12+vtx2*mtx22+mtx32
// fmul dword ptr [perspNorm] // (vtx2*mtx23+vtx1*mtx13+vtx0*mtx03+mtx33)*perspNorm
fstp dword ptr [esi+08h] // vtx2 vtx1 vtx0
fld dword ptr [ebx+0Ch] // vtx2 vtx1 vtx0 mtx03
fmulp ST(1), ST // vtx2 vtx1 vtx0*mtx03
fld dword ptr [ebx+1Ch] // vtx2 vtx1 vtx0*mtx03 mtx13
fmulp ST(2), ST // vtx2 vtx1*mtx13 vtx0*mtx03
fadd // vtx2 vtx1*mtx13+vtx0*mtx03
fld dword ptr [ebx+2Ch] // vtx2 vtx1*mtx13+vtx0*mtx03 mtx23
fmulp ST(2), ST // vtx2*mtx23 vtx1*mtx13+vtx0*mtx03
fadd // vtx2*mtx23+vtx1*mtx13+vtx0*mtx03
fadd dword ptr [ebx+3Ch] // vtx2*mtx23+vtx1*mtx13+vtx0*mtx03+mtx33
// fmul dword ptr [perspNorm] // (vtx2*mtx23+vtx1*mtx13+vtx0*mtx03+mtx33)*perspNorm
fstp dword ptr [esi+0Ch] //
}
#else // WIN32
float x, y, z, w;
x = vtx[0];
y = vtx[1];
z = vtx[2];
w = vtx[3];
vtx[0] = x * mtx[0][0] +
y * mtx[1][0] +
z * mtx[2][0];
vtx[1] = x * mtx[0][1] +
y * mtx[1][1] +
z * mtx[2][1];
vtx[2] = x * mtx[0][2] +
y * mtx[1][2] +
z * mtx[2][2];
vtx[3] = x * mtx[0][3] +
y * mtx[1][3] +
z * mtx[2][3];
vtx[0] += mtx[3][0];
vtx[1] += mtx[3][1];
vtx[2] += mtx[3][2];
vtx[3] += mtx[3][3];
#endif // WIN32
}
inline void TransformVector( float vec[3], float mtx[4][4] )
{
#ifdef WIN32
__asm {
mov esi, dword ptr [vec]
mov ebx, dword ptr [mtx]
// ST(4) ST(3) ST(2) ST(1) ST
fld dword ptr [esi+8] // vec2
fld dword ptr [esi+4] // vec2 vtx1
fld dword ptr [esi] // vec2 vec1 vec0
fld dword ptr [ebx] // vec2 vec1 vec0 mtx00
fmul ST, ST(1) // vec2 vec1 vec0 vec0*mtx00
fld dword ptr [ebx+10h] // vec2 vec1 vec0 vec0*mtx00 mtx10
fmul ST, ST(3) // vec2 vec1 vec0 vec0*mtx00 vec1*mtx10
fadd // vec2 vec1 vec0 vec0*mtx00+vec1*mtx10
fld dword ptr [ebx+20h] // vec2 vec1 vec0 vec0*mtx00+vec1*mtx10 mtx20
fmul ST, ST(4) // vec2 vec1 vec0 vec0*mtx00+vec1*mtx10 vec2*mtx20
fadd // vec2 vec1 vec0 vec0*mtx00+vec1*mtx10+vec2*mtx20
fstp dword ptr [esi] // vec2 vec1 vec[0]
fld dword ptr [ebx+04h] // vec2 vec1 vec0 mtx01
fmul ST, ST(1) // vec2 vec1 vec0 vec0*mtx01
fld dword ptr [ebx+14h] // vec2 vec1 vec0 vec0*mtx01 mtx11
fmul ST, ST(3) // vec2 vec1 vec0 vec0*mtx01 vec1*mtx11
fadd // vec2 vec1 vec0 vec0*mtx01+vec1*mtx11
fld dword ptr [ebx+24h] // vec2 vec1 vec0 vec0*mtx01+vec1*mtx11 mtx21
fmul ST, ST(4) // vec2 vec1 vec0 vec0*mtx01+vec1*mtx11 vec2*mtx21
fadd // vec2 vec1 vec0 vec0*mtx01+vec1*mtx11+vec2*mtx21
fstp dword ptr [esi+04h] // vec2 vec1 vec[0]
fld dword ptr [ebx+08h] // vec2 vec1 vec0 mtx02
fmulp ST(1), ST // vec2 vec1 vec0*mtx02
fld dword ptr [ebx+18h] // vec2 vec1 vec0*mtx02 mtx12
fmulp ST(2), ST // vec2 vec1*mtx12 vec0*mtx02
fadd // vec2 vec1*mtx12+vec0*mtx02
fld dword ptr [ebx+28h] // vec2 vec1*mtx12+vec0*mtx03 mtx22
fmulp ST(2), ST // vec2*mtx22 vec1*mtx12+vec0*mtx02
fadd // vec2*mtx22+vec1*mtx12+vec0*mtx02
fstp dword ptr [esi+08h] //
}
#else // WIN32
vec[0] = mtx[0][0] * vec[0]
+ mtx[1][0] * vec[1]
+ mtx[2][0] * vec[2];
vec[1] = mtx[0][1] * vec[0]
+ mtx[1][1] * vec[1]
+ mtx[2][1] * vec[2];
vec[2] = mtx[0][2] * vec[0]
+ mtx[1][2] * vec[1]
+ mtx[2][2] * vec[2];
#endif // WIN32
}
inline void Normalize( float v[3] )
inline void Normalize(float v[3])
{
#ifdef WIN32
__asm {
@ -356,8 +130,7 @@ End:
float len;
len = (float)(v[0]*v[0] + v[1]*v[1] + v[2]*v[2]);
if (len != 0.0)
{
if (len != 0.0) {
len = (float)sqrt( len );
v[0] /= (float)len;
v[1] /= (float)len;
@ -366,7 +139,8 @@ End:
#endif // WIN32
}
inline float DotProduct( float v0[3], float v1[3] )
inline float DotProduct(float v0[3], float v1[3])
{
float dot;
#ifdef WIN32

View File

@ -14,6 +14,7 @@ LOCAL_C_INCLUDES := \
LOCAL_SRC_FILES := \
2xSAI.cpp \
3DMath.cpp \
Combiner.cpp \
Config_mupen.cpp \
CRC.cpp \

View File

@ -4,6 +4,7 @@ project( GLideN64 )
set(GLideN64_SOURCES
2xSAI.cpp
3DMath.cpp
Combiner.cpp
CRC.cpp
Debug.cpp

View File

@ -273,6 +273,10 @@
RelativePath="2xSAI.cpp"
>
</File>
<File
RelativePath=".\3DMath.cpp"
>
</File>
<File
RelativePath="Combiner.cpp"
>

View File

@ -143,6 +143,7 @@
</ItemDefinitionGroup>
<ItemGroup>
<ClCompile Include="2xSAI.cpp" />
<ClCompile Include="3DMath.cpp" />
<ClCompile Include="Combiner.cpp" />
<ClCompile Include="Config.cpp" />
<ClCompile Include="CRC.cpp" />

View File

@ -114,6 +114,9 @@
<ClCompile Include="GLSLCombiner.cpp">
<Filter>Source Files\Combiners</Filter>
</ClCompile>
<ClCompile Include="3DMath.cpp">
<Filter>Source Files</Filter>
</ClCompile>
</ItemGroup>
<ItemGroup>
<ClInclude Include="2xSAI.h">