mirror of
https://github.com/blawar/GLideN64.git
synced 2024-07-04 10:03:36 +00:00
arm neon: add gSPLightVertex and gSPLightVertex4 neon version
This commit is contained in:
parent
542d083840
commit
e52f39aebe
|
@ -23,6 +23,8 @@
|
|||
#include "DisplayWindow.h"
|
||||
#include <arm_neon.h>
|
||||
|
||||
using namespace std;
|
||||
|
||||
void gSPTransformVertex4NEON(u32 v, float mtx[4][4])
|
||||
{
|
||||
GraphicsDrawer & drawer = dwnd().getDrawer();
|
||||
|
@ -134,3 +136,193 @@ void gSPTransformVertex_NEON(float vtx[4], float mtx[4][4])
|
|||
// Store vtx
|
||||
vst1q_f32(vtx, _mtx0);
|
||||
}
|
||||
|
||||
void DotProductMax7FullNeon( float v0[3], float v1[7][3], float lights[7][3], float _vtx[3])
|
||||
{
|
||||
asm volatile (
|
||||
"pld [%0] \n\t" //preload lights
|
||||
"pld [%0, #64] \n\t"
|
||||
"pld [%3] \n\t" //preload vtx
|
||||
"vld3.32 {d2[0],d3[0],d4[0]}, [%1] \n\t" //load v0
|
||||
"vld3.32 {d6,d8,d10}, [%2]! \n\t" //load v1
|
||||
"vld3.32 {d7,d9,d11}, [%2]! \n\t"
|
||||
"vld3.32 {d18,d20,d22}, [%2]! \n\t"
|
||||
"vld3.32 {d19[0],d21[0],d23[0]}, [%2] \n\t"
|
||||
|
||||
"vmul.f32 q0, q3, d2[0] \n\t" //q0=v0[0]*v1[i][0]
|
||||
"vmul.f32 q6, q9, d2[0] \n\t" //q6=v0[0]*v1[i+4][0]
|
||||
|
||||
"vmla.f32 q0, q4, d3[0] \n\t" //q0+=v0[0]*v1[i][1]
|
||||
"vmla.f32 q6, q10, d3[0] \n\t" //q6+=v0[0]*v1[i+4][1]
|
||||
"vmov.f32 q15, #0.0 \n\t" //q15={0.0f,0.0f,0.0f,0.0f}
|
||||
"vmla.f32 q0, q5, d4[0] \n\t" //q0+=v0[0]*v1[i][2]
|
||||
"vmla.f32 q6, q11, d4[0] \n\t" //q6+=v0[0]*v1[i+4][2]
|
||||
"vld3.32 {d4,d6,d8}, [%0]! \n\t" //load lights
|
||||
"vld3.32 {d5,d7,d9}, [%0]! \n\t"
|
||||
|
||||
"vmax.f32 q0, q0, q15 \n\t" //q0=max(q0,q15)
|
||||
"vmov.f32 d11, #0.0 \n\t" //d11={0.0f,0.0f}
|
||||
"vmov.f32 d15, #0.0 \n\t" //d15={0.0f,0.0f}
|
||||
"vmax.f32 q1, q6, q15 \n\t" //q1=max(q6,q15)
|
||||
"vmov.f32 d13, #0.0 \n\t" //d13={0.0f,0.0f}
|
||||
|
||||
"vld3.32 {d10,d12,d14}, [%0]! \n\t" //d10={x1,y1}
|
||||
"vld3.32 {d11[0],d13[0],d15[0]}, [%0] \n\t" //d10={x1,y1}
|
||||
|
||||
"vmul.f32 q2, q2, q0 \n\t" //q2=light.x*intensity
|
||||
"vmul.f32 q3, q3, q0 \n\t" //q3=light.y*intensity
|
||||
"vmul.f32 q4, q4, q0 \n\t" //q4=light.z*intensity
|
||||
"vmul.f32 q5, q5, q1 \n\t" //q5=(light.x+4)*intensity
|
||||
"vmul.f32 q6, q6, q1 \n\t" //q6=(light.y+4)*intensity
|
||||
"vmul.f32 q7, q7, q1 \n\t" //q7=(light.z+4)*intensity
|
||||
"vld3.32 {d22[0],d23[0],d24[0]}, [%3] \n\t" //load vtx
|
||||
|
||||
"vadd.f32 d4,d4,d5 \n\t" //add everything to vtx
|
||||
"vadd.f32 d6,d6,d7 \n\t"
|
||||
"vadd.f32 d8,d8,d9 \n\t"
|
||||
|
||||
"vadd.f32 d10,d10,d11 \n\t"
|
||||
"vadd.f32 d12,d12,d13 \n\t"
|
||||
"vadd.f32 d14,d14,d15 \n\t"
|
||||
|
||||
"vpadd.f32 d4,d4,d4 \n\t"
|
||||
"vpadd.f32 d10,d10,d10 \n\t"
|
||||
"vpadd.f32 d5,d6,d6 \n\t"
|
||||
"vpadd.f32 d11,d12,d12 \n\t"
|
||||
"vpadd.f32 d6,d8,d8 \n\t"
|
||||
"vpadd.f32 d12,d14,d14 \n\t"
|
||||
|
||||
"vadd.f32 d4,d4,d10 \n\t"
|
||||
"vadd.f32 d5,d5,d11 \n\t"
|
||||
"vadd.f32 d6,d6,d12 \n\t"
|
||||
|
||||
"vadd.f32 d4,d4,d22 \n\t"
|
||||
"vadd.f32 d5,d5,d23 \n\t"
|
||||
"vadd.f32 d6,d6,d24 \n\t"
|
||||
|
||||
"vst3.32 {d4[0],d5[0],d6[0]}, [%3] \n\t"
|
||||
: "+r"(lights), "+r"(v0), "+r"(v1), "+r"(_vtx):
|
||||
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "d14",
|
||||
"d15", "d16", "d17", "d18", "d19", "d20", "d21", "d22", "d23", "d24", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void DotProductMax4FullNeon( float v0[3], float v1[4][3], float _lights[4][3], float _vtx[3])
|
||||
{
|
||||
asm volatile (
|
||||
"vld3.32 {d0[0],d2[0],d4[0]}, [%1] \n\t" //load v0
|
||||
"vld3.32 {d6,d8,d10}, [%2]! \n\t" //load v1
|
||||
"vld3.32 {d7,d9,d11}, [%2] \n\t" //
|
||||
|
||||
"vmul.f32 q0, q3, d0[0] \n\t" //product=v0[0]*v1[0]
|
||||
"vld3.32 {d12,d14,d16}, [%0]! \n\t" //load lights
|
||||
"vmla.f32 q0, q4, d2[0] \n\t" //product+=v0[1]*v1[1]
|
||||
"vld3.32 {d13,d15,d17}, [%0] \n\t" //load lights +2
|
||||
"vmov.f32 q11, #0.0 \n\t"
|
||||
"vmla.f32 q0, q5, d4[0] \n\t" //product+=v0[2]*v1[2]
|
||||
|
||||
"vmax.f32 q0, q0, q11 \n\t" //product=max(product,0.0f)
|
||||
"vld3.32 {d18[0],d19[0],d20[0]}, [%3]\n\t" //load vtx
|
||||
"vmul.f32 q6, q6, q0 \n\t" //lights.r = lights.r * intensity
|
||||
"vmul.f32 q7, q7, q0 \n\t" //lights.g = lights.g * intensity
|
||||
"vmul.f32 q8, q8, q0 \n\t" //lights.b = lights.b * intensity
|
||||
|
||||
"vadd.f32 d12,d12,d13 \n\t" //add all values
|
||||
"vadd.f32 d14,d14,d15 \n\t"
|
||||
"vadd.f32 d16,d16,d17 \n\t"
|
||||
|
||||
"vpadd.f32 d12,d12,d12 \n\t"
|
||||
"vpadd.f32 d13,d14,d14 \n\t"
|
||||
"vpadd.f32 d14,d16,d16 \n\t"
|
||||
|
||||
"vadd.f32 d12,d12,d18 \n\t"
|
||||
"vadd.f32 d13,d13,d19 \n\t"
|
||||
"vadd.f32 d14,d14,d20 \n\t"
|
||||
|
||||
"vst3.32 {d12[0],d13[0],d14[0]}, [%3]\n\t" //store vtx
|
||||
: "+r"(_lights), "+r"(v0), "+r"(v1), "+r"(_vtx):
|
||||
: "d0", "d1", "d2", "d3", "d4", "d5", "d6", "d7", "d8", "d9", "d10", "d11", "d12", "d13", "memory"
|
||||
);
|
||||
}
|
||||
|
||||
void gSPLightVertex4_NEON(u32 v)
|
||||
{
|
||||
GraphicsDrawer & drawer = dwnd().getDrawer();
|
||||
if (!config.generalEmulation.enableHWLighting) {
|
||||
for(int j = 0; j < 4; ++j) {
|
||||
SPVertex & vtx = drawer.getVertex(v+j);
|
||||
vtx.r = gSP.lights.rgb[gSP.numLights][R];
|
||||
vtx.g = gSP.lights.rgb[gSP.numLights][G];
|
||||
vtx.b = gSP.lights.rgb[gSP.numLights][B];
|
||||
vtx.HWLight = 0;
|
||||
|
||||
s32 count = gSP.numLights-1;
|
||||
while (count >= 6) {
|
||||
DotProductMax7FullNeon(&vtx.nx,(float (*)[3])gSP.lights.i_xyz[gSP.numLights - count - 1],(float (*)[3])gSP.lights.rgb[gSP.numLights - count - 1],&vtx.r);
|
||||
count -= 7;
|
||||
}
|
||||
while (count >= 3) {
|
||||
DotProductMax4FullNeon(&vtx.nx,(float (*)[3])gSP.lights.i_xyz[gSP.numLights - count - 1],(float (*)[3])gSP.lights.rgb[gSP.numLights - count - 1],&vtx.r);
|
||||
count -= 4;
|
||||
}
|
||||
while (count >= 0)
|
||||
{
|
||||
f32 intensity = DotProduct( &vtx.nx, gSP.lights.i_xyz[gSP.numLights - count - 1] );
|
||||
if (intensity < 0.0f)
|
||||
intensity = 0.0f;
|
||||
vtx.r += gSP.lights.rgb[gSP.numLights - count - 1][R] * intensity;
|
||||
vtx.g += gSP.lights.rgb[gSP.numLights - count - 1][G] * intensity;
|
||||
vtx.b += gSP.lights.rgb[gSP.numLights - count - 1][B] * intensity;
|
||||
count -= 1;
|
||||
}
|
||||
vtx.r = min(1.0f, vtx.r);
|
||||
vtx.g = min(1.0f, vtx.g);
|
||||
vtx.b = min(1.0f, vtx.b);
|
||||
}
|
||||
} else {
|
||||
for(int j = 0; j < 4; ++j) {
|
||||
SPVertex & vtx = drawer.getVertex(v+j);
|
||||
vtx.HWLight = gSP.numLights;
|
||||
vtx.r = vtx.nx;
|
||||
vtx.g = vtx.ny;
|
||||
vtx.b = vtx.nz;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void gSPLightVertex_NEON(SPVertex & _vtx)
|
||||
{
|
||||
if (config.generalEmulation.enableHWLighting == 0) {
|
||||
_vtx.HWLight = 0;
|
||||
_vtx.r = gSP.lights.rgb[gSP.numLights][R];
|
||||
_vtx.g = gSP.lights.rgb[gSP.numLights][G];
|
||||
_vtx.b = gSP.lights.rgb[gSP.numLights][B];
|
||||
s32 count = gSP.numLights-1;
|
||||
while (count >= 6) {
|
||||
DotProductMax7FullNeon(&_vtx.nx,(float (*)[3])gSP.lights.i_xyz[gSP.numLights - count - 1],(float (*)[3])gSP.lights.rgb[gSP.numLights - count - 1],&_vtx.r);
|
||||
count -= 7;
|
||||
}
|
||||
while (count >= 3) {
|
||||
DotProductMax4FullNeon(&_vtx.nx,(float (*)[3])gSP.lights.i_xyz[gSP.numLights - count - 1],(float (*)[3])gSP.lights.rgb[gSP.numLights - count - 1],&_vtx.r);
|
||||
count -= 4;
|
||||
}
|
||||
while (count >= 0)
|
||||
{
|
||||
f32 intensity = DotProduct( &_vtx.nx, gSP.lights.i_xyz[gSP.numLights - count - 1] );
|
||||
if (intensity < 0.0f)
|
||||
intensity = 0.0f;
|
||||
_vtx.r += gSP.lights.rgb[gSP.numLights - count - 1][R] * intensity;
|
||||
_vtx.g += gSP.lights.rgb[gSP.numLights - count - 1][G] * intensity;
|
||||
_vtx.b += gSP.lights.rgb[gSP.numLights - count - 1][B] * intensity;
|
||||
count -= 1;
|
||||
}
|
||||
_vtx.r = min(1.0f, _vtx.r);
|
||||
_vtx.g = min(1.0f, _vtx.g);
|
||||
_vtx.b = min(1.0f, _vtx.b);
|
||||
} else {
|
||||
_vtx.HWLight = gSP.numLights;
|
||||
_vtx.r = _vtx.nx;
|
||||
_vtx.g = _vtx.ny;
|
||||
_vtx.b = _vtx.nz;
|
||||
}
|
||||
}
|
||||
|
|
19
src/gSP.cpp
19
src/gSP.cpp
|
@ -2567,18 +2567,21 @@ void gSPObjRendermode(u32 _mode)
|
|||
void gSPTransformVertex4NEON(u32 v, float mtx[4][4]);
|
||||
void gSPBillboardVertex4NEON(u32 v);
|
||||
void gSPTransformVertex_NEON(float vtx[4], float mtx[4][4]);
|
||||
void gSPLightVertex_NEON(SPVertex & _vtx);
|
||||
void gSPLightVertex4_NEON(u32 v);
|
||||
#endif //__NEON_OPT
|
||||
|
||||
#ifdef __VEC4_OPT
|
||||
#ifndef __NEON_OPT
|
||||
void (*gSPTransformVertex4)(u32 v, float mtx[4][4]) = gSPTransformVertex4_default;
|
||||
void (*gSPBillboardVertex4)(u32 v) = gSPBillboardVertex4_default;
|
||||
void (*gSPLightVertex4)(u32 v) = gSPLightVertex4_default;
|
||||
#else
|
||||
void (*gSPTransformVertex4)(u32 v, float mtx[4][4]) = gSPTransformVertex4NEON;
|
||||
void (*gSPBillboardVertex4)(u32 v) = gSPBillboardVertex4NEON;
|
||||
void (*gSPLightVertex4)(u32 v) = gSPLightVertex4_NEON;
|
||||
#endif
|
||||
|
||||
void (*gSPLightVertex4)(u32 v) = gSPLightVertex4_default;
|
||||
void (*gSPPointLightVertex4)(u32 v, float _vPos[4][3]) = gSPPointLightVertex4_default;
|
||||
|
||||
#endif
|
||||
|
@ -2587,11 +2590,14 @@ void (*gSPPointLightVertex4)(u32 v, float _vPos[4][3]) = gSPPointLightVertex4_de
|
|||
#ifndef __NEON_OPT
|
||||
void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]) =
|
||||
gSPTransformVertex_default;
|
||||
void (*gSPLightVertex)(SPVertex & _vtx) =
|
||||
gSPLightVertex_default;
|
||||
#else
|
||||
void (*gSPTransformVertex)(float vtx[4], float mtx[4][4]) =
|
||||
gSPTransformVertex_NEON;
|
||||
void (*gSPLightVertex)(SPVertex & _vtx) =
|
||||
gSPLightVertex_NEON;
|
||||
#endif
|
||||
void (*gSPLightVertex)(SPVertex & _vtx) = gSPLightVertex_default;
|
||||
void (*gSPPointLightVertex)(SPVertex & _vtx, float * _vPos) = gSPPointLightVertex_default;
|
||||
void (*gSPBillboardVertex)(u32 v, u32 i) = gSPBillboardVertex_default;
|
||||
|
||||
|
@ -2600,10 +2606,19 @@ void gSPSetupFunctions()
|
|||
if (GBI.getMicrocodeType() != F3DEX2CBFD) {
|
||||
|
||||
#ifdef __VEC4_OPT
|
||||
#ifndef __NEON_OPT
|
||||
gSPLightVertex4 = gSPLightVertex4_default;
|
||||
#else
|
||||
gSPLightVertex4 = gSPLightVertex4_NEON;
|
||||
#endif
|
||||
gSPPointLightVertex4 = gSPPointLightVertex4_default;
|
||||
#endif
|
||||
|
||||
#ifndef __NEON_OPT
|
||||
gSPLightVertex = gSPLightVertex_default;
|
||||
#else
|
||||
gSPLightVertex = gSPLightVertex_NEON;
|
||||
#endif
|
||||
gSPPointLightVertex = gSPPointLightVertex_default;
|
||||
return;
|
||||
}
|
||||
|
|
Loading…
Reference in New Issue
Block a user