1
0
mirror of https://github.com/blawar/GLideN64.git synced 2024-07-07 03:13:49 +00:00

Fix InverseTransformVectorNormalize with NaN src numbers.

This commit is contained in:
Francisco Zurita 2016-12-04 15:04:21 -05:00
parent 9da9a86c51
commit 69ea7605f5
2 changed files with 44 additions and 36 deletions

View File

@ -1,4 +1,6 @@
#include "3DMath.h"
#include <cmath>
#include "Log.h"
void MultMatrix( float m0[4][4], float m1[4][4], float dest[4][4])
{
@ -72,52 +74,58 @@ void TransformVectorNormalize(float vec[3], float mtx[4][4])
"vst1.32 {d4}, [%1] \n\t" //Q4 = m+12
"fsts s10, [%1, #8] \n\t" //Q4 = m+12
: "+r"(mtx): "r"(vec)
: "+r"(mtx), "+r"(vec) :
: "d0","d1","d2","d3","d18","d19","d20","d21","d22", "d23", "memory"
);
}
void InverseTransformVectorNormalize(float src[3], float dst[3], float mtx[4][4])
{
asm volatile (
"vld1.32 {d0}, [%1] \n\t" //Q0 = v
"flds s2, [%1, #8] \n\t" //Q0 = v
"vld1.32 {d18, d19}, [%0]! \n\t" //D18 = m
"vld1.32 {d20, d21}, [%0]! \n\t" //D20 = m+4
"vld1.32 {d22, d23}, [%0] \n\t" //D22 = m+8
if(!(std::isnan(src[0]) && std::isnan(src[1]) && std::isnan(src[2]))){
asm volatile (
"vld1.32 {d0}, [%1] \n\t" //Q0 = v
"flds s2, [%1, #8] \n\t" //Q0 = v
"vld1.32 {d18, d19}, [%0]! \n\t" //D18 = m
"vld1.32 {d20, d21}, [%0]! \n\t" //D20 = m+4
"vld1.32 {d22, d23}, [%0] \n\t" //D22 = m+8
"vmul.f32 q2, q0, q9 \n\t"
"vmul.f32 q3, q0, q10 \n\t"
"vmul.f32 q4, q0, q11 \n\t"
"vpadd.f32 d4, d4, d5 \n\t"
"vpadd.f32 d4, d4, d4 \n\t" //d4[0] = sum of q2
"vpadd.f32 d8, d8, d9 \n\t"
"vpadd.f32 d5, d8, d8 \n\t" //d5[0] = sum of q4
"vpadd.f32 d6, d6, d7 \n\t"
"vpadd.f32 d10, d6, d6 \n\t"
"vmov.f32 s9, s20 \n\t" //d4[1] = sum of q3
"vmul.f32 q2, q0, q9 \n\t"
"vmul.f32 q3, q0, q10 \n\t"
"vmul.f32 q4, q0, q11 \n\t"
"vpadd.f32 d4, d4, d5 \n\t"
"vpadd.f32 d4, d4, d4 \n\t" //d4[0] = sum of q2
"vpadd.f32 d8, d8, d9 \n\t"
"vpadd.f32 d5, d8, d8 \n\t" //d5[0] = sum of q4
"vpadd.f32 d6, d6, d7 \n\t"
"vpadd.f32 d10, d6, d6 \n\t"
"vmov.f32 s9, s20 \n\t" //d4[1] = sum of q3
"vmul.f32 d0, d4, d4 \n\t" //d0 = d0*d0
"vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1]
"vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d1*d1
"vmul.f32 d0, d4, d4 \n\t" //d0 = d0*d0
"vpadd.f32 d0, d0, d0 \n\t" //d0 = d[0] + d[1]
"vmla.f32 d0, d5, d5 \n\t" //d0 = d0 + d1*d1
"vmov.f32 d1, d0 \n\t" //d1 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
"vmov.f32 d1, d0 \n\t" //d1 = d0
"vrsqrte.f32 d0, d0 \n\t" //d0 = ~ 1.0 / sqrt(d0)
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d2) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d3
"vmul.f32 d2, d0, d1 \n\t" //d2 = d0 * d1
"vrsqrts.f32 d3, d2, d0 \n\t" //d3 = (3 - d0 * d3) / 2
"vmul.f32 d0, d0, d3 \n\t" //d0 = d0 * d4
"vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4
"vmul.f32 q2, q2, d0[0] \n\t" //d0= d2*d4
"vst1.32 {d4}, [%2] \n\t" //Q4 = m+12
"fsts s10, [%2, #8] \n\t" //Q4 = m+12
: "+r"(mtx): "r"(src), "r"(dst)
: "d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d18","d19",
"d20","d21","d22", "d23", "memory"
);
"vst1.32 {d4}, [%2] \n\t" //Q4 = m+12
"fsts s10, [%2, #8] \n\t" //Q4 = m+12
: "+r"(mtx), "+r"(src), "+r"(dst) :
: "d0","d1","d2","d3","d4","d5","d6","d7","d8","d9","d10","d18","d19",
"d20","d21","d22", "d23", "memory"
);
}else{
dst[0] = NAN;
dst[1] = NAN;
dst[2] = NAN;
}
}
void Normalize(float v[3])

View File

@ -101,7 +101,7 @@ ifeq ($(TARGET_ARCH_ABI), armeabi-v7a)
MY_LOCAL_SRC_FILES += $(SRCDIR)/3DMathNeon.cpp.neon
MY_LOCAL_SRC_FILES += $(SRCDIR)/gSPNeon.cpp.neon
MY_LOCAL_CFLAGS += -D__NEON_OPT
MY_LOCAL_CFLAGS += -D__VEC4_OPT -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -mvectorize-with-neon-quad -ftree-vectorizer-verbose=2 -funsafe-math-optimizations
MY_LOCAL_CFLAGS += -D__VEC4_OPT -mfpu=neon -mfloat-abi=softfp -ftree-vectorize -mvectorize-with-neon-quad -ftree-vectorizer-verbose=2 -funsafe-math-optimizations -fno-finite-math-only
else ifeq ($(TARGET_ARCH_ABI), x86)
# MY_LOCAL_CFLAGS += -DX86_ASM