1
0
mirror of https://github.com/blawar/GLideN64.git synced 2024-07-02 09:03:37 +00:00

CRC.cpp: add ARMv8 crc intrinsics

Speedup:
CRC_Calculate() up to 11x
CRC_CalculatePalette() up to 4x

Will be only used if the right compiler flags are set.
rpi3: -march=armv8-a+crc -mtune=cortex-a53

http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0801f/pge
1427897662265.html

If CRC32 can be replaced with CRC32C SSE4.2 CRC32C intrinsics could be
added as well.
https://github.com/gonetz/GLideN64/pull/1056#issuecomment-244530166
This commit is contained in:
gizmo98 2016-09-04 11:08:48 +02:00 committed by Sergey Lipskiy
parent 4049bfc53b
commit a471b130e7
3 changed files with 63 additions and 5 deletions

View File

@ -5,6 +5,7 @@ cmake [-DCMAKE_BUILD_TYPE=Debug] [-DVEC4_OPT=On] [-DCRC_OPT=On] [-DNEON_OPT=On]
-DCMAKE_BUILD_TYPE=Debug - optional parameter, if you want debug build. Default buid type is Release
-DVEC4_OPT=On - optional parameter. set it if you want to enable additional VEC4 optimization (can cause additional bugs).
-DCRC_OPT=On - optional parameter. set it if you want to enable additional CRC optimization (can cause additional bugs).
-DCRC_ARMV8=On - optional parameter. set it if you want to enable armv8 hardware CRC. It will be ignored if -DCRC_OPT=On.
-DNEON_OPT=On - optional parameter. set it if you want to enable additional ARM NEON optimization (can cause additional bugs).
-DNOHQ=On - build without realtime texture enhancer library (GLideNHQ).
-DUSE_UNIFORMBLOCK=On - Use uniform blocks in shaders. May help to improve performance. Not supported by GLES2 hardware.

View File

@ -185,14 +185,20 @@ if(VEC4_OPT)
)
endif(VEC4_OPT)
if(CRC_OPT)
list(APPEND GLideN64_SOURCES
CRC_OPT.cpp
)
if(CRC_OPT OR CRC_ARMV8)
list(REMOVE_ITEM GLideN64_SOURCES
CRC.cpp
)
endif(CRC_OPT)
if(CRC_OPT)
list(APPEND GLideN64_SOURCES
CRC_OPT.cpp
)
elseif(CRC_ARMV8)
list(APPEND GLideN64_SOURCES
CRC_ARMV8.cpp
)
endif(CRC_OPT)
endif(CRC_OPT OR CRC_ARMV8)
if(NEON_OPT)
add_definitions(

51
src/CRC_ARMV8.cpp Normal file
View File

@ -0,0 +1,51 @@
#include "CRC.h"
#include <arm_acle.h>
void CRC_BuildTable()
{
}
u32 CRC_Calculate( u32 crc, const void * buffer, u32 count )
{
u8 *p;
u32 orig = crc;
p = (u8*) buffer;
// use eight byte crc intrinsic __crc32d if count is high enough.
// __crc32d, __crc32w, __crc32h and __crc32b use polynomial 0x04C11DB7
while (count >= 8) {
crc = __crc32d(crc, *((u64*)p));
p += 8;
count -= 8;
}
if (count >= 4) {
crc = __crc32w(crc, *((u32*)p));
p += 4;
count -= 4;
}
if (count >= 2) {
crc = __crc32h(crc, *((u16*)p));
p += 2;
count -= 2;
}
if (count == 1)
crc = __crc32b(crc, *p);
return crc ^ orig;
}
u32 CRC_CalculatePalette(u32 crc, const void * buffer, u32 count )
{
u8 *p;
u32 orig = crc;
p = (u8*) buffer;
while (count--) {
// use two byte intrinsic __crc32h
crc = __crc32h(crc, *((u16*)p));
p += 8;
}
return crc ^ orig;
}