mirror of
https://github.com/blawar/GLideN64.git
synced 2024-07-04 10:03:36 +00:00
CRC.cpp: add ARMv8 crc intrinsics
Speedup: CRC_Calculate() up to 11x CRC_CalculatePalette() up to 4x Will be only used if the right compiler flags are set. rpi3: -march=armv8-a+crc -mtune=cortex-a53 http://infocenter.arm.com/help/index.jsp?topic=/com.arm.doc.dui0801f/pge 1427897662265.html If CRC32 can be replaced with CRC32C SSE4.2 CRC32C intrinsics could be added as well. https://github.com/gonetz/GLideN64/pull/1056#issuecomment-244530166
This commit is contained in:
parent
4049bfc53b
commit
a471b130e7
|
@ -5,6 +5,7 @@ cmake [-DCMAKE_BUILD_TYPE=Debug] [-DVEC4_OPT=On] [-DCRC_OPT=On] [-DNEON_OPT=On]
|
|||
-DCMAKE_BUILD_TYPE=Debug - optional parameter, if you want debug build. Default buid type is Release
|
||||
-DVEC4_OPT=On - optional parameter. set it if you want to enable additional VEC4 optimization (can cause additional bugs).
|
||||
-DCRC_OPT=On - optional parameter. set it if you want to enable additional CRC optimization (can cause additional bugs).
|
||||
-DCRC_ARMV8=On - optional parameter. set it if you want to enable armv8 hardware CRC. It will be ignored if -DCRC_OPT=On.
|
||||
-DNEON_OPT=On - optional parameter. set it if you want to enable additional ARM NEON optimization (can cause additional bugs).
|
||||
-DNOHQ=On - build without realtime texture enhancer library (GLideNHQ).
|
||||
-DUSE_UNIFORMBLOCK=On - Use uniform blocks in shaders. May help to improve performance. Not supported by GLES2 hardware.
|
||||
|
|
|
@ -185,14 +185,20 @@ if(VEC4_OPT)
|
|||
)
|
||||
endif(VEC4_OPT)
|
||||
|
||||
if(CRC_OPT OR CRC_ARMV8)
|
||||
list(REMOVE_ITEM GLideN64_SOURCES
|
||||
CRC.cpp
|
||||
)
|
||||
if(CRC_OPT)
|
||||
list(APPEND GLideN64_SOURCES
|
||||
CRC_OPT.cpp
|
||||
)
|
||||
list(REMOVE_ITEM GLideN64_SOURCES
|
||||
CRC.cpp
|
||||
elseif(CRC_ARMV8)
|
||||
list(APPEND GLideN64_SOURCES
|
||||
CRC_ARMV8.cpp
|
||||
)
|
||||
endif(CRC_OPT)
|
||||
endif(CRC_OPT OR CRC_ARMV8)
|
||||
|
||||
if(NEON_OPT)
|
||||
add_definitions(
|
||||
|
|
51
src/CRC_ARMV8.cpp
Normal file
51
src/CRC_ARMV8.cpp
Normal file
|
@ -0,0 +1,51 @@
|
|||
#include "CRC.h"
|
||||
#include <arm_acle.h>
|
||||
|
||||
void CRC_BuildTable()
|
||||
{
|
||||
}
|
||||
|
||||
u32 CRC_Calculate( u32 crc, const void * buffer, u32 count )
|
||||
{
|
||||
u8 *p;
|
||||
u32 orig = crc;
|
||||
|
||||
p = (u8*) buffer;
|
||||
|
||||
// use eight byte crc intrinsic __crc32d if count is high enough.
|
||||
// __crc32d, __crc32w, __crc32h and __crc32b use polynomial 0x04C11DB7
|
||||
while (count >= 8) {
|
||||
crc = __crc32d(crc, *((u64*)p));
|
||||
p += 8;
|
||||
count -= 8;
|
||||
}
|
||||
if (count >= 4) {
|
||||
crc = __crc32w(crc, *((u32*)p));
|
||||
p += 4;
|
||||
count -= 4;
|
||||
}
|
||||
if (count >= 2) {
|
||||
crc = __crc32h(crc, *((u16*)p));
|
||||
p += 2;
|
||||
count -= 2;
|
||||
}
|
||||
if (count == 1)
|
||||
crc = __crc32b(crc, *p);
|
||||
|
||||
return crc ^ orig;
|
||||
}
|
||||
|
||||
u32 CRC_CalculatePalette(u32 crc, const void * buffer, u32 count )
|
||||
{
|
||||
u8 *p;
|
||||
u32 orig = crc;
|
||||
|
||||
p = (u8*) buffer;
|
||||
while (count--) {
|
||||
// use two byte intrinsic __crc32h
|
||||
crc = __crc32h(crc, *((u16*)p));
|
||||
p += 8;
|
||||
}
|
||||
|
||||
return crc ^ orig;
|
||||
}
|
Loading…
Reference in New Issue
Block a user