Skip to content

Commit

Permalink
align whole array
Browse files Browse the repository at this point in the history
  • Loading branch information
DmitriyMusatkin committed Sep 12, 2024
1 parent cf9c5d1 commit 3a1d13a
Showing 1 changed file with 5 additions and 6 deletions.
11 changes: 5 additions & 6 deletions source/intel/intrin/crc32c_sse42_avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,8 +17,7 @@

# include <wmmintrin.h>

AWS_ALIGNED_TYPEDEF(const uint64_t, zalign_8, 64);
AWS_ALIGNED_TYPEDEF(const uint64_t, zalign_2, 16);
AWS_ALIGNED_TYPEDEF(const uint64_t, aligned_512_u64[8], 64);

// This macro uses casting to ensure the compiler actually uses the unaligned load instructions
# define load_zmm(ptr) _mm512_loadu_si512((const uint8_t *)(const void *)(ptr))
Expand Down Expand Up @@ -48,13 +47,13 @@ static uint32_t s_checksums_crc32c_avx512_impl(const uint8_t *input, int length,
* k6 = ( x ^ ( 128 - 32 ) mod P(x) << 32 )' << 1
*/

static zalign_8 k1k2[8] = {
static aligned_512_u64 k1k2 = {
0xdcb17aa4, 0xb9e02b86, 0xdcb17aa4, 0xb9e02b86, 0xdcb17aa4, 0xb9e02b86, 0xdcb17aa4, 0xb9e02b86};
static zalign_8 k3k4[8] = {
static aligned_512_u64 k3k4 = {
0x740eef02, 0x9e4addf8, 0x740eef02, 0x9e4addf8, 0x740eef02, 0x9e4addf8, 0x740eef02, 0x9e4addf8};
static zalign_8 k9k10[8] = {
static aligned_512_u64 k9k10 = {
0x6992cea2, 0x0d3b6092, 0x6992cea2, 0x0d3b6092, 0x6992cea2, 0x0d3b6092, 0x6992cea2, 0x0d3b6092};
static zalign_8 k1k4[8] = {
static aligned_512_u64 k1k4 = {
0x1c291d04, 0xddc0152b, 0x3da6d0cb, 0xba4fc28e, 0xf20c0dfe, 0x493c7d27, 0x00000000, 0x00000000};

__m512i x0, x1, x2, x3, x4, x5, x6, x7, x8, y5, y6, y7, y8;
Expand Down

0 comments on commit 3a1d13a

Please sign in to comment.