Skip to content

Commit

Permalink
Try not telling ASAN quite so much info about the type and see if it …
Browse files Browse the repository at this point in the history
…still complains.
  • Loading branch information
JonathanHenson committed Feb 1, 2024
1 parent 0d4f728 commit ef83ed9
Show file tree
Hide file tree
Showing 5 changed files with 35 additions and 21 deletions.
7 changes: 6 additions & 1 deletion include/aws/checksums/private/crc64_priv.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,13 @@ typedef struct {
uint64_t trailing[15][2]; // Folding constants for 15 possible trailing input data lengths
} aws_checksums_crc64_constants_t;

extern uint8_t aws_checksums_masks_shifts[6][16];
extern aws_checksums_crc64_constants_t aws_checksums_crc64xz_constants;
extern const uint8_t *aws_checksums_masks_pos_5;
extern const uint8_t *aws_checksums_masks_pos_4;
extern const uint8_t *aws_checksums_masks_pos_3;
extern const uint8_t *aws_checksums_masks_pos_2;
extern const uint8_t *aws_checksums_masks_pos_1;
extern const uint8_t *aws_checksums_masks_pos_0;

AWS_EXTERN_C_END

Expand Down
8 changes: 4 additions & 4 deletions source/arm/crc64_arm.c
Original file line number Diff line number Diff line change
Expand Up @@ -17,15 +17,15 @@
// Load a poly64x2_t neon register from a uint64_t pointer
# define load_p64(uint64_t_ptr) vreinterpretq_p64_u64(vld1q_u64((uint64_t_ptr)))
// Mask the bytes in a neon uint8x16_t register and preserve 0 to 15 least significant bytes.
# define mask_low_u8(u8, count) vandq_u8(u8, load_u8(aws_checksums_masks_shifts[5] - (intptr_t)(count)))
# define mask_low_u8(u8, count) vandq_u8(u8, load_u8(aws_checksums_masks_pos_5 - (intptr_t)(count)))
// Mask the bytes in a neon uint8x16_t register and preserve 0 to 15 most significant bytes.
# define mask_high_u8(u8, count) vandq_u8(u8, load_u8(aws_checksums_masks_shifts[3] + (intptr_t)(count)))
# define mask_high_u8(u8, count) vandq_u8(u8, load_u8(aws_checksums_masks_pos_3 + (intptr_t)(count)))
// Mask the bytes in a neon poly64x2_t register and preserve 0 to 15 most significant bytes.
# define mask_high_p64(poly, count) vreinterpretq_p64_u8(mask_high_u8(vreinterpretq_u8_p64(poly), count))
// Left shift bytes in a neon uint8x16_t register - shift count from 0 to 15.
# define left_shift_u8(u8, count) vqtbl1q_u8(u8, load_u8(aws_checksums_masks_shifts[1] - (intptr_t)(count)))
# define left_shift_u8(u8, count) vqtbl1q_u8(u8, load_u8(aws_checksums_masks_pos_1 - (intptr_t)(count)))
// Right shift bytes in a neon uint8x16_t register - shift count from 0 to 15.
# define right_shift_u8(u8, count) vqtbl1q_u8(u8, load_u8(aws_checksums_masks_shifts[1] + (intptr_t)(count)))
# define right_shift_u8(u8, count) vqtbl1q_u8(u8, load_u8(aws_checksums_masks_pos_1 + (intptr_t)(count)))
// Left shift bytes in a neon poly64x2_t register - shift count from 0 to 15.
# define left_shift_p64(poly, count) vreinterpretq_p64_u8(left_shift_u8(vreinterpretq_u8_p64(poly), count))
// Right shift a neon poly64x2_t register 0 to 15 bytes - imm must be an immediate constant
Expand Down
29 changes: 20 additions & 9 deletions source/crc64.c
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,29 @@
#include <aws/checksums/private/crc64_priv.h>
#include <aws/common/cpuid.h>

//AWS_ALIGNED_TYPEDEF(uint8_t, checksums_maxks_shifts_type[6][16], 16);
AWS_ALIGNED_TYPEDEF(uint8_t, checksums_masks_shifts_type[96], 16);
// Intel PSHUFB / ARM VTBL patterns for left/right shifts and masks
uint8_t aws_checksums_masks_shifts[6][16] = {
{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, //
{0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f}, // left/right
checksums_masks_shifts_type aws_checksums_masks_shifts = {
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, //
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f, // left/right
// shifts
{0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80}, //
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, //
{0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}, // byte masks
{0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00}, //
0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, //
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, // byte masks
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //
};

const uint8_t *aws_checksums_masks_pos_5 = (aws_checksums_masks_shifts + (intptr_t)80);
const uint8_t *aws_checksums_masks_pos_4 = (aws_checksums_masks_shifts + (intptr_t)64);
const uint8_t *aws_checksums_masks_pos_3 = (aws_checksums_masks_shifts + (intptr_t)48);
const uint8_t *aws_checksums_masks_pos_2 = (aws_checksums_masks_shifts + (intptr_t)32);
const uint8_t *aws_checksums_masks_pos_1 = (aws_checksums_masks_shifts + (intptr_t)16);
const uint8_t *aws_checksums_masks_pos_0 = (aws_checksums_masks_shifts);





AWS_ALIGNED_TYPEDEF(aws_checksums_crc64_constants_t, cheksums_constants, 16);

// Pre-computed bit-reflected constants for CRC64XZ
Expand Down Expand Up @@ -86,7 +97,7 @@ cheksums_constants aws_checksums_crc64xz_constants = {
static uint64_t (*s_crc64xz_fn_ptr)(const uint8_t *input, int length, uint64_t prev_crc64) = 0;

uint64_t aws_checksums_crc64xz(const uint8_t *input, int length, uint64_t prev_crc64) {

if (AWS_UNLIKELY(!s_crc64xz_fn_ptr)) {
#if defined(AWS_ARCH_INTEL_X64) && !(defined(_MSC_VER) && _MSC_VER < 1920)
# if defined(AWS_HAVE_AVX512_INTRINSICS)
Expand Down
3 changes: 1 addition & 2 deletions source/intel/intrin/crc64xz_avx512.c
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
# include <wmmintrin.h>

# define load_xmm(ptr) _mm_loadu_si128((const __m128i *)(const void *)(ptr))
# define mask_high_bytes(xmm, count) \
_mm_and_si128((xmm), load_xmm(aws_checksums_masks_shifts[3] + (intptr_t)(count)))
# define mask_high_bytes(xmm, count) _mm_and_si128((xmm), load_xmm(aws_checksums_masks_pos_3 + (intptr_t)(count)))
# define cmull_xmm_hi(xmm1, xmm2) _mm_clmulepi64_si128((xmm1), (xmm2), 0x11)
# define cmull_xmm_lo(xmm1, xmm2) _mm_clmulepi64_si128((xmm1), (xmm2), 0x00)
# define cmull_xmm_pair(xmm1, xmm2) _mm_xor_si128(cmull_xmm_hi((xmm1), (xmm2)), cmull_xmm_lo((xmm1), (xmm2)))
Expand Down
9 changes: 4 additions & 5 deletions source/intel/intrin/crc64xz_clmul.c
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,11 @@

# define load_xmm(ptr) _mm_loadu_si128((const __m128i *)(const void *)(ptr))
# define left_shift_bytes(xmm, count) \
_mm_shuffle_epi8((xmm), load_xmm(aws_checksums_masks_shifts[1] - (intptr_t)(count)))
_mm_shuffle_epi8((xmm), load_xmm(aws_checksums_masks_pos_1 - (intptr_t)(count)))
# define right_shift_bytes(xmm, count) \
_mm_shuffle_epi8((xmm), load_xmm(aws_checksums_masks_shifts[1] + (intptr_t)(count)))
# define mask_high_bytes(xmm, count) \
_mm_and_si128((xmm), load_xmm(aws_checksums_masks_shifts[3] + (intptr_t)(count)))
# define mask_low_bytes(xmm, count) _mm_and_si128((xmm), load_xmm(aws_checksums_masks_shifts[5] - (intptr_t)(count)))
_mm_shuffle_epi8((xmm), load_xmm(aws_checksums_masks_pos_1 + (intptr_t)(count)))
# define mask_high_bytes(xmm, count) _mm_and_si128((xmm), load_xmm(aws_checksums_masks_pos_3 + (intptr_t)(count)))
# define mask_low_bytes(xmm, count) _mm_and_si128((xmm), load_xmm(aws_checksums_masks_pos_5 - (intptr_t)(count)))
# define cmull_xmm_hi(xmm1, xmm2) _mm_clmulepi64_si128((xmm1), (xmm2), 0x11)
# define cmull_xmm_lo(xmm1, xmm2) _mm_clmulepi64_si128((xmm1), (xmm2), 0x00)
# define cmull_xmm_pair(xmm1, xmm2) _mm_xor_si128(cmull_xmm_hi((xmm1), (xmm2)), cmull_xmm_lo((xmm1), (xmm2)))
Expand Down

0 comments on commit ef83ed9

Please sign in to comment.