Skip to content

Commit

Permalink
ggml : fix 32-bit ARM compatibility
Browse files Browse the repository at this point in the history
  • Loading branch information
ggerganov committed Nov 3, 2023
1 parent 72c8697 commit db1093e
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 0 deletions.
83 changes: 83 additions & 0 deletions ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,13 @@ static inline float hsum_float_4x4(const __m128 a, const __m128 b, const __m128

// 64-bit compatibility

// vaddvq_s16
// vpaddq_s16
// vaddvq_s32
// vaddvq_f32
// vmaxvq_f32
// vcvtnq_s32_f32

inline static int32_t vaddvq_s16(int16x8_t v) {
return
(int32_t)vgetq_lane_s16(v, 0) + (int32_t)vgetq_lane_s16(v, 1) +
Expand Down Expand Up @@ -309,6 +316,82 @@ inline static int32x4_t vcvtnq_s32_f32(float32x4_t v) {
return res;
}

// vld1q_s16_x2
// vld1q_u8_x2
// vld1q_u8_x4
// vld1q_s8_x2
// vld1q_s8_x4
// TODO: double-check these work correctly

struct int16x8x2_t {
int16x8_t val[2];
};

inline static int16x8x2_t vld1q_s16_x2(const int16_t * ptr) {
int16x8x2_t res;

res.val[0] = vld1q_s16(ptr + 0);
res.val[1] = vld1q_s16(ptr + 8);

return res;
}

struct uint8x16x2_t {
uint8x16_t val[2];
};

inline static uint8x16x2_t vld1q_u8_x2(const uint8_t * ptr) {
uint8x16x2_t res;

res.val[0] = vld1q_u8(ptr + 0);
res.val[1] = vld1q_u8(ptr + 16);

return res;
}

struct uint8x16x4_t {
uint8x16_t val[4];
};

inline static uint8x16x4_t vld1q_u8_x4(const uint8_t * ptr) {
uint8x16x4_t res;

res.val[0] = vld1q_u8(ptr + 0);
res.val[1] = vld1q_u8(ptr + 16);
res.val[2] = vld1q_u8(ptr + 32);
res.val[3] = vld1q_u8(ptr + 48);

return res;
}

struct int8x16x2_t {
int8x16_t val[2];
};

inline static int8x16x2_t vld1q_s8_x2(const int8_t * ptr) {
int8x16x2_t res;

res.val[0] = vld1q_s8(ptr + 0);
res.val[1] = vld1q_s8(ptr + 16);

return res;
}

struct int8x16x4_t {
int8x16_t val[4];
};

inline static int8x16x4_t vld1q_s8_x4(const int8_t * ptr) {
int8x16x4_t res;

res.val[0] = vld1q_s8(ptr + 0);
res.val[1] = vld1q_s8(ptr + 16);
res.val[2] = vld1q_s8(ptr + 32);
res.val[3] = vld1q_s8(ptr + 48);

return res;
}

#endif
#endif

Expand Down
12 changes: 12 additions & 0 deletions ggml.c
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,18 @@ ggml_type_traits_t ggml_internal_get_type_traits(enum ggml_type type) {
// simd mappings
//

#if defined(__ARM_NEON)
#if !defined(__aarch64__)

// 64-bit compatibility

inline static float vaddvq_f32(float32x4_t v) {
return vgetq_lane_f32(v, 0) + vgetq_lane_f32(v, 1) + vgetq_lane_f32(v, 2) + vgetq_lane_f32(v, 3);
}

#endif
#endif

// we define a common set of C macros which map to specific intrinsics based on the current architecture
// we then implement the fundamental computation operations below using only these macros
// adding support for new architectures requires to define the corresponding SIMD macros
Expand Down

0 comments on commit db1093e

Please sign in to comment.