Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add basic tensor data validation function #6884

Merged
merged 8 commits into from
Apr 26, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions common/common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1089,6 +1089,10 @@ bool gpt_params_find_arg(int argc, char ** argv, const std::string & arg, gpt_pa
params.n_print = std::stoi(argv[i]);
return true;
}
if (arg == "--check-tensors") {
params.check_tensors = true;
return true;
}
if (arg == "--ppl-output-type") {
if (++i >= argc) {
invalid_param = true;
Expand Down Expand Up @@ -1554,6 +1558,7 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) {
printf(" types: int, float, bool. example: --override-kv tokenizer.ggml.add_bos_token=bool:false\n");
printf(" -ptc N, --print-token-count N\n");
printf(" print token count every N tokens (default: %d)\n", params.n_print);
printf(" --check-tensors check model tensor data for invalid values\n");
printf("\n");
#ifndef LOG_DISABLE_LOGS
log_print_usage();
Expand Down Expand Up @@ -1774,6 +1779,7 @@ struct llama_model_params llama_model_params_from_gpt_params(const gpt_params &
mparams.tensor_split = params.tensor_split;
mparams.use_mmap = params.use_mmap;
mparams.use_mlock = params.use_mlock;
mparams.check_tensors = params.check_tensors;
if (params.kv_overrides.empty()) {
mparams.kv_overrides = NULL;
} else {
Expand Down
1 change: 1 addition & 0 deletions common/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -161,6 +161,7 @@ struct gpt_params {
bool dump_kv_cache = false; // dump the KV cache contents for debugging purposes
bool no_kv_offload = false; // disable KV offloading
bool warmup = true; // warmup run
bool check_tensors = false; // validate tensor data

std::string cache_type_k = "f16"; // KV cache data type for the K
std::string cache_type_v = "f16"; // KV cache data type for the V
Expand Down
284 changes: 284 additions & 0 deletions ggml-quants.c
Original file line number Diff line number Diff line change
Expand Up @@ -12389,3 +12389,287 @@ void quantize_row_iq2_s(const float * restrict x, void * restrict vy, int64_t k)
block_iq2_s * restrict y = vy;
quantize_row_iq2_s_reference(x, y, k);
}

static bool validate_float(float f, size_t i) {
if (isinf(f)) {
fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
return false;
}

if (isnan(f)) {
fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
return false;
}

return true;
}

static bool isinf_fp16(ggml_fp16_t f) {
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) == 0;
}

static bool isnan_fp16(ggml_fp16_t f) {
return (f & 0x7c00) == 0x7c00 && (f & 0x03ff) != 0;
}

static bool validate_fp16(ggml_fp16_t f, size_t i) {
if (isinf_fp16(f)) {
fprintf(stderr, "ggml_validate_row_data: found inf value at block %zu\n", i);
return false;
}

if (isnan_fp16(f)) {
fprintf(stderr, "ggml_validate_row_data: found nan value at block %zu\n", i);
return false;
}

return true;
}

#define VALIDATE_ROW_DATA_D_F16_IMPL(type, data, nb) \
const type * q = (const type *) (data); \
for (size_t i = 0; i < (nb); ++i) { \
if (!validate_fp16(q[i].d, i)) { \
return false; \
} \
}

#define VALIDATE_ROW_DATA_DM_F16_IMPL(type, data, nb, d, m) \
const type * q = (const type *) (data); \
for (size_t i = 0; i < (nb); ++i) { \
if (!validate_fp16(q[i].d, i) || !validate_fp16(q[i].m, i)) { \
return false; \
} \
}

bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes) {
if (type < 0 || type >= GGML_TYPE_COUNT) {
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
return false;
}

if (nbytes % ggml_type_size(type) != 0) {
fprintf(stderr, "%s: invalid size %zu for type %d\n", __func__, nbytes, type);
return false;
}

const size_t nb = nbytes/ggml_type_size(type);

switch (type) {
case GGML_TYPE_F16:
{
const ggml_fp16_t * f = (const ggml_fp16_t *) data;
size_t i = 0;
#if defined(__AVX2__)
for (; i + 15 < nb; i += 16) {
__m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
__m256i vexp = _mm256_and_si256(v, _mm256_set1_epi16(0x7c00));
__m256i cmp = _mm256_cmpeq_epi16(vexp, _mm256_set1_epi16(0x7c00));
int mask = _mm256_movemask_epi8(cmp);
if (mask) {
for (size_t j = 0; j < 16; ++j) {
if (!validate_fp16(f[i + j], i + j)) {
return false;
}
}
GGML_UNREACHABLE();
}
}
#elif defined(__ARM_NEON)
for (; i + 7 < nb; i += 8) {
uint16x8_t v = vld1q_u16(f + i);
uint16x8_t vexp = vandq_u16(v, vdupq_n_u16(0x7c00));
uint16x8_t cmp = vceqq_u16(vexp, vdupq_n_u16(0x7c00));
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u16(cmp, 4)), 0);
if (mask) {
for (size_t j = 0; j < 8; ++j) {
if (!validate_fp16(f[i + j], i + j)) {
return false;
}
}
GGML_UNREACHABLE();
}
}
#endif
for (; i < nb; ++i) {
if (!validate_fp16(f[i], i)) {
return false;
}
}
} break;
case GGML_TYPE_F32:
{
const float * f = (const float *) data;
size_t i = 0;
#if defined(__AVX2__)
for (; i + 7 < nb; i += 8) {
__m256i v = _mm256_loadu_si256((const __m256i *)(f + i));
__m256i vexp = _mm256_and_si256(v, _mm256_set1_epi32(0x7f800000));
__m256i cmp = _mm256_cmpeq_epi32(vexp, _mm256_set1_epi32(0x7f800000));
int mask = _mm256_movemask_epi8(cmp);
if (mask) {
for (size_t j = 0; j < 8; ++j) {
if (!validate_float(f[i + j], i + j)) {
return false;
}
}
GGML_UNREACHABLE();
}
}
#elif defined(__ARM_NEON)
for (; i + 3 < nb; i += 4) {
uint32x4_t v = vld1q_u32((const uint32_t *)f + i);
uint32x4_t vexp = vandq_u32(v, vdupq_n_u32(0x7f800000));
uint32x4_t cmp = vceqq_u32(vexp, vdupq_n_u32(0x7f800000));
uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(vshrn_n_u32(cmp, 8)), 0);
if (mask) {
for (size_t j = 0; j < 4; ++j) {
if (!validate_float(f[i + j], i + j)) {
return false;
}
}
GGML_UNREACHABLE();
}
}
#endif
for (; i < nb; ++i) {
if (!validate_float(f[i], i)) {
return false;
}
}
} break;
case GGML_TYPE_F64:
{
const double * f = (const double *) data;
for (size_t i = 0; i < nb; ++i) {
if (!validate_float(f[i], i)) {
return false;
}
}
} break;
case GGML_TYPE_Q4_0:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_q4_0, data, nb);
} break;
case GGML_TYPE_Q4_1:
{
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_1, data, nb, d, m);
} break;
case GGML_TYPE_Q5_0:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_0, data, nb);
} break;
case GGML_TYPE_Q5_1:
{
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_1, data, nb, d, m);
} break;
case GGML_TYPE_Q8_0:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_q8_0, data, nb);
} break;
case GGML_TYPE_Q2_K:
{
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q2_K, data, nb, d, dmin);
} break;
case GGML_TYPE_Q3_K:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_q3_K, data, nb);
} break;
case GGML_TYPE_Q4_K:
{
#ifdef GGML_QKK_64
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d[0], d[1]);
#else
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q4_K, data, nb, d, dmin);
#endif
} break;
case GGML_TYPE_Q5_K:
{
#ifdef GGML_QKK_64
VALIDATE_ROW_DATA_D_F16_IMPL(block_q5_K, data, nb);
#else
VALIDATE_ROW_DATA_DM_F16_IMPL(block_q5_K, data, nb, d, dmin);
#endif
} break;
case GGML_TYPE_Q6_K:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_q6_K, data, nb);
} break;
case GGML_TYPE_Q8_K:
{
const block_q8_K * q = (const block_q8_K *) data;
for (size_t i = 0; i < nb; ++i) {
if (!validate_float(q[i].d, i)) {
return false;
}
}
} break;
case GGML_TYPE_IQ1_S:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq1_s, data, nb);
} break;
case GGML_TYPE_IQ1_M:
{
const block_iq1_m * q = (const block_iq1_m *) data;
for (size_t i = 0; i < nb; ++i) {
#if QK_K == 64
if (!validate_f16(q[i].d, i)) {
return false;
}
#else
iq1m_scale_t scale;
const uint16_t * sc = (const uint16_t *)q[i].scales;
scale.u16 = (sc[0] >> 12) | ((sc[1] >> 8) & 0x00f0) | ((sc[2] >> 4) & 0x0f00) | (sc[3] & 0xf000);
if (!validate_fp16(scale.f16, i)) {
return false;
}
#endif
}
} break;
case GGML_TYPE_IQ2_XXS:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xxs, data, nb);
} break;
case GGML_TYPE_IQ2_XS:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_xs, data, nb);
} break;
case GGML_TYPE_IQ2_S:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq2_s, data, nb);
} break;
case GGML_TYPE_IQ3_XXS:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_xxs, data, nb);
} break;

case GGML_TYPE_IQ3_S:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq3_s, data, nb);
} break;
case GGML_TYPE_IQ4_XS:
#if QK_K != 64
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_xs, data, nb);
} break;
#endif
// with QK_K == 64, iq4_xs is iq4_nl
case GGML_TYPE_IQ4_NL:
{
VALIDATE_ROW_DATA_D_F16_IMPL(block_iq4_nl, data, nb);
} break;
case GGML_TYPE_I8:
case GGML_TYPE_I16:
case GGML_TYPE_I32:
case GGML_TYPE_I64:
// nothing to validate
break;
default:
{
fprintf(stderr, "%s: invalid type %d\n", __func__, type);
return false;
}
}

return true;
}
2 changes: 2 additions & 0 deletions ggml.h
Original file line number Diff line number Diff line change
Expand Up @@ -762,6 +762,8 @@ extern "C" {
// use this to compute the memory overhead of a tensor
GGML_API size_t ggml_tensor_overhead(void);

GGML_API bool ggml_validate_row_data(enum ggml_type type, const void * data, size_t nbytes);

// main

GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
Expand Down
Loading
Loading