Skip to content

Commit

Permalink
Fix prefast bugs: 1944959 1997925 1997926 1997927 1997928 (#13203)
Browse files Browse the repository at this point in the history
  • Loading branch information
zhanghuanrong authored Oct 5, 2022
1 parent 72076b1 commit dca9417
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 30 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -244,7 +244,7 @@ Status QOrderedAttention::ComputeInternal(OpKernelContext* context) const {
int64_t size_of_attention_scores = ((int64_t)batch_size) * num_heads_ * sequence_length * sequence_length;

// transposed qkv_layer, union(stacked, attention probs + attention scores)
auto gemm_buffer_quantized = GetScratchBuffer<int8_t>(m * n + std::max((int64_t)m * n, 2 * size_of_attention_scores));
auto gemm_buffer_quantized = GetScratchBuffer<int8_t>((int64_t)m * n + std::max((int64_t)m * n, 2 * size_of_attention_scores));

int8_t* stacked_qkv_layers = gemm_buffer_quantized.get() + ((int64_t)m * n);
int8_t* tranposed_qkv_layers = gemm_buffer_quantized.get();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ QOrderedLongformerAttention::ComputeInternal(OpKernelContext* context) const {

// TODO: only calculate once per model.
// Build Global Index
auto global_index_buffer = GetScratchBuffer<int>(batch_size * sequence_length);
auto global_index_buffer = GetScratchBuffer<int>(static_cast<size_t>(batch_size) * static_cast<size_t>(sequence_length));
auto batch_global_num_buffer = GetScratchBuffer<int>(batch_size);

size_t global_scratch_bytes = GetGlobalScratchSize(sequence_length);
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/cuda/tensor/pad.cc
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ Status Pad<T>::ComputeInternal(OpKernelContext* ctx) const {
ORT_ENFORCE(pads_size == 2 * static_cast<size_t>(dimension_count),
"Pads tensor size should be equal to twice the input dimension count ");

pads.reserve(2 * dimension_count);
pads.reserve(2LL * dimension_count);
for (size_t i = 0; i < pads_size; ++i) {
pads.push_back(pads_tensor_raw_data[i]);
}
Expand Down
26 changes: 13 additions & 13 deletions onnxruntime/test/contrib_ops/qordered_attention_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -12,17 +12,17 @@
namespace onnxruntime {
namespace test {

static const int64_t batch_size = 1;
static const int64_t sequence_len = 16;
static const int64_t input_hidden_size = 32;
static const int64_t num_heads = 2;
static const int64_t head_size = 16;
static const int64_t hidden_size = num_heads * head_size;
static constexpr int64_t batch_size = 1;
static constexpr int64_t sequence_len = 16;
static constexpr int64_t input_hidden_size = 32;
static constexpr int64_t num_heads = 2;
static constexpr int64_t head_size = 16;
static constexpr int64_t hidden_size = num_heads * head_size;

static std::vector<int32_t> input_mask = { // [1, 16]
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0};

static float input_scale = 0.025f;
static constexpr float input_scale = 0.025f;

static std::vector<int8_t> inputq = { // [1, 16, 32]
-33, 7, -54, 29, 14, 6, 14, 16, 1, 16, 22, 0, 16, 49, -14, -15, 68, 11, -18, -9, -42, 6, 6, 58, 22, 31, 0, -13, 42, 40, 4, 0,
Expand Down Expand Up @@ -180,13 +180,13 @@ static std::vector<float> v_bias = {
-1.5637541858090884f, 0.053171526292804416f, -1.5821961194911058f, -1.2062417346542489f, 0.23029741928149683f, -0.8920457050782132f, -0.06220760650838387f, 0.2942590084687021f,
-0.4362228349183151f, -0.2344379226413643f, -0.586149329261036f, -1.5243876669794532f, 0.22378084867382358f, -1.715499198175354f, -1.3795418183607775f, -1.2237706022285266f};

static float qlayer_scale = 0.250f;
static float klayer_scale = 0.250f;
static float vlayer_scale = 0.125f;
static constexpr float qlayer_scale = 0.250f;
static constexpr float klayer_scale = 0.250f;
static constexpr float vlayer_scale = 0.125f;

static float qk_scale = 0.5f;
static float probs_scale = 0.0078125f;
static float attn_out_scale = 0.05f;
static constexpr float qk_scale = 0.5f;
static constexpr float probs_scale = 0.0078125f;
static constexpr float attn_out_scale = 0.05f;

static std::vector<int8_t> attn_out_q8 = {
-39, 8, -75, 2, -69, -31, -42, -29, 44, 6, 0, -61, -102, 61, 28, 76,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,19 +79,19 @@ static void run_qordered_longformer_attention_op_test(
}

TEST(QOrderedTest, LongformerAttention_1x128x2x16_window_32) {
const float scale_input = 1.0f / 32.0f;
const float scale_weight = 1.0f / 64.0f;
const float scale_bias = 1.0f / 8.0f;
const float scale_qkv_gemm = 1.0f / 4.0f;
const float scale_global_weight = 1.0f / 64.0f;
const float scale_global_gemm = 1.0f / 4.0f;
const float scale_output = 1.0f / 8.0f;
const int64_t batch_size = 1;
const int64_t sequence_len = 128;
const int64_t num_heads = 2;
const int64_t head_size = 16;
const int64_t window = 32;
const int64_t input_hidden_size = 0; // same as hidden_size
constexpr float scale_input = 1.0f / 32.0f;
constexpr float scale_weight = 1.0f / 64.0f;
constexpr float scale_bias = 1.0f / 8.0f;
constexpr float scale_qkv_gemm = 1.0f / 4.0f;
constexpr float scale_global_weight = 1.0f / 64.0f;
constexpr float scale_global_gemm = 1.0f / 4.0f;
constexpr float scale_output = 1.0f / 8.0f;
constexpr int64_t batch_size = 1;
constexpr int64_t sequence_len = 128;
constexpr int64_t num_heads = 2;
constexpr int64_t head_size = 16;
constexpr int64_t window = 32;
constexpr int64_t input_hidden_size = 0; // same as hidden_size

// Following code generate the input data vectors: (Keep it here in case)
// #include <iostream>
Expand Down Expand Up @@ -154,7 +154,7 @@ TEST(QOrderedTest, LongformerAttention_1x128x2x16_window_32) {
// debug_print(global_attention_mask.data(), batch_size, sequence_len, "global_attention_mask");
// float scale_output = 1.0f / 8.0f;



//========inputq : 128x32 ============
std::vector<int8_t> inputq = {
Expand Down

0 comments on commit dca9417

Please sign in to comment.