Skip to content

Commit

Permalink
fix the audio size output for phi models (#886)
Browse files Browse the repository at this point in the history
  • Loading branch information
wenbingl authored Feb 6, 2025
1 parent 85e8032 commit e6777d5
Show file tree
Hide file tree
Showing 2 changed files with 4 additions and 1 deletion.
2 changes: 1 addition & 1 deletion shared/api/speech_features.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -638,7 +638,7 @@ class Phi4AudioEmbed {
return result
*/
auto embedded_size_data = embeded_size.Allocate({1});
embedded_size_data[0] = std::ceil(static_cast<float>(ts_logmel.Shape()[1]) / audio_compression_rate_);
embedded_size_data[0] = std::ceil(static_cast<float>(ts_logmel.Shape()[0]) / audio_compression_rate_);
return status;
}

Expand Down
3 changes: 3 additions & 0 deletions test/pp_api_test/test_feature_extraction.cc
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ TEST(ExtractorTest, TestPhi4AudioFeatureExtraction) {
err = OrtxGetTensorData(tensor.get(), reinterpret_cast<const void**>(&data), &shape, &num_dims);
ASSERT_EQ(num_dims, 1);
ASSERT_EQ(std::vector<int64_t>(shape, shape + num_dims), std::vector<int64_t>({3}));
ASSERT_EQ(std::vector<int64_t>(reinterpret_cast<const int64_t*>(data),
reinterpret_cast<const int64_t*>(data) + 3),
std::vector<int64_t>({138, 167, 168}));
}

TEST(ExtractorTest, TestPhi4AudioFeatureExtraction8k) {
Expand Down

0 comments on commit e6777d5

Please sign in to comment.