Skip to content

Commit

Permalink
Attention mask
Browse files Browse the repository at this point in the history
  • Loading branch information
baijumeswani committed Feb 27, 2025
1 parent 52d90fa commit cd10559
Show file tree
Hide file tree
Showing 4 changed files with 4 additions and 6 deletions.
4 changes: 2 additions & 2 deletions src/config.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -421,8 +421,8 @@ struct SpeechInputs_Element : JSON::Element {
void OnValue(std::string_view name, JSON::Value value) override {
if (name == "audio_embeds") {
v_.audio_embeds = JSON::Get<std::string_view>(value);
} else if (name == "audio_attention_mask") {
v_.audio_attention_mask = JSON::Get<std::string_view>(value);
} else if (name == "attention_mask") {
v_.attention_mask = JSON::Get<std::string_view>(value);
} else if (name == "audio_sizes") {
v_.audio_sizes = JSON::Get<std::string_view>(value);
} else if (name == "audio_projection_mode") {
Expand Down
2 changes: 1 addition & 1 deletion src/config.h
Original file line number Diff line number Diff line change
Expand Up @@ -131,7 +131,7 @@ struct Config {

struct Inputs {
std::string audio_embeds{Defaults::AudioEmbedsName};
std::string audio_attention_mask{Defaults::AudioAttentionMaskName};
std::string attention_mask{Defaults::AudioAttentionMaskName};
std::string audio_sizes{Defaults::AudioSizesName};
std::string audio_projection_mode{Defaults::AudioProjectionModeName};
} inputs;
Expand Down
3 changes: 1 addition & 2 deletions src/models/phi_multimodal_processor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,6 @@ PhiMultiModalProcessor::PhiMultiModalProcessor(Config& config, const SessionInfo
: pixel_values_type_{session_info.GetInputDataType(config.model.vision.inputs.pixel_values)},
attention_mask_type_{session_info.GetInputDataType(config.model.vision.inputs.attention_mask)},
audio_features_type_{session_info.GetInputDataType(config.model.speech.inputs.audio_embeds)},
// audio_attention_mask_type_{session_info.GetInputDataType(config.model.speech.inputs.audio_attention_mask)},
audio_sizes_type_{session_info.GetInputDataType(config.model.speech.inputs.audio_sizes)} {
const auto image_processor_config = (config.config_path / fs::path(config.model.vision.config_filename)).string();
CheckResult(OrtxCreateProcessor(image_processor_.ToBeAssigned(), image_processor_config.c_str()));
Expand All @@ -154,7 +153,7 @@ PhiMultiModalProcessor::PhiMultiModalProcessor(Config& config, const SessionInfo
config.AddMapping(std::string(Config::Defaults::ImageSizesName), config.model.vision.inputs.image_sizes);

config.AddMapping(std::string(Config::Defaults::AudioEmbedsName), config.model.speech.inputs.audio_embeds);
config.AddMapping(std::string(Config::Defaults::AudioAttentionMaskName), config.model.speech.inputs.audio_attention_mask);
config.AddMapping(std::string(Config::Defaults::AudioAttentionMaskName), config.model.speech.inputs.attention_mask);
config.AddMapping(std::string(Config::Defaults::AudioSizesName), config.model.speech.inputs.audio_sizes);
}

Expand Down
1 change: 0 additions & 1 deletion src/models/phi_multimodal_processor.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@ struct PhiMultiModalProcessor : Processor {
ONNXTensorElementDataType pixel_values_type_;
ONNXTensorElementDataType attention_mask_type_;
ONNXTensorElementDataType audio_features_type_;
ONNXTensorElementDataType audio_attention_mask_type_;
ONNXTensorElementDataType audio_sizes_type_;
};

Expand Down

0 comments on commit cd10559

Please sign in to comment.