Skip to content

Commit

Permalink
more changes to components (#3862)
Browse files Browse the repository at this point in the history
Co-authored-by: Matthias Blondeel <mablonde@microsoft.com>
  • Loading branch information
matthiasblondeel and Matthias Blondeel authored Feb 25, 2025
1 parent df4a6cc commit d74da53
Show file tree
Hide file tree
Showing 9 changed files with 226 additions and 222 deletions.
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: medimgage_adapter_finetune
version: 0.0.1.yesh1
version: 0.0.4
type: command

is_deterministic: True
Expand Down Expand Up @@ -57,12 +57,11 @@ inputs:
optional: true
description: Number of workers for the validation dataloader.

output_classes:
type: integer
min: 1
default: 5
optional: true
description: Number of output classes.
label_file:
type: uri_file
optional: false
description: Path to the label file.
mode: ro_mount

hidden_dimensions:
type: integer
Expand Down Expand Up @@ -102,11 +101,11 @@ command: >-
--task_name "AdapterTrain"
--train_data_path "${{inputs.train_data_path}}"
--validation_data_path "${{inputs.validation_data_path}}"
--label_file "${{inputs.label_file}}"
$[[--train_dataloader_batch_size "${{inputs.train_dataloader_batch_size}}"]]
$[[--validation_dataloader_batch_size "${{inputs.validation_dataloader_batch_size}}"]]
$[[--train_dataloader_workers "${{inputs.train_dataloader_workers}}"]]
$[[--validation_dataloader_workers "${{inputs.validation_dataloader_workers}}"]]
$[[--output_classes "${{inputs.output_classes}}"]]
$[[--hidden_dimensions "${{inputs.hidden_dimensions}}"]]
$[[--input_channels "${{inputs.input_channels}}"]]
$[[--learning_rate "${{inputs.learning_rate}}"]]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
type: command

version: 0.0.10
version: 0.0.12
name: medimage_embedding_adapter_merge
display_name: Merge Adapter Model with Embedding Generation Model
description: Import and integrate Adapter and Embedding Generation Model
Expand All @@ -25,11 +25,25 @@ inputs:
optional: true
description: Integrated Mlflow Model Asset.

configuration:
type: uri_folder
label_file:
type: uri_file
optional: false
description: Path to the label file.
mode: ro_mount

hidden_dimensions:
type: integer
min: 1
default: 512
optional: true
description: Configuration file for the model merging process.
description: Number of hidden dimensions.

input_channels:
type: integer
min: 1
default: 1024
optional: true
description: Number of input channels.

outputs:
output_dir:
Expand All @@ -40,5 +54,7 @@ command: >-
python medimage_model_merge.py
$[[--adapter_model ${{inputs.adapter_model}}]]
$[[--mlflow_model ${{inputs.mlflow_model}}]]
$[[--configuration ${{inputs.configuration}}]]
$[[--hidden_dimensions "${{inputs.hidden_dimensions}}"]]
$[[--input_channels "${{inputs.input_channels}}"]]
--label_file "${{inputs.label_file}}"
--output_dir ${{outputs.output_dir}}
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/pipelineComponent.schema.json
name: medimage_insight_ft_pipeline
version: 0.0.7
version: 0.0.14
type: pipeline
display_name: Medical Image Insight Embedding Generator and Classification Adapter Pipeline
description: Pipeline Component to finetune Hugging Face pretrained models for chat completion task. The component supports optimizations such as LoRA, Deepspeed and ONNXRuntime for performance enhancement. See [docs](https://aka.ms/azureml/components/chat_completion_pipeline) to learn more.
Expand Down Expand Up @@ -142,7 +142,6 @@ inputs:
optional: true
description: Pin memory.


knn:
type: integer
min: 0
Expand Down Expand Up @@ -267,18 +266,6 @@ inputs:
compute is named 'FT-Cluster'. Special characters like \ and ' are invalid in the parameter value.
If compute cluster name is provided, instance_type field will be ignored and the respective cluster will be used

zeroshot_path:
type: uri_file
optional: false
description: Path to the zeroshot data file.
mode: rw_mount

test_train_split_csv_path:
type: uri_file
optional: false
description: Path to the CSV file containing test-train split information.
mode: rw_mount

train_dataloader_batch_size:
type: integer
min: 1
Expand Down Expand Up @@ -307,13 +294,6 @@ inputs:
optional: true
description: Number of workers for the validation dataloader.

output_classes:
type: integer
min: 1
default: 5
optional: true
description: Number of output classes.

hidden_dimensions:
type: integer
min: 1
Expand Down Expand Up @@ -341,11 +321,6 @@ inputs:
optional: true
description: Maximum number of epochs for training.

merge_configuration_folder:
type: uri_folder
description: Files reqiured for merging the models.
mode: rw_mount

outputs:
save_dir:
type: uri_folder
Expand Down Expand Up @@ -401,17 +376,19 @@ jobs:
mlflow_model_folder: '${{parent.outputs.mlflow_model_folder}}'
medical_image_embedding_datapreprocessing:
type: command
component: azureml://registries/models-staging/components/medical_image_embedding_datapreprocessing/versions/0.0.1.yesh5
component: azureml://registries/mablonde-registry-101/components/medical_image_embedding_datapreprocessing/versions/0.0.9
compute: '${{parent.inputs.compute_preprocess}}'
resources:
instance_type: '${{parent.inputs.instance_type_preprocess}}'
inputs:
mlflow_model_path: '${{parent.jobs.medical_image_embedding_model_finetune.outputs.mlflow_model_folder}}'
zeroshot_path: '${{parent.inputs.zeroshot_path}}'
test_train_split_csv_path: '${{parent.inputs.test_train_split_csv_path}}'
eval_image_tsv: '${{parent.inputs.eval_image_tsv}}'
eval_text_tsv: '${{parent.inputs.eval_text_tsv}}'
image_tsv: '${{parent.inputs.image_tsv}}'
text_tsv: '${{parent.inputs.text_tsv}}'
medimgage_adapter_finetune:
type: command
component: azureml://registries/models-staging/components/medimgage_adapter_finetune/versions/0.0.1.yesh1
component: azureml://registries/mablonde-registry-101/components/medimgage_adapter_finetune/versions/0.0.4
compute: '${{parent.inputs.compute_finetune}}'
resources:
instance_type: '${{parent.inputs.instance_type_finetune}}'
Expand All @@ -422,20 +399,22 @@ jobs:
validation_dataloader_batch_size: '${{parent.inputs.validation_dataloader_batch_size}}'
train_dataloader_workers: '${{parent.inputs.train_dataloader_workers}}'
validation_dataloader_workers: '${{parent.inputs.validation_dataloader_workers}}'
output_classes: '${{parent.inputs.output_classes}}'
label_file: '${{parent.inputs.label_file}}'
hidden_dimensions: '${{parent.inputs.hidden_dimensions}}'
input_channels: '${{parent.inputs.input_channels}}'
learning_rate: '${{parent.inputs.learning_rate}}'
max_epochs: '${{parent.inputs.max_epochs}}'
medimage_embedding_adapter_merge:
type: command
component: azureml://registries/mablonde-registry-101/components/medimage_embedding_adapter_merge/versions/0.0.10
component: azureml://registries/mablonde-registry-101/components/medimage_embedding_adapter_merge/versions/0.0.12
compute: '${{parent.inputs.compute_finetune}}'
resources:
instance_type: '${{parent.inputs.instance_type_finetune}}'
inputs:
adapter_model: '${{parent.jobs.medimgage_adapter_finetune.outputs.output_model_path}}'
mlflow_model: '${{parent.jobs.medical_image_embedding_model_finetune.outputs.mlflow_model_folder}}'
configuration: '${{parent.inputs.merge_configuration_folder}}'
label_file: '${{parent.inputs.label_file}}'
hidden_dimensions: '${{parent.inputs.hidden_dimensions}}'
input_channels: '${{parent.inputs.input_channels}}'
outputs:
output_dir: '${{parent.outputs.merged_mlfow_model}}'
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
$schema: https://azuremlschemas.azureedge.net/latest/commandComponent.schema.json
name: medical_image_embedding_datapreprocessing
version: 0.0.1.yesh5
version: 0.0.9
type: command

is_deterministic: True
Expand All @@ -13,17 +13,29 @@ environment: azureml://registries/models-staging/environments/medimage-embedding
code: ../../../src/medimage_insight_adapter_preprocess

inputs:
zeroshot_path:
eval_image_tsv:
type: uri_file
optional: false
description: Path to the zeroshot data file.
mode: rw_mount
description: Path to the evaluation image TSV file.
mode: ro_mount

test_train_split_csv_path:
eval_text_tsv:
type: uri_file
optional: false
description: Path to the CSV file containing test-train split information.
mode: rw_mount
description: Path to the evaluation text TSV file.
mode: ro_mount

image_tsv:
type: uri_file
optional: false
description: Path to the image TSV file.
mode: ro_mount

text_tsv:
type: uri_file
optional: false
description: Path to the text TSV file.
mode: ro_mount

mlflow_model_path:
type: uri_folder
Expand All @@ -43,4 +55,12 @@ outputs:
mode: rw_mount

command: >-
python medimage_datapreprocess.py --task_name "MedEmbedding" --zeroshot_path "${{inputs.zeroshot_path}}" --test_train_split_csv_path "${{inputs.test_train_split_csv_path}}" --output_train_pkl "${{outputs.output_train_pkl}}" --output_validation_pkl "${{outputs.output_validation_pkl}}" --mlflow_model_path "${{inputs.mlflow_model_path}}"
python medimage_datapreprocess.py
--task_name "MedEmbedding"
--eval_image_tsv "${{inputs.eval_image_tsv}}"
--eval_text_tsv "${{inputs.eval_text_tsv}}"
--image_tsv "${{inputs.image_tsv}}"
--text_tsv "${{inputs.text_tsv}}"
--output_train_pkl "${{outputs.output_train_pkl}}"
--output_validation_pkl "${{outputs.output_validation_pkl}}"
--mlflow_model_path "${{inputs.mlflow_model_path}}"
Original file line number Diff line number Diff line change
Expand Up @@ -79,10 +79,9 @@ def get_parser():
help='Number of workers for the validation dataloader.'
)
parser.add_argument(
'--output_classes',
type=int,
required=True,
help='Number of output classes.'
'--label_file',
type=str,
help='Path to label file.'
)
parser.add_argument(
'--hidden_dimensions',
Expand Down Expand Up @@ -150,10 +149,13 @@ def initialize_model(args: argparse.Namespace) -> torch.nn.Module:
Returns:
torch.nn.Module: Initialized model.
"""
with open(args.label_file, "r") as f:
labels = [l.strip() for l in f.read().splitlines() if l.strip()]

return training.create_model(
in_channels=args.input_channels,
hidden_dim=args.hidden_dimensions,
num_class=args.output_classes
num_class=len(labels)
)


Expand Down
Loading

0 comments on commit d74da53

Please sign in to comment.