From 00a1f86da1a5bfdbbac44bfeda177de9439f4c73 Mon Sep 17 00:00:00 2001
From: Shay Aharon <80472096+shaydeci@users.noreply.github.com>
Date: Wed, 1 May 2024 15:57:08 +0300
Subject: [PATCH 1/5] Fix average best models to false when save model is false
 (#1976)

* average best models disabled when save_model is set to false

* average best models disabled when save_model is set to false
---
 src/super_gradients/training/sg_trainer/sg_trainer.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py
index 5f856da732..7ba08998ae 100755
--- a/src/super_gradients/training/sg_trainer/sg_trainer.py
+++ b/src/super_gradients/training/sg_trainer/sg_trainer.py
@@ -1421,6 +1421,14 @@ def get_finetune_lr_dict(self, lr: float) -> Dict[str, float]:
 
         self.ckpt_best_name = self.training_params.ckpt_best_name
 
+        if self.training_params.average_best_models and not self.training_params.save_model:
+            logger.warning(
+                "'training_params.average_best_models'  is enabled, but 'training_params.save_model' is disabled. \n"
+                "Model averaging requires saving snapshot checkpoints to function properly. As a result, "
+                "'training_params.average_best_models' will be disabled. "
+            )
+            self.training_params.average_best_models = False
+
         self.max_train_batches = self.training_params.max_train_batches
         self.max_valid_batches = self.training_params.max_valid_batches
 

From 3548fd5e285ac01efe99bb67fee9c80755d8f431 Mon Sep 17 00:00:00 2001
From: Shay Aharon <80472096+shaydeci@users.noreply.github.com>
Date: Thu, 2 May 2024 11:00:21 +0300
Subject: [PATCH 2/5] removed log_dir arg which is None by default (#1977)

Co-authored-by: Ofri Masad <ofrimasad@users.noreply.github.com>
---
 src/super_gradients/training/utils/distributed_training_utils.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/super_gradients/training/utils/distributed_training_utils.py b/src/super_gradients/training/utils/distributed_training_utils.py
index c4dbd55e80..dda9913777 100755
--- a/src/super_gradients/training/utils/distributed_training_utils.py
+++ b/src/super_gradients/training/utils/distributed_training_utils.py
@@ -345,7 +345,6 @@ def restart_script_with_ddp(num_gpus: int = None):
         max_restarts=0,
         monitor_interval=5,
         start_method="spawn",
-        log_dir=None,
         redirects=Std.NONE,
         tee=Std.NONE,
         metrics_cfg={},

From 292e38cea4900523bb40ebd9ff60896c56c78eba Mon Sep 17 00:00:00 2001
From: Talhaa Hussain <73853725+talhaahussain@users.noreply.github.com>
Date: Thu, 2 May 2024 19:09:32 +0100
Subject: [PATCH 3/5] Fixed issue with saved pose images (#1972) (#1973)

Co-authored-by: Eugene Khvedchenya <ekhvedchenya@gmail.com>
---
 .../utils/predict/prediction_pose_estimation_results.py  | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/src/super_gradients/training/utils/predict/prediction_pose_estimation_results.py b/src/super_gradients/training/utils/predict/prediction_pose_estimation_results.py
index 2019a5ba17..345c7d15ee 100644
--- a/src/super_gradients/training/utils/predict/prediction_pose_estimation_results.py
+++ b/src/super_gradients/training/utils/predict/prediction_pose_estimation_results.py
@@ -119,7 +119,14 @@ def save(
         :param show_confidence: Whether to show confidence scores on the image.
         :param box_thickness:   (Optional) Thickness of bounding boxes. If None, will adapt to the box size.
         """
-        image = self.draw(box_thickness=box_thickness, show_confidence=show_confidence)
+        image = self.draw(
+            edge_colors=edge_colors,
+            joint_thickness=joint_thickness,
+            keypoint_colors=keypoint_colors,
+            keypoint_radius=keypoint_radius,
+            box_thickness=box_thickness,
+            show_confidence=show_confidence,
+        )
         save_image(image=image, path=output_path)
 
 

From 9e737925e7e9ac34eae4c9c21322dc9836da87a3 Mon Sep 17 00:00:00 2001
From: Shay Aharon <80472096+shaydeci@users.noreply.github.com>
Date: Wed, 8 May 2024 11:02:09 +0300
Subject: [PATCH 4/5] more deprecated default args removed from LaunchConfig
 (#1982)

---
 .../training/utils/distributed_training_utils.py               | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/super_gradients/training/utils/distributed_training_utils.py b/src/super_gradients/training/utils/distributed_training_utils.py
index dda9913777..6d587466a1 100755
--- a/src/super_gradients/training/utils/distributed_training_utils.py
+++ b/src/super_gradients/training/utils/distributed_training_utils.py
@@ -10,7 +10,6 @@
 from torch import distributed as dist
 from torch.cuda.amp import autocast
 from torch.distributed import get_rank, all_gather_object
-from torch.distributed.elastic.multiprocessing import Std
 from torch.distributed.elastic.multiprocessing.errors import record
 from torch.distributed.launcher.api import LaunchConfig, elastic_launch
 
@@ -345,8 +344,6 @@ def restart_script_with_ddp(num_gpus: int = None):
         max_restarts=0,
         monitor_interval=5,
         start_method="spawn",
-        redirects=Std.NONE,
-        tee=Std.NONE,
         metrics_cfg={},
     )
 

From f8cc94a77e6eee520d82e76e18d9cfef6105403e Mon Sep 17 00:00:00 2001
From: Eugene Khvedchenya <ekhvedchenya@gmail.com>
Date: Wed, 15 May 2024 13:38:15 +0300
Subject: [PATCH 5/5] Fixed issue of logging wrong config (#1988)

---
 src/super_gradients/training/sg_trainer/sg_trainer.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/super_gradients/training/sg_trainer/sg_trainer.py b/src/super_gradients/training/sg_trainer/sg_trainer.py
index 7ba08998ae..566228bc64 100755
--- a/src/super_gradients/training/sg_trainer/sg_trainer.py
+++ b/src/super_gradients/training/sg_trainer/sg_trainer.py
@@ -239,13 +239,15 @@ def train_from_config(cls, cfg: Union[DictConfig, dict]) -> Tuple[nn.Module, Tup
         :return: the model and the output of trainer.train(...) (i.e results tuple)
         """
 
-        # TODO: bind checkpoint_run_id
         setup_device(
             device=core_utils.get_param(cfg, "device"),
             multi_gpu=core_utils.get_param(cfg, "multi_gpu"),
             num_gpus=core_utils.get_param(cfg, "num_gpus"),
         )
 
+        # Create resolved config before instantiation
+        recipe_logged_cfg = {"recipe_config": OmegaConf.to_container(cfg, resolve=True)}
+
         # INSTANTIATE ALL OBJECTS IN CFG
         cfg = hydra.utils.instantiate(cfg)
 
@@ -283,7 +285,6 @@ def train_from_config(cls, cfg: Union[DictConfig, dict]) -> Tuple[nn.Module, Tup
 
         test_loaders = maybe_instantiate_test_loaders(cfg)
 
-        recipe_logged_cfg = {"recipe_config": OmegaConf.to_container(cfg, resolve=True)}
         # TRAIN
         res = trainer.train(
             model=model,