From 793b13607bf622af259e175479e5d954cdfae51d Mon Sep 17 00:00:00 2001 From: Eunwoo Shin Date: Wed, 13 Mar 2024 10:58:49 +0900 Subject: [PATCH] Fix a bug that error is raised when training a model after HPO on multi XPU environment (#3081) fix HPO index error on XPU Co-authored-by: Emily --- src/otx/hpo/resource_manager.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/src/otx/hpo/resource_manager.py b/src/otx/hpo/resource_manager.py index 3971613d170..3d23a0edee2 100644 --- a/src/otx/hpo/resource_manager.py +++ b/src/otx/hpo/resource_manager.py @@ -180,6 +180,10 @@ def _make_env_var_for_train(self, device_arr: List[int]) -> Dict[str, str]: class XPUResourceManager(AcceleratorManager): """Resource manager class for XPU.""" + def __init__(self, num_devices_per_trial: int = 1, available_devices: Optional[str] = None): + super().__init__(num_devices_per_trial, available_devices) + torch.xpu.init() # Avoid default_generators index error in multi XPU environment + def _set_available_devices(self, available_devices: Optional[str] = None) -> List[int]: if available_devices is None: visible_devices = os.getenv("ONEAPI_DEVICE_SELECTOR", "").split(":")