You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
Traceback (most recent call last): File "/workspace/StyleTTS2/train_finetune.py", line 707, in <module> main() File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1157, in __call__ return self.main(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1078, in main rv = self.invoke(ctx) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1434, in invoke return ctx.invoke(self.callback, **ctx.params) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 783, in invoke return __callback(*args, **kwargs) File "/workspace/StyleTTS2/train_finetune.py", line 487, in main slm_out = slmadv(i, File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/workspace/StyleTTS2/Modules/slmadv.py", line 138, in forward y_pred = self.model.decoder(en, F0_fake, N_fake, sp[:, :128]) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/data_parallel.py", line 184, in forward replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/data_parallel.py", line 189, in replicate return replicate(module, device_ids, not torch.is_grad_enabled()) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/replicate.py", line 110, in replicate param_copies = _broadcast_coalesced_reshape(params, devices, detach) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/replicate.py", line 83, in _broadcast_coalesced_reshape tensor_copies = Broadcast.apply(devices, *tensors) File "/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py", line 539, in apply return super().apply(*args, **kwargs) # type: ignore[misc] File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/_functions.py", line 23, in forward outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/comm.py", line 57, in broadcast_coalesced return torch._C._broadcast_coalesced(tensors, devices, buffer_size) RuntimeError: NCCL Error 3: internal error - please report this issue to the NCCL developers
The text was updated successfully, but these errors were encountered:
See #81
The problem is likely batch_percentage here: https://github.com/yl4579/StyleTTS2/blob/main/Configs/config_ft.yml#L106
You may have set the batch size to 1 and batch_percentage to 0.5 so SLM adversarial training only has a batch size of 1. You have to either increase the batch size or increase the batch percentage if you want to continue with SLM adversarial training.
Traceback (most recent call last): File "/workspace/StyleTTS2/train_finetune.py", line 707, in <module> main() File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1157, in __call__ return self.main(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1078, in main rv = self.invoke(ctx) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 1434, in invoke return ctx.invoke(self.callback, **ctx.params) File "/usr/local/lib/python3.10/dist-packages/click/core.py", line 783, in invoke return __callback(*args, **kwargs) File "/workspace/StyleTTS2/train_finetune.py", line 487, in main slm_out = slmadv(i, File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/workspace/StyleTTS2/Modules/slmadv.py", line 138, in forward y_pred = self.model.decoder(en, F0_fake, N_fake, sp[:, :128]) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1518, in _wrapped_call_impl return self._call_impl(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/modules/module.py", line 1527, in _call_impl return forward_call(*args, **kwargs) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/data_parallel.py", line 184, in forward replicas = self.replicate(self.module, self.device_ids[:len(inputs)]) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/data_parallel.py", line 189, in replicate return replicate(module, device_ids, not torch.is_grad_enabled()) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/replicate.py", line 110, in replicate param_copies = _broadcast_coalesced_reshape(params, devices, detach) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/replicate.py", line 83, in _broadcast_coalesced_reshape tensor_copies = Broadcast.apply(devices, *tensors) File "/usr/local/lib/python3.10/dist-packages/torch/autograd/function.py", line 539, in apply return super().apply(*args, **kwargs) # type: ignore[misc] File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/_functions.py", line 23, in forward outputs = comm.broadcast_coalesced(inputs, ctx.target_gpus) File "/usr/local/lib/python3.10/dist-packages/torch/nn/parallel/comm.py", line 57, in broadcast_coalesced return torch._C._broadcast_coalesced(tensors, devices, buffer_size) RuntimeError: NCCL Error 3: internal error - please report this issue to the NCCL developers
The text was updated successfully, but these errors were encountered: