Skip to content

Commit

Permalink
prevent child process from being termated by fokred main process
Browse files Browse the repository at this point in the history
  • Loading branch information
eunwoosh committed Dec 6, 2022
1 parent 88a23ad commit 916da4e
Showing 1 changed file with 6 additions and 3 deletions.
9 changes: 6 additions & 3 deletions otx/cli/tools/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,10 @@ def run_multigpu_child_process(rank: int, gpu_ids: List[int]):
multigpu_initilization(rank, gpu_ids)
main()

def terminate_signal_handler(signum, frame, processes: List[mp.Process]):
def terminate_signal_handler(signum, frame, processes: List[mp.Process], main_pid):
if main_pid != os.getpid(): # if main process is forked and they get a signal, then terminated alone.
sys.exit()

for process in processes:
print(f"Kill child process {process.pid}")
try:
Expand All @@ -280,8 +283,8 @@ def run_multi_gpu_train(gpu_ids: List[int], output_path: str, optimized_hyper_pa
task_p.start()
processes.append(task_p)

signal.signal(signal.SIGINT, partial(terminate_signal_handler, processes=processes))
signal.signal(signal.SIGTERM, partial(terminate_signal_handler, processes=processes))
signal.signal(signal.SIGINT, partial(terminate_signal_handler, processes=processes, main_pid=os.getpid()))
signal.signal(signal.SIGTERM, partial(terminate_signal_handler, processes=processes, main_pid=os.getpid()))

multigpu_initilization(0, gpu_ids)

Expand Down

0 comments on commit 916da4e

Please sign in to comment.