diff --git a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts index 57e6686bd8..bc0f44fc58 100644 --- a/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts +++ b/src/nni_manager/training_service/remote_machine/remoteMachineTrainingService.ts @@ -442,7 +442,7 @@ class RemoteMachineTrainingService implements TrainingService { async (tick: number) => { const cmdresult: RemoteCommandResult = await SSHClientUtility.remoteExeCommand( `tail -n 1 ${unixPathJoin(remoteGpuScriptCollectorDir, 'gpu_metrics')}`, conn); - if (cmdresult !== undefined && cmdresult.stdout !== undefined) { + if (cmdresult !== undefined && cmdresult.stdout !== undefined && cmdresult.stdout.length > 0) { rmMeta.gpuSummary = JSON.parse(cmdresult.stdout); if (rmMeta.gpuSummary.gpuCount === 0) { this.log.warning(`No GPU found on remote machine ${rmMeta.ip}`); diff --git a/src/nni_manager/training_service/remote_machine/sshClientUtility.ts b/src/nni_manager/training_service/remote_machine/sshClientUtility.ts index 79af637dfb..d261ff46c4 100644 --- a/src/nni_manager/training_service/remote_machine/sshClientUtility.ts +++ b/src/nni_manager/training_service/remote_machine/sshClientUtility.ts @@ -101,10 +101,9 @@ export namespace SSHClientUtility { * @param sshClient SSH client */ export async function copyDirectoryToRemote(localDirectory: string, remoteDirectory: string, sshClient: Client, remoteOS: string): Promise { - const deferred: Deferred = new Deferred(); - const tmpTarName: string = `${uniqueString(10)}.tar.gz`; - const localTarPath: string = path.join(os.tmpdir(), tmpTarName); - const remoteTarPath: string = unixPathJoin(getRemoteTmpDir(remoteOS), tmpTarName); + const tmpSuffix: string = uniqueString(5); + const localTarPath: string = path.join(os.tmpdir(), `nni_tmp_local_${tmpSuffix}.tar.gz`); + const remoteTarPath: string = unixPathJoin(getRemoteTmpDir(remoteOS), `nni_tmp_remote_${tmpSuffix}.tar.gz`); // Compress files in local directory to experiment root directory await tarAdd(localTarPath, localDirectory); @@ -114,9 +113,6 @@ export namespace SSHClientUtility { // Decompress the remote compressed file in and delete it await remoteExeCommand(`tar -oxzf ${remoteTarPath} -C ${remoteDirectory}`, sshClient); await remoteExeCommand(`rm ${remoteTarPath}`, sshClient); - deferred.resolve(); - - return deferred.promise; } export function getRemoteFileContent(filePath: string, sshClient: Client): Promise {