Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fsxwindowsfileserver: fixing task concurrency issues #2690

Merged
merged 1 commit into from
Oct 27, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 0 additions & 6 deletions agent/api/task/task_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -225,18 +225,12 @@ func (task *Task) addFSxWindowsFileServerResource(
vol *TaskVolume,
fsxWindowsFileServerVol *fsxwindowsfileserver.FSxWindowsFileServerVolumeConfig,
) error {
hostPath, err := utils.FindUnusedDriveLetter()
if err != nil {
return err
}

fsxwindowsfileserverResource, err := fsxwindowsfileserver.NewFSxWindowsFileServerResource(
task.Arn,
cfg.AWSRegion,
vol.Name,
FSxWindowsFileServerVolumeType,
fsxWindowsFileServerVol,
hostPath,
task.ExecutionCredentialsID,
credentialsManager,
resourceFields.SSMClientCreator,
Expand Down
2 changes: 1 addition & 1 deletion agent/api/task/task_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -716,7 +716,7 @@ func TestPostUnmarshalTaskWithFSxWindowsFileServerVolumes(t *testing.T) {
"credentialsParameter": "arn",
"domain": "test"
},
"fsxWindowsFileServerHostPath": "Z:\\"
"fsxWindowsFileServerHostPath": ""
},
"createdAt": "0001-01-01T00:00:00Z",
"desiredStatus": "NONE",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,6 @@ func NewFSxWindowsFileServerResource(
name string,
volumeType string,
volumeConfig *FSxWindowsFileServerVolumeConfig,
hostPath string,
executionCredentialsID string,
credentialsManager credentials.Manager,
ssmClientCreator ssmfactory.SSMClientCreator,
Expand All @@ -121,7 +120,6 @@ func NewFSxWindowsFileServerResource(
FileSystemID: volumeConfig.FileSystemID,
RootDirectory: volumeConfig.RootDirectory,
AuthConfig: volumeConfig.AuthConfig,
HostPath: hostPath,
},
taskARN: taskARN,
region: region,
Expand Down Expand Up @@ -357,28 +355,26 @@ func (fv *FSxWindowsFileServerResource) SetRootDirectory(rootDirectory string) {
fv.VolumeConfig.RootDirectory = rootDirectory
}

var DriveLetterAvailable = utils.IsAvailableDriveLetter
// GetHostPath safely returns the volume config's host path
func (fv *FSxWindowsFileServerResource) GetHostPath() string {
fv.lock.RLock()
defer fv.lock.RUnlock()

return fv.VolumeConfig.HostPath
}

// SetHostPath safely updates volume config's host path
func (fv *FSxWindowsFileServerResource) SetHostPath(hostPath string) {
fv.lock.Lock()
defer fv.lock.Unlock()

fv.VolumeConfig.HostPath = hostPath
}

// Create is used to create all the fsxwindowsfileserver resources for a given task
func (fv *FSxWindowsFileServerResource) Create() error {
var err error

volumeConfig := fv.GetVolumeConfig()
localPath := volumeConfig.HostPath

// formatting to keep powershell happy
localPathArg := fmt.Sprintf("-LocalPath \"%s\"", strings.Trim(localPath, `\`))

// remove mount if localPath is not available
// handling case where agent crashes after mount and before resource status is updated
if !(DriveLetterAvailable(localPath)) {
err = fv.removeHostMount(localPathArg)
if err != nil {
seelog.Errorf("Failed to remove existing fsxwindowsfileserver resource mount on the container instance: %v", err)
fv.setTerminalReason(err.Error())
return err
}
}

var iamCredentials credentials.IAMRoleCredentials
executionCredentials, ok := fv.credentialsManager.GetTaskCredentials(fv.GetExecutionCredentialsID())
Expand Down Expand Up @@ -418,7 +414,7 @@ func (fv *FSxWindowsFileServerResource) Create() error {

password := creds.Password

err = fv.performHostMount(remotePath, localPathArg, username, password)
err = fv.performHostMount(remotePath, username, password)
if err != nil {
fv.setTerminalReason(err.Error())
return err
Expand Down Expand Up @@ -519,9 +515,37 @@ func (fv *FSxWindowsFileServerResource) retrieveFileSystemDNSName(fileSystemId s
return nil
}

var mountLock sync.Mutex
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do you need an additional lock while performing hostMount? Please add a comment to explain the rationale. Also, why is this outside the resource encapsulation?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add a comment for this

var DriveLetterAvailable = utils.IsAvailableDriveLetter
var execCommand = exec.Command

func (fv *FSxWindowsFileServerResource) performHostMount(remotePath string, localPathArg string, username string, password string) error {
func (fv *FSxWindowsFileServerResource) performHostMount(remotePath string, username string, password string) error {
// mountLock is a package-level mutex lock.
// It is used to avoid a scenario where concurrent tasks get the same drive letter and perform host mount at the same time.
// A drive letter is free until host mount is performed, a lock prevents multiple tasks getting the same drive letter.
mountLock.Lock()
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you add a comment here documenting why a package level lock is needed here? so that we don't lose context on why this was added

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

does this delay the resource creation for fsx tasks?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ran 10 tasks together and noticed about half a second delay between tasks transitioning into running state.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I'll add a comment to add some context

defer mountLock.Unlock()

hostPath, err := utils.FindUnusedDriveLetter()
if err != nil {
return err
}
fv.SetHostPath(hostPath)

// formatting to keep powershell happy
localPathArg := fmt.Sprintf("-LocalPath \"%s\"", strings.Trim(hostPath, `\`))

// remove mount if localPath is not available
// handling case where agent crashes after mount and before resource status is updated
if !(DriveLetterAvailable(hostPath)) {
err = fv.removeHostMount(localPathArg)
if err != nil {
seelog.Errorf("Failed to remove existing fsxwindowsfileserver resource mount on the container instance: %v", err)
fv.setTerminalReason(err.Error())
return err
}
}

// formatting to keep powershell happy
creds := fmt.Sprintf("-Credential $(New-Object System.Management.Automation.PSCredential(\"%s\", $(ConvertTo-SecureString \"%s\" -AsPlainText -Force)))", username, password)
remotePathArg := fmt.Sprintf("-RemotePath \"%s\"", remotePath)
Expand All @@ -537,7 +561,7 @@ func (fv *FSxWindowsFileServerResource) performHostMount(remotePath string, loca
"-RequirePrivacy $true",
"-ErrorAction Stop")

_, err := cmd.CombinedOutput()
_, err = cmd.CombinedOutput()
if err != nil {
seelog.Errorf("Failed to map fsxwindowsfileserver resource on the container instance: %v", err)
fv.setTerminalReason(err.Error())
Expand Down Expand Up @@ -576,7 +600,7 @@ func (fv *FSxWindowsFileServerResource) removeHostMount(localPathArg string) err
}

func (fv *FSxWindowsFileServerResource) Cleanup() error {
localPath := fv.VolumeConfig.HostPath
localPath := fv.GetHostPath()
// formatting to keep powershell happy
localPathArg := fmt.Sprintf("-LocalPath \"%s\"", strings.Trim(localPath, `\`))
err := fv.removeHostMount(localPathArg)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ func TestPerformHostMount(t *testing.T) {
execCommand = fakeExecCommand
defer func() { execCommand = exec.Command }()

err := fv.performHostMount(`\\amznfsxfp8sdlcw.test.corp.com\share`, hostPath, `test\user`, `pass`)
err := fv.performHostMount(`\\amznfsxfp8sdlcw.test.corp.com\share`, `test\user`, `pass`)
assert.NoError(t, err)
}

Expand Down