From ab599881c74765372c1b7054a2c430b82ca829c3 Mon Sep 17 00:00:00 2001 From: Mike Frisch Date: Fri, 7 Feb 2025 11:50:00 -0500 Subject: [PATCH] aws: logging improvements Improve logging for clarity in context of asynchronous system. For exmaple, including instance ID/image name in all log related messages to facilitate easier (human) log tracing Signed-off-by: Mike Frisch --- src/cloud-providers/aws/provider.go | 66 ++++++++++++++++------------- 1 file changed, 36 insertions(+), 30 deletions(-) diff --git a/src/cloud-providers/aws/provider.go b/src/cloud-providers/aws/provider.go index 88bd45a28..7fe099505 100644 --- a/src/cloud-providers/aws/provider.go +++ b/src/cloud-providers/aws/provider.go @@ -22,8 +22,14 @@ import ( ) var ( - logger = log.New(log.Writer(), "[adaptor/cloud/aws] ", log.LstdFlags|log.Lmsgprefix) - errNotReady = errors.New("address not ready") + logger = log.New(log.Writer(), "[adaptor/cloud/aws] ", log.LstdFlags|log.Lmsgprefix) + + errNotReady = errors.New("address not ready") + errNoImageID = errors.New("ImageId is empty") + errNilPublicIPAddress = errors.New("public IP address is nil") + errEmptyPublicIPAddress = errors.New("public IP address is empty") + errImageDetailsFailed = errors.New("unable to get image details") + errDeviceNameEmpty = errors.New("empty device name") ) const ( @@ -120,7 +126,7 @@ func NewProvider(config *Config) (provider.Provider, error) { logger.Printf("RootDeviceName and RootVolumeSize of the image %s is %s, %d", config.ImageId, config.RootDeviceName, config.RootVolumeSize) } - if err = provider.updateInstanceTypeSpecList(); err != nil { + if err := provider.updateInstanceTypeSpecList(); err != nil { return nil, err } @@ -142,7 +148,7 @@ func getIPs(instance types.Instance) ([]netip.Addr, error) { } podNodeIPs = append(podNodeIPs, ip) - logger.Printf("podNodeIP[%d]=%s", i, ip.String()) + logger.Printf("instance %s: podNodeIP[%d]=%s", *instance.InstanceId, i, ip.String()) } return podNodeIPs, nil @@ -239,7 +245,6 @@ func (p *awsProvider) CreateInstance(ctx context.Context, podName, sandboxID str input.SubnetId = nil // Remove the security group IDs from the input input.SecurityGroupIds = nil - } // Ref: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/snp-work.html @@ -255,7 +260,6 @@ func (p *awsProvider) CreateInstance(ctx context.Context, podName, sandboxID str AmdSevSnp: types.AmdSevSnpSpecificationEnabled, } } - } // Add block device mappings to the instance to set the root volume size @@ -272,20 +276,20 @@ func (p *awsProvider) CreateInstance(ctx context.Context, podName, sandboxID str } } - logger.Printf("CreateInstance: name: %q", instanceName) + logger.Printf("Creating instance %s for sandbox %s", instanceName, sandboxID) result, err := p.ec2Client.RunInstances(ctx, input) if err != nil { - return nil, fmt.Errorf("Creating instance (%v) returned error: %s", result, err) + return nil, fmt.Errorf("creating instance %s (%v): %w", instanceName, result, err) } - logger.Printf("created an instance %s for sandbox %s", *result.Instances[0].PublicDnsName, sandboxID) - instanceID := *result.Instances[0].InstanceId + logger.Printf("Created instance %s (%s) for sandbox %s", instanceName, instanceID, sandboxID) + ips, err := getIPs(result.Instances[0]) if err != nil { - logger.Printf("failed to get IPs for the instance : %v ", err) + logger.Printf("Failed to get IPs for instance %s: %v ", instanceID, err) return nil, err } @@ -298,7 +302,6 @@ func (p *awsProvider) CreateInstance(ctx context.Context, podName, sandboxID str // Replace the first IP address with the public IP address ips[0] = publicIPAddr - } instance := &provider.Instance{ @@ -317,14 +320,16 @@ func (p *awsProvider) DeleteInstance(ctx context.Context, instanceID string) err }, } - logger.Printf("Deleting instance (%s)", instanceID) + logger.Printf("Deleting instance %s", instanceID) resp, err := p.ec2Client.TerminateInstances(ctx, terminateInput) if err != nil { - logger.Printf("failed to delete an instance: %v and the response is %v", err, resp) + logger.Printf("failed to delete instance: %v and the response is %v", err, resp) return err } - logger.Printf("deleted an instance %s", instanceID) + + logger.Printf("Deleted instance %s", instanceID) + return nil } @@ -333,15 +338,14 @@ func (p *awsProvider) Teardown() error { } func (p *awsProvider) ConfigVerifier() error { - ImageId := p.serviceConfig.ImageId - if len(ImageId) == 0 { - return fmt.Errorf("ImageId is empty") + if len(p.serviceConfig.ImageId) == 0 { + return errNoImageID } return nil } // Add SelectInstanceType method to select an instance type based on the memory and vcpu requirements -func (p *awsProvider) selectInstanceType(ctx context.Context, spec provider.InstanceTypeSpec) (string, error) { +func (p *awsProvider) selectInstanceType(_ context.Context, spec provider.InstanceTypeSpec) (string, error) { return provider.SelectInstanceTypeToUse(spec, p.serviceConfig.InstanceTypeSpecList, p.serviceConfig.InstanceTypes, p.serviceConfig.InstanceType) } @@ -374,6 +378,8 @@ func (p *awsProvider) updateInstanceTypeSpecList() error { return nil } +var errInstanceTypeNotFound = errors.New("instance type not found") + // Add a method to retrieve cpu, memory, and storage from the instance type func (p *awsProvider) getInstanceTypeInformation(instanceType string) (vcpu int64, memory int64, gpuCount int64, err error, @@ -405,7 +411,7 @@ func (p *awsProvider) getInstanceTypeInformation(instanceType string) (vcpu int6 return vcpu, memory, gpuCount, nil } - return 0, 0, 0, fmt.Errorf("instance type %s not found", instanceType) + return 0, 0, 0, errInstanceTypeNotFound } // Add a method to get public IP address of the instance @@ -420,7 +426,7 @@ func (p *awsProvider) getPublicIP(ctx context.Context, instanceID string) (netip // Wait for instance to be ready before getting the public IP address err := p.waiter.Wait(ctx, describeInstanceInput, maxWaitTime) if err != nil { - logger.Printf("failed to wait for the instance to be ready : %v ", err) + logger.Printf("failed to wait for instance %s to be ready: %v", instanceID, err) return netip.Addr{}, err } @@ -428,7 +434,7 @@ func (p *awsProvider) getPublicIP(ctx context.Context, instanceID string) (netip // Add describe instance output describeInstanceOutput, err := p.ec2Client.DescribeInstances(ctx, describeInstanceInput) if err != nil { - logger.Printf("failed to describe the instance : %v ", err) + logger.Printf("failed to describe instance %s: %v", instanceID, err) return netip.Addr{}, err } // Get the public IP address from InstanceNetworkInterfaceAssociation @@ -436,14 +442,14 @@ func (p *awsProvider) getPublicIP(ctx context.Context, instanceID string) (netip // Check if the public IP address is nil if publicIP == nil { - return netip.Addr{}, fmt.Errorf("public IP address is nil") + return netip.Addr{}, errNilPublicIPAddress } // If the public IP address is empty, return an error if *publicIP == "" { - return netip.Addr{}, fmt.Errorf("public IP address is empty") + return netip.Addr{}, errEmptyPublicIPAddress } - logger.Printf("public IP address of the instance %s is %s", instanceID, *publicIP) + logger.Printf("public IP address instance %s: %s", instanceID, *publicIP) // Parse the public IP address publicIPAddr, err := netip.ParseAddr(*publicIP) @@ -463,12 +469,12 @@ func (p *awsProvider) getDeviceNameAndSize(imageID string) (string, int32, error // Add describe images output describeImagesOutput, err := p.ec2Client.DescribeImages(context.Background(), describeImagesInput) if err != nil { - logger.Printf("failed to describe the image : %v ", err) + logger.Printf("failed to describe image %s: %v", imageID, err) return "", 0, err } if describeImagesOutput == nil || len(describeImagesOutput.Images) == 0 { - return "", 0, fmt.Errorf("Unable to get details for the image") + return "", 0, errImageDetailsFailed } // Get the device name @@ -476,18 +482,18 @@ func (p *awsProvider) getDeviceNameAndSize(imageID string) (string, int32, error // Check if the device name is empty if deviceName == nil || *deviceName == "" { - return "", 0, fmt.Errorf("device name is empty") + return "", 0, errDeviceNameEmpty } // Get the device size if it is set deviceSize := describeImagesOutput.Images[0].BlockDeviceMappings[0].Ebs.VolumeSize if deviceSize == nil { - logger.Printf("device size of the image %s is not set", imageID) + logger.Printf("image %s device size not set", imageID) return *deviceName, 0, nil } - logger.Printf("device name and size of the image %s is %s, %d", imageID, *deviceName, *deviceSize) + logger.Printf("image %s: device name=[%s], size=[%d]", imageID, *deviceName, *deviceSize) return *deviceName, *deviceSize, nil }