Skip to content

Commit

Permalink
aws: logging improvements
Browse files Browse the repository at this point in the history
Improve logging for clarity in context of asynchronous system. For exmaple, including instance ID/image name in all log related messages to facilitate easier (human) log tracing

Signed-off-by: Mike Frisch <mikef17@gmail.com>
  • Loading branch information
EmmEff committed Feb 7, 2025
1 parent 5d0ce71 commit ab59988
Showing 1 changed file with 36 additions and 30 deletions.
66 changes: 36 additions & 30 deletions src/cloud-providers/aws/provider.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,14 @@ import (
)

var (
logger = log.New(log.Writer(), "[adaptor/cloud/aws] ", log.LstdFlags|log.Lmsgprefix)
errNotReady = errors.New("address not ready")
logger = log.New(log.Writer(), "[adaptor/cloud/aws] ", log.LstdFlags|log.Lmsgprefix)

errNotReady = errors.New("address not ready")
errNoImageID = errors.New("ImageId is empty")
errNilPublicIPAddress = errors.New("public IP address is nil")
errEmptyPublicIPAddress = errors.New("public IP address is empty")
errImageDetailsFailed = errors.New("unable to get image details")
errDeviceNameEmpty = errors.New("empty device name")
)

const (
Expand Down Expand Up @@ -120,7 +126,7 @@ func NewProvider(config *Config) (provider.Provider, error) {
logger.Printf("RootDeviceName and RootVolumeSize of the image %s is %s, %d", config.ImageId, config.RootDeviceName, config.RootVolumeSize)
}

if err = provider.updateInstanceTypeSpecList(); err != nil {
if err := provider.updateInstanceTypeSpecList(); err != nil {
return nil, err
}

Expand All @@ -142,7 +148,7 @@ func getIPs(instance types.Instance) ([]netip.Addr, error) {
}
podNodeIPs = append(podNodeIPs, ip)

logger.Printf("podNodeIP[%d]=%s", i, ip.String())
logger.Printf("instance %s: podNodeIP[%d]=%s", *instance.InstanceId, i, ip.String())
}

return podNodeIPs, nil
Expand Down Expand Up @@ -239,7 +245,6 @@ func (p *awsProvider) CreateInstance(ctx context.Context, podName, sandboxID str
input.SubnetId = nil
// Remove the security group IDs from the input
input.SecurityGroupIds = nil

}

// Ref: https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/snp-work.html
Expand All @@ -255,7 +260,6 @@ func (p *awsProvider) CreateInstance(ctx context.Context, podName, sandboxID str
AmdSevSnp: types.AmdSevSnpSpecificationEnabled,
}
}

}

// Add block device mappings to the instance to set the root volume size
Expand All @@ -272,20 +276,20 @@ func (p *awsProvider) CreateInstance(ctx context.Context, podName, sandboxID str
}
}

logger.Printf("CreateInstance: name: %q", instanceName)
logger.Printf("Creating instance %s for sandbox %s", instanceName, sandboxID)

result, err := p.ec2Client.RunInstances(ctx, input)
if err != nil {
return nil, fmt.Errorf("Creating instance (%v) returned error: %s", result, err)
return nil, fmt.Errorf("creating instance %s (%v): %w", instanceName, result, err)
}

logger.Printf("created an instance %s for sandbox %s", *result.Instances[0].PublicDnsName, sandboxID)

instanceID := *result.Instances[0].InstanceId

logger.Printf("Created instance %s (%s) for sandbox %s", instanceName, instanceID, sandboxID)

ips, err := getIPs(result.Instances[0])
if err != nil {
logger.Printf("failed to get IPs for the instance : %v ", err)
logger.Printf("Failed to get IPs for instance %s: %v ", instanceID, err)
return nil, err
}

Expand All @@ -298,7 +302,6 @@ func (p *awsProvider) CreateInstance(ctx context.Context, podName, sandboxID str

// Replace the first IP address with the public IP address
ips[0] = publicIPAddr

}

instance := &provider.Instance{
Expand All @@ -317,14 +320,16 @@ func (p *awsProvider) DeleteInstance(ctx context.Context, instanceID string) err
},
}

logger.Printf("Deleting instance (%s)", instanceID)
logger.Printf("Deleting instance %s", instanceID)

resp, err := p.ec2Client.TerminateInstances(ctx, terminateInput)
if err != nil {
logger.Printf("failed to delete an instance: %v and the response is %v", err, resp)
logger.Printf("failed to delete instance: %v and the response is %v", err, resp)
return err
}
logger.Printf("deleted an instance %s", instanceID)

logger.Printf("Deleted instance %s", instanceID)

return nil
}

Expand All @@ -333,15 +338,14 @@ func (p *awsProvider) Teardown() error {
}

func (p *awsProvider) ConfigVerifier() error {
ImageId := p.serviceConfig.ImageId
if len(ImageId) == 0 {
return fmt.Errorf("ImageId is empty")
if len(p.serviceConfig.ImageId) == 0 {
return errNoImageID
}
return nil
}

// Add SelectInstanceType method to select an instance type based on the memory and vcpu requirements
func (p *awsProvider) selectInstanceType(ctx context.Context, spec provider.InstanceTypeSpec) (string, error) {
func (p *awsProvider) selectInstanceType(_ context.Context, spec provider.InstanceTypeSpec) (string, error) {
return provider.SelectInstanceTypeToUse(spec, p.serviceConfig.InstanceTypeSpecList, p.serviceConfig.InstanceTypes, p.serviceConfig.InstanceType)
}

Expand Down Expand Up @@ -374,6 +378,8 @@ func (p *awsProvider) updateInstanceTypeSpecList() error {
return nil
}

var errInstanceTypeNotFound = errors.New("instance type not found")

// Add a method to retrieve cpu, memory, and storage from the instance type
func (p *awsProvider) getInstanceTypeInformation(instanceType string) (vcpu int64, memory int64,
gpuCount int64, err error,
Expand Down Expand Up @@ -405,7 +411,7 @@ func (p *awsProvider) getInstanceTypeInformation(instanceType string) (vcpu int6

return vcpu, memory, gpuCount, nil
}
return 0, 0, 0, fmt.Errorf("instance type %s not found", instanceType)
return 0, 0, 0, errInstanceTypeNotFound
}

// Add a method to get public IP address of the instance
Expand All @@ -420,30 +426,30 @@ func (p *awsProvider) getPublicIP(ctx context.Context, instanceID string) (netip
// Wait for instance to be ready before getting the public IP address
err := p.waiter.Wait(ctx, describeInstanceInput, maxWaitTime)
if err != nil {
logger.Printf("failed to wait for the instance to be ready : %v ", err)
logger.Printf("failed to wait for instance %s to be ready: %v", instanceID, err)
return netip.Addr{}, err

}

// Add describe instance output
describeInstanceOutput, err := p.ec2Client.DescribeInstances(ctx, describeInstanceInput)
if err != nil {
logger.Printf("failed to describe the instance : %v ", err)
logger.Printf("failed to describe instance %s: %v", instanceID, err)
return netip.Addr{}, err
}
// Get the public IP address from InstanceNetworkInterfaceAssociation
publicIP := describeInstanceOutput.Reservations[0].Instances[0].NetworkInterfaces[0].Association.PublicIp

// Check if the public IP address is nil
if publicIP == nil {
return netip.Addr{}, fmt.Errorf("public IP address is nil")
return netip.Addr{}, errNilPublicIPAddress
}
// If the public IP address is empty, return an error
if *publicIP == "" {
return netip.Addr{}, fmt.Errorf("public IP address is empty")
return netip.Addr{}, errEmptyPublicIPAddress
}

logger.Printf("public IP address of the instance %s is %s", instanceID, *publicIP)
logger.Printf("public IP address instance %s: %s", instanceID, *publicIP)

// Parse the public IP address
publicIPAddr, err := netip.ParseAddr(*publicIP)
Expand All @@ -463,31 +469,31 @@ func (p *awsProvider) getDeviceNameAndSize(imageID string) (string, int32, error
// Add describe images output
describeImagesOutput, err := p.ec2Client.DescribeImages(context.Background(), describeImagesInput)
if err != nil {
logger.Printf("failed to describe the image : %v ", err)
logger.Printf("failed to describe image %s: %v", imageID, err)
return "", 0, err
}

if describeImagesOutput == nil || len(describeImagesOutput.Images) == 0 {
return "", 0, fmt.Errorf("Unable to get details for the image")
return "", 0, errImageDetailsFailed
}

// Get the device name
deviceName := describeImagesOutput.Images[0].RootDeviceName

// Check if the device name is empty
if deviceName == nil || *deviceName == "" {
return "", 0, fmt.Errorf("device name is empty")
return "", 0, errDeviceNameEmpty
}

// Get the device size if it is set
deviceSize := describeImagesOutput.Images[0].BlockDeviceMappings[0].Ebs.VolumeSize

if deviceSize == nil {
logger.Printf("device size of the image %s is not set", imageID)
logger.Printf("image %s device size not set", imageID)
return *deviceName, 0, nil
}

logger.Printf("device name and size of the image %s is %s, %d", imageID, *deviceName, *deviceSize)
logger.Printf("image %s: device name=[%s], size=[%d]", imageID, *deviceName, *deviceSize)

return *deviceName, *deviceSize, nil
}

0 comments on commit ab59988

Please sign in to comment.