Feat/ec2 fleet integration (LeanerCloud#471)

* Carve instanceManager from instances.go into instance_manager.go * Move instanceManager tests to instance_manager_test.go * Move read-only instance query functions to instance_queries.go * Move additional code to instance_queries.go and instance_queries_test.go * Move OD->Spot conversion helpers to instance_conversion.go and instance_actions.go * Fix linter issues in autoscaling_test.go * Fix linter issues in mock_test.go * Fix linter issues in region.go and spot_price.go * Convert RunInstances to instant EC2 Fleet API call - This allows us to implement support for allocation strategies, currently hardcoded to capacity-optimized-prioritized, but later it could be made configurable. - It requires the use of a temporary LaunchTemplate, which is created based on the data previously passed to the RunInstances API call and deleted immediately after the EC2 fleet API call. * Convert tests for createRunInstancesInput to createLaunchTemplateData * Small log message fix * Add additional EC2 mocks * Implement support for configurable Spot allocation strategies - Added global config with per-ASG tag overrides - Extended unit test coverage for the new logic - Improved tests for reading other configurations from tags - Added unit tests for EBS block device conversion logic - Converted PatchBeanstalkUserdata config flag to bool value * Move small utility functions to util.go * Fix codeclimate issue * Extract complex if condition into its own function, pass instance type information by reference * Further simplifications for codeclimate * Use latest version of golang and build for amd64 * Expose spot_allocation_strategy on CloudFormation * Ensure the AMI ID comes from the LaunchConfiguration/Template * Add required IAM permissions * Set priority for capacity-optimized-prioritized * Pass missing LaunchTemplate fields, such as UserData and KeyName * Expand test coverage * Document the capacity-optimized prioritized * Small readme changes
Samit-Maharjan · Oct 14, 2021 · ff96e8f · ff96e8f
1 parent 751b4cc
commit ff96e8f
Show file tree

Hide file tree

Showing 28 changed files with 5,489 additions and 3,915 deletions.
diff --git a/Dockerfile b/Dockerfile
@@ -1,8 +1,8 @@
-FROM golang:1.16-alpine as golang
+FROM golang:alpine as golang
 RUN apk add -U --no-cache ca-certificates git make
 COPY . /src
 WORKDIR /src
-RUN FLAVOR=nightly CGO_ENABLED=0 GOPROXY=direct make
+RUN GOARCH=amd64 FLAVOR=nightly CGO_ENABLED=0 GOPROXY=direct make
 
 FROM scratch
 COPY LICENSE BINARY_LICENSE THIRDPARTY /

diff --git a/Dockerfile.build b/Dockerfile.build
@@ -1,4 +1,4 @@
-FROM golang:1.16-alpine
+FROM golang:alpine
 
 ARG flavor
 

diff --git a/Dockerfile.marketplace b/Dockerfile.marketplace
@@ -1,10 +1,10 @@
-FROM golang:1.16-alpine as golang
+FROM golang:alpine as golang
 RUN apk add -U --no-cache ca-certificates git make
 COPY . /src
 WORKDIR /src
 RUN FLAVOR=stable CGO_ENABLED=0 GOPROXY=direct make
 
-FROM alpine:3.14.1
+FROM alpine:latest
 COPY LICENSE BINARY_LICENSE THIRDPARTY /
 COPY --from=golang /etc/ssl/certs/ca-certificates.crt /etc/ssl/certs/
 COPY --from=golang /src/AutoSpotting .

diff --git a/Makefile b/Makefile
@@ -19,6 +19,8 @@ BUILD := $(DOCKER_IMAGE_VERSION)-$(FLAVOR)-$(SHA)
 EXPIRATION := $(shell go run ./scripts/expiration_date.go)
 SAVINGS_CUT ?= 5
 
+GOARCH ?= amd64
+
 ifneq ($(FLAVOR), custom)
     LICENSE_FILES += BINARY_LICENSE
 endif

diff --git a/README.md b/README.md
@@ -23,11 +23,22 @@ It is usually set up to monitor existing long-running AutoScaling groups,
 replacing their instances with Spot instances with minimal configuration
 changes.
 
-Often all it needs is just tagging them with `spot-enabled=true`, but
-even that can be avoided in some cases, yielding the usual 70%-90% Spot cost
+Often all it needs is just tagging them with `spot-enabled=true`, (in some cases
+even that can be avoided), yielding the usual 70%-90% Spot cost
 savings but in a better integrated and easier to adopt way
-than other alternative tools and solutions, especially if you run infrastructure
-that for whatever reasons you can't afford to update to Spot by other means.
+than other alternative tools and solutions.
+
+It is particularly useful if you have a large footprint that you want to migrate
+to Spot quickly due to management pressure but with minimal effort and configuration
+changes.
+
+## Guiding principles ##
+
+- Customer-focused, designed to maximize user benefits and reduce adoption friction
+- Safe and secure, hosted in your AWS account and with minimal required set of IAM permissions
+- Auditable OSS code base developed in the open
+- Inexpensive, easy to install and supported builds offered through the AWS Marketplace
+- Simple, minimalist implementation
 
 ## How does it work? ##
 
@@ -45,22 +56,20 @@ replaced with spot clones within seconds of being launched.
 
 If this fails temporarily due to insufficient spot capacity, AutoSpotting will
 continuously attempt to replace them every few minutes until successful after
-spot capacity becomes available again. When launching Spot instances, the
-compatible instance types are attempted in increasing order of their price,
-until one is successfully launched, lazily achieving diversification in case of
-temporary unavailability of certain instance types.
+spot capacity becomes available again.
+
+When launching Spot instances, the compatible instance types are chosen by
+default using a the
+[capacity-optimized-prioritized](https://docs.amazonaws.cn/en_us/AWSEC2/latest/UserGuide/ec2-fleet-examples.html#ec2-fleet-config11)
+allocation strategy, which is given a list of instance types sorted by price. This
+configuration offers a good tradeoff between low cost and significantly reduced
+interruption rates. The lowest-price allocation strategy is still available as a
+configuration option.
 
 This process can partly be seen in action below, you can click to expand the animation:
 
 ![Workflow](https://autospotting.org/img/autospotting.gif)
 
-Additionally, it implements some advanced logic that is aware of spot and on
-demand prices, including for different spot products and configurable discounts
-for reserved instances or large volume customers. It also considers the specs of
-all instance types and automatically launches the cheapest available instance
-types based on flexible configuration set globally or overridden at the group
-level using additional tags, but these overrides are often not needed.
-
 A single installation can handle all enabled groups from an entire AWS account in
 parallel across all available AWS regions, but it can be restricted to fewer
 regions if desired in certain situations.
@@ -75,8 +84,6 @@ the traffic would automatically be drained on termination.
 The savings it generates are in the 60-90% range usually seen when using spot
 instances, but they may vary depending on region and instance type.
 
-![Savings](https://autospotting.org/img/savings.png)
-
 ## What's under the hood? ##
 
 The entire logic described above is implemented in a set of Lambda functions

diff --git a/autospotting.go b/autospotting.go
@@ -19,6 +19,8 @@ var conf autospotting.Config
 
 // Version represents the build version being used
 var Version = "number missing"
+
+// SavingsCut stores the saving percentage charged for the stable builds
 var SavingsCut = "0"
 
 // ExpirationDate represents the date at which the version will expire

diff --git a/cloudformation/stacks/AutoSpotting/template.yaml b/cloudformation/stacks/AutoSpotting/template.yaml
@@ -219,6 +219,19 @@
         bucket stored on another region, but it can process AutoScaling groups
         from any other regions. Example: 'us-east-1,eu-west-1'"
       Type: CommaDelimitedList
+    SpotAllocationStrategy:
+      Type: "String"
+      Description: >
+        "Controls the Spot allocation strategy for
+        launching Spot instances. Allowed options:
+        'capacity-optimized-prioritized' (default), 'capacity-optimized',
+        'lowest-price'. Further information on this is available at
+        https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/ec2-fleet-allocation-strategy.html"
+      AllowedValues:
+        - "capacity-optimized-prioritized"
+        - "capacity-optimized"
+        - "lowest-price"
+      Default: "capacity-optimized-prioritized"
     SpotPricePercentageBuffer:
       Default: "10.0"
       Description: >
@@ -382,6 +395,8 @@
               Fn::Join:
               - ","
               - Ref: "Regions"
+            SPOT_ALLOCATION_STRATEGY:
+              Ref: SpotAllocationStrategy
             SPOT_PRICE_BUFFER_PERCENTAGE:
               Ref: "SpotPricePercentageBuffer"
             SPOT_PRODUCT_DESCRIPTION:
@@ -435,6 +450,9 @@
                 - "aws-marketplace:RegisterUsage"
                 - "cloudformation:Describe*"
                 - "ec2:CreateTags"
+                - "ec2:CreateLaunchTemplate"
+                - "ec2:CreateFleet"
+                - "ec2:DeleteLaunchTemplate"
                 - "ec2:DeleteTags"
                 - "ec2:DescribeImages"
                 - "ec2:DescribeInstanceAttribute"

diff --git a/core/action.go b/core/action.go
@@ -42,7 +42,7 @@ type launchSpotReplacement struct {
 func (lsr launchSpotReplacement) run() {
 	spotInstanceID, err := lsr.target.onDemandInstance.launchSpotReplacement()
 	if err != nil {
-		log.Printf("Could not launch cheapest spot instance: %s", err)
+		log.Printf("Could not launch replacement spot instance: %s", err)
 		return
 	}
 	log.Printf("Successfully launched spot instance %s, exiting...", *spotInstanceID)

diff --git a/core/autoscaling.go b/core/autoscaling.go
@@ -23,7 +23,6 @@ type autoScalingGroup struct {
 	launchConfiguration *launchConfiguration
 	launchTemplate      *launchTemplate
 	instances           instances
-	minOnDemand         int64
 	config              AutoScalingConfig
 }
 
@@ -121,20 +120,20 @@ func (a *autoScalingGroup) loadLaunchTemplate() (*launchTemplate, error) {
 func (a *autoScalingGroup) needReplaceOnDemandInstances() (bool, int64) {
 	onDemandRunning, totalRunning := a.alreadyRunningInstanceCount(false, nil)
 	debug.Printf("onDemandRunning=%v totalRunning=%v a.minOnDemand=%v",
-		onDemandRunning, totalRunning, a.minOnDemand)
+		onDemandRunning, totalRunning, a.config.MinOnDemand)
 
 	if totalRunning == 0 {
 		log.Printf("The group %s is currently empty or in the process of launching new instances",
 			a.name)
 		return true, totalRunning
 	}
 
-	if onDemandRunning > a.minOnDemand {
+	if onDemandRunning > a.config.MinOnDemand {
 		log.Println("Currently more than enough OnDemand instances running")
 		return true, totalRunning
 	}
 
-	if onDemandRunning == a.minOnDemand {
+	if onDemandRunning == a.config.MinOnDemand {
 		log.Println("Currently OnDemand running equals to the required number, skipping run")
 		return false, totalRunning
 	}
@@ -150,7 +149,7 @@ func (a *autoScalingGroup) terminateRandomSpotInstanceIfHavingEnough(totalRunnin
 	}
 
 	if allInstancesAreRunning, onDemandRunning := a.allInstancesRunning(); allInstancesAreRunning {
-		if a.instances.count64() == *a.DesiredCapacity && onDemandRunning == a.minOnDemand {
+		if a.instances.count64() == *a.DesiredCapacity && onDemandRunning == a.config.MinOnDemand {
 			log.Println("Currently Spot running equals to the required number, skipping termination")
 			return nil
 		}