Skip to content

Commit

Permalink
Improve model adapter reconcile workflow stability (#260)
Browse files Browse the repository at this point in the history
* Remove self maintained pointer utils

* Refactor status update methods

* Improve model adapter stability
  • Loading branch information
Jeffwan authored Oct 3, 2024
1 parent 00e1673 commit cf9c4a1
Show file tree
Hide file tree
Showing 7 changed files with 251 additions and 247 deletions.
19 changes: 11 additions & 8 deletions api/model/v1alpha1/modeladapter_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -64,17 +64,21 @@ type ModelAdapterPhase string
const (
// ModelAdapterPending means the CR has been created and that's the initial status
ModelAdapterPending ModelAdapterPhase = "Pending"
// ModelAdapterScheduling means the ModelAdapter is pending scheduling
ModelAdapterScheduling ModelAdapterPhase = "Scheduling"
// ModelAdapterBinding means the controller loads ModelAdapter on a selected pod
ModelAdapterBinding ModelAdapterPhase = "Binding"
// ModelAdapterScheduled means the ModelAdapter is pending scheduling
ModelAdapterScheduled ModelAdapterPhase = "Scheduled"
// ModelAdapterBound means the controller loads ModelAdapter on a selected pod
ModelAdapterBound ModelAdapterPhase = "Bound"
// ModelAdapterResourceCreated means the model adapter owned resources have been created
ModelAdapterResourceCreated ModelAdapterPhase = "ResourceCreated"
// ModelAdapterRunning means ModelAdapter has been running on the pod
ModelAdapterRunning ModelAdapterPhase = "Running"
// ModelAdapterFailed means ModelAdapter has terminated in a failure
ModelAdapterFailed ModelAdapterPhase = "Failed"
// ModelAdapterScaling means ModelAdapter is scaling, could be scaling in or out. won't be enabled until we allow multiple replicas
// ModelAdapterUnknown means ModelAdapter clean up some stable resources
ModelAdapterUnknown ModelAdapterPhase = "Unknown"
// ModelAdapterScaled means ModelAdapter is scaled, could be scaling in or out. won't be enabled until we allow multiple replicas
// TODO: not implemented yet.
ModelAdapterScaling ModelAdapterPhase = "Scaling"
ModelAdapterScaled ModelAdapterPhase = "Scaled"
)

// ModelAdapterStatus defines the observed state of ModelAdapter
Expand All @@ -97,11 +101,10 @@ type ModelAdapterConditionType string

const (
ModelAdapterConditionTypeInitialized ModelAdapterConditionType = "Initialized"
ModelAdapterConditionTypeSelectorMatched ModelAdapterConditionType = "SelectorMatched"
ModelAdapterConditionTypeScheduled ModelAdapterConditionType = "Scheduled"
ModelAdapterConditionTypeBound ModelAdapterConditionType = "Bound"
ModelAdapterConditionTypeResourceCreated ModelAdapterConditionType = "ResourceCreated"
ModelAdapterConditionReady ModelAdapterConditionType = "Ready"
ModelAdapterConditionCleanup ModelAdapterConditionType = "Cleanup"
)

// +genclient
Expand Down
406 changes: 203 additions & 203 deletions pkg/controller/modeladapter/modeladapter_controller.go

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions pkg/controller/modeladapter/resources.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,10 @@ import (
discoveryv1 "k8s.io/api/discovery/v1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/util/intstr"
"k8s.io/utils/ptr"
)

func buildModelAdapterEndpointSlice(instance *modelv1alpha1.ModelAdapter, pod *corev1.Pod) (*discoveryv1.EndpointSlice, error) {
func buildModelAdapterEndpointSlice(instance *modelv1alpha1.ModelAdapter, pod *corev1.Pod) *discoveryv1.EndpointSlice {
serviceLabels := map[string]string{
"kubernetes.io/service-name": instance.Name,
}
Expand All @@ -37,9 +38,9 @@ func buildModelAdapterEndpointSlice(instance *modelv1alpha1.ModelAdapter, pod *c

ports := []discoveryv1.EndpointPort{
{
Name: stringPtr("http"),
Protocol: protocolPtr(corev1.ProtocolTCP),
Port: int32Ptr(8000),
Name: ptr.To("http"),
Protocol: ptr.To(corev1.ProtocolTCP),
Port: ptr.To(int32(8000)),
},
}

Expand All @@ -56,10 +57,10 @@ func buildModelAdapterEndpointSlice(instance *modelv1alpha1.ModelAdapter, pod *c
AddressType: discoveryv1.AddressTypeIPv4,
Endpoints: addresses,
Ports: ports,
}, nil
}
}

func buildModelAdapterService(instance *modelv1alpha1.ModelAdapter) (*corev1.Service, error) {
func buildModelAdapterService(instance *modelv1alpha1.ModelAdapter) *corev1.Service {
labels := map[string]string{
"model.aibrix.ai/name": instance.Spec.BaseModel,
"adapter.model.aibrix.ai/name": instance.Name,
Expand Down Expand Up @@ -94,5 +95,5 @@ func buildModelAdapterService(instance *modelv1alpha1.ModelAdapter) (*corev1.Ser
PublishNotReadyAddresses: true,
Ports: ports,
},
}, nil
}
}
10 changes: 2 additions & 8 deletions pkg/controller/modeladapter/resources_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,10 +43,7 @@ func TestBuildModelAdapterEndpointSlice(t *testing.T) {
}

// Call the function to test
endpointSlice, err := buildModelAdapterEndpointSlice(instance, pod)

// Assert no errors
assert.NoError(t, err)
endpointSlice := buildModelAdapterEndpointSlice(instance, pod)

// Check EndpointSlice metadata
assert.Equal(t, "test-instance", endpointSlice.Name)
Expand Down Expand Up @@ -83,10 +80,7 @@ func TestBuildModelAdapterService(t *testing.T) {
}

// Call the function to test
service, err := buildModelAdapterService(instance)

// Assert no errors
assert.NoError(t, err)
service := buildModelAdapterService(instance)

// Check Service metadata
assert.Equal(t, "test-instance", service.Name)
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller/modeladapter/scheduling/leastadapters.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,6 @@ func (r leastAdapters) SelectPod(ctx context.Context, pods []v1.Pod) (*v1.Pod, e
}
}

klog.Infof("pod selected with least model adapters: %s", selectedPod.Name)
klog.InfoS("pod selected with least model adapters", "pod", klog.KObj(&selectedPod))
return &selectedPod, nil
}
43 changes: 24 additions & 19 deletions pkg/controller/modeladapter/utils.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,27 +23,10 @@ import (
"os"
"strings"

corev1 "k8s.io/api/core/v1"

modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

func stringPtr(s string) *string {
return &s
}

func protocolPtr(p corev1.Protocol) *corev1.Protocol {
return &p
}

func int32Ptr(i int32) *int32 {
return &i
}

func mapPtr(m map[string]string) *map[string]string {
return &m
}

func validateModelAdapter(instance *modelv1alpha1.ModelAdapter) error {
if instance.Spec.ArtifactURL == "" {
return fmt.Errorf("artifactURL is required")
Expand Down Expand Up @@ -128,11 +111,33 @@ func extractHuggingFacePath(artifactURL string) (string, error) {
return path, nil
}

func stringInSlice(slice []string, str string) bool {
func StringInSlice(slice []string, str string) bool {
for _, v := range slice {
if v == str {
return true
}
}
return false
}

// RemoveInstanceFromList removes a string from a slice of strings
func RemoveInstanceFromList(slice []string, strToRemove string) []string {
var result []string
for _, s := range slice {
if s != strToRemove {
result = append(result, s)
}
}
return result
}

// NewCondition creates a new condition.
func NewCondition(condType string, status metav1.ConditionStatus, reason, msg string) metav1.Condition {
return metav1.Condition{
Type: condType,
Status: status,
LastTransitionTime: metav1.Now(),
Reason: reason,
Message: msg,
}
}
3 changes: 2 additions & 1 deletion pkg/controller/modeladapter/utils_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (

"github.com/stretchr/testify/assert"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/utils/ptr"

modelv1alpha1 "github.com/aibrix/aibrix/api/model/v1alpha1"
)
Expand All @@ -36,7 +37,7 @@ func TestValidateModelAdapter(t *testing.T) {
PodSelector: &metav1.LabelSelector{
MatchLabels: map[string]string{"app": "test"},
},
Replicas: int32Ptr(1),
Replicas: ptr.To(int32(1)),
},
}

Expand Down

0 comments on commit cf9c4a1

Please sign in to comment.