Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve condition update and fix multiple endpoint ips issue #609

Merged
merged 1 commit into from
Jan 27, 2025
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 34 additions & 7 deletions pkg/controller/modeladapter/modeladapter_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -468,9 +468,15 @@ func (r *ModelAdapterReconciler) DoReconcile(ctx context.Context, req ctrl.Reque
return ctrl.Result{}, nil
}

func (r *ModelAdapterReconciler) updateStatus(ctx context.Context, instance *modelv1alpha1.ModelAdapter, condition metav1.Condition) error {
klog.InfoS("model adapter reconcile", "Update CR status", instance.Name, "status", instance.Status)
meta.SetStatusCondition(&instance.Status.Conditions, condition)
func (r *ModelAdapterReconciler) updateStatus(ctx context.Context, instance *modelv1alpha1.ModelAdapter, conditions ...metav1.Condition) error {
changed := false
for _, condition := range conditions {
if meta.SetStatusCondition(&instance.Status.Conditions, condition) {
changed = true
}
}
// TODO: sort the conditions based on LastTransitionTime if needed.
klog.InfoS("model adapter reconcile", "Update CR status", instance.Name, "changed", changed, "status", instance.Status, "conditions", conditions)
return r.Status().Update(ctx, instance)
}

Expand All @@ -482,7 +488,19 @@ func (r *ModelAdapterReconciler) clearModelAdapterInstanceList(ctx context.Conte
StableInstanceFoundReason,
fmt.Sprintf("Pod (%s/%s) is stale or invalid for model adapter (%s/%s), clean up the list", instance.GetNamespace(), stalePodName, instance.GetNamespace(), instance.Name))

if err := r.updateStatus(ctx, instance, condition); err != nil {
// We also need to update the scheduling and ready status to false
// When the pod get migrated, we need to update the status with latest LastTransitionTime.
// However, meta.SetStatusCondition won't update the status unless there's other change like Status as well.
// Here it also help trigger time update in later updates.
scheduleCondition := meta.FindStatusCondition(instance.Status.Conditions, string(modelv1alpha1.ModelAdapterConditionTypeScheduled))
scheduleCondition.Status = metav1.ConditionFalse
scheduleCondition.LastTransitionTime = metav1.Now()

readyCondition := meta.FindStatusCondition(instance.Status.Conditions, string(modelv1alpha1.ModelAdapterConditionReady))
readyCondition.Status = metav1.ConditionFalse
readyCondition.LastTransitionTime = metav1.Now()

if err := r.updateStatus(ctx, instance, condition, *scheduleCondition, *readyCondition); err != nil {
return err
}

Expand Down Expand Up @@ -860,9 +878,18 @@ func (r *ModelAdapterReconciler) reconcileEndpointSlice(ctx context.Context, ins

// Append the Pod IP to the EndpointSlice if it doesn't exist
if !alreadyExists {
found.Endpoints = append(found.Endpoints, discoveryv1.Endpoint{
Addresses: []string{podIP},
})
// TODO: come back when we start to support multi-instance.
//found.Endpoints = append(found.Endpoints, discoveryv1.Endpoint{
// Addresses: []string{podIP},
//})

// override the endpoint with only one pod id.
// TODO: We need to refactor the logic once we start to support multi instances later
found.Endpoints = []discoveryv1.Endpoint{
{
Addresses: []string{podIP},
},
}

if err := r.Update(ctx, found); err != nil {
klog.ErrorS(err, "Failed to update EndpointSlice", "EndpointSlice", found.Name)
Expand Down
Loading