Skip to content

Commit

Permalink
Add message with error to the plan status to ease debugging (#1050)
Browse files Browse the repository at this point in the history
  • Loading branch information
alenkacz authored Nov 19, 2019
1 parent 9eaf5e0 commit d049eed
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 49 deletions.
55 changes: 44 additions & 11 deletions pkg/apis/kudo/v1beta1/instance_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -78,22 +78,55 @@ type AggregatedStatus struct {
type PlanStatus struct {
Name string `json:"name,omitempty"`
Status ExecutionStatus `json:"status,omitempty"`
Message string `json:"message,omitempty"` // more verbose explanation of the status, e.g. a detailed error message
LastFinishedRun metav1.Time `json:"lastFinishedRun,omitempty"`
Phases []PhaseStatus `json:"phases,omitempty"`
UID apimachinerytypes.UID `json:"uid,omitempty"`
}

// PhaseStatus is representing status of a phase
type PhaseStatus struct {
Name string `json:"name,omitempty"`
Status ExecutionStatus `json:"status,omitempty"`
Steps []StepStatus `json:"steps,omitempty"`
Name string `json:"name,omitempty"`
Status ExecutionStatus `json:"status,omitempty"`
Message string `json:"message,omitempty"` // more verbose explanation of the status, e.g. a detailed error message
Steps []StepStatus `json:"steps,omitempty"`
}

// StepStatus is representing status of a step
type StepStatus struct {
Name string `json:"name,omitempty"`
Status ExecutionStatus `json:"status,omitempty"`
Name string `json:"name,omitempty"`
Message string `json:"message,omitempty"` // more verbose explanation of the status, e.g. a detailed error message
Status ExecutionStatus `json:"status,omitempty"`
}

func (s *StepStatus) Set(status ExecutionStatus) {
s.Status = status
s.Message = ""
}

func (s *StepStatus) SetWithMessage(status ExecutionStatus, message string) {
s.Status = status
s.Message = message
}

func (s *PhaseStatus) Set(status ExecutionStatus) {
s.Status = status
s.Message = ""
}

func (s *PhaseStatus) SetWithMessage(status ExecutionStatus, message string) {
s.Status = status
s.Message = message
}

func (s *PlanStatus) Set(status ExecutionStatus) {
s.Status = status
s.Message = ""
}

func (s *PlanStatus) SetWithMessage(status ExecutionStatus, message string) {
s.Status = status
s.Message = message
}

// ExecutionStatus captures the state of the rollout.
Expand Down Expand Up @@ -213,7 +246,7 @@ func (i *Instance) EnsurePlanStatusInitialized(ov *OperatorVersion) {

existingPlanStatus, planExists := i.Status.PlanStatus[planName]
if planExists {
planStatus.Status = existingPlanStatus.Status
planStatus.SetWithMessage(existingPlanStatus.Status, existingPlanStatus.Message)
}
for _, phase := range plan.Phases {
phaseStatus := &PhaseStatus{
Expand All @@ -227,7 +260,7 @@ func (i *Instance) EnsurePlanStatusInitialized(ov *OperatorVersion) {
if phase.Name == oldPhase.Name {
existingPhaseStatus = oldPhase
phaseExists = true
phaseStatus.Status = existingPhaseStatus.Status
phaseStatus.SetWithMessage(existingPhaseStatus.Status, existingPhaseStatus.Message)
}
}
}
Expand All @@ -239,7 +272,7 @@ func (i *Instance) EnsurePlanStatusInitialized(ov *OperatorVersion) {
if phaseExists {
for _, oldStep := range existingPhaseStatus.Steps {
if step.Name == oldStep.Name {
stepStatus.Status = oldStep.Status
stepStatus.SetWithMessage(oldStep.Status, oldStep.Message)
}
}
}
Expand All @@ -265,12 +298,12 @@ func (i *Instance) StartPlanExecution(planName string, ov *OperatorVersion) erro
// update plan status
notFound = false
planStatus := i.Status.PlanStatus[planIndex]
planStatus.Status = ExecutionPending
planStatus.Set(ExecutionPending)
planStatus.UID = uuid.NewUUID()
for j, p := range v.Phases {
planStatus.Phases[j].Status = ExecutionPending
planStatus.Phases[j].Set(ExecutionPending)
for k := range p.Steps {
i.Status.PlanStatus[planIndex].Phases[j].Steps[k].Status = ExecutionPending
i.Status.PlanStatus[planIndex].Phases[j].Steps[k].Set(ExecutionPending)
}
}

Expand Down
59 changes: 35 additions & 24 deletions pkg/engine/workflow/engine.go
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,8 @@ func (ap *ActivePlan) taskByName(name string) (*v1beta1.Task, bool) {
// └── Phase main [FATAL_ERROR]
// └── Step everything (FATAL_ERROR)
//
// In terms of Status Message, we don't propagate the message up for fatal errors
//
// Furthermore, a transient ERROR during a step execution, means that the next step may be executed if the step strategy
// is "parallel". In case of a fatal error, it is returned alongside with the new plan status and published on the event bus.
func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.Enhancer, currentTime time.Time) (*v1beta1.PlanStatus, error) {
Expand All @@ -67,16 +69,18 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
}

planStatus := pl.PlanStatus.DeepCopy()
planStatus.Status = v1beta1.ExecutionInProgress
planStatus.Set(v1beta1.ExecutionInProgress)

phasesLeft := len(pl.Spec.Phases)
// --- 1. Iterate over plan phases ---
for _, ph := range pl.Spec.Phases {
phaseStatus := getPhaseStatus(ph.Name, planStatus)
if phaseStatus == nil {
planStatus.Status = v1beta1.ExecutionFatalError
err := fmt.Errorf("%s/%s %w missing phase status: %s.%s", em.InstanceNamespace, em.InstanceName, engine.ErrFatalExecution, pl.Name, ph.Name)

planStatus.SetWithMessage(v1beta1.ExecutionFatalError, err.Error())
return planStatus, engine.ExecutionError{
Err: fmt.Errorf("%s/%s %w missing phase status: %s.%s", em.InstanceNamespace, em.InstanceName, engine.ErrFatalExecution, pl.Name, ph.Name),
Err: err,
EventName: missingPhaseStatus,
}
}
Expand All @@ -86,7 +90,7 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
phasesLeft = phasesLeft - 1
continue
} else if isInProgress(phaseStatus.Status) {
phaseStatus.Status = v1beta1.ExecutionInProgress
phaseStatus.Set(v1beta1.ExecutionInProgress)
} else {
break
}
Expand All @@ -96,10 +100,12 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
for _, st := range ph.Steps {
stepStatus := getStepStatus(st.Name, phaseStatus)
if stepStatus == nil {
phaseStatus.Status = v1beta1.ExecutionFatalError
planStatus.Status = v1beta1.ExecutionFatalError
err := fmt.Errorf("%s/%s %w missing step status: %s.%s.%s", em.InstanceNamespace, em.InstanceName, engine.ErrFatalExecution, pl.Name, ph.Name, st.Name)

phaseStatus.SetWithMessage(v1beta1.ExecutionFatalError, err.Error())
planStatus.Set(v1beta1.ExecutionFatalError)
return planStatus, engine.ExecutionError{
Err: fmt.Errorf("%s/%s %w missing step status: %s.%s.%s", em.InstanceNamespace, em.InstanceName, engine.ErrFatalExecution, pl.Name, ph.Name, st.Name),
Err: err,
EventName: missingStepStatus,
}
}
Expand All @@ -109,7 +115,7 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
delete(stepsLeft, stepStatus.Name)
continue
} else if isInProgress(stepStatus.Status) {
stepStatus.Status = v1beta1.ExecutionInProgress
stepStatus.Set(v1beta1.ExecutionInProgress)
} else {
// we are not in progress and not finished. An unexpected error occurred so that we can not proceed to the next phase
break
Expand All @@ -120,11 +126,13 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
for _, tn := range st.Tasks {
t, ok := pl.taskByName(tn)
if !ok {
phaseStatus.Status = v1beta1.ExecutionFatalError
stepStatus.Status = v1beta1.ExecutionFatalError
planStatus.Status = v1beta1.ExecutionFatalError
err := fmt.Errorf("%s/%s %w missing task %s.%s.%s.%s", em.InstanceNamespace, em.InstanceName, engine.ErrFatalExecution, pl.Name, ph.Name, st.Name, tn)

phaseStatus.Set(v1beta1.ExecutionFatalError)
planStatus.Set(v1beta1.ExecutionFatalError)
stepStatus.SetWithMessage(v1beta1.ExecutionFatalError, err.Error())
return planStatus, engine.ExecutionError{
Err: fmt.Errorf("%s/%s %w missing task %s.%s.%s.%s ", em.InstanceNamespace, em.InstanceName, engine.ErrFatalExecution, pl.Name, ph.Name, st.Name, tn),
Err: err,
EventName: unknownTaskNameEventName,
}
}
Expand All @@ -141,11 +149,13 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
// - 3.b build the engine task -
tt, err := task.Build(t)
if err != nil {
stepStatus.Status = v1beta1.ExecutionFatalError
phaseStatus.Status = v1beta1.ExecutionFatalError
planStatus.Status = v1beta1.ExecutionFatalError
err := fmt.Errorf("%s/%s %w failed to build task %s.%s.%s.%s: %v", em.InstanceNamespace, em.InstanceName, engine.ErrFatalExecution, pl.Name, ph.Name, st.Name, tn, err)

stepStatus.SetWithMessage(v1beta1.ExecutionFatalError, err.Error())
planStatus.Set(v1beta1.ExecutionFatalError)
phaseStatus.Set(v1beta1.ExecutionFatalError)
return planStatus, engine.ExecutionError{
Err: fmt.Errorf("%s/%s %w failed to build task %s.%s.%s.%s: %v", em.InstanceNamespace, em.InstanceName, engine.ErrFatalExecution, pl.Name, ph.Name, st.Name, tn, err),
Err: err,
EventName: unknownTaskKindEventName,
}
}
Expand All @@ -166,13 +176,14 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
// stopped in the spirit of "fail-loud-and-proud".
switch {
case errors.Is(err, engine.ErrFatalExecution):
phaseStatus.Status = v1beta1.ExecutionFatalError
stepStatus.Status = v1beta1.ExecutionFatalError
planStatus.Status = v1beta1.ExecutionFatalError
phaseStatus.Set(v1beta1.ExecutionFatalError)
planStatus.Set(v1beta1.ExecutionFatalError)
stepStatus.SetWithMessage(v1beta1.ExecutionFatalError, err.Error())
return planStatus, err
case err != nil:
fmt.Printf("PlanExecution: A transient error when executing task %s.%s.%s.%s. Will retry. %v", pl.Name, ph.Name, st.Name, t.Name, err)
stepStatus.Status = v1beta1.ErrorStatus
message := fmt.Sprintf("A transient error when executing task %s.%s.%s.%s. Will retry. %v", pl.Name, ph.Name, st.Name, t.Name, err)
stepStatus.SetWithMessage(v1beta1.ErrorStatus, message)
fmt.Printf("PlanExecution: %s", message)
case done:
delete(tasksLeft, t.Name)
}
Expand All @@ -187,7 +198,7 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
break
}
} else {
stepStatus.Status = v1beta1.ExecutionComplete
stepStatus.Set(v1beta1.ExecutionComplete)
delete(stepsLeft, stepStatus.Name)
}
}
Expand All @@ -201,15 +212,15 @@ func Execute(pl *ActivePlan, em *engine.Metadata, c client.Client, enh renderer.
break
}
} else {
phaseStatus.Status = v1beta1.ExecutionComplete
phaseStatus.Set(v1beta1.ExecutionComplete)
phasesLeft = phasesLeft - 1
}
}

// --- 7. Check if all PHASEs are finished ---
if phasesLeft == 0 {
log.Printf("PlanExecution: %s/%s all phases of the plan %s are ready", em.InstanceNamespace, em.InstanceName, pl.Name)
planStatus.Status = v1beta1.ExecutionComplete
planStatus.Set(v1beta1.ExecutionComplete)
planStatus.LastFinishedRun = v1.Time{Time: currentTime}
}

Expand Down
22 changes: 11 additions & 11 deletions pkg/engine/workflow/engine_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,7 @@ func TestExecutePlan(t *testing.T) {
expectedStatus: &v1beta1.PlanStatus{
Status: v1beta1.ExecutionInProgress,
Name: "test",
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{{Status: v1beta1.ErrorStatus, Name: "step"}}}},
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{{Status: v1beta1.ErrorStatus, Name: "step", Message: "A transient error when executing task test.phase.step.task. Will retry. dummy error"}}}},
},
enhancer: testEnhancer,
},
Expand Down Expand Up @@ -207,7 +207,7 @@ func TestExecutePlan(t *testing.T) {
expectedStatus: &v1beta1.PlanStatus{
Status: v1beta1.ExecutionFatalError,
Name: "test",
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Status: v1beta1.ExecutionFatalError, Name: "step"}}}},
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Status: v1beta1.ExecutionFatalError, Message: "Error during execution: fatal error: default/test-instance failed in test.phase.step.task: dummy error", Name: "step"}}}},
},
wantErr: true,
enhancer: testEnhancer,
Expand Down Expand Up @@ -240,7 +240,7 @@ func TestExecutePlan(t *testing.T) {
expectedStatus: &v1beta1.PlanStatus{
Status: v1beta1.ExecutionFatalError,
Name: "test",
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Status: v1beta1.ExecutionFatalError, Name: "step"}}}},
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Status: v1beta1.ExecutionFatalError, Message: "default/test-instance fatal error: missing task test.phase.step.fake-task", Name: "step"}}}},
},
wantErr: true,
enhancer: testEnhancer,
Expand Down Expand Up @@ -271,7 +271,7 @@ func TestExecutePlan(t *testing.T) {
expectedStatus: &v1beta1.PlanStatus{
Status: v1beta1.ExecutionFatalError,
Name: "test",
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Status: v1beta1.ExecutionFatalError, Name: "step"}}}},
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Status: v1beta1.ExecutionFatalError, Name: "step", Message: "default/test-instance fatal error: failed to build task test.phase.step.task: unknown task kind Unknown"}}}},
},
wantErr: true,
enhancer: testEnhancer,
Expand Down Expand Up @@ -319,7 +319,7 @@ func TestExecutePlan(t *testing.T) {
Status: v1beta1.ExecutionInProgress,
Name: "test",
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{
{Name: "stepOne", Status: v1beta1.ErrorStatus},
{Name: "stepOne", Status: v1beta1.ErrorStatus, Message: "A transient error when executing task test.phase.stepOne.taskOne. Will retry. dummy error"},
{Name: "stepTwo", Status: v1beta1.ExecutionInProgress},
}}},
},
Expand Down Expand Up @@ -367,7 +367,7 @@ func TestExecutePlan(t *testing.T) {
Status: v1beta1.ExecutionInProgress,
Name: "test",
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{
{Name: "stepOne", Status: v1beta1.ErrorStatus},
{Name: "stepOne", Status: v1beta1.ErrorStatus, Message: "A transient error when executing task test.phase.stepOne.taskOne. Will retry. dummy error"},
{Name: "stepTwo", Status: v1beta1.ExecutionComplete},
}}},
},
Expand Down Expand Up @@ -415,7 +415,7 @@ func TestExecutePlan(t *testing.T) {
Status: v1beta1.ExecutionFatalError,
Name: "test",
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{
{Name: "stepOne", Status: v1beta1.ExecutionFatalError},
{Name: "stepOne", Status: v1beta1.ExecutionFatalError, Message: "Error during execution: fatal error: default/test-instance failed in test.phase.stepOne.taskOne: dummy error"},
{Name: "stepTwo", Status: v1beta1.ExecutionInProgress},
}}},
},
Expand Down Expand Up @@ -463,7 +463,7 @@ func TestExecutePlan(t *testing.T) {
Status: v1beta1.ExecutionInProgress,
Name: "test",
Phases: []v1beta1.PhaseStatus{
{Name: "phaseOne", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ErrorStatus}}},
{Name: "phaseOne", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ErrorStatus, Message: "A transient error when executing task test.phaseOne.step.taskOne. Will retry. dummy error"}}},
{Name: "phaseTwo", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ExecutionInProgress}}},
},
},
Expand Down Expand Up @@ -509,7 +509,7 @@ func TestExecutePlan(t *testing.T) {
Status: v1beta1.ExecutionInProgress,
Name: "test",
Phases: []v1beta1.PhaseStatus{
{Name: "phaseOne", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ErrorStatus}}},
{Name: "phaseOne", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ErrorStatus, Message: "A transient error when executing task test.phaseOne.step.taskOne. Will retry. dummy error"}}},
{Name: "phaseTwo", Status: v1beta1.ExecutionComplete, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ExecutionComplete}}},
},
},
Expand Down Expand Up @@ -555,7 +555,7 @@ func TestExecutePlan(t *testing.T) {
Status: v1beta1.ExecutionFatalError,
Name: "test",
Phases: []v1beta1.PhaseStatus{
{Name: "phaseOne", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ExecutionFatalError}}},
{Name: "phaseOne", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ExecutionFatalError, Message: "Error during execution: fatal error: default/test-instance failed in test.phaseOne.step.taskOne: dummy error"}}},
{Name: "phaseTwo", Status: v1beta1.ExecutionInProgress, Steps: []v1beta1.StepStatus{{Name: "step", Status: v1beta1.ExecutionInProgress}}},
},
},
Expand Down Expand Up @@ -592,7 +592,7 @@ func TestExecutePlan(t *testing.T) {
expectedStatus: &v1beta1.PlanStatus{
Status: v1beta1.ExecutionFatalError,
Name: "test",
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Status: v1beta1.ExecutionFatalError, Name: "step"}}}}},
Phases: []v1beta1.PhaseStatus{{Name: "phase", Status: v1beta1.ExecutionFatalError, Steps: []v1beta1.StepStatus{{Status: v1beta1.ExecutionFatalError, Name: "step", Message: "Error during execution: fatal error: default/test-instance failed in test.phase.step.task: dummy error"}}}}},
wantErr: true,
enhancer: testEnhancer,
},
Expand Down
13 changes: 10 additions & 3 deletions pkg/kudoctl/cmd/plan/plan_status.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,13 +110,13 @@ func planStatus(options *Options, settings *env.Settings) error {

for name, plan := range operator.Spec.Plans {
if name == lastPlanStatus.Name {
planDisplay := fmt.Sprintf("Plan %s (%s strategy) [%s]", name, plan.Strategy, lastPlanStatus.Status)
planDisplay := fmt.Sprintf("Plan %s (%s strategy) [%s]%s", name, plan.Strategy, lastPlanStatus.Status, printMessageIfAvailable(lastPlanStatus.Message))
planBranchName := rootBranchName.AddBranch(planDisplay)
for _, phase := range lastPlanStatus.Phases {
phaseDisplay := fmt.Sprintf("Phase %s [%s]", phase.Name, phase.Status)
phaseDisplay := fmt.Sprintf("Phase %s [%s]%s", phase.Name, phase.Status, printMessageIfAvailable(phase.Message))
phaseBranchName := planBranchName.AddBranch(phaseDisplay)
for _, steps := range phase.Steps {
stepsDisplay := fmt.Sprintf("Step %s (%s)", steps.Name, steps.Status)
stepsDisplay := fmt.Sprintf("Step %s (%s)%s", steps.Name, steps.Status, printMessageIfAvailable(steps.Message))
phaseBranchName.AddBranch(stepsDisplay)
}
}
Expand All @@ -143,3 +143,10 @@ func planStatus(options *Options, settings *env.Settings) error {

return nil
}

func printMessageIfAvailable(s string) string {
if s != "" {
return fmt.Sprintf(" (%s)", s)
}
return ""
}

0 comments on commit d049eed

Please sign in to comment.