Skip to content

Commit

Permalink
Fix TestKubernetesRestoreMultipleSMs test (#270)
Browse files Browse the repository at this point in the history
The TestKubernetesRestoreMultipleBackupGroups test is doing multiple restore
operations in a sequence which increases the possibility for a container to be
reported as `CrashLoopBackOff.` This increases the testing time due to container
restart back-off and can fail the test. Restart database pods manually after
the restore has been requested.  Database processes that are not selected for
restore may experience schedule delay and start after the restore coordinator
SM already restored the database.  In that case "Waiting for database restore
to complete" message won't be seen in their log.
  • Loading branch information
sivanov-nuodb authored Feb 7, 2022
1 parent d24e0f4 commit 9fce815
Show file tree
Hide file tree
Showing 2 changed files with 38 additions and 6 deletions.
22 changes: 16 additions & 6 deletions test/minikube/minikube_long_restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -214,6 +214,10 @@ func TestKubernetesRestoreMultipleBackupGroups(t *testing.T) {
"database.te.logPersistence.enabled": "true",
"database.env[0].name": "NUODB_DEBUG",
"database.env[0].value": "debug",
// multiple restore operations with autoRestart=true may cause
// containers to be reported as "CrashLoopBackOff" although the
// engines will exit with zero return code
"restore.autoRestart": "false",
},
}

Expand Down Expand Up @@ -248,10 +252,12 @@ func TestKubernetesRestoreMultipleBackupGroups(t *testing.T) {
// restore database
databaseOptions.SetValues["restore.source"] = ":latest"
testlib.RestoreDatabase(t, namespaceName, admin0, &databaseOptions)
testlib.RestartDatabasePods(t, namespaceName, databaseChartName, &databaseOptions)
testlib.AwaitDatabaseUp(t, namespaceName, admin0, opt.DbName, opt.NrTePods+opt.NrSmPods)

// HCSM with ordinal 0 should not be selected for restore
require.GreaterOrEqual(t, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName0,
"Waiting for database restore to complete", &corev1.PodLogOptions{}), 1)
require.Equal(t, 0, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName0,
"Restoring ", &corev1.PodLogOptions{}))
// verify that the correct backupset is used to restore the archive of
// HCSM with ordinal 1
require.GreaterOrEqual(t, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName1,
Expand All @@ -275,10 +281,12 @@ func TestKubernetesRestoreMultipleBackupGroups(t *testing.T) {
// restore database
databaseOptions.SetValues["restore.source"] = "cluster0-0:latest"
testlib.RestoreDatabase(t, namespaceName, admin0, &databaseOptions)
testlib.RestartDatabasePods(t, namespaceName, databaseChartName, &databaseOptions)
testlib.AwaitDatabaseUp(t, namespaceName, admin0, opt.DbName, opt.NrTePods+opt.NrSmPods)

// HCSM with ordinal 1 should not be selected for restore
require.GreaterOrEqual(t, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName1,
"Waiting for database restore to complete", &corev1.PodLogOptions{}), 1)
require.Equal(t, 0, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName1,
"Restoring ", &corev1.PodLogOptions{}))
// verify that the correct backupset is used to restore the archive of
// HCSM with ordinal 0
require.GreaterOrEqual(t, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName0,
Expand All @@ -301,10 +309,12 @@ func TestKubernetesRestoreMultipleBackupGroups(t *testing.T) {
// restore database
databaseOptions.SetValues["restore.source"] = "cluster0-0:2"
testlib.RestoreDatabase(t, namespaceName, admin0, &databaseOptions)
testlib.RestartDatabasePods(t, namespaceName, databaseChartName, &databaseOptions)
testlib.AwaitDatabaseUp(t, namespaceName, admin0, opt.DbName, opt.NrTePods+opt.NrSmPods)

// HCSM with ordinal 1 should not be selected for restore
require.GreaterOrEqual(t, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName1,
"Waiting for database restore to complete", &corev1.PodLogOptions{}), 1)
require.Equal(t, 0, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName1,
"Restoring ", &corev1.PodLogOptions{}))
// verify that the correct backupset is used to restore the archive of
// HCSM with ordinal 0
require.GreaterOrEqual(t, testlib.GetStringOccurrenceInLog(t, namespaceName, hcSmPodName0,
Expand Down
22 changes: 22 additions & 0 deletions test/testlib/nuodb_database_utilities.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,28 @@ func EnsureDatabaseNotRunning(t *testing.T, adminPod string, opt ExtractedOption
k8s.RunKubectl(t, kubectlOptions, "exec", adminPod, "--", "nuocmd", "check", "database", "--db-name", opt.DbName, "--num-processes", "0", "--timeout", "30")
}

func RestartDatabasePods(t *testing.T, namespaceName string, helmChartReleaseName string, options *helm.Options) {
opt := GetExtractedOptions(options)
hcSmPodNameTemplate := fmt.Sprintf("sm-%s-nuodb-%s-%s-hotcopy", helmChartReleaseName, opt.ClusterName, opt.DbName)
smPodNameTemplate := fmt.Sprintf("sm-%s-nuodb-%s-%s", helmChartReleaseName, opt.ClusterName, opt.DbName)
tePodNameTemplate := fmt.Sprintf("te-%s-nuodb-%s-%s", helmChartReleaseName, opt.ClusterName, opt.DbName)
var toDelete []string
tes := GetPodNames(t, namespaceName, tePodNameTemplate)
require.Equal(t, opt.NrTePods, len(tes), "Unexpected number of TE Pods")
toDelete = append(toDelete, tes...)
for i := 0; i < opt.NrSmHotCopyPods; i++ {
toDelete = append(toDelete, fmt.Sprintf("%s-%d", hcSmPodNameTemplate, i))
}
for i := 0; i < opt.NrSmNoHotCopyPods; i++ {
toDelete = append(toDelete, fmt.Sprintf("%s-%d", smPodNameTemplate, i))
}
for _, podName := range toDelete {
DeletePod(t, namespaceName, "pod/"+podName)
}
AwaitNrReplicasScheduled(t, namespaceName, tePodNameTemplate, opt.NrTePods)
AwaitNrReplicasScheduled(t, namespaceName, smPodNameTemplate, opt.NrSmPods)
}

type DatabaseInstallationStep func(t *testing.T, options *helm.Options, helmChartReleaseName string)

func StartDatabaseTemplate(t *testing.T, namespaceName string, adminPod string, options *helm.Options, installationStep DatabaseInstallationStep, awaitDatabase bool) (helmChartReleaseName string) {
Expand Down

0 comments on commit 9fce815

Please sign in to comment.