Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Account for gap between snapshot preparation and archive creation #361

Merged
merged 4 commits into from
Mar 25, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions stable/database/files/nuosm
Original file line number Diff line number Diff line change
Expand Up @@ -402,7 +402,7 @@ function checkBackupId() {
function loadFromSnapshot() {
local recreate_archives="false"

if [ ! -f "$DB_DIR/info.json" ]; then
if [ ! -f "$DB_DIR/info.json" ] && [ ! -f "$DB_DIR/restored.txt" ]; then
local archives="$(find /var/opt/nuodb/archive -name info.json)"
if [ -z "$archives" ] || [ "$(echo "$archives" | wc -l)" != 1 ]; then
# Relax check for archive snapshot for SMs other than ordinal 0. It is
Expand Down Expand Up @@ -467,9 +467,15 @@ function loadFromSnapshot() {
fi

if [ "$recreate_archives" == "true" ]; then
# Create restored.txt to signal that snapshot preparation is complete. This
# is needed in the absence of info.json, which is not created for the
# restored archive object until later.
echo "$BACKUP_ID" > "${DB_DIR}/restored.txt"
kontaras marked this conversation as resolved.
Show resolved Hide resolved

log "Removing metadata from snapshot archive"
rm "${DB_DIR}/info.json" "${DB_DIR}/backup.txt"
[ -e "${JOURNAL_DIR}/backup.txt" ] && rm "${JOURNAL_DIR}/backup.txt"
rm -f "${DB_DIR}/info.json"
rm -f "${DB_DIR}/backup.txt"
rm -f "${JOURNAL_DIR}/backup.txt"
fi
}

Expand Down
96 changes: 96 additions & 0 deletions test/minikube/minikube_base_restore_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -503,6 +503,94 @@ func TestKubernetesAutoRestore(t *testing.T) {
})
}

func TestSmRestartPartialSnapshotRestore(t *testing.T) {
testlib.AwaitTillerUp(t)
defer testlib.VerifyTeardown(t)
defer testlib.Teardown(testlib.TEARDOWN_ADMIN)
// Create admin release
adminRelease, namespaceName := testlib.StartAdmin(t, &helm.Options{}, 1, "")
admin := fmt.Sprintf("%s-nuodb-cluster0", adminRelease)
admin0 := fmt.Sprintf("%s-0", admin)

// Create a PVC that has restored.txt in the archive directory, but no
// archive.json or backup.txt. This simulates a failure occurring
// between preparation of the archive directory from the snapshot and
// creation of the archive object and info.json file for the archive.
tmpfile, err := os.CreateTemp("", "partial-restore.yaml")
require.NoError(t, err)
defer os.Remove(tmpfile.Name())
tmpfile.WriteString(fmt.Sprintf(`
apiVersion: v1
kind: PersistentVolumeClaim
metadata:
name: partial-restore
spec:
accessModes:
- ReadWriteOnce
resources:
requests:
storage: 1Gi
storageClassName: %s
volumeMode: Filesystem
---
apiVersion: v1
kind: Pod
metadata:
name: partial-restore
spec:
restartPolicy: Never
volumes:
- name: volume
persistentVolumeClaim:
claimName: partial-restore
containers:
- name: container
image: busybox
args:
- sh
- -c
- mkdir -p /mnt/nuodb/demo && echo "abc123" > /mnt/nuodb/demo/restored.txt
volumeMounts:
- mountPath: "/mnt"
name: volume
`, testlib.SNAPSHOTABLE_STORAGE_CLASS))
kubectlOptions := k8s.NewKubectlOptions("", "", namespaceName)
output, err := k8s.RunKubectlAndGetOutputE(t, kubectlOptions, "apply", "-f", tmpfile.Name())
require.NoError(t, err, output)
// Wait for pod to complete successfully
output, err = k8s.RunKubectlAndGetOutputE(t, kubectlOptions, "wait", "--timeout=60s", "--for", "jsonpath={.status.phase}=Succeeded", "pod/partial-restore")
require.NoError(t, err, output)

// Create a database with the prepared PVC as a data source. When the SM
// comes up, it should skip archive preparation from the snapshot and
// proceed to creation of the archive object and info.json file.
defer testlib.Teardown(testlib.TEARDOWN_DATABASE)
options := &helm.Options{
SetValues: map[string]string{
"database.name": "demo",
"database.sm.resources.requests.cpu": "250m",
"database.sm.resources.requests.memory": testlib.MINIMAL_VIABLE_ENGINE_MEMORY,
"database.te.resources.requests.cpu": "250m",
"database.te.resources.requests.memory": testlib.MINIMAL_VIABLE_ENGINE_MEMORY,
"database.sm.noHotCopy.journalPath.persistence.storageClass": testlib.SNAPSHOTABLE_STORAGE_CLASS,
"database.persistence.storageClass": testlib.SNAPSHOTABLE_STORAGE_CLASS,
"database.persistence.archiveDataSource.name": "partial-restore",
"database.persistence.archiveDataSource.kind": "PersistentVolumeClaim",
"database.persistence.archiveDataSource.apiGroup": "",
"database.snapshotRestore.backupId": "abc123",
"database.sm.noHotCopy.replicas": "1",
"database.sm.hotCopy.enablePod": "false",
},
}
dbRelease := testlib.StartDatabase(t, namespaceName, admin0, options)
// Verify that the restored.txt file is found
smPod := fmt.Sprintf("sm-%s-nuodb-cluster0-demo-0", dbRelease)
output, err = k8s.RunKubectlAndGetOutputE(t, kubectlOptions, "exec", smPod, "-c", "engine", "--",
"cat", "/var/opt/nuodb/archive/nuodb/demo/restored.txt")
require.NoError(t, err, output)
require.Equal(t, "abc123", strings.TrimSpace(output))
}

// Test exercising backup hooks and volume snapshot restore
func runTestKubernetesSnapshotRestore(t *testing.T, preprovisionVolumes bool, inPlaceRestore bool) {
testlib.AwaitTillerUp(t)
Expand Down Expand Up @@ -558,6 +646,7 @@ func runTestKubernetesSnapshotRestore(t *testing.T, preprovisionVolumes bool, in
if inPlaceRestore {
restoredDb = "demo"
// Delete database and archive objects from domain state
k8s.RunKubectl(t, kubectlOptions, "exec", admin0, "-c", "admin", "--", "nuocmd", "check", "database", "--db-name", "demo", "--num-processes", "0", "--timeout", "60")
k8s.RunKubectl(t, kubectlOptions, "exec", admin0, "-c", "admin", "--", "nuocmd", "delete", "database", "--db-name", "demo")
k8s.RunKubectl(t, kubectlOptions, "exec", admin0, "-c", "admin", "--", "nuocmd", "delete", "archive", "--archive-id", "0", "--purge")

Expand Down Expand Up @@ -585,6 +674,13 @@ func runTestKubernetesSnapshotRestore(t *testing.T, preprovisionVolumes bool, in
}
dbRelease := testlib.StartDatabase(t, namespaceName, admin0, options)

// Verify that the restored.txt file is found
restoredSmPod := fmt.Sprintf("sm-%s-nuodb-cluster0-%s-0", dbRelease, restoredDb)
output, err = k8s.RunKubectlAndGetOutputE(t, kubectlOptions, "exec", restoredSmPod, "-c", "engine", "--",
"cat", "/var/opt/nuodb/archive/nuodb/"+restoredDb+"/restored.txt")
require.NoError(t, err, output)
require.Equal(t, backupId, strings.TrimSpace(output))

// Make sure data written to clone is present
output, err = testlib.RunSQL(t, namespaceName, admin0, restoredDb, "SELECT id FROM testtbl")
require.NoError(t, err, output)
Expand Down