fix: data lost caused by Longhorn CSI plugin doing a wrong re-encrypt…

…ion of volume in rare race condition This happens with encrypted volume only While Longhorn CSI plugin is doing NodeStageVolume, if the Longhorn engine crash right before GetDiskFormat (https://github.com/longhorn/longhorn-manager/blob/ 5f9ec86/csi/node_server.go#L486) and recover quickly after that, the following race condition can happen: 1. When engine is crash, the engine removes the Longhorn device at /dev/longhorn/volume-name 2. Because the device path is gone, getDiskFormat returns empty value for diskFormat and nil error. Ref https://github.com/longhorn/longhorn-manager/blob/ 5f9ec86/vendor/k8s.io/ mount-utils/mount_linux.go#L687-L693 3. Now engine is recovered, so it recreate the device at /dev/longhorn/volume-name 4. However, due to diskFormat having empty value, Longhorn re-encrypts the volume which wipes out the all data. Ref https://github.com/longhorn/ longhorn-manager/blob/5f9ec86d18612afaa9f97c78cee3e34f7b653ad6/ csi/node_server.go#L513-L517 5. Finally, Longhorn cannot detect a filesystem for the volume any more so it reformat the filesystem RCA The root cause of the issue is that getDiskFormat is using blkid. The blkid command cannot differentiate 2 cases: the device doesn't exist VS the device doesn't have filesystem inside it Proposal Use cryptsetup isLuks instead. can differentiate between: 1. The device is already encrypted -> return exit code 0 2. The device is not encrypted -> return exit code 1 3. All other errors -> return exit code 4 Ref: https://gitlab.com/cryptsetup/cryptsetup/-/blob/main/FAQ.md?plain=1#L2848 Now we can safely make decision of only doing encryption in the 2nd case longhorn-10416 Signed-off-by: Phan Le <phan.le@suse.com> (cherry picked from commit 72c9173)
longhorn · Feb 14, 2025 · d79742b · d79742b
1 parent c9d945b
commit d79742b
Show file tree

Hide file tree

Showing 2 changed files with 41 additions and 3 deletions.
diff --git a/csi/crypto/crypto.go b/csi/crypto/crypto.go
@@ -8,11 +8,11 @@ import (
 	"github.com/pkg/errors"
 	"github.com/sirupsen/logrus"
 
-	"github.com/longhorn/longhorn-manager/types"
-
 	lhns "github.com/longhorn/go-common-libs/ns"
 	lhtypes "github.com/longhorn/go-common-libs/types"
 	longhorn "github.com/longhorn/longhorn-manager/k8s/pkg/apis/longhorn/v1beta2"
+
+	"github.com/longhorn/longhorn-manager/types"
 )
 
 const (
@@ -85,12 +85,21 @@ func VolumeMapper(volume, dataEngine string) string {
 
 // EncryptVolume encrypts provided device with LUKS.
 func EncryptVolume(devicePath, passphrase string, cryptoParams *EncryptParams) error {
+	isEncrypted, err := isDeviceEncrypted(devicePath)
+	if err != nil {
+		logrus.WithError(err).Warnf("Failed to check IsDeviceEncrypted before encrypting volume %v", devicePath)
+		return err
+	}
+	if isEncrypted {
+		logrus.Infof("The device %v is already encrypted. Skipping the encryption to avoid data lost", devicePath)
+		return nil
+	}
+
 	namespaces := []lhtypes.Namespace{lhtypes.NamespaceMnt, lhtypes.NamespaceIpc}
 	nsexec, err := lhns.NewNamespaceExecutor(lhtypes.ProcessNone, lhtypes.HostProcDirectory, namespaces)
 	if err != nil {
 		return err
 	}
-
 	logrus.Infof("Encrypting device %s with LUKS", devicePath)
 	if _, err := nsexec.LuksFormat(
 		devicePath, passphrase,
@@ -182,6 +191,15 @@ func IsDeviceOpen(device string) (bool, error) {
 	return mappedFile != "", err
 }
 
+func isDeviceEncrypted(devicePath string) (bool, error) {
+	namespaces := []lhtypes.Namespace{lhtypes.NamespaceMnt, lhtypes.NamespaceIpc}
+	nsexec, err := lhns.NewNamespaceExecutor(lhtypes.ProcessNone, lhtypes.HostProcDirectory, namespaces)
+	if err != nil {
+		return false, err
+	}
+	return nsexec.IsLuks(devicePath, lhtypes.LuksTimeout)
+}
+
 // DeviceEncryptionStatus looks to identify if the passed device is a LUKS mapping
 // and if so what the device is and the mapper name as used by LUKS.
 // If not, just returns the original device and an empty string.

diff --git a/vendor/github.com/longhorn/go-common-libs/ns/crypto.go b/vendor/github.com/longhorn/go-common-libs/ns/crypto.go